diff options
Diffstat (limited to 'third_party/aom/av1/common/x86/convolve_2d_sse2.c')
-rw-r--r-- | third_party/aom/av1/common/x86/convolve_2d_sse2.c | 26 |
1 files changed, 20 insertions, 6 deletions
diff --git a/third_party/aom/av1/common/x86/convolve_2d_sse2.c b/third_party/aom/av1/common/x86/convolve_2d_sse2.c index 46c2674ca7..e4d352c0e0 100644 --- a/third_party/aom/av1/common/x86/convolve_2d_sse2.c +++ b/third_party/aom/av1/common/x86/convolve_2d_sse2.c @@ -31,6 +31,7 @@ void av1_convolve_2d_sse2(const uint8_t *src, int src_stride, int i, j; const int fo_vert = filter_params_y->taps / 2 - 1; const int fo_horiz = filter_params_x->taps / 2 - 1; + const int do_average = conv_params->do_average; const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz; const __m128i zero = _mm_setzero_si128(); @@ -181,9 +182,15 @@ void av1_convolve_2d_sse2(const uint8_t *src, int src_stride, // Accumulate values into the destination buffer __m128i *const p = (__m128i *)&dst[i * dst_stride + j]; - _mm_storeu_si128(p, _mm_add_epi32(_mm_loadu_si128(p), res_lo_round)); - _mm_storeu_si128(p + 1, - _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round)); + if (do_average) { + _mm_storeu_si128(p + 0, + _mm_add_epi32(_mm_loadu_si128(p + 0), res_lo_round)); + _mm_storeu_si128(p + 1, + _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round)); + } else { + _mm_storeu_si128(p + 0, res_lo_round); + _mm_storeu_si128(p + 1, res_hi_round); + } } } } @@ -204,6 +211,7 @@ void av1_convolve_2d_sse2(const uint8_t *src, int src_stride, int i, j; const int fo_vert = filter_params_y->taps / 2 - 1; const int fo_horiz = filter_params_x->taps / 2 - 1; + const int do_average = conv_params->do_average; const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz; const __m128i zero = _mm_setzero_si128(); @@ -357,9 +365,15 @@ void av1_convolve_2d_sse2(const uint8_t *src, int src_stride, // Accumulate values into the destination buffer __m128i *const p = (__m128i *)&dst[i * dst_stride + j]; - _mm_storeu_si128(p, _mm_add_epi32(_mm_loadu_si128(p), res_lo_round)); - _mm_storeu_si128(p + 1, - _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round)); + if (do_average) { + _mm_storeu_si128(p + 0, + _mm_add_epi32(_mm_loadu_si128(p + 0), res_lo_round)); + _mm_storeu_si128(p + 1, + _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round)); + } else { + _mm_storeu_si128(p + 0, res_lo_round); + _mm_storeu_si128(p + 1, res_hi_round); + } } } } |