summaryrefslogtreecommitdiff
path: root/third_party/aom/aom_dsp/x86/variance_sse2.c
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/aom/aom_dsp/x86/variance_sse2.c')
-rw-r--r--third_party/aom/aom_dsp/x86/variance_sse2.c30
1 files changed, 28 insertions, 2 deletions
diff --git a/third_party/aom/aom_dsp/x86/variance_sse2.c b/third_party/aom/aom_dsp/x86/variance_sse2.c
index 918844185b..211fad3f80 100644
--- a/third_party/aom/aom_dsp/x86/variance_sse2.c
+++ b/third_party/aom/aom_dsp/x86/variance_sse2.c
@@ -382,6 +382,28 @@ unsigned int aom_variance32x8_sse2(const uint8_t *src, int src_stride,
assert(sum >= -255 * 32 * 8);
return *sse - (unsigned int)(((int64_t)sum * sum) >> 8);
}
+
+unsigned int aom_variance16x64_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_sse2(src, src_stride, ref, ref_stride, 16, 64, sse, &sum,
+ aom_get16x16var_sse2, 16);
+ assert(sum <= 255 * 16 * 64);
+ assert(sum >= -255 * 16 * 64);
+ return *sse - (unsigned int)(((int64_t)sum * sum) >> 10);
+}
+
+unsigned int aom_variance64x16_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_sse2(src, src_stride, ref, ref_stride, 64, 16, sse, &sum,
+ aom_get16x16var_sse2, 16);
+ assert(sum <= 255 * 64 * 16);
+ assert(sum >= -255 * 64 * 16);
+ return *sse - (unsigned int)(((int64_t)sum * sum) >> 10);
+}
#endif
// The 2 unused parameters are place holders for PIC enabled build.
@@ -451,7 +473,9 @@ DECLS(ssse3);
FN(4, 16, 4, 2, 4, opt, (int32_t), (int32_t)); \
FN(16, 4, 16, 4, 2, opt, (int32_t), (int32_t)); \
FN(8, 32, 8, 3, 5, opt, (int32_t), (int32_t)); \
- FN(32, 8, 16, 5, 3, opt, (int32_t), (int32_t))
+ FN(32, 8, 16, 5, 3, opt, (int32_t), (int32_t)); \
+ FN(16, 64, 16, 4, 6, opt, (int32_t), (int32_t)); \
+ FN(64, 16, 16, 6, 4, opt, (int32_t), (int32_t))
#else
#define FNS(opt) \
FN(64, 64, 16, 6, 6, opt, (int64_t), (int64_t)); \
@@ -543,7 +567,9 @@ DECLS(ssse3);
FN(4, 16, 4, 2, 4, opt, (int32_t), (int32_t)); \
FN(16, 4, 16, 4, 2, opt, (int32_t), (int32_t)); \
FN(8, 32, 8, 3, 5, opt, (int32_t), (int32_t)); \
- FN(32, 8, 16, 5, 3, opt, (int32_t), (int32_t))
+ FN(32, 8, 16, 5, 3, opt, (int32_t), (int32_t)); \
+ FN(16, 64, 16, 4, 6, opt, (int32_t), (int32_t)); \
+ FN(64, 16, 16, 6, 4, opt, (int32_t), (int32_t))
#else
#define FNS(opt) \
FN(64, 64, 16, 6, 6, opt, (int64_t), (int64_t)); \