summaryrefslogtreecommitdiff
path: root/third_party
diff options
context:
space:
mode:
Diffstat (limited to 'third_party')
-rw-r--r--third_party/aom/aom_dsp/x86/highbd_intrapred_avx2.c1
-rw-r--r--third_party/aom/aom_ports/msvc.h20
-rw-r--r--third_party/aom/av1/common/reconinter.c6
-rw-r--r--third_party/aom/av1/common/reconinter.h6
-rw-r--r--third_party/aom/av1/common/thread_common.c15
-rw-r--r--third_party/aom/av1/common/thread_common.h2
-rw-r--r--third_party/aom/av1/common/x86/selfguided_sse4.c30
7 files changed, 51 insertions, 29 deletions
diff --git a/third_party/aom/aom_dsp/x86/highbd_intrapred_avx2.c b/third_party/aom/aom_dsp/x86/highbd_intrapred_avx2.c
index 41b55c9852..e001a1d701 100644
--- a/third_party/aom/aom_dsp/x86/highbd_intrapred_avx2.c
+++ b/third_party/aom/aom_dsp/x86/highbd_intrapred_avx2.c
@@ -11,6 +11,7 @@
#include <immintrin.h>
+#include "aom_ports/msvc.h"
#include "./aom_dsp_rtcd.h"
// -----------------------------------------------------------------------------
diff --git a/third_party/aom/aom_ports/msvc.h b/third_party/aom/aom_ports/msvc.h
index 2d3ab9b653..5a41d29d2b 100644
--- a/third_party/aom/aom_ports/msvc.h
+++ b/third_party/aom/aom_ports/msvc.h
@@ -43,5 +43,25 @@ static INLINE long lroundf(float x) {
}
#endif // _MSC_VER < 1800
+#if HAVE_AVX
+#include <immintrin.h>
+// Note:
+// _mm256_insert_epi16 intrinsics is available from vs2017.
+// We define this macro for vs2015 and earlier. The
+// intrinsics used here are in vs2015 document:
+// https://msdn.microsoft.com/en-us/library/hh977022.aspx
+// Input parameters:
+// a: __m256i,
+// d: int16_t,
+// indx: imm8 (0 - 15)
+#if _MSC_VER <= 1900
+#define _mm256_insert_epi16(a, d, indx) \
+ _mm256_insertf128_si256( \
+ a, \
+ _mm_insert_epi16(_mm256_extractf128_si256(a, indx >> 3), d, indx % 8), \
+ indx >> 3)
+#endif // _MSC_VER <= 1900
+#endif // HAVE_AVX
+
#endif // _MSC_VER
#endif // AOM_PORTS_MSVC_H_
diff --git a/third_party/aom/av1/common/reconinter.c b/third_party/aom/av1/common/reconinter.c
index d7e39b45ca..a1a22a0af3 100644
--- a/third_party/aom/av1/common/reconinter.c
+++ b/third_party/aom/av1/common/reconinter.c
@@ -1728,9 +1728,9 @@ void av1_build_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
av1_build_inter_predictors_sbuv(cm, xd, mi_row, mi_col, ctx, bsize);
}
-void av1_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE],
- BLOCK_SIZE bsize, const YV12_BUFFER_CONFIG *src,
- int mi_row, int mi_col) {
+void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
+ const YV12_BUFFER_CONFIG *src, int mi_row,
+ int mi_col) {
const int widths[MAX_MB_PLANE] = { src->y_crop_width, src->uv_crop_width,
src->uv_crop_width };
const int heights[MAX_MB_PLANE] = { src->y_crop_height, src->uv_crop_height,
diff --git a/third_party/aom/av1/common/reconinter.h b/third_party/aom/av1/common/reconinter.h
index fd69f9db37..0c33333397 100644
--- a/third_party/aom/av1/common/reconinter.h
+++ b/third_party/aom/av1/common/reconinter.h
@@ -446,9 +446,9 @@ static INLINE void setup_pred_plane(struct buf_2d *dst, BLOCK_SIZE bsize,
dst->stride = stride;
}
-void av1_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE],
- BLOCK_SIZE bsize, const YV12_BUFFER_CONFIG *src,
- int mi_row, int mi_col);
+void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
+ const YV12_BUFFER_CONFIG *src, int mi_row,
+ int mi_col);
void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
diff --git a/third_party/aom/av1/common/thread_common.c b/third_party/aom/av1/common/thread_common.c
index eec8629ff9..4c9fa69625 100644
--- a/third_party/aom/av1/common/thread_common.c
+++ b/third_party/aom/av1/common/thread_common.c
@@ -86,7 +86,7 @@ static INLINE void sync_write(AV1LfSync *const lf_sync, int r, int c,
#if !CONFIG_EXT_PARTITION_TYPES
static INLINE enum lf_path get_loop_filter_path(
- int y_only, struct macroblockd_plane planes[MAX_MB_PLANE]) {
+ int y_only, struct macroblockd_plane *planes) {
if (y_only)
return LF_PATH_444;
else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
@@ -98,7 +98,7 @@ static INLINE enum lf_path get_loop_filter_path(
}
static INLINE void loop_filter_block_plane_ver(
- AV1_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int plane,
+ AV1_COMMON *cm, struct macroblockd_plane *planes, int plane,
MODE_INFO **mi, int mi_row, int mi_col, enum lf_path path,
LOOP_FILTER_MASK *lfm) {
if (plane == 0) {
@@ -120,7 +120,7 @@ static INLINE void loop_filter_block_plane_ver(
}
static INLINE void loop_filter_block_plane_hor(
- AV1_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int plane,
+ AV1_COMMON *cm, struct macroblockd_plane *planes, int plane,
MODE_INFO **mi, int mi_row, int mi_col, enum lf_path path,
LOOP_FILTER_MASK *lfm) {
if (plane == 0) {
@@ -286,10 +286,9 @@ static int loop_filter_row_worker(AV1LfSync *const lf_sync,
#endif // CONFIG_PARALLEL_DEBLOCKING
static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
- struct macroblockd_plane planes[MAX_MB_PLANE],
- int start, int stop, int y_only,
- AVxWorker *workers, int nworkers,
- AV1LfSync *lf_sync) {
+ struct macroblockd_plane *planes, int start,
+ int stop, int y_only, AVxWorker *workers,
+ int nworkers, AV1LfSync *lf_sync) {
#if CONFIG_EXT_PARTITION
printf(
"STOPPING: This code has not been modified to work with the "
@@ -415,7 +414,7 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
}
void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
- struct macroblockd_plane planes[MAX_MB_PLANE],
+ struct macroblockd_plane *planes,
int frame_filter_level,
#if CONFIG_LOOPFILTER_LEVEL
int frame_filter_level_r,
diff --git a/third_party/aom/av1/common/thread_common.h b/third_party/aom/av1/common/thread_common.h
index 6d118e60b1..7eddc662ce 100644
--- a/third_party/aom/av1/common/thread_common.h
+++ b/third_party/aom/av1/common/thread_common.h
@@ -49,7 +49,7 @@ void av1_loop_filter_dealloc(AV1LfSync *lf_sync);
// Multi-threaded loopfilter that uses the tile threads.
void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
- struct macroblockd_plane planes[MAX_MB_PLANE],
+ struct macroblockd_plane *planes,
int frame_filter_level,
#if CONFIG_LOOPFILTER_LEVEL
int frame_filter_level_r,
diff --git a/third_party/aom/av1/common/x86/selfguided_sse4.c b/third_party/aom/av1/common/x86/selfguided_sse4.c
index 4006b85181..9de9177c1d 100644
--- a/third_party/aom/av1/common/x86/selfguided_sse4.c
+++ b/third_party/aom/av1/common/x86/selfguided_sse4.c
@@ -10,9 +10,11 @@
av1_selfguided_restoration)
*/
static void calc_block(__m128i sum, __m128i sum_sq, __m128i n,
- __m128i one_over_n, __m128i s, int bit_depth, int idx,
- int32_t *A, int32_t *B) {
+ __m128i *one_over_n_, __m128i *s_, int bit_depth,
+ int idx, int32_t *A, int32_t *B) {
__m128i a, b, p;
+ __m128i one_over_n = *one_over_n_;
+ __m128i s = *s_;
#if CONFIG_HIGHBITDEPTH
if (bit_depth > 8) {
__m128i rounding_a = _mm_set1_epi32((1 << (2 * (bit_depth - 8))) >> 1);
@@ -147,7 +149,7 @@ static void selfguided_restoration_1_h(int32_t *A, int32_t *B, int width,
__m128i s = _mm_set_epi32(
sgrproj_mtable[eps - 1][3 * h - 1], sgrproj_mtable[eps - 1][3 * h - 1],
sgrproj_mtable[eps - 1][3 * h - 1], sgrproj_mtable[eps - 1][2 * h - 1]);
- calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride, A,
+ calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth, i * buf_stride, A,
B);
n = _mm_set1_epi32(3 * h);
@@ -178,8 +180,8 @@ static void selfguided_restoration_1_h(int32_t *A, int32_t *B, int width,
_mm_alignr_epi8(b2, b1, 8)));
sum_sq_ = _mm_add_epi32(a1, _mm_add_epi32(_mm_alignr_epi8(a2, a1, 4),
_mm_alignr_epi8(a2, a1, 8)));
- calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride + j,
- A, B);
+ calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth,
+ i * buf_stride + j, A, B);
}
__m128i a3 = _mm_loadu_si128((__m128i *)&A[i * buf_stride + j + 3]);
__m128i b3 = _mm_loadu_si128((__m128i *)&B[i * buf_stride + j + 3]);
@@ -227,7 +229,7 @@ static void selfguided_restoration_1_h(int32_t *A, int32_t *B, int width,
s = _mm_set_epi32(
sgrproj_mtable[eps - 1][2 * h - 1], sgrproj_mtable[eps - 1][3 * h - 1],
sgrproj_mtable[eps - 1][3 * h - 1], sgrproj_mtable[eps - 1][3 * h - 1]);
- calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride + j,
+ calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth, i * buf_stride + j,
A, B);
}
}
@@ -342,7 +344,7 @@ static void selfguided_restoration_2_h(int32_t *A, int32_t *B, int width,
__m128i s = _mm_set_epi32(
sgrproj_mtable[eps - 1][5 * h - 1], sgrproj_mtable[eps - 1][5 * h - 1],
sgrproj_mtable[eps - 1][4 * h - 1], sgrproj_mtable[eps - 1][3 * h - 1]);
- calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride, A,
+ calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth, i * buf_stride, A,
B);
// Re-align a1 and b1 so that they start at index i * buf_stride + 2
@@ -372,8 +374,8 @@ static void selfguided_restoration_2_h(int32_t *A, int32_t *B, int width,
_mm_alignr_epi8(a2, a1, 8))),
_mm_add_epi32(_mm_alignr_epi8(a2, a1, 12), a2));
- calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride + j,
- A, B);
+ calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth,
+ i * buf_stride + j, A, B);
}
// If the width is not a multiple of 4, we need to reset j to width - 4
// and adjust a1, a2, b1, b2 so that the loop invariant above is maintained
@@ -428,7 +430,7 @@ static void selfguided_restoration_2_h(int32_t *A, int32_t *B, int width,
s = _mm_set_epi32(
sgrproj_mtable[eps - 1][3 * h - 1], sgrproj_mtable[eps - 1][4 * h - 1],
sgrproj_mtable[eps - 1][5 * h - 1], sgrproj_mtable[eps - 1][5 * h - 1]);
- calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride + j,
+ calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth, i * buf_stride + j,
A, B);
}
}
@@ -562,7 +564,7 @@ static void selfguided_restoration_3_h(int32_t *A, int32_t *B, int width,
__m128i s = _mm_set_epi32(
sgrproj_mtable[eps - 1][7 * h - 1], sgrproj_mtable[eps - 1][6 * h - 1],
sgrproj_mtable[eps - 1][5 * h - 1], sgrproj_mtable[eps - 1][4 * h - 1]);
- calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride, A,
+ calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth, i * buf_stride, A,
B);
// Re-align a1 and b1 so that they start at index i * buf_stride + 1
@@ -599,8 +601,8 @@ static void selfguided_restoration_3_h(int32_t *A, int32_t *B, int width,
_mm_add_epi32(_mm_add_epi32(a2, _mm_alignr_epi8(a3, a2, 4)),
_mm_alignr_epi8(a3, a2, 8)));
- calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride + j,
- A, B);
+ calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth,
+ i * buf_stride + j, A, B);
}
__m128i a3 = _mm_loadu_si128((__m128i *)&A[i * buf_stride + j + 1]);
__m128i b3 = _mm_loadu_si128((__m128i *)&B[i * buf_stride + j + 1]);
@@ -657,7 +659,7 @@ static void selfguided_restoration_3_h(int32_t *A, int32_t *B, int width,
s = _mm_set_epi32(
sgrproj_mtable[eps - 1][4 * h - 1], sgrproj_mtable[eps - 1][5 * h - 1],
sgrproj_mtable[eps - 1][6 * h - 1], sgrproj_mtable[eps - 1][7 * h - 1]);
- calc_block(sum_, sum_sq_, n, one_over_n, s, bit_depth, i * buf_stride + j,
+ calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth, i * buf_stride + j,
A, B);
}
}