diff options
Diffstat (limited to 'third_party/aom/av1/encoder/dct.c')
-rw-r--r-- | third_party/aom/av1/encoder/dct.c | 888 |
1 files changed, 520 insertions, 368 deletions
diff --git a/third_party/aom/av1/encoder/dct.c b/third_party/aom/av1/encoder/dct.c index 850b84ca95..a04d46b725 100644 --- a/third_party/aom/av1/encoder/dct.c +++ b/third_party/aom/av1/encoder/dct.c @@ -21,7 +21,8 @@ #include "av1/common/av1_fwd_txfm1d.h" #include "av1/common/av1_fwd_txfm1d_cfg.h" #include "av1/common/idct.h" -#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8 +#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8 || CONFIG_DAALA_DCT16 || \ + CONFIG_DAALA_DCT32 || CONFIG_DAALA_DCT64 #include "av1/common/daala_tx.h" #endif @@ -42,18 +43,6 @@ static INLINE void range_check(const tran_low_t *input, const int size, #endif } -#if CONFIG_DAALA_DCT4 -static void fdct4(const tran_low_t *input, tran_low_t *output) { - int i; - od_coeff x[4]; - od_coeff y[4]; - for (i = 0; i < 4; i++) x[i] = (od_coeff)input[i]; - od_bin_fdct4(y, x, 1); - for (i = 0; i < 4; i++) output[i] = (tran_low_t)y[i]; -} - -#else - static void fdct4(const tran_low_t *input, tran_low_t *output) { tran_high_t temp; tran_low_t step[4]; @@ -89,19 +78,6 @@ static void fdct4(const tran_low_t *input, tran_low_t *output) { range_check(output, 4, 16); } -#endif - -#if CONFIG_DAALA_DCT8 -static void fdct8(const tran_low_t *input, tran_low_t *output) { - int i; - od_coeff x[8]; - od_coeff y[8]; - for (i = 0; i < 8; i++) x[i] = (od_coeff)input[i]; - od_bin_fdct8(y, x, 1); - for (i = 0; i < 8; i++) output[i] = (tran_low_t)y[i]; -} - -#else static void fdct8(const tran_low_t *input, tran_low_t *output) { tran_high_t temp; @@ -180,7 +156,6 @@ static void fdct8(const tran_low_t *input, tran_low_t *output) { range_check(output, 8, 16); } -#endif static void fdct16(const tran_low_t *input, tran_low_t *output) { tran_high_t temp; @@ -755,7 +730,6 @@ static void fdct32(const tran_low_t *input, tran_low_t *output) { } #ifndef AV1_DCT_GTEST - static void fadst4(const tran_low_t *input, tran_low_t *output) { tran_high_t x0, x1, x2, x3; tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; @@ -796,18 +770,6 @@ static void fadst4(const tran_low_t *input, tran_low_t *output) { output[3] = (tran_low_t)fdct_round_shift(s3); } -#if CONFIG_DAALA_DCT8 -static void fadst8(const tran_low_t *input, tran_low_t *output) { - int i; - od_coeff x[8]; - od_coeff y[8]; - for (i = 0; i < 8; i++) x[i] = (od_coeff)input[i]; - od_bin_fdst8(y, x, 1); - for (i = 0; i < 8; i++) output[i] = (tran_low_t)y[i]; -} - -#else - static void fadst8(const tran_low_t *input, tran_low_t *output) { tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; @@ -878,7 +840,6 @@ static void fadst8(const tran_low_t *input, tran_low_t *output) { output[6] = (tran_low_t)x5; output[7] = (tran_low_t)-x1; } -#endif static void fadst16(const tran_low_t *input, tran_low_t *output) { tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; @@ -1066,9 +1027,27 @@ static void fhalfright32(const tran_low_t *input, tran_low_t *output) { #if CONFIG_MRC_TX static void get_masked_residual32(const int16_t **input, int *input_stride, const uint8_t *pred, int pred_stride, - int16_t *masked_input) { - int mrc_mask[32 * 32]; - get_mrc_mask(pred, pred_stride, mrc_mask, 32, 32, 32); + int16_t *masked_input, + TxfmParam *txfm_param) { + int n_masked_vals = 0; + uint8_t *mrc_mask; + uint8_t mask_tmp[32 * 32]; + if ((txfm_param->is_inter && SIGNAL_MRC_MASK_INTER) || + (!txfm_param->is_inter && SIGNAL_MRC_MASK_INTRA)) { + mrc_mask = txfm_param->mask; + n_masked_vals = get_mrc_diff_mask(*input, *input_stride, mrc_mask, 32, 32, + 32, txfm_param->is_inter); + } else { + mrc_mask = mask_tmp; + n_masked_vals = get_mrc_pred_mask(pred, pred_stride, mrc_mask, 32, 32, 32, + txfm_param->is_inter); + } + + // Do not use MRC_DCT if mask is invalid. DCT_DCT will be used instead. + if (!is_valid_mrc_mask(n_masked_vals, 32, 32)) { + *txfm_param->valid_mask = 0; + return; + } int32_t sum = 0; int16_t avg; // Get the masked average of the prediction @@ -1077,7 +1056,7 @@ static void get_masked_residual32(const int16_t **input, int *input_stride, sum += mrc_mask[i * 32 + j] * (*input)[i * (*input_stride) + j]; } } - avg = ROUND_POWER_OF_TWO_SIGNED(sum, 10); + avg = sum / n_masked_vals; // Replace all of the unmasked pixels in the prediction with the average // of the masked pixels for (int i = 0; i < 32; ++i) { @@ -1087,16 +1066,24 @@ static void get_masked_residual32(const int16_t **input, int *input_stride, } *input = masked_input; *input_stride = 32; + *txfm_param->valid_mask = 1; } #endif // CONFIG_MRC_TX -#if CONFIG_LGT +#if CONFIG_LGT || CONFIG_LGT_FROM_PRED static void flgt4(const tran_low_t *input, tran_low_t *output, const tran_high_t *lgtmtx) { - if (!(input[0] | input[1] | input[2] | input[3])) { - output[0] = output[1] = output[2] = output[3] = 0; + if (!lgtmtx) assert(0); +#if CONFIG_LGT_FROM_PRED + // For DCT/ADST, use butterfly implementations + if (lgtmtx[0] == DCT4) { + fdct4(input, output); + return; + } else if (lgtmtx[0] == ADST4) { + fadst4(input, output); return; } +#endif // CONFIG_LGT_FROM_PRED // evaluate s[j] = sum of all lgtmtx[j][i]*input[i] over i=1,...,4 tran_high_t s[4] = { 0 }; @@ -1108,6 +1095,18 @@ static void flgt4(const tran_low_t *input, tran_low_t *output, static void flgt8(const tran_low_t *input, tran_low_t *output, const tran_high_t *lgtmtx) { + if (!lgtmtx) assert(0); +#if CONFIG_LGT_FROM_PRED + // For DCT/ADST, use butterfly implementations + if (lgtmtx[0] == DCT8) { + fdct8(input, output); + return; + } else if (lgtmtx[0] == ADST8) { + fadst8(input, output); + return; + } +#endif // CONFIG_LGT_FROM_PRED + // evaluate s[j] = sum of all lgtmtx[j][i]*input[i] over i=1,...,8 tran_high_t s[8] = { 0 }; for (int i = 0; i < 8; ++i) @@ -1115,30 +1114,140 @@ static void flgt8(const tran_low_t *input, tran_low_t *output, for (int i = 0; i < 8; ++i) output[i] = (tran_low_t)fdct_round_shift(s[i]); } +#endif // CONFIG_LGT || CONFIG_LGT_FROM_PRED -// The get_fwd_lgt functions return 1 if LGT is chosen to apply, and 0 otherwise -int get_fwd_lgt4(transform_1d tx_orig, TxfmParam *txfm_param, - const tran_high_t *lgtmtx[], int ntx) { - // inter/intra split - if (tx_orig == &fadst4) { - for (int i = 0; i < ntx; ++i) - lgtmtx[i] = txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0]; - return 1; +#if CONFIG_LGT_FROM_PRED +static void flgt16up(const tran_low_t *input, tran_low_t *output, + const tran_high_t *lgtmtx) { + if (lgtmtx[0] == DCT16) { + fdct16(input, output); + return; + } else if (lgtmtx[0] == ADST16) { + fadst16(input, output); + return; + } else if (lgtmtx[0] == DCT32) { + fdct32(input, output); + return; + } else if (lgtmtx[0] == ADST32) { + fhalfright32(input, output); + return; + } else { + assert(0); + } +} + +typedef void (*FlgtFunc)(const tran_low_t *input, tran_low_t *output, + const tran_high_t *lgtmtx); + +static FlgtFunc flgt_func[4] = { flgt4, flgt8, flgt16up, flgt16up }; + +typedef void (*GetLgtFunc)(const TxfmParam *txfm_param, int is_col, + const tran_high_t *lgtmtx[], int ntx); + +static GetLgtFunc get_lgt_func[4] = { get_lgt4_from_pred, get_lgt8_from_pred, + get_lgt16up_from_pred, + get_lgt16up_from_pred }; + +// this inline function corresponds to the up scaling before the first +// transform in the av1_fht* functions +static INLINE tran_low_t fwd_upscale_wrt_txsize(const tran_high_t val, + const TX_SIZE tx_size) { + switch (tx_size) { + case TX_4X4: return (tran_low_t)val << 4; + case TX_8X8: + case TX_4X16: + case TX_16X4: + case TX_8X32: + case TX_32X8: return (tran_low_t)val << 2; + case TX_4X8: + case TX_8X4: + case TX_8X16: + case TX_16X8: return (tran_low_t)fdct_round_shift(val * 4 * Sqrt2); + default: assert(0); break; } return 0; } -int get_fwd_lgt8(transform_1d tx_orig, TxfmParam *txfm_param, - const tran_high_t *lgtmtx[], int ntx) { - // inter/intra split - if (tx_orig == &fadst8) { - for (int i = 0; i < ntx; ++i) - lgtmtx[i] = txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0]; - return 1; +// This inline function corresponds to the bit shift after the second +// transform in the av1_fht* functions +static INLINE tran_low_t fwd_downscale_wrt_txsize(const tran_low_t val, + const TX_SIZE tx_size) { + switch (tx_size) { + case TX_4X4: return (val + 1) >> 2; + case TX_4X8: + case TX_8X4: + case TX_8X8: + case TX_4X16: + case TX_16X4: return (val + (val < 0)) >> 1; + case TX_8X16: + case TX_16X8: return val; + case TX_8X32: + case TX_32X8: return ROUND_POWER_OF_TWO_SIGNED(val, 2); + default: assert(0); break; } return 0; } -#endif // CONFIG_LGT + +void flgt2d_from_pred_c(const int16_t *input, tran_low_t *output, int stride, + TxfmParam *txfm_param) { + const TX_SIZE tx_size = txfm_param->tx_size; + const int w = tx_size_wide[tx_size]; + const int h = tx_size_high[tx_size]; + const int wlog2 = tx_size_wide_log2[tx_size]; + const int hlog2 = tx_size_high_log2[tx_size]; + assert(w <= 8 || h <= 8); + + int i, j; + tran_low_t out[256]; // max size: 8x32 and 32x8 + tran_low_t temp_in[32], temp_out[32]; + const tran_high_t *lgtmtx_col[1]; + const tran_high_t *lgtmtx_row[1]; + get_lgt_func[hlog2 - 2](txfm_param, 1, lgtmtx_col, w); + get_lgt_func[wlog2 - 2](txfm_param, 0, lgtmtx_row, h); + + // For forward transforms, to be consistent with av1_fht functions, we apply + // short transform first and long transform second. + if (w < h) { + // Row transforms + for (i = 0; i < h; ++i) { + for (j = 0; j < w; ++j) + temp_in[j] = fwd_upscale_wrt_txsize(input[i * stride + j], tx_size); + flgt_func[wlog2 - 2](temp_in, temp_out, lgtmtx_row[0]); + // right shift of 2 bits here in fht8x16 and fht16x8 + for (j = 0; j < w; ++j) + out[j * h + i] = (tx_size == TX_16X8 || tx_size == TX_8X16) + ? ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2) + : temp_out[j]; + } + // Column transforms + for (i = 0; i < w; ++i) { + for (j = 0; j < h; ++j) temp_in[j] = out[j + i * h]; + flgt_func[hlog2 - 2](temp_in, temp_out, lgtmtx_col[0]); + for (j = 0; j < h; ++j) + output[j * w + i] = fwd_downscale_wrt_txsize(temp_out[j], tx_size); + } + } else { + // Column transforms + for (i = 0; i < w; ++i) { + for (j = 0; j < h; ++j) + temp_in[j] = fwd_upscale_wrt_txsize(input[j * stride + i], tx_size); + flgt_func[hlog2 - 2](temp_in, temp_out, lgtmtx_col[0]); + // fht8x16 and fht16x8 have right shift of 2 bits here + for (j = 0; j < h; ++j) + out[j * w + i] = (tx_size == TX_16X8 || tx_size == TX_8X16) + ? ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2) + : temp_out[j]; + } + // Row transforms + for (i = 0; i < h; ++i) { + for (j = 0; j < w; ++j) temp_in[j] = out[j + i * w]; + flgt_func[wlog2 - 2](temp_in, temp_out, lgtmtx_row[0]); + for (j = 0; j < w; ++j) + output[j + i * w] = fwd_downscale_wrt_txsize(temp_out[j], tx_size); + } + } +} +#endif // CONFIG_LGT_FROM_PRED #if CONFIG_EXT_TX // TODO(sarahparker) these functions will be removed once the highbitdepth @@ -1148,34 +1257,29 @@ int get_fwd_lgt8(transform_1d tx_orig, TxfmParam *txfm_param, static void fidtx4(const tran_low_t *input, tran_low_t *output) { int i; for (i = 0; i < 4; ++i) { -#if CONFIG_DAALA_DCT4 - output[i] = input[i]; -#else output[i] = (tran_low_t)fdct_round_shift(input[i] * Sqrt2); -#endif } } static void fidtx8(const tran_low_t *input, tran_low_t *output) { int i; for (i = 0; i < 8; ++i) { -#if CONFIG_DAALA_DCT8 - output[i] = input[i]; -#else output[i] = input[i] * 2; -#endif } } static void fidtx16(const tran_low_t *input, tran_low_t *output) { int i; - for (i = 0; i < 16; ++i) + for (i = 0; i < 16; ++i) { output[i] = (tran_low_t)fdct_round_shift(input[i] * 2 * Sqrt2); + } } static void fidtx32(const tran_low_t *input, tran_low_t *output) { int i; - for (i = 0; i < 32; ++i) output[i] = input[i] * 4; + for (i = 0; i < 32; ++i) { + output[i] = input[i] * 4; + } } static void copy_block(const int16_t *src, int src_stride, int l, int w, @@ -1238,7 +1342,7 @@ static void copy_fliplrud(const int16_t *src, int src_stride, int l, int w, } static void maybe_flip_input(const int16_t **src, int *src_stride, int l, int w, - int16_t *buff, int tx_type) { + int16_t *buff, TX_TYPE tx_type) { switch (tx_type) { #if CONFIG_MRC_TX case MRC_DCT: @@ -1278,7 +1382,7 @@ static void maybe_flip_input(const int16_t **src, int *src_stride, int l, int w, void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, TxfmParam *txfm_param) { - int tx_type = txfm_param->tx_type; + const TX_TYPE tx_type = txfm_param->tx_type; #if CONFIG_MRC_TX assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); #endif @@ -1293,6 +1397,26 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, #endif { static const transform_2d FHT[] = { +#if CONFIG_DAALA_DCT4 + { daala_fdct4, daala_fdct4 }, // DCT_DCT + { daala_fdst4, daala_fdct4 }, // ADST_DCT + { daala_fdct4, daala_fdst4 }, // DCT_ADST + { daala_fdst4, daala_fdst4 }, // ADST_ADST +#if CONFIG_EXT_TX + { daala_fdst4, daala_fdct4 }, // FLIPADST_DCT + { daala_fdct4, daala_fdst4 }, // DCT_FLIPADST + { daala_fdst4, daala_fdst4 }, // FLIPADST_FLIPADST + { daala_fdst4, daala_fdst4 }, // ADST_FLIPADST + { daala_fdst4, daala_fdst4 }, // FLIPADST_ADST + { daala_idtx4, daala_idtx4 }, // IDTX + { daala_fdct4, daala_idtx4 }, // V_DCT + { daala_idtx4, daala_fdct4 }, // H_DCT + { daala_fdst4, daala_idtx4 }, // V_ADST + { daala_idtx4, daala_fdst4 }, // H_ADST + { daala_fdst4, daala_idtx4 }, // V_FLIPADST + { daala_idtx4, daala_fdst4 }, // H_FLIPADST +#endif +#else { fdct4, fdct4 }, // DCT_DCT { fadst4, fdct4 }, // ADST_DCT { fdct4, fadst4 }, // DCT_ADST @@ -1311,6 +1435,7 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, { fadst4, fidtx4 }, // V_FLIPADST { fidtx4, fadst4 }, // H_FLIPADST #endif +#endif }; const transform_2d ht = FHT[tx_type]; tran_low_t out[4 * 4]; @@ -1325,10 +1450,10 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, #if CONFIG_LGT // Choose LGT adaptive to the prediction. We may apply different LGTs for // different rows/columns, indicated by the pointers to 2D arrays - const tran_high_t *lgtmtx_col[4]; - const tran_high_t *lgtmtx_row[4]; - int use_lgt_col = get_fwd_lgt4(ht.cols, txfm_param, lgtmtx_col, 4); - int use_lgt_row = get_fwd_lgt4(ht.rows, txfm_param, lgtmtx_row, 4); + const tran_high_t *lgtmtx_col[1]; + const tran_high_t *lgtmtx_row[1]; + int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col); + int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row); #endif // Columns @@ -1340,7 +1465,7 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, #endif #if CONFIG_LGT if (use_lgt_col) - flgt4(temp_in, temp_out, lgtmtx_col[i]); + flgt4(temp_in, temp_out, lgtmtx_col[0]); else #endif ht.cols(temp_in, temp_out); @@ -1352,7 +1477,7 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, for (j = 0; j < 4; ++j) temp_in[j] = out[j + i * 4]; #if CONFIG_LGT if (use_lgt_row) - flgt4(temp_in, temp_out, lgtmtx_row[i]); + flgt4(temp_in, temp_out, lgtmtx_row[0]); else #endif ht.rows(temp_in, temp_out); @@ -1369,7 +1494,7 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride, TxfmParam *txfm_param) { - int tx_type = txfm_param->tx_type; + const TX_TYPE tx_type = txfm_param->tx_type; #if CONFIG_MRC_TX assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); #endif // CONFIG_MRC_TX @@ -1408,10 +1533,10 @@ void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride, #endif #if CONFIG_LGT - const tran_high_t *lgtmtx_col[4]; - const tran_high_t *lgtmtx_row[8]; - int use_lgt_col = get_fwd_lgt8(ht.cols, txfm_param, lgtmtx_col, 4); - int use_lgt_row = get_fwd_lgt4(ht.rows, txfm_param, lgtmtx_row, 8); + const tran_high_t *lgtmtx_col[1]; + const tran_high_t *lgtmtx_row[1]; + int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col); + int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row); #endif // Rows @@ -1421,7 +1546,7 @@ void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride, (tran_low_t)fdct_round_shift(input[i * stride + j] * 4 * Sqrt2); #if CONFIG_LGT if (use_lgt_row) - flgt4(temp_in, temp_out, lgtmtx_row[i]); + flgt4(temp_in, temp_out, lgtmtx_row[0]); else #endif ht.rows(temp_in, temp_out); @@ -1433,7 +1558,7 @@ void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride, for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2]; #if CONFIG_LGT if (use_lgt_col) - flgt8(temp_in, temp_out, lgtmtx_col[i]); + flgt8(temp_in, temp_out, lgtmtx_col[0]); else #endif ht.cols(temp_in, temp_out); @@ -1445,7 +1570,7 @@ void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride, void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride, TxfmParam *txfm_param) { - int tx_type = txfm_param->tx_type; + const TX_TYPE tx_type = txfm_param->tx_type; #if CONFIG_MRC_TX assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); #endif // CONFIG_MRC_TX @@ -1484,10 +1609,10 @@ void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride, #endif #if CONFIG_LGT - const tran_high_t *lgtmtx_col[8]; - const tran_high_t *lgtmtx_row[4]; - int use_lgt_col = get_fwd_lgt4(ht.cols, txfm_param, lgtmtx_col, 8); - int use_lgt_row = get_fwd_lgt8(ht.rows, txfm_param, lgtmtx_row, 4); + const tran_high_t *lgtmtx_col[1]; + const tran_high_t *lgtmtx_row[1]; + int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col); + int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row); #endif // Columns @@ -1497,7 +1622,7 @@ void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride, (tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2); #if CONFIG_LGT if (use_lgt_col) - flgt4(temp_in, temp_out, lgtmtx_col[i]); + flgt4(temp_in, temp_out, lgtmtx_col[0]); else #endif ht.cols(temp_in, temp_out); @@ -1509,7 +1634,7 @@ void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride, for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2]; #if CONFIG_LGT if (use_lgt_row) - flgt8(temp_in, temp_out, lgtmtx_row[i]); + flgt8(temp_in, temp_out, lgtmtx_row[0]); else #endif ht.rows(temp_in, temp_out); @@ -1521,7 +1646,7 @@ void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride, void av1_fht4x16_c(const int16_t *input, tran_low_t *output, int stride, TxfmParam *txfm_param) { - int tx_type = txfm_param->tx_type; + const TX_TYPE tx_type = txfm_param->tx_type; #if CONFIG_MRC_TX assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); #endif // CONFIG_MRC_TX @@ -1560,8 +1685,8 @@ void av1_fht4x16_c(const int16_t *input, tran_low_t *output, int stride, #endif #if CONFIG_LGT - const tran_high_t *lgtmtx_row[16]; - int use_lgt_row = get_fwd_lgt4(ht.rows, txfm_param, lgtmtx_row, 16); + const tran_high_t *lgtmtx_row[1]; + int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row); #endif // Rows @@ -1569,7 +1694,7 @@ void av1_fht4x16_c(const int16_t *input, tran_low_t *output, int stride, for (j = 0; j < n; ++j) temp_in[j] = input[i * stride + j] * 4; #if CONFIG_LGT if (use_lgt_row) - flgt4(temp_in, temp_out, lgtmtx_row[i]); + flgt4(temp_in, temp_out, lgtmtx_row[0]); else #endif ht.rows(temp_in, temp_out); @@ -1588,7 +1713,7 @@ void av1_fht4x16_c(const int16_t *input, tran_low_t *output, int stride, void av1_fht16x4_c(const int16_t *input, tran_low_t *output, int stride, TxfmParam *txfm_param) { - int tx_type = txfm_param->tx_type; + const TX_TYPE tx_type = txfm_param->tx_type; #if CONFIG_MRC_TX assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); #endif // CONFIG_MRC_TX @@ -1627,8 +1752,8 @@ void av1_fht16x4_c(const int16_t *input, tran_low_t *output, int stride, #endif #if CONFIG_LGT - const tran_high_t *lgtmtx_col[16]; - int use_lgt_col = get_fwd_lgt4(ht.cols, txfm_param, lgtmtx_col, 16); + const tran_high_t *lgtmtx_col[1]; + int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col); #endif // Columns @@ -1636,7 +1761,7 @@ void av1_fht16x4_c(const int16_t *input, tran_low_t *output, int stride, for (j = 0; j < n; ++j) temp_in[j] = input[j * stride + i] * 4; #if CONFIG_LGT if (use_lgt_col) - flgt4(temp_in, temp_out, lgtmtx_col[i]); + flgt4(temp_in, temp_out, lgtmtx_col[0]); else #endif ht.cols(temp_in, temp_out); @@ -1655,7 +1780,7 @@ void av1_fht16x4_c(const int16_t *input, tran_low_t *output, int stride, void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride, TxfmParam *txfm_param) { - int tx_type = txfm_param->tx_type; + const TX_TYPE tx_type = txfm_param->tx_type; #if CONFIG_MRC_TX assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); #endif // CONFIG_MRC_TX @@ -1694,8 +1819,8 @@ void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride, #endif #if CONFIG_LGT - const tran_high_t *lgtmtx_row[16]; - int use_lgt_row = get_fwd_lgt8(ht.rows, txfm_param, lgtmtx_row, 16); + const tran_high_t *lgtmtx_row[1]; + int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row); #endif // Rows @@ -1705,7 +1830,7 @@ void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride, (tran_low_t)fdct_round_shift(input[i * stride + j] * 4 * Sqrt2); #if CONFIG_LGT if (use_lgt_row) - flgt8(temp_in, temp_out, lgtmtx_row[i]); + flgt8(temp_in, temp_out, lgtmtx_row[0]); else #endif ht.rows(temp_in, temp_out); @@ -1724,7 +1849,7 @@ void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride, void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride, TxfmParam *txfm_param) { - int tx_type = txfm_param->tx_type; + const TX_TYPE tx_type = txfm_param->tx_type; #if CONFIG_MRC_TX assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); #endif // CONFIG_MRC_TX @@ -1763,8 +1888,8 @@ void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride, #endif #if CONFIG_LGT - const tran_high_t *lgtmtx_col[16]; - int use_lgt_col = get_fwd_lgt8(ht.cols, txfm_param, lgtmtx_col, 16); + const tran_high_t *lgtmtx_col[1]; + int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col); #endif // Columns @@ -1774,7 +1899,7 @@ void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride, (tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2); #if CONFIG_LGT if (use_lgt_col) - flgt8(temp_in, temp_out, lgtmtx_col[i]); + flgt8(temp_in, temp_out, lgtmtx_col[0]); else #endif ht.cols(temp_in, temp_out); @@ -1793,7 +1918,7 @@ void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride, void av1_fht8x32_c(const int16_t *input, tran_low_t *output, int stride, TxfmParam *txfm_param) { - int tx_type = txfm_param->tx_type; + const TX_TYPE tx_type = txfm_param->tx_type; #if CONFIG_MRC_TX assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); #endif // CONFIG_MRC_TX @@ -1832,8 +1957,8 @@ void av1_fht8x32_c(const int16_t *input, tran_low_t *output, int stride, #endif #if CONFIG_LGT - const tran_high_t *lgtmtx_row[32]; - int use_lgt_row = get_fwd_lgt8(ht.rows, txfm_param, lgtmtx_row, 32); + const tran_high_t *lgtmtx_row[1]; + int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row); #endif // Rows @@ -1841,7 +1966,7 @@ void av1_fht8x32_c(const int16_t *input, tran_low_t *output, int stride, for (j = 0; j < n; ++j) temp_in[j] = input[i * stride + j] * 4; #if CONFIG_LGT if (use_lgt_row) - flgt8(temp_in, temp_out, lgtmtx_row[i]); + flgt8(temp_in, temp_out, lgtmtx_row[0]); else #endif ht.rows(temp_in, temp_out); @@ -1855,12 +1980,12 @@ void av1_fht8x32_c(const int16_t *input, tran_low_t *output, int stride, for (j = 0; j < n4; ++j) output[i + j * n] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2); } - // Note: overall scale factor of transform is 4 times unitary + // Note: overall scale factor of transform is 8 times unitary } void av1_fht32x8_c(const int16_t *input, tran_low_t *output, int stride, TxfmParam *txfm_param) { - int tx_type = txfm_param->tx_type; + const TX_TYPE tx_type = txfm_param->tx_type; #if CONFIG_MRC_TX assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); #endif // CONFIG_MRC_TX @@ -1899,8 +2024,8 @@ void av1_fht32x8_c(const int16_t *input, tran_low_t *output, int stride, #endif #if CONFIG_LGT - const tran_high_t *lgtmtx_col[32]; - int use_lgt_col = get_fwd_lgt8(ht.cols, txfm_param, lgtmtx_col, 32); + const tran_high_t *lgtmtx_col[1]; + int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col); #endif // Columns @@ -1908,7 +2033,7 @@ void av1_fht32x8_c(const int16_t *input, tran_low_t *output, int stride, for (j = 0; j < n; ++j) temp_in[j] = input[j * stride + i] * 4; #if CONFIG_LGT if (use_lgt_col) - flgt8(temp_in, temp_out, lgtmtx_col[i]); + flgt8(temp_in, temp_out, lgtmtx_col[0]); else #endif ht.cols(temp_in, temp_out); @@ -1922,12 +2047,12 @@ void av1_fht32x8_c(const int16_t *input, tran_low_t *output, int stride, for (j = 0; j < n4; ++j) output[j + i * n4] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2); } - // Note: overall scale factor of transform is 4 times unitary + // Note: overall scale factor of transform is 8 times unitary } void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride, TxfmParam *txfm_param) { - int tx_type = txfm_param->tx_type; + const TX_TYPE tx_type = txfm_param->tx_type; #if CONFIG_MRC_TX assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); #endif // CONFIG_MRC_TX @@ -1986,7 +2111,7 @@ void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride, void av1_fht32x16_c(const int16_t *input, tran_low_t *output, int stride, TxfmParam *txfm_param) { - int tx_type = txfm_param->tx_type; + const TX_TYPE tx_type = txfm_param->tx_type; #if CONFIG_MRC_TX assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); #endif // CONFIG_MRC_TX @@ -2043,134 +2168,9 @@ void av1_fht32x16_c(const int16_t *input, tran_low_t *output, int stride, // Note: overall scale factor of transform is 4 times unitary } -void av1_fdct8x8_quant_c(const int16_t *input, int stride, - tran_low_t *coeff_ptr, intptr_t n_coeffs, - int skip_block, const int16_t *zbin_ptr, - const int16_t *round_ptr, const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, const int16_t *scan, - const int16_t *iscan -#if CONFIG_AOM_QM - , - const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr -#endif - ) { - int eob = -1; - - int i, j; - tran_low_t intermediate[64]; - - // Transform columns - { - tran_low_t *output = intermediate; - tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16 - tran_high_t t0, t1, t2, t3; // needs32 - tran_high_t x0, x1, x2, x3; // canbe16 - - for (i = 0; i < 8; i++) { - // stage 1 - s0 = (input[0 * stride] + input[7 * stride]) * 4; - s1 = (input[1 * stride] + input[6 * stride]) * 4; - s2 = (input[2 * stride] + input[5 * stride]) * 4; - s3 = (input[3 * stride] + input[4 * stride]) * 4; - s4 = (input[3 * stride] - input[4 * stride]) * 4; - s5 = (input[2 * stride] - input[5 * stride]) * 4; - s6 = (input[1 * stride] - input[6 * stride]) * 4; - s7 = (input[0 * stride] - input[7 * stride]) * 4; - - // fdct4(step, step); - x0 = s0 + s3; - x1 = s1 + s2; - x2 = s1 - s2; - x3 = s0 - s3; - t0 = (x0 + x1) * cospi_16_64; - t1 = (x0 - x1) * cospi_16_64; - t2 = x2 * cospi_24_64 + x3 * cospi_8_64; - t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; - output[0 * 8] = (tran_low_t)fdct_round_shift(t0); - output[2 * 8] = (tran_low_t)fdct_round_shift(t2); - output[4 * 8] = (tran_low_t)fdct_round_shift(t1); - output[6 * 8] = (tran_low_t)fdct_round_shift(t3); - - // stage 2 - t0 = (s6 - s5) * cospi_16_64; - t1 = (s6 + s5) * cospi_16_64; - t2 = fdct_round_shift(t0); - t3 = fdct_round_shift(t1); - - // stage 3 - x0 = s4 + t2; - x1 = s4 - t2; - x2 = s7 - t3; - x3 = s7 + t3; - - // stage 4 - t0 = x0 * cospi_28_64 + x3 * cospi_4_64; - t1 = x1 * cospi_12_64 + x2 * cospi_20_64; - t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; - t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; - output[1 * 8] = (tran_low_t)fdct_round_shift(t0); - output[3 * 8] = (tran_low_t)fdct_round_shift(t2); - output[5 * 8] = (tran_low_t)fdct_round_shift(t1); - output[7 * 8] = (tran_low_t)fdct_round_shift(t3); - input++; - output++; - } - } - - // Rows - for (i = 0; i < 8; ++i) { - fdct8(&intermediate[i * 8], &coeff_ptr[i * 8]); - for (j = 0; j < 8; ++j) coeff_ptr[j + i * 8] /= 2; - } - - // TODO(jingning) Decide the need of these arguments after the - // quantization process is completed. - (void)zbin_ptr; - (void)quant_shift_ptr; - (void)iscan; - - memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); - - if (!skip_block) { - // Quantization pass: All coefficients with index >= zero_flag are - // skippable. Note: zero_flag can be zero. - for (i = 0; i < n_coeffs; i++) { - const int rc = scan[i]; - const int coeff = coeff_ptr[rc]; -#if CONFIG_AOM_QM - const qm_val_t wt = qm_ptr[rc]; - const qm_val_t iwt = iqm_ptr[rc]; - const int dequant = - (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> - AOM_QM_BITS; -#endif - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - - int64_t tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); - int tmp32; -#if CONFIG_AOM_QM - tmp32 = (int)((tmp * quant_ptr[rc != 0] * wt) >> (16 + AOM_QM_BITS)); - qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; - dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant; -#else - tmp32 = (int)((tmp * quant_ptr[rc != 0]) >> 16); - qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; - dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0]; -#endif - - if (tmp32) eob = i; - } - } - *eob_ptr = eob + 1; -} - void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, TxfmParam *txfm_param) { - int tx_type = txfm_param->tx_type; + const TX_TYPE tx_type = txfm_param->tx_type; #if CONFIG_MRC_TX assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); #endif // CONFIG_MRC_TX @@ -2185,6 +2185,26 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, #endif { static const transform_2d FHT[] = { +#if CONFIG_DAALA_DCT8 + { daala_fdct8, daala_fdct8 }, // DCT_DCT + { daala_fdst8, daala_fdct8 }, // ADST_DCT + { daala_fdct8, daala_fdst8 }, // DCT_ADST + { daala_fdst8, daala_fdst8 }, // ADST_ADST +#if CONFIG_EXT_TX + { daala_fdst8, daala_fdct8 }, // FLIPADST_DCT + { daala_fdct8, daala_fdst8 }, // DCT_FLIPADST + { daala_fdst8, daala_fdst8 }, // FLIPADST_FLIPADST + { daala_fdst8, daala_fdst8 }, // ADST_FLIPADST + { daala_fdst8, daala_fdst8 }, // FLIPADST_ADST + { daala_idtx8, daala_idtx8 }, // IDTX + { daala_fdct8, daala_idtx8 }, // V_DCT + { daala_idtx8, daala_fdct8 }, // H_DCT + { daala_fdst8, daala_idtx8 }, // V_ADST + { daala_idtx8, daala_fdst8 }, // H_ADST + { daala_fdst8, daala_idtx8 }, // V_FLIPADST + { daala_idtx8, daala_fdst8 }, // H_FLIPADST +#endif +#else { fdct8, fdct8 }, // DCT_DCT { fadst8, fdct8 }, // ADST_DCT { fdct8, fadst8 }, // DCT_ADST @@ -2203,6 +2223,7 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, { fadst8, fidtx8 }, // V_FLIPADST { fidtx8, fadst8 }, // H_FLIPADST #endif +#endif }; const transform_2d ht = FHT[tx_type]; tran_low_t out[64]; @@ -2215,10 +2236,10 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, #endif #if CONFIG_LGT - const tran_high_t *lgtmtx_col[8]; - const tran_high_t *lgtmtx_row[8]; - int use_lgt_col = get_fwd_lgt8(ht.cols, txfm_param, lgtmtx_col, 8); - int use_lgt_row = get_fwd_lgt8(ht.rows, txfm_param, lgtmtx_row, 8); + const tran_high_t *lgtmtx_col[1]; + const tran_high_t *lgtmtx_row[1]; + int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col); + int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row); #endif // Columns @@ -2230,7 +2251,7 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, #endif #if CONFIG_LGT if (use_lgt_col) - flgt8(temp_in, temp_out, lgtmtx_col[i]); + flgt8(temp_in, temp_out, lgtmtx_col[0]); else #endif ht.cols(temp_in, temp_out); @@ -2242,7 +2263,7 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, for (j = 0; j < 8; ++j) temp_in[j] = out[j + i * 8]; #if CONFIG_LGT if (use_lgt_row) - flgt8(temp_in, temp_out, lgtmtx_row[i]); + flgt8(temp_in, temp_out, lgtmtx_row[0]); else #endif ht.rows(temp_in, temp_out); @@ -2315,7 +2336,7 @@ void av1_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) { void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, TxfmParam *txfm_param) { - int tx_type = txfm_param->tx_type; + const TX_TYPE tx_type = txfm_param->tx_type; #if CONFIG_MRC_TX assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); #endif // CONFIG_MRC_TX @@ -2323,6 +2344,26 @@ void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, assert(tx_type == DCT_DCT); #endif static const transform_2d FHT[] = { +#if CONFIG_DAALA_DCT16 + { daala_fdct16, daala_fdct16 }, // DCT_DCT + { daala_fdst16, daala_fdct16 }, // ADST_DCT + { daala_fdct16, daala_fdst16 }, // DCT_ADST + { daala_fdst16, daala_fdst16 }, // ADST_ADST +#if CONFIG_EXT_TX + { daala_fdst16, daala_fdct16 }, // FLIPADST_DCT + { daala_fdct16, daala_fdst16 }, // DCT_FLIPADST + { daala_fdst16, daala_fdst16 }, // FLIPADST_FLIPADST + { daala_fdst16, daala_fdst16 }, // ADST_FLIPADST + { daala_fdst16, daala_fdst16 }, // FLIPADST_ADST + { daala_idtx16, daala_idtx16 }, // IDTX + { daala_fdct16, daala_idtx16 }, // V_DCT + { daala_idtx16, daala_fdct16 }, // H_DCT + { daala_fdst16, daala_idtx16 }, // V_ADST + { daala_idtx16, daala_fdst16 }, // H_ADST + { daala_fdst16, daala_idtx16 }, // V_FLIPADST + { daala_idtx16, daala_fdst16 }, // H_FLIPADST +#endif +#else { fdct16, fdct16 }, // DCT_DCT { fadst16, fdct16 }, // ADST_DCT { fdct16, fadst16 }, // DCT_ADST @@ -2341,6 +2382,7 @@ void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, { fadst16, fidtx16 }, // V_FLIPADST { fidtx16, fadst16 }, // H_FLIPADST #endif +#endif }; const transform_2d ht = FHT[tx_type]; tran_low_t out[256]; @@ -2354,17 +2396,34 @@ void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, // Columns for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) temp_in[j] = input[j * stride + i] * 4; + for (j = 0; j < 16; ++j) { +#if CONFIG_DAALA_DCT16 + temp_in[j] = input[j * stride + i] * 16; +#else + temp_in[j] = input[j * stride + i] * 4; +#endif + } ht.cols(temp_in, temp_out); - for (j = 0; j < 16; ++j) + for (j = 0; j < 16; ++j) { +#if CONFIG_DAALA_DCT16 + out[j * 16 + i] = temp_out[j]; +#else out[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; +#endif + } } // Rows for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) temp_in[j] = out[j + i * 16]; ht.rows(temp_in, temp_out); - for (j = 0; j < 16; ++j) output[j + i * 16] = temp_out[j]; + for (j = 0; j < 16; ++j) { +#if CONFIG_DAALA_DCT16 + output[j + i * 16] = (temp_out[j] + (temp_out[j] < 0)) >> 1; +#else + output[j + i * 16] = temp_out[j]; +#endif + } } } @@ -2375,12 +2434,32 @@ void av1_highbd_fwht4x4_c(const int16_t *input, tran_low_t *output, void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride, TxfmParam *txfm_param) { - int tx_type = txfm_param->tx_type; + const TX_TYPE tx_type = txfm_param->tx_type; #if CONFIG_DCT_ONLY assert(tx_type == DCT_DCT); #endif static const transform_2d FHT[] = { - { fdct32, fdct32 }, // DCT_DCT +#if CONFIG_DAALA_DCT32 + { daala_fdct32, daala_fdct32 }, // DCT_DCT +#if CONFIG_EXT_TX + { daala_fdst32, daala_fdct32 }, // ADST_DCT + { daala_fdct32, daala_fdst32 }, // DCT_ADST + { daala_fdst32, daala_fdst32 }, // ADST_ADST + { daala_fdst32, daala_fdct32 }, // FLIPADST_DCT + { daala_fdct32, daala_fdst32 }, // DCT_FLIPADST + { daala_fdst32, daala_fdst32 }, // FLIPADST_FLIPADST + { daala_fdst32, daala_fdst32 }, // ADST_FLIPADST + { daala_fdst32, daala_fdst32 }, // FLIPADST_ADST + { daala_idtx32, daala_idtx32 }, // IDTX + { daala_fdct32, daala_idtx32 }, // V_DCT + { daala_idtx32, daala_fdct32 }, // H_DCT + { daala_fdst32, daala_idtx32 }, // V_ADST + { daala_idtx32, daala_fdst32 }, // H_ADST + { daala_fdst32, daala_idtx32 }, // V_FLIPADST + { daala_idtx32, daala_fdst32 }, // H_FLIPADST +#endif +#else + { fdct32, fdct32 }, // DCT_DCT #if CONFIG_EXT_TX { fhalfright32, fdct32 }, // ADST_DCT { fdct32, fhalfright32 }, // DCT_ADST @@ -2398,6 +2477,7 @@ void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride, { fhalfright32, fidtx32 }, // V_FLIPADST { fidtx32, fhalfright32 }, // H_FLIPADST #endif +#endif #if CONFIG_MRC_TX { fdct32, fdct32 }, // MRC_TX #endif // CONFIG_MRC_TX @@ -2416,27 +2496,41 @@ void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride, if (tx_type == MRC_DCT) { int16_t masked_input[32 * 32]; get_masked_residual32(&input, &stride, txfm_param->dst, txfm_param->stride, - masked_input); + masked_input, txfm_param); } #endif // CONFIG_MRC_TX // Columns for (i = 0; i < 32; ++i) { - for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4; + for (j = 0; j < 32; ++j) { +#if CONFIG_DAALA_DCT32 + temp_in[j] = input[j * stride + i] * 16; +#else + temp_in[j] = input[j * stride + i] * 4; +#endif + } ht.cols(temp_in, temp_out); - for (j = 0; j < 32; ++j) + for (j = 0; j < 32; ++j) { +#if CONFIG_DAALA_DCT32 + out[j * 32 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2); +#else out[j * 32 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 4); +#endif + } } // Rows for (i = 0; i < 32; ++i) { for (j = 0; j < 32; ++j) temp_in[j] = out[j + i * 32]; ht.rows(temp_in, temp_out); - for (j = 0; j < 32; ++j) output[j + i * 32] = temp_out[j]; + for (j = 0; j < 32; ++j) { + output[j + i * 32] = temp_out[j]; + } } } #if CONFIG_TX64X64 +#if !CONFIG_DAALA_DCT64 #if CONFIG_EXT_TX static void fidtx64(const tran_low_t *input, tran_low_t *output) { int i; @@ -2475,10 +2569,11 @@ static void fdct64_row(const tran_low_t *input, tran_low_t *output) { av1_fdct64_new(in, out, fwd_cos_bit_row_dct_64, fwd_stage_range_row_dct_64); for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i]; } +#endif void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride, TxfmParam *txfm_param) { - int tx_type = txfm_param->tx_type; + const TX_TYPE tx_type = txfm_param->tx_type; #if CONFIG_MRC_TX assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); #endif // CONFIG_MRC_TX @@ -2486,7 +2581,27 @@ void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride, assert(tx_type == DCT_DCT); #endif static const transform_2d FHT[] = { - { fdct64_col, fdct64_row }, // DCT_DCT +#if CONFIG_DAALA_DCT64 + { daala_fdct64, daala_fdct64 }, // DCT_DCT +#if CONFIG_EXT_TX + { daala_fdst64, daala_fdct64 }, // ADST_DCT + { daala_fdct64, daala_fdst64 }, // DCT_ADST + { daala_fdst64, daala_fdst64 }, // ADST_ADST + { daala_fdst64, daala_fdct64 }, // FLIPADST_DCT + { daala_fdct64, daala_fdst64 }, // DCT_FLIPADST + { daala_fdst64, daala_fdst64 }, // FLIPADST_FLIPADST + { daala_fdst64, daala_fdst64 }, // ADST_FLIPADST + { daala_fdst64, daala_fdst64 }, // FLIPADST_ADST + { daala_idtx64, daala_idtx64 }, // IDTX + { daala_fdct64, daala_idtx64 }, // V_DCT + { daala_idtx64, daala_fdct64 }, // H_DCT + { daala_fdst64, daala_idtx64 }, // V_ADST + { daala_idtx64, daala_fdst64 }, // H_ADST + { daala_fdst64, daala_idtx64 }, // V_FLIPADST + { daala_idtx64, daala_fdst64 }, // H_FLIPADST +#endif // CONFIG_EXT_TX +#else + { fdct64_col, fdct64_row }, // DCT_DCT #if CONFIG_EXT_TX { fhalfright64, fdct64_row }, // ADST_DCT { fdct64_col, fhalfright64 }, // DCT_ADST @@ -2503,7 +2618,8 @@ void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride, { fidtx64, fhalfright64 }, // H_ADST { fhalfright64, fidtx64 }, // V_FLIPADST { fidtx64, fhalfright64 }, // H_FLIPADST -#endif +#endif // CONFIG_EXT_TX +#endif // CONFIG_DAALA_DCT64 }; const transform_2d ht = FHT[tx_type]; tran_low_t out[4096]; @@ -2516,10 +2632,18 @@ void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride, // Columns for (i = 0; i < 64; ++i) { +#if CONFIG_DAALA_DCT64 + for (j = 0; j < 64; ++j) temp_in[j] = input[j * stride + i] * 16; + ht.cols(temp_in, temp_out); + for (j = 0; j < 64; ++j) + out[j * 64 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 3; + +#else for (j = 0; j < 64; ++j) temp_in[j] = input[j * stride + i]; ht.cols(temp_in, temp_out); for (j = 0; j < 64; ++j) out[j * 64 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; +#endif } // Rows @@ -2527,8 +2651,129 @@ void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride, for (j = 0; j < 64; ++j) temp_in[j] = out[j + i * 64]; ht.rows(temp_in, temp_out); for (j = 0; j < 64; ++j) +#if CONFIG_DAALA_DCT64 + output[j + i * 64] = temp_out[j]; +#else output[j + i * 64] = (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2); +#endif + } +} + +void av1_fht64x32_c(const int16_t *input, tran_low_t *output, int stride, + TxfmParam *txfm_param) { + const TX_TYPE tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif // CONFIG_MRC_TX +#if CONFIG_DCT_ONLY + assert(tx_type == DCT_DCT); +#endif + static const transform_2d FHT[] = { + { fdct32, fdct64_row }, // DCT_DCT +#if CONFIG_EXT_TX + { fhalfright32, fdct64_row }, // ADST_DCT + { fdct32, fhalfright64 }, // DCT_ADST + { fhalfright32, fhalfright64 }, // ADST_ADST + { fhalfright32, fdct64_row }, // FLIPADST_DCT + { fdct32, fhalfright64 }, // DCT_FLIPADST + { fhalfright32, fhalfright64 }, // FLIPADST_FLIPADST + { fhalfright32, fhalfright64 }, // ADST_FLIPADST + { fhalfright32, fhalfright64 }, // FLIPADST_ADST + { fidtx32, fidtx64 }, // IDTX + { fdct32, fidtx64 }, // V_DCT + { fidtx32, fdct64_row }, // H_DCT + { fhalfright32, fidtx64 }, // V_ADST + { fidtx32, fhalfright64 }, // H_ADST + { fhalfright32, fidtx64 }, // V_FLIPADST + { fidtx32, fhalfright64 }, // H_FLIPADST +#endif // CONFIG_EXT_TX + }; + const transform_2d ht = FHT[tx_type]; + tran_low_t out[2048]; + int i, j; + tran_low_t temp_in[64], temp_out[64]; + const int n = 32; + const int n2 = 64; +#if CONFIG_EXT_TX + int16_t flipped_input[32 * 64]; + maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type); +#endif + + // Columns + for (i = 0; i < n2; ++i) { + for (j = 0; j < n; ++j) + temp_in[j] = (tran_low_t)fdct_round_shift(input[j * stride + i] * Sqrt2); + ht.cols(temp_in, temp_out); + for (j = 0; j < n; ++j) + out[j * n2 + i] = (tran_low_t)ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2); + } + + // Rows + for (i = 0; i < n; ++i) { + for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2]; + ht.rows(temp_in, temp_out); + for (j = 0; j < n2; ++j) + output[j + i * n2] = + (tran_low_t)ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2); + } +} + +void av1_fht32x64_c(const int16_t *input, tran_low_t *output, int stride, + TxfmParam *txfm_param) { + const TX_TYPE tx_type = txfm_param->tx_type; +#if CONFIG_MRC_TX + assert(tx_type != MRC_DCT && "Invalid tx type for tx size"); +#endif // CONFIG_MRC_TX +#if CONFIG_DCT_ONLY + assert(tx_type == DCT_DCT); +#endif + static const transform_2d FHT[] = { + { fdct64_row, fdct32 }, // DCT_DCT +#if CONFIG_EXT_TX + { fhalfright64, fdct32 }, // ADST_DCT + { fdct64_row, fhalfright32 }, // DCT_ADST + { fhalfright64, fhalfright32 }, // ADST_ADST + { fhalfright64, fdct32 }, // FLIPADST_DCT + { fdct64_row, fhalfright32 }, // DCT_FLIPADST + { fhalfright64, fhalfright32 }, // FLIPADST_FLIPADST + { fhalfright64, fhalfright32 }, // ADST_FLIPADST + { fhalfright64, fhalfright32 }, // FLIPADST_ADST + { fidtx64, fidtx32 }, // IDTX + { fdct64_row, fidtx32 }, // V_DCT + { fidtx64, fdct32 }, // H_DCT + { fhalfright64, fidtx32 }, // V_ADST + { fidtx64, fhalfright32 }, // H_ADST + { fhalfright64, fidtx32 }, // V_FLIPADST + { fidtx64, fhalfright32 }, // H_FLIPADST +#endif // CONFIG_EXT_TX + }; + const transform_2d ht = FHT[tx_type]; + tran_low_t out[32 * 64]; + int i, j; + tran_low_t temp_in[64], temp_out[64]; + const int n = 32; + const int n2 = 64; +#if CONFIG_EXT_TX + int16_t flipped_input[32 * 64]; + maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type); +#endif + + // Rows + for (i = 0; i < n2; ++i) { + for (j = 0; j < n; ++j) + temp_in[j] = (tran_low_t)fdct_round_shift(input[i * stride + j] * Sqrt2); + ht.rows(temp_in, temp_out); + for (j = 0; j < n; ++j) + out[j * n2 + i] = (tran_low_t)ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2); + } + + // Columns + for (i = 0; i < n; ++i) { + for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2]; + ht.cols(temp_in, temp_out); + for (j = 0; j < n2; ++j) + output[i + j * n] = (tran_low_t)ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2); } } #endif // CONFIG_TX64X64 @@ -2536,110 +2781,17 @@ void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride, #if CONFIG_EXT_TX // Forward identity transform. void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride, - int bs, int tx_type) { + int bsx, int bsy, TX_TYPE tx_type) { int r, c; - const int shift = bs < 32 ? 3 : (bs < 64 ? 2 : 1); + const int pels = bsx * bsy; + const int shift = 3 - ((pels > 256) + (pels > 1024)); if (tx_type == IDTX) { - for (r = 0; r < bs; ++r) { - for (c = 0; c < bs; ++c) coeff[c] = src_diff[c] * (1 << shift); + for (r = 0; r < bsy; ++r) { + for (c = 0; c < bsx; ++c) coeff[c] = src_diff[c] * (1 << shift); src_diff += stride; - coeff += bs; + coeff += bsx; } } } #endif // CONFIG_EXT_TX - -#if CONFIG_DPCM_INTRA -void av1_dpcm_ft4_c(const int16_t *input, int stride, TX_TYPE_1D tx_type, - tran_low_t *output) { - assert(tx_type < TX_TYPES_1D); - static const transform_1d FHT[] = { fdct4, fadst4, fadst4, fidtx4 }; - const transform_1d ft = FHT[tx_type]; - tran_low_t temp_in[4]; - for (int i = 0; i < 4; ++i) - temp_in[i] = (tran_low_t)fdct_round_shift(input[i * stride] * 4 * Sqrt2); - ft(temp_in, output); -} - -void av1_dpcm_ft8_c(const int16_t *input, int stride, TX_TYPE_1D tx_type, - tran_low_t *output) { - assert(tx_type < TX_TYPES_1D); - static const transform_1d FHT[] = { fdct8, fadst8, fadst8, fidtx8 }; - const transform_1d ft = FHT[tx_type]; - tran_low_t temp_in[8]; - for (int i = 0; i < 8; ++i) temp_in[i] = input[i * stride] * 4; - ft(temp_in, output); -} - -void av1_dpcm_ft16_c(const int16_t *input, int stride, TX_TYPE_1D tx_type, - tran_low_t *output) { - assert(tx_type < TX_TYPES_1D); - static const transform_1d FHT[] = { fdct16, fadst16, fadst16, fidtx16 }; - const transform_1d ft = FHT[tx_type]; - tran_low_t temp_in[16]; - for (int i = 0; i < 16; ++i) - temp_in[i] = (tran_low_t)fdct_round_shift(input[i * stride] * 2 * Sqrt2); - ft(temp_in, output); -} - -void av1_dpcm_ft32_c(const int16_t *input, int stride, TX_TYPE_1D tx_type, - tran_low_t *output) { - assert(tx_type < TX_TYPES_1D); - static const transform_1d FHT[] = { fdct32, fhalfright32, fhalfright32, - fidtx32 }; - const transform_1d ft = FHT[tx_type]; - tran_low_t temp_in[32]; - for (int i = 0; i < 32; ++i) temp_in[i] = input[i * stride]; - ft(temp_in, output); -} - -#if CONFIG_HIGHBITDEPTH -void av1_hbd_dpcm_ft4_c(const int16_t *input, int stride, TX_TYPE_1D tx_type, - tran_low_t *output, int dir) { - (void)dir; - assert(tx_type < TX_TYPES_1D); - static const transform_1d FHT[] = { fdct4, fadst4, fadst4, fidtx4 }; - const transform_1d ft = FHT[tx_type]; - tran_low_t temp_in[4]; - for (int i = 0; i < 4; ++i) - temp_in[i] = (tran_low_t)fdct_round_shift(input[i * stride] * 4 * Sqrt2); - ft(temp_in, output); -} - -void av1_hbd_dpcm_ft8_c(const int16_t *input, int stride, TX_TYPE_1D tx_type, - tran_low_t *output, int dir) { - (void)dir; - assert(tx_type < TX_TYPES_1D); - static const transform_1d FHT[] = { fdct8, fadst8, fadst8, fidtx8 }; - const transform_1d ft = FHT[tx_type]; - tran_low_t temp_in[8]; - for (int i = 0; i < 8; ++i) temp_in[i] = input[i * stride] * 4; - ft(temp_in, output); -} - -void av1_hbd_dpcm_ft16_c(const int16_t *input, int stride, TX_TYPE_1D tx_type, - tran_low_t *output, int dir) { - (void)dir; - assert(tx_type < TX_TYPES_1D); - static const transform_1d FHT[] = { fdct16, fadst16, fadst16, fidtx16 }; - const transform_1d ft = FHT[tx_type]; - tran_low_t temp_in[16]; - for (int i = 0; i < 16; ++i) - temp_in[i] = (tran_low_t)fdct_round_shift(input[i * stride] * 2 * Sqrt2); - ft(temp_in, output); -} - -void av1_hbd_dpcm_ft32_c(const int16_t *input, int stride, TX_TYPE_1D tx_type, - tran_low_t *output, int dir) { - (void)dir; - assert(tx_type < TX_TYPES_1D); - static const transform_1d FHT[] = { fdct32, fhalfright32, fhalfright32, - fidtx32 }; - const transform_1d ft = FHT[tx_type]; - tran_low_t temp_in[32]; - for (int i = 0; i < 32; ++i) temp_in[i] = input[i * stride]; - ft(temp_in, output); -} -#endif // CONFIG_HIGHBITDEPTH -#endif // CONFIG_DPCM_INTRA #endif // !AV1_DCT_GTEST |