summaryrefslogtreecommitdiff
path: root/third_party/aom/av1/encoder/dct.c
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/aom/av1/encoder/dct.c')
-rw-r--r--third_party/aom/av1/encoder/dct.c888
1 files changed, 520 insertions, 368 deletions
diff --git a/third_party/aom/av1/encoder/dct.c b/third_party/aom/av1/encoder/dct.c
index 850b84ca95..a04d46b725 100644
--- a/third_party/aom/av1/encoder/dct.c
+++ b/third_party/aom/av1/encoder/dct.c
@@ -21,7 +21,8 @@
#include "av1/common/av1_fwd_txfm1d.h"
#include "av1/common/av1_fwd_txfm1d_cfg.h"
#include "av1/common/idct.h"
-#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8
+#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8 || CONFIG_DAALA_DCT16 || \
+ CONFIG_DAALA_DCT32 || CONFIG_DAALA_DCT64
#include "av1/common/daala_tx.h"
#endif
@@ -42,18 +43,6 @@ static INLINE void range_check(const tran_low_t *input, const int size,
#endif
}
-#if CONFIG_DAALA_DCT4
-static void fdct4(const tran_low_t *input, tran_low_t *output) {
- int i;
- od_coeff x[4];
- od_coeff y[4];
- for (i = 0; i < 4; i++) x[i] = (od_coeff)input[i];
- od_bin_fdct4(y, x, 1);
- for (i = 0; i < 4; i++) output[i] = (tran_low_t)y[i];
-}
-
-#else
-
static void fdct4(const tran_low_t *input, tran_low_t *output) {
tran_high_t temp;
tran_low_t step[4];
@@ -89,19 +78,6 @@ static void fdct4(const tran_low_t *input, tran_low_t *output) {
range_check(output, 4, 16);
}
-#endif
-
-#if CONFIG_DAALA_DCT8
-static void fdct8(const tran_low_t *input, tran_low_t *output) {
- int i;
- od_coeff x[8];
- od_coeff y[8];
- for (i = 0; i < 8; i++) x[i] = (od_coeff)input[i];
- od_bin_fdct8(y, x, 1);
- for (i = 0; i < 8; i++) output[i] = (tran_low_t)y[i];
-}
-
-#else
static void fdct8(const tran_low_t *input, tran_low_t *output) {
tran_high_t temp;
@@ -180,7 +156,6 @@ static void fdct8(const tran_low_t *input, tran_low_t *output) {
range_check(output, 8, 16);
}
-#endif
static void fdct16(const tran_low_t *input, tran_low_t *output) {
tran_high_t temp;
@@ -755,7 +730,6 @@ static void fdct32(const tran_low_t *input, tran_low_t *output) {
}
#ifndef AV1_DCT_GTEST
-
static void fadst4(const tran_low_t *input, tran_low_t *output) {
tran_high_t x0, x1, x2, x3;
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
@@ -796,18 +770,6 @@ static void fadst4(const tran_low_t *input, tran_low_t *output) {
output[3] = (tran_low_t)fdct_round_shift(s3);
}
-#if CONFIG_DAALA_DCT8
-static void fadst8(const tran_low_t *input, tran_low_t *output) {
- int i;
- od_coeff x[8];
- od_coeff y[8];
- for (i = 0; i < 8; i++) x[i] = (od_coeff)input[i];
- od_bin_fdst8(y, x, 1);
- for (i = 0; i < 8; i++) output[i] = (tran_low_t)y[i];
-}
-
-#else
-
static void fadst8(const tran_low_t *input, tran_low_t *output) {
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
@@ -878,7 +840,6 @@ static void fadst8(const tran_low_t *input, tran_low_t *output) {
output[6] = (tran_low_t)x5;
output[7] = (tran_low_t)-x1;
}
-#endif
static void fadst16(const tran_low_t *input, tran_low_t *output) {
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
@@ -1066,9 +1027,27 @@ static void fhalfright32(const tran_low_t *input, tran_low_t *output) {
#if CONFIG_MRC_TX
static void get_masked_residual32(const int16_t **input, int *input_stride,
const uint8_t *pred, int pred_stride,
- int16_t *masked_input) {
- int mrc_mask[32 * 32];
- get_mrc_mask(pred, pred_stride, mrc_mask, 32, 32, 32);
+ int16_t *masked_input,
+ TxfmParam *txfm_param) {
+ int n_masked_vals = 0;
+ uint8_t *mrc_mask;
+ uint8_t mask_tmp[32 * 32];
+ if ((txfm_param->is_inter && SIGNAL_MRC_MASK_INTER) ||
+ (!txfm_param->is_inter && SIGNAL_MRC_MASK_INTRA)) {
+ mrc_mask = txfm_param->mask;
+ n_masked_vals = get_mrc_diff_mask(*input, *input_stride, mrc_mask, 32, 32,
+ 32, txfm_param->is_inter);
+ } else {
+ mrc_mask = mask_tmp;
+ n_masked_vals = get_mrc_pred_mask(pred, pred_stride, mrc_mask, 32, 32, 32,
+ txfm_param->is_inter);
+ }
+
+ // Do not use MRC_DCT if mask is invalid. DCT_DCT will be used instead.
+ if (!is_valid_mrc_mask(n_masked_vals, 32, 32)) {
+ *txfm_param->valid_mask = 0;
+ return;
+ }
int32_t sum = 0;
int16_t avg;
// Get the masked average of the prediction
@@ -1077,7 +1056,7 @@ static void get_masked_residual32(const int16_t **input, int *input_stride,
sum += mrc_mask[i * 32 + j] * (*input)[i * (*input_stride) + j];
}
}
- avg = ROUND_POWER_OF_TWO_SIGNED(sum, 10);
+ avg = sum / n_masked_vals;
// Replace all of the unmasked pixels in the prediction with the average
// of the masked pixels
for (int i = 0; i < 32; ++i) {
@@ -1087,16 +1066,24 @@ static void get_masked_residual32(const int16_t **input, int *input_stride,
}
*input = masked_input;
*input_stride = 32;
+ *txfm_param->valid_mask = 1;
}
#endif // CONFIG_MRC_TX
-#if CONFIG_LGT
+#if CONFIG_LGT || CONFIG_LGT_FROM_PRED
static void flgt4(const tran_low_t *input, tran_low_t *output,
const tran_high_t *lgtmtx) {
- if (!(input[0] | input[1] | input[2] | input[3])) {
- output[0] = output[1] = output[2] = output[3] = 0;
+ if (!lgtmtx) assert(0);
+#if CONFIG_LGT_FROM_PRED
+ // For DCT/ADST, use butterfly implementations
+ if (lgtmtx[0] == DCT4) {
+ fdct4(input, output);
+ return;
+ } else if (lgtmtx[0] == ADST4) {
+ fadst4(input, output);
return;
}
+#endif // CONFIG_LGT_FROM_PRED
// evaluate s[j] = sum of all lgtmtx[j][i]*input[i] over i=1,...,4
tran_high_t s[4] = { 0 };
@@ -1108,6 +1095,18 @@ static void flgt4(const tran_low_t *input, tran_low_t *output,
static void flgt8(const tran_low_t *input, tran_low_t *output,
const tran_high_t *lgtmtx) {
+ if (!lgtmtx) assert(0);
+#if CONFIG_LGT_FROM_PRED
+ // For DCT/ADST, use butterfly implementations
+ if (lgtmtx[0] == DCT8) {
+ fdct8(input, output);
+ return;
+ } else if (lgtmtx[0] == ADST8) {
+ fadst8(input, output);
+ return;
+ }
+#endif // CONFIG_LGT_FROM_PRED
+
// evaluate s[j] = sum of all lgtmtx[j][i]*input[i] over i=1,...,8
tran_high_t s[8] = { 0 };
for (int i = 0; i < 8; ++i)
@@ -1115,30 +1114,140 @@ static void flgt8(const tran_low_t *input, tran_low_t *output,
for (int i = 0; i < 8; ++i) output[i] = (tran_low_t)fdct_round_shift(s[i]);
}
+#endif // CONFIG_LGT || CONFIG_LGT_FROM_PRED
-// The get_fwd_lgt functions return 1 if LGT is chosen to apply, and 0 otherwise
-int get_fwd_lgt4(transform_1d tx_orig, TxfmParam *txfm_param,
- const tran_high_t *lgtmtx[], int ntx) {
- // inter/intra split
- if (tx_orig == &fadst4) {
- for (int i = 0; i < ntx; ++i)
- lgtmtx[i] = txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0];
- return 1;
+#if CONFIG_LGT_FROM_PRED
+static void flgt16up(const tran_low_t *input, tran_low_t *output,
+ const tran_high_t *lgtmtx) {
+ if (lgtmtx[0] == DCT16) {
+ fdct16(input, output);
+ return;
+ } else if (lgtmtx[0] == ADST16) {
+ fadst16(input, output);
+ return;
+ } else if (lgtmtx[0] == DCT32) {
+ fdct32(input, output);
+ return;
+ } else if (lgtmtx[0] == ADST32) {
+ fhalfright32(input, output);
+ return;
+ } else {
+ assert(0);
+ }
+}
+
+typedef void (*FlgtFunc)(const tran_low_t *input, tran_low_t *output,
+ const tran_high_t *lgtmtx);
+
+static FlgtFunc flgt_func[4] = { flgt4, flgt8, flgt16up, flgt16up };
+
+typedef void (*GetLgtFunc)(const TxfmParam *txfm_param, int is_col,
+ const tran_high_t *lgtmtx[], int ntx);
+
+static GetLgtFunc get_lgt_func[4] = { get_lgt4_from_pred, get_lgt8_from_pred,
+ get_lgt16up_from_pred,
+ get_lgt16up_from_pred };
+
+// this inline function corresponds to the up scaling before the first
+// transform in the av1_fht* functions
+static INLINE tran_low_t fwd_upscale_wrt_txsize(const tran_high_t val,
+ const TX_SIZE tx_size) {
+ switch (tx_size) {
+ case TX_4X4: return (tran_low_t)val << 4;
+ case TX_8X8:
+ case TX_4X16:
+ case TX_16X4:
+ case TX_8X32:
+ case TX_32X8: return (tran_low_t)val << 2;
+ case TX_4X8:
+ case TX_8X4:
+ case TX_8X16:
+ case TX_16X8: return (tran_low_t)fdct_round_shift(val * 4 * Sqrt2);
+ default: assert(0); break;
}
return 0;
}
-int get_fwd_lgt8(transform_1d tx_orig, TxfmParam *txfm_param,
- const tran_high_t *lgtmtx[], int ntx) {
- // inter/intra split
- if (tx_orig == &fadst8) {
- for (int i = 0; i < ntx; ++i)
- lgtmtx[i] = txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0];
- return 1;
+// This inline function corresponds to the bit shift after the second
+// transform in the av1_fht* functions
+static INLINE tran_low_t fwd_downscale_wrt_txsize(const tran_low_t val,
+ const TX_SIZE tx_size) {
+ switch (tx_size) {
+ case TX_4X4: return (val + 1) >> 2;
+ case TX_4X8:
+ case TX_8X4:
+ case TX_8X8:
+ case TX_4X16:
+ case TX_16X4: return (val + (val < 0)) >> 1;
+ case TX_8X16:
+ case TX_16X8: return val;
+ case TX_8X32:
+ case TX_32X8: return ROUND_POWER_OF_TWO_SIGNED(val, 2);
+ default: assert(0); break;
}
return 0;
}
-#endif // CONFIG_LGT
+
+void flgt2d_from_pred_c(const int16_t *input, tran_low_t *output, int stride,
+ TxfmParam *txfm_param) {
+ const TX_SIZE tx_size = txfm_param->tx_size;
+ const int w = tx_size_wide[tx_size];
+ const int h = tx_size_high[tx_size];
+ const int wlog2 = tx_size_wide_log2[tx_size];
+ const int hlog2 = tx_size_high_log2[tx_size];
+ assert(w <= 8 || h <= 8);
+
+ int i, j;
+ tran_low_t out[256]; // max size: 8x32 and 32x8
+ tran_low_t temp_in[32], temp_out[32];
+ const tran_high_t *lgtmtx_col[1];
+ const tran_high_t *lgtmtx_row[1];
+ get_lgt_func[hlog2 - 2](txfm_param, 1, lgtmtx_col, w);
+ get_lgt_func[wlog2 - 2](txfm_param, 0, lgtmtx_row, h);
+
+ // For forward transforms, to be consistent with av1_fht functions, we apply
+ // short transform first and long transform second.
+ if (w < h) {
+ // Row transforms
+ for (i = 0; i < h; ++i) {
+ for (j = 0; j < w; ++j)
+ temp_in[j] = fwd_upscale_wrt_txsize(input[i * stride + j], tx_size);
+ flgt_func[wlog2 - 2](temp_in, temp_out, lgtmtx_row[0]);
+ // right shift of 2 bits here in fht8x16 and fht16x8
+ for (j = 0; j < w; ++j)
+ out[j * h + i] = (tx_size == TX_16X8 || tx_size == TX_8X16)
+ ? ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2)
+ : temp_out[j];
+ }
+ // Column transforms
+ for (i = 0; i < w; ++i) {
+ for (j = 0; j < h; ++j) temp_in[j] = out[j + i * h];
+ flgt_func[hlog2 - 2](temp_in, temp_out, lgtmtx_col[0]);
+ for (j = 0; j < h; ++j)
+ output[j * w + i] = fwd_downscale_wrt_txsize(temp_out[j], tx_size);
+ }
+ } else {
+ // Column transforms
+ for (i = 0; i < w; ++i) {
+ for (j = 0; j < h; ++j)
+ temp_in[j] = fwd_upscale_wrt_txsize(input[j * stride + i], tx_size);
+ flgt_func[hlog2 - 2](temp_in, temp_out, lgtmtx_col[0]);
+ // fht8x16 and fht16x8 have right shift of 2 bits here
+ for (j = 0; j < h; ++j)
+ out[j * w + i] = (tx_size == TX_16X8 || tx_size == TX_8X16)
+ ? ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2)
+ : temp_out[j];
+ }
+ // Row transforms
+ for (i = 0; i < h; ++i) {
+ for (j = 0; j < w; ++j) temp_in[j] = out[j + i * w];
+ flgt_func[wlog2 - 2](temp_in, temp_out, lgtmtx_row[0]);
+ for (j = 0; j < w; ++j)
+ output[j + i * w] = fwd_downscale_wrt_txsize(temp_out[j], tx_size);
+ }
+ }
+}
+#endif // CONFIG_LGT_FROM_PRED
#if CONFIG_EXT_TX
// TODO(sarahparker) these functions will be removed once the highbitdepth
@@ -1148,34 +1257,29 @@ int get_fwd_lgt8(transform_1d tx_orig, TxfmParam *txfm_param,
static void fidtx4(const tran_low_t *input, tran_low_t *output) {
int i;
for (i = 0; i < 4; ++i) {
-#if CONFIG_DAALA_DCT4
- output[i] = input[i];
-#else
output[i] = (tran_low_t)fdct_round_shift(input[i] * Sqrt2);
-#endif
}
}
static void fidtx8(const tran_low_t *input, tran_low_t *output) {
int i;
for (i = 0; i < 8; ++i) {
-#if CONFIG_DAALA_DCT8
- output[i] = input[i];
-#else
output[i] = input[i] * 2;
-#endif
}
}
static void fidtx16(const tran_low_t *input, tran_low_t *output) {
int i;
- for (i = 0; i < 16; ++i)
+ for (i = 0; i < 16; ++i) {
output[i] = (tran_low_t)fdct_round_shift(input[i] * 2 * Sqrt2);
+ }
}
static void fidtx32(const tran_low_t *input, tran_low_t *output) {
int i;
- for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
+ for (i = 0; i < 32; ++i) {
+ output[i] = input[i] * 4;
+ }
}
static void copy_block(const int16_t *src, int src_stride, int l, int w,
@@ -1238,7 +1342,7 @@ static void copy_fliplrud(const int16_t *src, int src_stride, int l, int w,
}
static void maybe_flip_input(const int16_t **src, int *src_stride, int l, int w,
- int16_t *buff, int tx_type) {
+ int16_t *buff, TX_TYPE tx_type) {
switch (tx_type) {
#if CONFIG_MRC_TX
case MRC_DCT:
@@ -1278,7 +1382,7 @@ static void maybe_flip_input(const int16_t **src, int *src_stride, int l, int w,
void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
TxfmParam *txfm_param) {
- int tx_type = txfm_param->tx_type;
+ const TX_TYPE tx_type = txfm_param->tx_type;
#if CONFIG_MRC_TX
assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif
@@ -1293,6 +1397,26 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
#endif
{
static const transform_2d FHT[] = {
+#if CONFIG_DAALA_DCT4
+ { daala_fdct4, daala_fdct4 }, // DCT_DCT
+ { daala_fdst4, daala_fdct4 }, // ADST_DCT
+ { daala_fdct4, daala_fdst4 }, // DCT_ADST
+ { daala_fdst4, daala_fdst4 }, // ADST_ADST
+#if CONFIG_EXT_TX
+ { daala_fdst4, daala_fdct4 }, // FLIPADST_DCT
+ { daala_fdct4, daala_fdst4 }, // DCT_FLIPADST
+ { daala_fdst4, daala_fdst4 }, // FLIPADST_FLIPADST
+ { daala_fdst4, daala_fdst4 }, // ADST_FLIPADST
+ { daala_fdst4, daala_fdst4 }, // FLIPADST_ADST
+ { daala_idtx4, daala_idtx4 }, // IDTX
+ { daala_fdct4, daala_idtx4 }, // V_DCT
+ { daala_idtx4, daala_fdct4 }, // H_DCT
+ { daala_fdst4, daala_idtx4 }, // V_ADST
+ { daala_idtx4, daala_fdst4 }, // H_ADST
+ { daala_fdst4, daala_idtx4 }, // V_FLIPADST
+ { daala_idtx4, daala_fdst4 }, // H_FLIPADST
+#endif
+#else
{ fdct4, fdct4 }, // DCT_DCT
{ fadst4, fdct4 }, // ADST_DCT
{ fdct4, fadst4 }, // DCT_ADST
@@ -1311,6 +1435,7 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
{ fadst4, fidtx4 }, // V_FLIPADST
{ fidtx4, fadst4 }, // H_FLIPADST
#endif
+#endif
};
const transform_2d ht = FHT[tx_type];
tran_low_t out[4 * 4];
@@ -1325,10 +1450,10 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
#if CONFIG_LGT
// Choose LGT adaptive to the prediction. We may apply different LGTs for
// different rows/columns, indicated by the pointers to 2D arrays
- const tran_high_t *lgtmtx_col[4];
- const tran_high_t *lgtmtx_row[4];
- int use_lgt_col = get_fwd_lgt4(ht.cols, txfm_param, lgtmtx_col, 4);
- int use_lgt_row = get_fwd_lgt4(ht.rows, txfm_param, lgtmtx_row, 4);
+ const tran_high_t *lgtmtx_col[1];
+ const tran_high_t *lgtmtx_row[1];
+ int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col);
+ int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row);
#endif
// Columns
@@ -1340,7 +1465,7 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
#endif
#if CONFIG_LGT
if (use_lgt_col)
- flgt4(temp_in, temp_out, lgtmtx_col[i]);
+ flgt4(temp_in, temp_out, lgtmtx_col[0]);
else
#endif
ht.cols(temp_in, temp_out);
@@ -1352,7 +1477,7 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
for (j = 0; j < 4; ++j) temp_in[j] = out[j + i * 4];
#if CONFIG_LGT
if (use_lgt_row)
- flgt4(temp_in, temp_out, lgtmtx_row[i]);
+ flgt4(temp_in, temp_out, lgtmtx_row[0]);
else
#endif
ht.rows(temp_in, temp_out);
@@ -1369,7 +1494,7 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride,
TxfmParam *txfm_param) {
- int tx_type = txfm_param->tx_type;
+ const TX_TYPE tx_type = txfm_param->tx_type;
#if CONFIG_MRC_TX
assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif // CONFIG_MRC_TX
@@ -1408,10 +1533,10 @@ void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride,
#endif
#if CONFIG_LGT
- const tran_high_t *lgtmtx_col[4];
- const tran_high_t *lgtmtx_row[8];
- int use_lgt_col = get_fwd_lgt8(ht.cols, txfm_param, lgtmtx_col, 4);
- int use_lgt_row = get_fwd_lgt4(ht.rows, txfm_param, lgtmtx_row, 8);
+ const tran_high_t *lgtmtx_col[1];
+ const tran_high_t *lgtmtx_row[1];
+ int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col);
+ int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row);
#endif
// Rows
@@ -1421,7 +1546,7 @@ void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride,
(tran_low_t)fdct_round_shift(input[i * stride + j] * 4 * Sqrt2);
#if CONFIG_LGT
if (use_lgt_row)
- flgt4(temp_in, temp_out, lgtmtx_row[i]);
+ flgt4(temp_in, temp_out, lgtmtx_row[0]);
else
#endif
ht.rows(temp_in, temp_out);
@@ -1433,7 +1558,7 @@ void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride,
for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
#if CONFIG_LGT
if (use_lgt_col)
- flgt8(temp_in, temp_out, lgtmtx_col[i]);
+ flgt8(temp_in, temp_out, lgtmtx_col[0]);
else
#endif
ht.cols(temp_in, temp_out);
@@ -1445,7 +1570,7 @@ void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride,
void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
TxfmParam *txfm_param) {
- int tx_type = txfm_param->tx_type;
+ const TX_TYPE tx_type = txfm_param->tx_type;
#if CONFIG_MRC_TX
assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif // CONFIG_MRC_TX
@@ -1484,10 +1609,10 @@ void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
#endif
#if CONFIG_LGT
- const tran_high_t *lgtmtx_col[8];
- const tran_high_t *lgtmtx_row[4];
- int use_lgt_col = get_fwd_lgt4(ht.cols, txfm_param, lgtmtx_col, 8);
- int use_lgt_row = get_fwd_lgt8(ht.rows, txfm_param, lgtmtx_row, 4);
+ const tran_high_t *lgtmtx_col[1];
+ const tran_high_t *lgtmtx_row[1];
+ int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col);
+ int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
#endif
// Columns
@@ -1497,7 +1622,7 @@ void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
(tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
#if CONFIG_LGT
if (use_lgt_col)
- flgt4(temp_in, temp_out, lgtmtx_col[i]);
+ flgt4(temp_in, temp_out, lgtmtx_col[0]);
else
#endif
ht.cols(temp_in, temp_out);
@@ -1509,7 +1634,7 @@ void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
#if CONFIG_LGT
if (use_lgt_row)
- flgt8(temp_in, temp_out, lgtmtx_row[i]);
+ flgt8(temp_in, temp_out, lgtmtx_row[0]);
else
#endif
ht.rows(temp_in, temp_out);
@@ -1521,7 +1646,7 @@ void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
void av1_fht4x16_c(const int16_t *input, tran_low_t *output, int stride,
TxfmParam *txfm_param) {
- int tx_type = txfm_param->tx_type;
+ const TX_TYPE tx_type = txfm_param->tx_type;
#if CONFIG_MRC_TX
assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif // CONFIG_MRC_TX
@@ -1560,8 +1685,8 @@ void av1_fht4x16_c(const int16_t *input, tran_low_t *output, int stride,
#endif
#if CONFIG_LGT
- const tran_high_t *lgtmtx_row[16];
- int use_lgt_row = get_fwd_lgt4(ht.rows, txfm_param, lgtmtx_row, 16);
+ const tran_high_t *lgtmtx_row[1];
+ int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row);
#endif
// Rows
@@ -1569,7 +1694,7 @@ void av1_fht4x16_c(const int16_t *input, tran_low_t *output, int stride,
for (j = 0; j < n; ++j) temp_in[j] = input[i * stride + j] * 4;
#if CONFIG_LGT
if (use_lgt_row)
- flgt4(temp_in, temp_out, lgtmtx_row[i]);
+ flgt4(temp_in, temp_out, lgtmtx_row[0]);
else
#endif
ht.rows(temp_in, temp_out);
@@ -1588,7 +1713,7 @@ void av1_fht4x16_c(const int16_t *input, tran_low_t *output, int stride,
void av1_fht16x4_c(const int16_t *input, tran_low_t *output, int stride,
TxfmParam *txfm_param) {
- int tx_type = txfm_param->tx_type;
+ const TX_TYPE tx_type = txfm_param->tx_type;
#if CONFIG_MRC_TX
assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif // CONFIG_MRC_TX
@@ -1627,8 +1752,8 @@ void av1_fht16x4_c(const int16_t *input, tran_low_t *output, int stride,
#endif
#if CONFIG_LGT
- const tran_high_t *lgtmtx_col[16];
- int use_lgt_col = get_fwd_lgt4(ht.cols, txfm_param, lgtmtx_col, 16);
+ const tran_high_t *lgtmtx_col[1];
+ int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col);
#endif
// Columns
@@ -1636,7 +1761,7 @@ void av1_fht16x4_c(const int16_t *input, tran_low_t *output, int stride,
for (j = 0; j < n; ++j) temp_in[j] = input[j * stride + i] * 4;
#if CONFIG_LGT
if (use_lgt_col)
- flgt4(temp_in, temp_out, lgtmtx_col[i]);
+ flgt4(temp_in, temp_out, lgtmtx_col[0]);
else
#endif
ht.cols(temp_in, temp_out);
@@ -1655,7 +1780,7 @@ void av1_fht16x4_c(const int16_t *input, tran_low_t *output, int stride,
void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride,
TxfmParam *txfm_param) {
- int tx_type = txfm_param->tx_type;
+ const TX_TYPE tx_type = txfm_param->tx_type;
#if CONFIG_MRC_TX
assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif // CONFIG_MRC_TX
@@ -1694,8 +1819,8 @@ void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride,
#endif
#if CONFIG_LGT
- const tran_high_t *lgtmtx_row[16];
- int use_lgt_row = get_fwd_lgt8(ht.rows, txfm_param, lgtmtx_row, 16);
+ const tran_high_t *lgtmtx_row[1];
+ int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
#endif
// Rows
@@ -1705,7 +1830,7 @@ void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride,
(tran_low_t)fdct_round_shift(input[i * stride + j] * 4 * Sqrt2);
#if CONFIG_LGT
if (use_lgt_row)
- flgt8(temp_in, temp_out, lgtmtx_row[i]);
+ flgt8(temp_in, temp_out, lgtmtx_row[0]);
else
#endif
ht.rows(temp_in, temp_out);
@@ -1724,7 +1849,7 @@ void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride,
void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride,
TxfmParam *txfm_param) {
- int tx_type = txfm_param->tx_type;
+ const TX_TYPE tx_type = txfm_param->tx_type;
#if CONFIG_MRC_TX
assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif // CONFIG_MRC_TX
@@ -1763,8 +1888,8 @@ void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride,
#endif
#if CONFIG_LGT
- const tran_high_t *lgtmtx_col[16];
- int use_lgt_col = get_fwd_lgt8(ht.cols, txfm_param, lgtmtx_col, 16);
+ const tran_high_t *lgtmtx_col[1];
+ int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col);
#endif
// Columns
@@ -1774,7 +1899,7 @@ void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride,
(tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
#if CONFIG_LGT
if (use_lgt_col)
- flgt8(temp_in, temp_out, lgtmtx_col[i]);
+ flgt8(temp_in, temp_out, lgtmtx_col[0]);
else
#endif
ht.cols(temp_in, temp_out);
@@ -1793,7 +1918,7 @@ void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride,
void av1_fht8x32_c(const int16_t *input, tran_low_t *output, int stride,
TxfmParam *txfm_param) {
- int tx_type = txfm_param->tx_type;
+ const TX_TYPE tx_type = txfm_param->tx_type;
#if CONFIG_MRC_TX
assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif // CONFIG_MRC_TX
@@ -1832,8 +1957,8 @@ void av1_fht8x32_c(const int16_t *input, tran_low_t *output, int stride,
#endif
#if CONFIG_LGT
- const tran_high_t *lgtmtx_row[32];
- int use_lgt_row = get_fwd_lgt8(ht.rows, txfm_param, lgtmtx_row, 32);
+ const tran_high_t *lgtmtx_row[1];
+ int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
#endif
// Rows
@@ -1841,7 +1966,7 @@ void av1_fht8x32_c(const int16_t *input, tran_low_t *output, int stride,
for (j = 0; j < n; ++j) temp_in[j] = input[i * stride + j] * 4;
#if CONFIG_LGT
if (use_lgt_row)
- flgt8(temp_in, temp_out, lgtmtx_row[i]);
+ flgt8(temp_in, temp_out, lgtmtx_row[0]);
else
#endif
ht.rows(temp_in, temp_out);
@@ -1855,12 +1980,12 @@ void av1_fht8x32_c(const int16_t *input, tran_low_t *output, int stride,
for (j = 0; j < n4; ++j)
output[i + j * n] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
}
- // Note: overall scale factor of transform is 4 times unitary
+ // Note: overall scale factor of transform is 8 times unitary
}
void av1_fht32x8_c(const int16_t *input, tran_low_t *output, int stride,
TxfmParam *txfm_param) {
- int tx_type = txfm_param->tx_type;
+ const TX_TYPE tx_type = txfm_param->tx_type;
#if CONFIG_MRC_TX
assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif // CONFIG_MRC_TX
@@ -1899,8 +2024,8 @@ void av1_fht32x8_c(const int16_t *input, tran_low_t *output, int stride,
#endif
#if CONFIG_LGT
- const tran_high_t *lgtmtx_col[32];
- int use_lgt_col = get_fwd_lgt8(ht.cols, txfm_param, lgtmtx_col, 32);
+ const tran_high_t *lgtmtx_col[1];
+ int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col);
#endif
// Columns
@@ -1908,7 +2033,7 @@ void av1_fht32x8_c(const int16_t *input, tran_low_t *output, int stride,
for (j = 0; j < n; ++j) temp_in[j] = input[j * stride + i] * 4;
#if CONFIG_LGT
if (use_lgt_col)
- flgt8(temp_in, temp_out, lgtmtx_col[i]);
+ flgt8(temp_in, temp_out, lgtmtx_col[0]);
else
#endif
ht.cols(temp_in, temp_out);
@@ -1922,12 +2047,12 @@ void av1_fht32x8_c(const int16_t *input, tran_low_t *output, int stride,
for (j = 0; j < n4; ++j)
output[j + i * n4] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
}
- // Note: overall scale factor of transform is 4 times unitary
+ // Note: overall scale factor of transform is 8 times unitary
}
void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride,
TxfmParam *txfm_param) {
- int tx_type = txfm_param->tx_type;
+ const TX_TYPE tx_type = txfm_param->tx_type;
#if CONFIG_MRC_TX
assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif // CONFIG_MRC_TX
@@ -1986,7 +2111,7 @@ void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride,
void av1_fht32x16_c(const int16_t *input, tran_low_t *output, int stride,
TxfmParam *txfm_param) {
- int tx_type = txfm_param->tx_type;
+ const TX_TYPE tx_type = txfm_param->tx_type;
#if CONFIG_MRC_TX
assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif // CONFIG_MRC_TX
@@ -2043,134 +2168,9 @@ void av1_fht32x16_c(const int16_t *input, tran_low_t *output, int stride,
// Note: overall scale factor of transform is 4 times unitary
}
-void av1_fdct8x8_quant_c(const int16_t *input, int stride,
- tran_low_t *coeff_ptr, intptr_t n_coeffs,
- int skip_block, const int16_t *zbin_ptr,
- const int16_t *round_ptr, const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan,
- const int16_t *iscan
-#if CONFIG_AOM_QM
- ,
- const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
-#endif
- ) {
- int eob = -1;
-
- int i, j;
- tran_low_t intermediate[64];
-
- // Transform columns
- {
- tran_low_t *output = intermediate;
- tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
- tran_high_t t0, t1, t2, t3; // needs32
- tran_high_t x0, x1, x2, x3; // canbe16
-
- for (i = 0; i < 8; i++) {
- // stage 1
- s0 = (input[0 * stride] + input[7 * stride]) * 4;
- s1 = (input[1 * stride] + input[6 * stride]) * 4;
- s2 = (input[2 * stride] + input[5 * stride]) * 4;
- s3 = (input[3 * stride] + input[4 * stride]) * 4;
- s4 = (input[3 * stride] - input[4 * stride]) * 4;
- s5 = (input[2 * stride] - input[5 * stride]) * 4;
- s6 = (input[1 * stride] - input[6 * stride]) * 4;
- s7 = (input[0 * stride] - input[7 * stride]) * 4;
-
- // fdct4(step, step);
- x0 = s0 + s3;
- x1 = s1 + s2;
- x2 = s1 - s2;
- x3 = s0 - s3;
- t0 = (x0 + x1) * cospi_16_64;
- t1 = (x0 - x1) * cospi_16_64;
- t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
- t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
- output[0 * 8] = (tran_low_t)fdct_round_shift(t0);
- output[2 * 8] = (tran_low_t)fdct_round_shift(t2);
- output[4 * 8] = (tran_low_t)fdct_round_shift(t1);
- output[6 * 8] = (tran_low_t)fdct_round_shift(t3);
-
- // stage 2
- t0 = (s6 - s5) * cospi_16_64;
- t1 = (s6 + s5) * cospi_16_64;
- t2 = fdct_round_shift(t0);
- t3 = fdct_round_shift(t1);
-
- // stage 3
- x0 = s4 + t2;
- x1 = s4 - t2;
- x2 = s7 - t3;
- x3 = s7 + t3;
-
- // stage 4
- t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
- t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
- t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
- t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
- output[1 * 8] = (tran_low_t)fdct_round_shift(t0);
- output[3 * 8] = (tran_low_t)fdct_round_shift(t2);
- output[5 * 8] = (tran_low_t)fdct_round_shift(t1);
- output[7 * 8] = (tran_low_t)fdct_round_shift(t3);
- input++;
- output++;
- }
- }
-
- // Rows
- for (i = 0; i < 8; ++i) {
- fdct8(&intermediate[i * 8], &coeff_ptr[i * 8]);
- for (j = 0; j < 8; ++j) coeff_ptr[j + i * 8] /= 2;
- }
-
- // TODO(jingning) Decide the need of these arguments after the
- // quantization process is completed.
- (void)zbin_ptr;
- (void)quant_shift_ptr;
- (void)iscan;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- // Quantization pass: All coefficients with index >= zero_flag are
- // skippable. Note: zero_flag can be zero.
- for (i = 0; i < n_coeffs; i++) {
- const int rc = scan[i];
- const int coeff = coeff_ptr[rc];
-#if CONFIG_AOM_QM
- const qm_val_t wt = qm_ptr[rc];
- const qm_val_t iwt = iqm_ptr[rc];
- const int dequant =
- (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
- AOM_QM_BITS;
-#endif
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-
- int64_t tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
- int tmp32;
-#if CONFIG_AOM_QM
- tmp32 = (int)((tmp * quant_ptr[rc != 0] * wt) >> (16 + AOM_QM_BITS));
- qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
-#else
- tmp32 = (int)((tmp * quant_ptr[rc != 0]) >> 16);
- qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
-#endif
-
- if (tmp32) eob = i;
- }
- }
- *eob_ptr = eob + 1;
-}
-
void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
TxfmParam *txfm_param) {
- int tx_type = txfm_param->tx_type;
+ const TX_TYPE tx_type = txfm_param->tx_type;
#if CONFIG_MRC_TX
assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif // CONFIG_MRC_TX
@@ -2185,6 +2185,26 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
#endif
{
static const transform_2d FHT[] = {
+#if CONFIG_DAALA_DCT8
+ { daala_fdct8, daala_fdct8 }, // DCT_DCT
+ { daala_fdst8, daala_fdct8 }, // ADST_DCT
+ { daala_fdct8, daala_fdst8 }, // DCT_ADST
+ { daala_fdst8, daala_fdst8 }, // ADST_ADST
+#if CONFIG_EXT_TX
+ { daala_fdst8, daala_fdct8 }, // FLIPADST_DCT
+ { daala_fdct8, daala_fdst8 }, // DCT_FLIPADST
+ { daala_fdst8, daala_fdst8 }, // FLIPADST_FLIPADST
+ { daala_fdst8, daala_fdst8 }, // ADST_FLIPADST
+ { daala_fdst8, daala_fdst8 }, // FLIPADST_ADST
+ { daala_idtx8, daala_idtx8 }, // IDTX
+ { daala_fdct8, daala_idtx8 }, // V_DCT
+ { daala_idtx8, daala_fdct8 }, // H_DCT
+ { daala_fdst8, daala_idtx8 }, // V_ADST
+ { daala_idtx8, daala_fdst8 }, // H_ADST
+ { daala_fdst8, daala_idtx8 }, // V_FLIPADST
+ { daala_idtx8, daala_fdst8 }, // H_FLIPADST
+#endif
+#else
{ fdct8, fdct8 }, // DCT_DCT
{ fadst8, fdct8 }, // ADST_DCT
{ fdct8, fadst8 }, // DCT_ADST
@@ -2203,6 +2223,7 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
{ fadst8, fidtx8 }, // V_FLIPADST
{ fidtx8, fadst8 }, // H_FLIPADST
#endif
+#endif
};
const transform_2d ht = FHT[tx_type];
tran_low_t out[64];
@@ -2215,10 +2236,10 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
#endif
#if CONFIG_LGT
- const tran_high_t *lgtmtx_col[8];
- const tran_high_t *lgtmtx_row[8];
- int use_lgt_col = get_fwd_lgt8(ht.cols, txfm_param, lgtmtx_col, 8);
- int use_lgt_row = get_fwd_lgt8(ht.rows, txfm_param, lgtmtx_row, 8);
+ const tran_high_t *lgtmtx_col[1];
+ const tran_high_t *lgtmtx_row[1];
+ int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col);
+ int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
#endif
// Columns
@@ -2230,7 +2251,7 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
#endif
#if CONFIG_LGT
if (use_lgt_col)
- flgt8(temp_in, temp_out, lgtmtx_col[i]);
+ flgt8(temp_in, temp_out, lgtmtx_col[0]);
else
#endif
ht.cols(temp_in, temp_out);
@@ -2242,7 +2263,7 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
for (j = 0; j < 8; ++j) temp_in[j] = out[j + i * 8];
#if CONFIG_LGT
if (use_lgt_row)
- flgt8(temp_in, temp_out, lgtmtx_row[i]);
+ flgt8(temp_in, temp_out, lgtmtx_row[0]);
else
#endif
ht.rows(temp_in, temp_out);
@@ -2315,7 +2336,7 @@ void av1_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) {
void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride,
TxfmParam *txfm_param) {
- int tx_type = txfm_param->tx_type;
+ const TX_TYPE tx_type = txfm_param->tx_type;
#if CONFIG_MRC_TX
assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif // CONFIG_MRC_TX
@@ -2323,6 +2344,26 @@ void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride,
assert(tx_type == DCT_DCT);
#endif
static const transform_2d FHT[] = {
+#if CONFIG_DAALA_DCT16
+ { daala_fdct16, daala_fdct16 }, // DCT_DCT
+ { daala_fdst16, daala_fdct16 }, // ADST_DCT
+ { daala_fdct16, daala_fdst16 }, // DCT_ADST
+ { daala_fdst16, daala_fdst16 }, // ADST_ADST
+#if CONFIG_EXT_TX
+ { daala_fdst16, daala_fdct16 }, // FLIPADST_DCT
+ { daala_fdct16, daala_fdst16 }, // DCT_FLIPADST
+ { daala_fdst16, daala_fdst16 }, // FLIPADST_FLIPADST
+ { daala_fdst16, daala_fdst16 }, // ADST_FLIPADST
+ { daala_fdst16, daala_fdst16 }, // FLIPADST_ADST
+ { daala_idtx16, daala_idtx16 }, // IDTX
+ { daala_fdct16, daala_idtx16 }, // V_DCT
+ { daala_idtx16, daala_fdct16 }, // H_DCT
+ { daala_fdst16, daala_idtx16 }, // V_ADST
+ { daala_idtx16, daala_fdst16 }, // H_ADST
+ { daala_fdst16, daala_idtx16 }, // V_FLIPADST
+ { daala_idtx16, daala_fdst16 }, // H_FLIPADST
+#endif
+#else
{ fdct16, fdct16 }, // DCT_DCT
{ fadst16, fdct16 }, // ADST_DCT
{ fdct16, fadst16 }, // DCT_ADST
@@ -2341,6 +2382,7 @@ void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride,
{ fadst16, fidtx16 }, // V_FLIPADST
{ fidtx16, fadst16 }, // H_FLIPADST
#endif
+#endif
};
const transform_2d ht = FHT[tx_type];
tran_low_t out[256];
@@ -2354,17 +2396,34 @@ void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride,
// Columns
for (i = 0; i < 16; ++i) {
- for (j = 0; j < 16; ++j) temp_in[j] = input[j * stride + i] * 4;
+ for (j = 0; j < 16; ++j) {
+#if CONFIG_DAALA_DCT16
+ temp_in[j] = input[j * stride + i] * 16;
+#else
+ temp_in[j] = input[j * stride + i] * 4;
+#endif
+ }
ht.cols(temp_in, temp_out);
- for (j = 0; j < 16; ++j)
+ for (j = 0; j < 16; ++j) {
+#if CONFIG_DAALA_DCT16
+ out[j * 16 + i] = temp_out[j];
+#else
out[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
+#endif
+ }
}
// Rows
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j) temp_in[j] = out[j + i * 16];
ht.rows(temp_in, temp_out);
- for (j = 0; j < 16; ++j) output[j + i * 16] = temp_out[j];
+ for (j = 0; j < 16; ++j) {
+#if CONFIG_DAALA_DCT16
+ output[j + i * 16] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
+#else
+ output[j + i * 16] = temp_out[j];
+#endif
+ }
}
}
@@ -2375,12 +2434,32 @@ void av1_highbd_fwht4x4_c(const int16_t *input, tran_low_t *output,
void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride,
TxfmParam *txfm_param) {
- int tx_type = txfm_param->tx_type;
+ const TX_TYPE tx_type = txfm_param->tx_type;
#if CONFIG_DCT_ONLY
assert(tx_type == DCT_DCT);
#endif
static const transform_2d FHT[] = {
- { fdct32, fdct32 }, // DCT_DCT
+#if CONFIG_DAALA_DCT32
+ { daala_fdct32, daala_fdct32 }, // DCT_DCT
+#if CONFIG_EXT_TX
+ { daala_fdst32, daala_fdct32 }, // ADST_DCT
+ { daala_fdct32, daala_fdst32 }, // DCT_ADST
+ { daala_fdst32, daala_fdst32 }, // ADST_ADST
+ { daala_fdst32, daala_fdct32 }, // FLIPADST_DCT
+ { daala_fdct32, daala_fdst32 }, // DCT_FLIPADST
+ { daala_fdst32, daala_fdst32 }, // FLIPADST_FLIPADST
+ { daala_fdst32, daala_fdst32 }, // ADST_FLIPADST
+ { daala_fdst32, daala_fdst32 }, // FLIPADST_ADST
+ { daala_idtx32, daala_idtx32 }, // IDTX
+ { daala_fdct32, daala_idtx32 }, // V_DCT
+ { daala_idtx32, daala_fdct32 }, // H_DCT
+ { daala_fdst32, daala_idtx32 }, // V_ADST
+ { daala_idtx32, daala_fdst32 }, // H_ADST
+ { daala_fdst32, daala_idtx32 }, // V_FLIPADST
+ { daala_idtx32, daala_fdst32 }, // H_FLIPADST
+#endif
+#else
+ { fdct32, fdct32 }, // DCT_DCT
#if CONFIG_EXT_TX
{ fhalfright32, fdct32 }, // ADST_DCT
{ fdct32, fhalfright32 }, // DCT_ADST
@@ -2398,6 +2477,7 @@ void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride,
{ fhalfright32, fidtx32 }, // V_FLIPADST
{ fidtx32, fhalfright32 }, // H_FLIPADST
#endif
+#endif
#if CONFIG_MRC_TX
{ fdct32, fdct32 }, // MRC_TX
#endif // CONFIG_MRC_TX
@@ -2416,27 +2496,41 @@ void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride,
if (tx_type == MRC_DCT) {
int16_t masked_input[32 * 32];
get_masked_residual32(&input, &stride, txfm_param->dst, txfm_param->stride,
- masked_input);
+ masked_input, txfm_param);
}
#endif // CONFIG_MRC_TX
// Columns
for (i = 0; i < 32; ++i) {
- for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4;
+ for (j = 0; j < 32; ++j) {
+#if CONFIG_DAALA_DCT32
+ temp_in[j] = input[j * stride + i] * 16;
+#else
+ temp_in[j] = input[j * stride + i] * 4;
+#endif
+ }
ht.cols(temp_in, temp_out);
- for (j = 0; j < 32; ++j)
+ for (j = 0; j < 32; ++j) {
+#if CONFIG_DAALA_DCT32
+ out[j * 32 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
+#else
out[j * 32 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 4);
+#endif
+ }
}
// Rows
for (i = 0; i < 32; ++i) {
for (j = 0; j < 32; ++j) temp_in[j] = out[j + i * 32];
ht.rows(temp_in, temp_out);
- for (j = 0; j < 32; ++j) output[j + i * 32] = temp_out[j];
+ for (j = 0; j < 32; ++j) {
+ output[j + i * 32] = temp_out[j];
+ }
}
}
#if CONFIG_TX64X64
+#if !CONFIG_DAALA_DCT64
#if CONFIG_EXT_TX
static void fidtx64(const tran_low_t *input, tran_low_t *output) {
int i;
@@ -2475,10 +2569,11 @@ static void fdct64_row(const tran_low_t *input, tran_low_t *output) {
av1_fdct64_new(in, out, fwd_cos_bit_row_dct_64, fwd_stage_range_row_dct_64);
for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}
+#endif
void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride,
TxfmParam *txfm_param) {
- int tx_type = txfm_param->tx_type;
+ const TX_TYPE tx_type = txfm_param->tx_type;
#if CONFIG_MRC_TX
assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif // CONFIG_MRC_TX
@@ -2486,7 +2581,27 @@ void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride,
assert(tx_type == DCT_DCT);
#endif
static const transform_2d FHT[] = {
- { fdct64_col, fdct64_row }, // DCT_DCT
+#if CONFIG_DAALA_DCT64
+ { daala_fdct64, daala_fdct64 }, // DCT_DCT
+#if CONFIG_EXT_TX
+ { daala_fdst64, daala_fdct64 }, // ADST_DCT
+ { daala_fdct64, daala_fdst64 }, // DCT_ADST
+ { daala_fdst64, daala_fdst64 }, // ADST_ADST
+ { daala_fdst64, daala_fdct64 }, // FLIPADST_DCT
+ { daala_fdct64, daala_fdst64 }, // DCT_FLIPADST
+ { daala_fdst64, daala_fdst64 }, // FLIPADST_FLIPADST
+ { daala_fdst64, daala_fdst64 }, // ADST_FLIPADST
+ { daala_fdst64, daala_fdst64 }, // FLIPADST_ADST
+ { daala_idtx64, daala_idtx64 }, // IDTX
+ { daala_fdct64, daala_idtx64 }, // V_DCT
+ { daala_idtx64, daala_fdct64 }, // H_DCT
+ { daala_fdst64, daala_idtx64 }, // V_ADST
+ { daala_idtx64, daala_fdst64 }, // H_ADST
+ { daala_fdst64, daala_idtx64 }, // V_FLIPADST
+ { daala_idtx64, daala_fdst64 }, // H_FLIPADST
+#endif // CONFIG_EXT_TX
+#else
+ { fdct64_col, fdct64_row }, // DCT_DCT
#if CONFIG_EXT_TX
{ fhalfright64, fdct64_row }, // ADST_DCT
{ fdct64_col, fhalfright64 }, // DCT_ADST
@@ -2503,7 +2618,8 @@ void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride,
{ fidtx64, fhalfright64 }, // H_ADST
{ fhalfright64, fidtx64 }, // V_FLIPADST
{ fidtx64, fhalfright64 }, // H_FLIPADST
-#endif
+#endif // CONFIG_EXT_TX
+#endif // CONFIG_DAALA_DCT64
};
const transform_2d ht = FHT[tx_type];
tran_low_t out[4096];
@@ -2516,10 +2632,18 @@ void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride,
// Columns
for (i = 0; i < 64; ++i) {
+#if CONFIG_DAALA_DCT64
+ for (j = 0; j < 64; ++j) temp_in[j] = input[j * stride + i] * 16;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 64; ++j)
+ out[j * 64 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 3;
+
+#else
for (j = 0; j < 64; ++j) temp_in[j] = input[j * stride + i];
ht.cols(temp_in, temp_out);
for (j = 0; j < 64; ++j)
out[j * 64 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
+#endif
}
// Rows
@@ -2527,8 +2651,129 @@ void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride,
for (j = 0; j < 64; ++j) temp_in[j] = out[j + i * 64];
ht.rows(temp_in, temp_out);
for (j = 0; j < 64; ++j)
+#if CONFIG_DAALA_DCT64
+ output[j + i * 64] = temp_out[j];
+#else
output[j + i * 64] =
(tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
+#endif
+ }
+}
+
+void av1_fht64x32_c(const int16_t *input, tran_low_t *output, int stride,
+ TxfmParam *txfm_param) {
+ const TX_TYPE tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif // CONFIG_MRC_TX
+#if CONFIG_DCT_ONLY
+ assert(tx_type == DCT_DCT);
+#endif
+ static const transform_2d FHT[] = {
+ { fdct32, fdct64_row }, // DCT_DCT
+#if CONFIG_EXT_TX
+ { fhalfright32, fdct64_row }, // ADST_DCT
+ { fdct32, fhalfright64 }, // DCT_ADST
+ { fhalfright32, fhalfright64 }, // ADST_ADST
+ { fhalfright32, fdct64_row }, // FLIPADST_DCT
+ { fdct32, fhalfright64 }, // DCT_FLIPADST
+ { fhalfright32, fhalfright64 }, // FLIPADST_FLIPADST
+ { fhalfright32, fhalfright64 }, // ADST_FLIPADST
+ { fhalfright32, fhalfright64 }, // FLIPADST_ADST
+ { fidtx32, fidtx64 }, // IDTX
+ { fdct32, fidtx64 }, // V_DCT
+ { fidtx32, fdct64_row }, // H_DCT
+ { fhalfright32, fidtx64 }, // V_ADST
+ { fidtx32, fhalfright64 }, // H_ADST
+ { fhalfright32, fidtx64 }, // V_FLIPADST
+ { fidtx32, fhalfright64 }, // H_FLIPADST
+#endif // CONFIG_EXT_TX
+ };
+ const transform_2d ht = FHT[tx_type];
+ tran_low_t out[2048];
+ int i, j;
+ tran_low_t temp_in[64], temp_out[64];
+ const int n = 32;
+ const int n2 = 64;
+#if CONFIG_EXT_TX
+ int16_t flipped_input[32 * 64];
+ maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type);
+#endif
+
+ // Columns
+ for (i = 0; i < n2; ++i) {
+ for (j = 0; j < n; ++j)
+ temp_in[j] = (tran_low_t)fdct_round_shift(input[j * stride + i] * Sqrt2);
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < n; ++j)
+ out[j * n2 + i] = (tran_low_t)ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
+ }
+
+ // Rows
+ for (i = 0; i < n; ++i) {
+ for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < n2; ++j)
+ output[j + i * n2] =
+ (tran_low_t)ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
+ }
+}
+
+void av1_fht32x64_c(const int16_t *input, tran_low_t *output, int stride,
+ TxfmParam *txfm_param) {
+ const TX_TYPE tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif // CONFIG_MRC_TX
+#if CONFIG_DCT_ONLY
+ assert(tx_type == DCT_DCT);
+#endif
+ static const transform_2d FHT[] = {
+ { fdct64_row, fdct32 }, // DCT_DCT
+#if CONFIG_EXT_TX
+ { fhalfright64, fdct32 }, // ADST_DCT
+ { fdct64_row, fhalfright32 }, // DCT_ADST
+ { fhalfright64, fhalfright32 }, // ADST_ADST
+ { fhalfright64, fdct32 }, // FLIPADST_DCT
+ { fdct64_row, fhalfright32 }, // DCT_FLIPADST
+ { fhalfright64, fhalfright32 }, // FLIPADST_FLIPADST
+ { fhalfright64, fhalfright32 }, // ADST_FLIPADST
+ { fhalfright64, fhalfright32 }, // FLIPADST_ADST
+ { fidtx64, fidtx32 }, // IDTX
+ { fdct64_row, fidtx32 }, // V_DCT
+ { fidtx64, fdct32 }, // H_DCT
+ { fhalfright64, fidtx32 }, // V_ADST
+ { fidtx64, fhalfright32 }, // H_ADST
+ { fhalfright64, fidtx32 }, // V_FLIPADST
+ { fidtx64, fhalfright32 }, // H_FLIPADST
+#endif // CONFIG_EXT_TX
+ };
+ const transform_2d ht = FHT[tx_type];
+ tran_low_t out[32 * 64];
+ int i, j;
+ tran_low_t temp_in[64], temp_out[64];
+ const int n = 32;
+ const int n2 = 64;
+#if CONFIG_EXT_TX
+ int16_t flipped_input[32 * 64];
+ maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type);
+#endif
+
+ // Rows
+ for (i = 0; i < n2; ++i) {
+ for (j = 0; j < n; ++j)
+ temp_in[j] = (tran_low_t)fdct_round_shift(input[i * stride + j] * Sqrt2);
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < n; ++j)
+ out[j * n2 + i] = (tran_low_t)ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
+ }
+
+ // Columns
+ for (i = 0; i < n; ++i) {
+ for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < n2; ++j)
+ output[i + j * n] = (tran_low_t)ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
}
}
#endif // CONFIG_TX64X64
@@ -2536,110 +2781,17 @@ void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride,
#if CONFIG_EXT_TX
// Forward identity transform.
void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride,
- int bs, int tx_type) {
+ int bsx, int bsy, TX_TYPE tx_type) {
int r, c;
- const int shift = bs < 32 ? 3 : (bs < 64 ? 2 : 1);
+ const int pels = bsx * bsy;
+ const int shift = 3 - ((pels > 256) + (pels > 1024));
if (tx_type == IDTX) {
- for (r = 0; r < bs; ++r) {
- for (c = 0; c < bs; ++c) coeff[c] = src_diff[c] * (1 << shift);
+ for (r = 0; r < bsy; ++r) {
+ for (c = 0; c < bsx; ++c) coeff[c] = src_diff[c] * (1 << shift);
src_diff += stride;
- coeff += bs;
+ coeff += bsx;
}
}
}
#endif // CONFIG_EXT_TX
-
-#if CONFIG_DPCM_INTRA
-void av1_dpcm_ft4_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
- tran_low_t *output) {
- assert(tx_type < TX_TYPES_1D);
- static const transform_1d FHT[] = { fdct4, fadst4, fadst4, fidtx4 };
- const transform_1d ft = FHT[tx_type];
- tran_low_t temp_in[4];
- for (int i = 0; i < 4; ++i)
- temp_in[i] = (tran_low_t)fdct_round_shift(input[i * stride] * 4 * Sqrt2);
- ft(temp_in, output);
-}
-
-void av1_dpcm_ft8_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
- tran_low_t *output) {
- assert(tx_type < TX_TYPES_1D);
- static const transform_1d FHT[] = { fdct8, fadst8, fadst8, fidtx8 };
- const transform_1d ft = FHT[tx_type];
- tran_low_t temp_in[8];
- for (int i = 0; i < 8; ++i) temp_in[i] = input[i * stride] * 4;
- ft(temp_in, output);
-}
-
-void av1_dpcm_ft16_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
- tran_low_t *output) {
- assert(tx_type < TX_TYPES_1D);
- static const transform_1d FHT[] = { fdct16, fadst16, fadst16, fidtx16 };
- const transform_1d ft = FHT[tx_type];
- tran_low_t temp_in[16];
- for (int i = 0; i < 16; ++i)
- temp_in[i] = (tran_low_t)fdct_round_shift(input[i * stride] * 2 * Sqrt2);
- ft(temp_in, output);
-}
-
-void av1_dpcm_ft32_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
- tran_low_t *output) {
- assert(tx_type < TX_TYPES_1D);
- static const transform_1d FHT[] = { fdct32, fhalfright32, fhalfright32,
- fidtx32 };
- const transform_1d ft = FHT[tx_type];
- tran_low_t temp_in[32];
- for (int i = 0; i < 32; ++i) temp_in[i] = input[i * stride];
- ft(temp_in, output);
-}
-
-#if CONFIG_HIGHBITDEPTH
-void av1_hbd_dpcm_ft4_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
- tran_low_t *output, int dir) {
- (void)dir;
- assert(tx_type < TX_TYPES_1D);
- static const transform_1d FHT[] = { fdct4, fadst4, fadst4, fidtx4 };
- const transform_1d ft = FHT[tx_type];
- tran_low_t temp_in[4];
- for (int i = 0; i < 4; ++i)
- temp_in[i] = (tran_low_t)fdct_round_shift(input[i * stride] * 4 * Sqrt2);
- ft(temp_in, output);
-}
-
-void av1_hbd_dpcm_ft8_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
- tran_low_t *output, int dir) {
- (void)dir;
- assert(tx_type < TX_TYPES_1D);
- static const transform_1d FHT[] = { fdct8, fadst8, fadst8, fidtx8 };
- const transform_1d ft = FHT[tx_type];
- tran_low_t temp_in[8];
- for (int i = 0; i < 8; ++i) temp_in[i] = input[i * stride] * 4;
- ft(temp_in, output);
-}
-
-void av1_hbd_dpcm_ft16_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
- tran_low_t *output, int dir) {
- (void)dir;
- assert(tx_type < TX_TYPES_1D);
- static const transform_1d FHT[] = { fdct16, fadst16, fadst16, fidtx16 };
- const transform_1d ft = FHT[tx_type];
- tran_low_t temp_in[16];
- for (int i = 0; i < 16; ++i)
- temp_in[i] = (tran_low_t)fdct_round_shift(input[i * stride] * 2 * Sqrt2);
- ft(temp_in, output);
-}
-
-void av1_hbd_dpcm_ft32_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
- tran_low_t *output, int dir) {
- (void)dir;
- assert(tx_type < TX_TYPES_1D);
- static const transform_1d FHT[] = { fdct32, fhalfright32, fhalfright32,
- fidtx32 };
- const transform_1d ft = FHT[tx_type];
- tran_low_t temp_in[32];
- for (int i = 0; i < 32; ++i) temp_in[i] = input[i * stride];
- ft(temp_in, output);
-}
-#endif // CONFIG_HIGHBITDEPTH
-#endif // CONFIG_DPCM_INTRA
#endif // !AV1_DCT_GTEST