vp9/encoder: apply clang-format

Change-Id: I45d9fb4013f50766b24363a86365e8063e8954c2
This commit is contained in:
clang-format
2016-07-26 20:43:23 -07:00
committed by James Zern
parent 3a04c9c9c4
commit e0cc52db3f
81 changed files with 6382 additions and 8308 deletions

View File

@@ -18,14 +18,13 @@
#include "vpx_dsp/txfm_common.h"
void vp9_fdct8x8_quant_neon(const int16_t *input, int stride,
int16_t* coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t* zbin_ptr,
const int16_t* round_ptr, const int16_t* quant_ptr,
const int16_t* quant_shift_ptr,
int16_t* qcoeff_ptr, int16_t* dqcoeff_ptr,
const int16_t* dequant_ptr, uint16_t* eob_ptr,
const int16_t* scan_ptr,
const int16_t* iscan_ptr) {
int16_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr,
int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan_ptr,
const int16_t *iscan_ptr) {
int16_t temp_buffer[64];
(void)coeff_ptr;

View File

@@ -26,8 +26,8 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr,
int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
uint16_t *eob_ptr, const int16_t *scan,
const int16_t *iscan) {
// TODO(jingning) Decide the need of these arguments after the
// quantization process is completed.
(void)zbin_ptr;
@@ -54,12 +54,12 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count,
const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[0]);
const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
const int32x4_t v_tmp_lo = vmull_s16(vget_low_s16(v_tmp),
vget_low_s16(v_quant));
const int32x4_t v_tmp_hi = vmull_s16(vget_high_s16(v_tmp),
vget_high_s16(v_quant));
const int16x8_t v_tmp2 = vcombine_s16(vshrn_n_s32(v_tmp_lo, 16),
vshrn_n_s32(v_tmp_hi, 16));
const int32x4_t v_tmp_lo =
vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
const int32x4_t v_tmp_hi =
vmull_s16(vget_high_s16(v_tmp), vget_high_s16(v_quant));
const int16x8_t v_tmp2 =
vcombine_s16(vshrn_n_s32(v_tmp_lo, 16), vshrn_n_s32(v_tmp_hi, 16));
const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero);
const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one);
const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
@@ -79,12 +79,12 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count,
const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[i]);
const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
const int32x4_t v_tmp_lo = vmull_s16(vget_low_s16(v_tmp),
vget_low_s16(v_quant));
const int32x4_t v_tmp_hi = vmull_s16(vget_high_s16(v_tmp),
vget_high_s16(v_quant));
const int16x8_t v_tmp2 = vcombine_s16(vshrn_n_s32(v_tmp_lo, 16),
vshrn_n_s32(v_tmp_hi, 16));
const int32x4_t v_tmp_lo =
vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
const int32x4_t v_tmp_hi =
vmull_s16(vget_high_s16(v_tmp), vget_high_s16(v_quant));
const int16x8_t v_tmp2 =
vcombine_s16(vshrn_n_s32(v_tmp_lo, 16), vshrn_n_s32(v_tmp_hi, 16));
const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero);
const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one);
const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
@@ -96,9 +96,8 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count,
vst1q_s16(&dqcoeff_ptr[i], v_dqcoeff);
}
{
const int16x4_t v_eobmax_3210 =
vmax_s16(vget_low_s16(v_eobmax_76543210),
vget_high_s16(v_eobmax_76543210));
const int16x4_t v_eobmax_3210 = vmax_s16(
vget_low_s16(v_eobmax_76543210), vget_high_s16(v_eobmax_76543210));
const int64x1_t v_eobmax_xx32 =
vshr_n_s64(vreinterpret_s64_s16(v_eobmax_3210), 32);
const int16x4_t v_eobmax_tmp =

View File

@@ -11,74 +11,73 @@
#include "./vp9_rtcd.h"
#include "vpx_dsp/mips/macros_msa.h"
#define BLOCK_ERROR_BLOCKSIZE_MSA(BSize) \
static int64_t block_error_##BSize##size_msa(const int16_t *coeff_ptr, \
const int16_t *dq_coeff_ptr, \
int64_t *ssz) { \
int64_t err = 0; \
uint32_t loop_cnt; \
v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h; \
v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w; \
v2i64 sq_coeff_r, sq_coeff_l; \
v2i64 err0, err_dup0, err1, err_dup1; \
\
coeff = LD_SH(coeff_ptr); \
dq_coeff = LD_SH(dq_coeff_ptr); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, \
sq_coeff_r, sq_coeff_l); \
DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1); \
\
coeff = LD_SH(coeff_ptr + 8); \
dq_coeff = LD_SH(dq_coeff_ptr + 8); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
\
coeff_ptr += 16; \
dq_coeff_ptr += 16; \
\
for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) { \
coeff = LD_SH(coeff_ptr); \
dq_coeff = LD_SH(dq_coeff_ptr); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
\
coeff = LD_SH(coeff_ptr + 8); \
dq_coeff = LD_SH(dq_coeff_ptr + 8); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
\
coeff_ptr += 16; \
dq_coeff_ptr += 16; \
} \
\
err_dup0 = __msa_splati_d(sq_coeff_r, 1); \
err_dup1 = __msa_splati_d(sq_coeff_l, 1); \
sq_coeff_r += err_dup0; \
sq_coeff_l += err_dup1; \
*ssz = __msa_copy_s_d(sq_coeff_r, 0); \
*ssz += __msa_copy_s_d(sq_coeff_l, 0); \
\
err_dup0 = __msa_splati_d(err0, 1); \
err_dup1 = __msa_splati_d(err1, 1); \
err0 += err_dup0; \
err1 += err_dup1; \
err = __msa_copy_s_d(err0, 0); \
err += __msa_copy_s_d(err1, 0); \
\
return err; \
}
#define BLOCK_ERROR_BLOCKSIZE_MSA(BSize) \
static int64_t block_error_##BSize##size_msa( \
const int16_t *coeff_ptr, const int16_t *dq_coeff_ptr, int64_t *ssz) { \
int64_t err = 0; \
uint32_t loop_cnt; \
v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h; \
v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w; \
v2i64 sq_coeff_r, sq_coeff_l; \
v2i64 err0, err_dup0, err1, err_dup1; \
\
coeff = LD_SH(coeff_ptr); \
dq_coeff = LD_SH(dq_coeff_ptr); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, sq_coeff_r, \
sq_coeff_l); \
DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1); \
\
coeff = LD_SH(coeff_ptr + 8); \
dq_coeff = LD_SH(dq_coeff_ptr + 8); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
\
coeff_ptr += 16; \
dq_coeff_ptr += 16; \
\
for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) { \
coeff = LD_SH(coeff_ptr); \
dq_coeff = LD_SH(dq_coeff_ptr); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
\
coeff = LD_SH(coeff_ptr + 8); \
dq_coeff = LD_SH(dq_coeff_ptr + 8); \
UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
\
coeff_ptr += 16; \
dq_coeff_ptr += 16; \
} \
\
err_dup0 = __msa_splati_d(sq_coeff_r, 1); \
err_dup1 = __msa_splati_d(sq_coeff_l, 1); \
sq_coeff_r += err_dup0; \
sq_coeff_l += err_dup1; \
*ssz = __msa_copy_s_d(sq_coeff_r, 0); \
*ssz += __msa_copy_s_d(sq_coeff_l, 0); \
\
err_dup0 = __msa_splati_d(err0, 1); \
err_dup1 = __msa_splati_d(err1, 1); \
err0 += err_dup0; \
err1 += err_dup1; \
err = __msa_copy_s_d(err0, 0); \
err += __msa_copy_s_d(err1, 0); \
\
return err; \
}
BLOCK_ERROR_BLOCKSIZE_MSA(16);
BLOCK_ERROR_BLOCKSIZE_MSA(64);
@@ -86,25 +85,17 @@ BLOCK_ERROR_BLOCKSIZE_MSA(256);
BLOCK_ERROR_BLOCKSIZE_MSA(1024);
int64_t vp9_block_error_msa(const tran_low_t *coeff_ptr,
const tran_low_t *dq_coeff_ptr,
intptr_t blk_size, int64_t *ssz) {
const tran_low_t *dq_coeff_ptr, intptr_t blk_size,
int64_t *ssz) {
int64_t err;
const int16_t *coeff = (const int16_t *)coeff_ptr;
const int16_t *dq_coeff = (const int16_t *)dq_coeff_ptr;
switch (blk_size) {
case 16:
err = block_error_16size_msa(coeff, dq_coeff, ssz);
break;
case 64:
err = block_error_64size_msa(coeff, dq_coeff, ssz);
break;
case 256:
err = block_error_256size_msa(coeff, dq_coeff, ssz);
break;
case 1024:
err = block_error_1024size_msa(coeff, dq_coeff, ssz);
break;
case 16: err = block_error_16size_msa(coeff, dq_coeff, ssz); break;
case 64: err = block_error_64size_msa(coeff, dq_coeff, ssz); break;
case 256: err = block_error_256size_msa(coeff, dq_coeff, ssz); break;
case 1024: err = block_error_1024size_msa(coeff, dq_coeff, ssz); break;
default:
err = vp9_block_error_c(coeff_ptr, dq_coeff_ptr, blk_size, ssz);
break;

View File

@@ -159,8 +159,8 @@ static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) {
/* load input data */
LD_SH8(input, 16, l0, l1, l2, l3, l4, l5, l6, l7);
TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
r0, r1, r2, r3, r4, r5, r6, r7);
TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6,
r7);
FDCT_POSTPROC_2V_NEG_H(r0, r1);
FDCT_POSTPROC_2V_NEG_H(r2, r3);
FDCT_POSTPROC_2V_NEG_H(r4, r5);
@@ -169,8 +169,8 @@ static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) {
out += 64;
LD_SH8(input + 8, 16, l8, l9, l10, l11, l12, l13, l14, l15);
TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
r8, r9, r10, r11, r12, r13, r14, r15);
TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11,
r12, r13, r14, r15);
FDCT_POSTPROC_2V_NEG_H(r8, r9);
FDCT_POSTPROC_2V_NEG_H(r10, r11);
FDCT_POSTPROC_2V_NEG_H(r12, r13);
@@ -181,8 +181,8 @@ static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) {
/* load input data */
input += 128;
LD_SH8(input, 16, l0, l1, l2, l3, l4, l5, l6, l7);
TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
r0, r1, r2, r3, r4, r5, r6, r7);
TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6,
r7);
FDCT_POSTPROC_2V_NEG_H(r0, r1);
FDCT_POSTPROC_2V_NEG_H(r2, r3);
FDCT_POSTPROC_2V_NEG_H(r4, r5);
@@ -191,8 +191,8 @@ static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) {
out += 64;
LD_SH8(input + 8, 16, l8, l9, l10, l11, l12, l13, l14, l15);
TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
r8, r9, r10, r11, r12, r13, r14, r15);
TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11,
r12, r13, r14, r15);
FDCT_POSTPROC_2V_NEG_H(r8, r9);
FDCT_POSTPROC_2V_NEG_H(r10, r11);
FDCT_POSTPROC_2V_NEG_H(r12, r13);
@@ -339,24 +339,24 @@ static void fadst16_transpose_msa(int16_t *input, int16_t *out) {
v8i16 l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15;
/* load input data */
LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11,
l4, l12, l5, l13, l6, l14, l7, l15);
TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
r0, r1, r2, r3, r4, r5, r6, r7);
TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
r8, r9, r10, r11, r12, r13, r14, r15);
LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11, l4, l12, l5, l13, l6, l14,
l7, l15);
TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6,
r7);
TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11,
r12, r13, r14, r15);
ST_SH8(r0, r8, r1, r9, r2, r10, r3, r11, out, 8);
ST_SH8(r4, r12, r5, r13, r6, r14, r7, r15, (out + 64), 8);
out += 16 * 8;
/* load input data */
input += 128;
LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11,
l4, l12, l5, l13, l6, l14, l7, l15);
TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
r0, r1, r2, r3, r4, r5, r6, r7);
TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
r8, r9, r10, r11, r12, r13, r14, r15);
LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11, l4, l12, l5, l13, l6, l14,
l7, l15);
TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6,
r7);
TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11,
r12, r13, r14, r15);
ST_SH8(r0, r8, r1, r9, r2, r10, r3, r11, out, 8);
ST_SH8(r4, r12, r5, r13, r6, r14, r7, r15, (out + 64), 8);
}
@@ -371,10 +371,10 @@ static void postproc_fdct16x8_1d_row(int16_t *intermediate, int16_t *output) {
LD_SH8(temp, 16, in0, in1, in2, in3, in4, in5, in6, in7);
temp = intermediate + 8;
LD_SH8(temp, 16, in8, in9, in10, in11, in12, in13, in14, in15);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(in8, in9, in10, in11, in12, in13, in14, in15,
in8, in9, in10, in11, in12, in13, in14, in15);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(in8, in9, in10, in11, in12, in13, in14, in15, in8, in9,
in10, in11, in12, in13, in14, in15);
FDCT_POSTPROC_2V_NEG_H(in0, in1);
FDCT_POSTPROC_2V_NEG_H(in2, in3);
FDCT_POSTPROC_2V_NEG_H(in4, in5);
@@ -383,29 +383,28 @@ static void postproc_fdct16x8_1d_row(int16_t *intermediate, int16_t *output) {
FDCT_POSTPROC_2V_NEG_H(in10, in11);
FDCT_POSTPROC_2V_NEG_H(in12, in13);
FDCT_POSTPROC_2V_NEG_H(in14, in15);
BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7,
in8, in9, in10, in11, in12, in13, in14, in15,
tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7,
in8, in9, in10, in11, in12, in13, in14, in15);
BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11,
in12, in13, in14, in15, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6,
tmp7, in8, in9, in10, in11, in12, in13, in14, in15);
temp = intermediate;
ST_SH8(in8, in9, in10, in11, in12, in13, in14, in15, temp, 16);
FDCT8x16_EVEN(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7,
tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
FDCT8x16_EVEN(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp0, tmp1,
tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
temp = intermediate;
LD_SH8(temp, 16, in8, in9, in10, in11, in12, in13, in14, in15);
FDCT8x16_ODD(in8, in9, in10, in11, in12, in13, in14, in15,
in0, in1, in2, in3, in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3,
tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3);
FDCT8x16_ODD(in8, in9, in10, in11, in12, in13, in14, in15, in0, in1, in2, in3,
in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3, tmp0, in0,
tmp1, in1, tmp2, in2, tmp3, in3);
ST_SH8(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3, out, 16);
TRANSPOSE8x8_SH_SH(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7,
tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7);
TRANSPOSE8x8_SH_SH(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, tmp4, in4,
tmp5, in5, tmp6, in6, tmp7, in7);
out = output + 8;
ST_SH8(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, out, 16);
}
void vp9_fht16x16_msa(const int16_t *input, int16_t *output,
int32_t stride, int32_t tx_type) {
void vp9_fht16x16_msa(const int16_t *input, int16_t *output, int32_t stride,
int32_t tx_type) {
DECLARE_ALIGNED(32, int16_t, tmp[256]);
DECLARE_ALIGNED(32, int16_t, trans_buf[256]);
DECLARE_ALIGNED(32, int16_t, tmp_buf[128]);
@@ -413,35 +412,31 @@ void vp9_fht16x16_msa(const int16_t *input, int16_t *output,
int16_t *ptmpbuf = &tmp_buf[0];
int16_t *trans = &trans_buf[0];
const int32_t const_arr[29 * 4] = {
52707308, 52707308, 52707308, 52707308,
-1072430300, -1072430300, -1072430300, -1072430300,
795618043, 795618043, 795618043, 795618043,
-721080468, -721080468, -721080468, -721080468,
459094491, 459094491, 459094491, 459094491,
-970646691, -970646691, -970646691, -970646691,
1010963856, 1010963856, 1010963856, 1010963856,
-361743294, -361743294, -361743294, -361743294,
209469125, 209469125, 209469125, 209469125,
-1053094788, -1053094788, -1053094788, -1053094788,
1053160324, 1053160324, 1053160324, 1053160324,
639644520, 639644520, 639644520, 639644520,
-862444000, -862444000, -862444000, -862444000,
1062144356, 1062144356, 1062144356, 1062144356,
-157532337, -157532337, -157532337, -157532337,
260914709, 260914709, 260914709, 260914709,
-1041559667, -1041559667, -1041559667, -1041559667,
920985831, 920985831, 920985831, 920985831,
-551995675, -551995675, -551995675, -551995675,
596522295, 596522295, 596522295, 596522295,
892853362, 892853362, 892853362, 892853362,
-892787826, -892787826, -892787826, -892787826,
410925857, 410925857, 410925857, 410925857,
-992012162, -992012162, -992012162, -992012162,
992077698, 992077698, 992077698, 992077698,
759246145, 759246145, 759246145, 759246145,
-759180609, -759180609, -759180609, -759180609,
-759222975, -759222975, -759222975, -759222975,
759288511, 759288511, 759288511, 759288511 };
52707308, 52707308, 52707308, 52707308, -1072430300,
-1072430300, -1072430300, -1072430300, 795618043, 795618043,
795618043, 795618043, -721080468, -721080468, -721080468,
-721080468, 459094491, 459094491, 459094491, 459094491,
-970646691, -970646691, -970646691, -970646691, 1010963856,
1010963856, 1010963856, 1010963856, -361743294, -361743294,
-361743294, -361743294, 209469125, 209469125, 209469125,
209469125, -1053094788, -1053094788, -1053094788, -1053094788,
1053160324, 1053160324, 1053160324, 1053160324, 639644520,
639644520, 639644520, 639644520, -862444000, -862444000,
-862444000, -862444000, 1062144356, 1062144356, 1062144356,
1062144356, -157532337, -157532337, -157532337, -157532337,
260914709, 260914709, 260914709, 260914709, -1041559667,
-1041559667, -1041559667, -1041559667, 920985831, 920985831,
920985831, 920985831, -551995675, -551995675, -551995675,
-551995675, 596522295, 596522295, 596522295, 596522295,
892853362, 892853362, 892853362, 892853362, -892787826,
-892787826, -892787826, -892787826, 410925857, 410925857,
410925857, 410925857, -992012162, -992012162, -992012162,
-992012162, 992077698, 992077698, 992077698, 992077698,
759246145, 759246145, 759246145, 759246145, -759180609,
-759180609, -759180609, -759180609, -759222975, -759222975,
-759222975, -759222975, 759288511, 759288511, 759288511,
759288511
};
switch (tx_type) {
case DCT_DCT:
@@ -500,8 +495,6 @@ void vp9_fht16x16_msa(const int16_t *input, int16_t *output,
fadst16_transpose_msa(tmp, output);
break;
default:
assert(0);
break;
default: assert(0); break;
}
}

View File

@@ -86,9 +86,7 @@ void vp9_fht4x4_msa(const int16_t *input, int16_t *output, int32_t stride,
TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
VP9_FADST4(in0, in1, in2, in3, in0, in1, in2, in3);
break;
default:
assert(0);
break;
default: assert(0); break;
}
TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);

View File

@@ -23,44 +23,42 @@ void vp9_fht8x8_msa(const int16_t *input, int16_t *output, int32_t stride,
switch (tx_type) {
case DCT_DCT:
VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
in3, in4, in5, in6, in7);
VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
in5, in6, in7);
break;
case ADST_DCT:
VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
in3, in4, in5, in6, in7);
VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
in5, in6, in7);
break;
case DCT_ADST:
VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
in3, in4, in5, in6, in7);
VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
in5, in6, in7);
break;
case ADST_ADST:
VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
break;
default:
assert(0);
VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
in3, in4, in5, in6, in7);
VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
in5, in6, in7);
break;
default: assert(0); break;
}
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
in4, in5, in6, in7);
SRLI_AVE_S_4V_H(in0, in1, in2, in3, in4, in5, in6, in7);
ST_SH8(in0, in1, in2, in3, in4, in5, in6, in7, output, 8);
}

View File

@@ -15,103 +15,102 @@
#include "vpx_dsp/mips/txfm_macros_msa.h"
#include "vpx_ports/mem.h"
#define VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, \
out0, out1, out2, out3, out4, out5, out6, out7) { \
v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst4_m; \
v8i16 vec0_m, vec1_m, vec2_m, vec3_m, s0_m, s1_m; \
v8i16 coeff0_m = { cospi_2_64, cospi_6_64, cospi_10_64, cospi_14_64, \
cospi_18_64, cospi_22_64, cospi_26_64, cospi_30_64 }; \
v8i16 coeff1_m = { cospi_8_64, -cospi_8_64, cospi_16_64, -cospi_16_64, \
cospi_24_64, -cospi_24_64, 0, 0 }; \
\
SPLATI_H2_SH(coeff0_m, 0, 7, cnst0_m, cnst1_m); \
cnst2_m = -cnst0_m; \
ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \
SPLATI_H2_SH(coeff0_m, 4, 3, cnst2_m, cnst3_m); \
cnst4_m = -cnst2_m; \
ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \
\
ILVRL_H2_SH(in0, in7, vec1_m, vec0_m); \
ILVRL_H2_SH(in4, in3, vec3_m, vec2_m); \
DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, \
cnst1_m, cnst2_m, cnst3_m, in7, in0, \
in4, in3); \
\
SPLATI_H2_SH(coeff0_m, 2, 5, cnst0_m, cnst1_m); \
cnst2_m = -cnst0_m; \
ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \
SPLATI_H2_SH(coeff0_m, 6, 1, cnst2_m, cnst3_m); \
cnst4_m = -cnst2_m; \
ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \
\
ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \
ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \
\
DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, \
cnst1_m, cnst2_m, cnst3_m, in5, in2, \
in6, in1); \
BUTTERFLY_4(in7, in0, in2, in5, s1_m, s0_m, in2, in5); \
out7 = -s0_m; \
out0 = s1_m; \
\
SPLATI_H4_SH(coeff1_m, 0, 4, 1, 5, cnst0_m, cnst1_m, cnst2_m, cnst3_m); \
\
ILVEV_H2_SH(cnst3_m, cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst2_m); \
cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
cnst1_m = cnst0_m; \
\
ILVRL_H2_SH(in4, in3, vec1_m, vec0_m); \
ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \
DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, \
cnst2_m, cnst3_m, cnst1_m, out1, out6, \
s0_m, s1_m); \
\
SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m); \
cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
\
ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \
ILVRL_H2_SH(s0_m, s1_m, vec3_m, vec2_m); \
out3 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \
out4 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \
out2 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m); \
out5 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m); \
\
out1 = -out1; \
out3 = -out3; \
out5 = -out5; \
}
#define VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, \
out3, out4, out5, out6, out7) \
{ \
v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst4_m; \
v8i16 vec0_m, vec1_m, vec2_m, vec3_m, s0_m, s1_m; \
v8i16 coeff0_m = { cospi_2_64, cospi_6_64, cospi_10_64, cospi_14_64, \
cospi_18_64, cospi_22_64, cospi_26_64, cospi_30_64 }; \
v8i16 coeff1_m = { cospi_8_64, -cospi_8_64, cospi_16_64, -cospi_16_64, \
cospi_24_64, -cospi_24_64, 0, 0 }; \
\
SPLATI_H2_SH(coeff0_m, 0, 7, cnst0_m, cnst1_m); \
cnst2_m = -cnst0_m; \
ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \
SPLATI_H2_SH(coeff0_m, 4, 3, cnst2_m, cnst3_m); \
cnst4_m = -cnst2_m; \
ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \
\
ILVRL_H2_SH(in0, in7, vec1_m, vec0_m); \
ILVRL_H2_SH(in4, in3, vec3_m, vec2_m); \
DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst1_m, \
cnst2_m, cnst3_m, in7, in0, in4, in3); \
\
SPLATI_H2_SH(coeff0_m, 2, 5, cnst0_m, cnst1_m); \
cnst2_m = -cnst0_m; \
ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \
SPLATI_H2_SH(coeff0_m, 6, 1, cnst2_m, cnst3_m); \
cnst4_m = -cnst2_m; \
ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \
\
ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \
ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \
\
DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst1_m, \
cnst2_m, cnst3_m, in5, in2, in6, in1); \
BUTTERFLY_4(in7, in0, in2, in5, s1_m, s0_m, in2, in5); \
out7 = -s0_m; \
out0 = s1_m; \
\
SPLATI_H4_SH(coeff1_m, 0, 4, 1, 5, cnst0_m, cnst1_m, cnst2_m, cnst3_m); \
\
ILVEV_H2_SH(cnst3_m, cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst2_m); \
cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
cnst1_m = cnst0_m; \
\
ILVRL_H2_SH(in4, in3, vec1_m, vec0_m); \
ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \
DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst2_m, \
cnst3_m, cnst1_m, out1, out6, s0_m, s1_m); \
\
SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m); \
cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
\
ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \
ILVRL_H2_SH(s0_m, s1_m, vec3_m, vec2_m); \
out3 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \
out4 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \
out2 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m); \
out5 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m); \
\
out1 = -out1; \
out3 = -out3; \
out5 = -out5; \
}
#define VP9_FADST4(in0, in1, in2, in3, out0, out1, out2, out3) { \
v4i32 s0_m, s1_m, s2_m, s3_m, constant_m; \
v4i32 in0_r_m, in1_r_m, in2_r_m, in3_r_m; \
\
UNPCK_R_SH_SW(in0, in0_r_m); \
UNPCK_R_SH_SW(in1, in1_r_m); \
UNPCK_R_SH_SW(in2, in2_r_m); \
UNPCK_R_SH_SW(in3, in3_r_m); \
\
constant_m = __msa_fill_w(sinpi_4_9); \
MUL2(in0_r_m, constant_m, in3_r_m, constant_m, s1_m, s0_m); \
\
constant_m = __msa_fill_w(sinpi_1_9); \
s0_m += in0_r_m * constant_m; \
s1_m -= in1_r_m * constant_m; \
\
constant_m = __msa_fill_w(sinpi_2_9); \
s0_m += in1_r_m * constant_m; \
s1_m += in3_r_m * constant_m; \
\
s2_m = in0_r_m + in1_r_m - in3_r_m; \
\
constant_m = __msa_fill_w(sinpi_3_9); \
MUL2(in2_r_m, constant_m, s2_m, constant_m, s3_m, in1_r_m); \
\
in0_r_m = s0_m + s3_m; \
s2_m = s1_m - s3_m; \
s3_m = s1_m - s0_m + s3_m; \
\
SRARI_W4_SW(in0_r_m, in1_r_m, s2_m, s3_m, DCT_CONST_BITS); \
PCKEV_H4_SH(in0_r_m, in0_r_m, in1_r_m, in1_r_m, s2_m, s2_m, \
s3_m, s3_m, out0, out1, out2, out3); \
}
#endif /* VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_ */
#define VP9_FADST4(in0, in1, in2, in3, out0, out1, out2, out3) \
{ \
v4i32 s0_m, s1_m, s2_m, s3_m, constant_m; \
v4i32 in0_r_m, in1_r_m, in2_r_m, in3_r_m; \
\
UNPCK_R_SH_SW(in0, in0_r_m); \
UNPCK_R_SH_SW(in1, in1_r_m); \
UNPCK_R_SH_SW(in2, in2_r_m); \
UNPCK_R_SH_SW(in3, in3_r_m); \
\
constant_m = __msa_fill_w(sinpi_4_9); \
MUL2(in0_r_m, constant_m, in3_r_m, constant_m, s1_m, s0_m); \
\
constant_m = __msa_fill_w(sinpi_1_9); \
s0_m += in0_r_m * constant_m; \
s1_m -= in1_r_m * constant_m; \
\
constant_m = __msa_fill_w(sinpi_2_9); \
s0_m += in1_r_m * constant_m; \
s1_m += in3_r_m * constant_m; \
\
s2_m = in0_r_m + in1_r_m - in3_r_m; \
\
constant_m = __msa_fill_w(sinpi_3_9); \
MUL2(in2_r_m, constant_m, s2_m, constant_m, s3_m, in1_r_m); \
\
in0_r_m = s0_m + s3_m; \
s2_m = s1_m - s3_m; \
s3_m = s1_m - s0_m + s3_m; \
\
SRARI_W4_SW(in0_r_m, in1_r_m, s2_m, s3_m, DCT_CONST_BITS); \
PCKEV_H4_SH(in0_r_m, in0_r_m, in1_r_m, in1_r_m, s2_m, s2_m, s3_m, s3_m, \
out0, out1, out2, out3); \
}
#endif /* VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_ */

View File

@@ -11,12 +11,9 @@
#include "./vp9_rtcd.h"
#include "vpx_dsp/mips/macros_msa.h"
static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
uint32_t stride,
uint8_t *frm2_ptr,
int32_t filt_sth,
int32_t filt_wgt,
uint32_t *acc,
static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr, uint32_t stride,
uint8_t *frm2_ptr, int32_t filt_sth,
int32_t filt_wgt, uint32_t *acc,
uint16_t *cnt) {
uint32_t row;
uint64_t f0, f1, f2, f3;
@@ -54,10 +51,10 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
UNPCK_SH_SW(diff0, diff0_r, diff0_l);
UNPCK_SH_SW(diff1, diff1_r, diff1_l);
MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l,
diff1_l, mod0_w, mod1_w, mod2_w, mod3_w);
MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3,
MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
mod0_w, mod1_w, mod2_w, mod3_w);
MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
mod1_w, mod2_w, mod3_w);
SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
diff0_r = (mod0_w < cnst16);
@@ -65,8 +62,8 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
diff1_r = (mod2_w < cnst16);
diff1_l = (mod3_w < cnst16);
SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w,
mod0_w, mod1_w, mod2_w, mod3_w);
SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
mod1_w, mod2_w, mod3_w);
mod0_w = diff0_r & mod0_w;
mod1_w = diff0_l & mod1_w;
@@ -85,8 +82,8 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
mod0_w, mod1_w, mod2_w, mod3_w);
ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3,
mod0_w, mod1_w, mod2_w, mod3_w);
ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
mod2_w, mod3_w);
ST_SW2(mod0_w, mod1_w, acc, 4);
acc += 8;
@@ -101,10 +98,10 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
UNPCK_SH_SW(diff0, diff0_r, diff0_l);
UNPCK_SH_SW(diff1, diff1_r, diff1_l);
MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l,
diff1_l, mod0_w, mod1_w, mod2_w, mod3_w);
MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3,
MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
mod0_w, mod1_w, mod2_w, mod3_w);
MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
mod1_w, mod2_w, mod3_w);
SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
diff0_r = (mod0_w < cnst16);
@@ -112,8 +109,8 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
diff1_r = (mod2_w < cnst16);
diff1_l = (mod3_w < cnst16);
SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w,
mod0_w, mod1_w, mod2_w, mod3_w);
SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
mod1_w, mod2_w, mod3_w);
mod0_w = diff0_r & mod0_w;
mod1_w = diff0_l & mod1_w;
@@ -131,8 +128,8 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
mod0_w, mod1_w, mod2_w, mod3_w);
ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3,
mod0_w, mod1_w, mod2_w, mod3_w);
ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
mod2_w, mod3_w);
ST_SW2(mod0_w, mod1_w, acc, 4);
acc += 8;
@@ -141,13 +138,10 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
}
}
static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr,
uint32_t stride,
static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr, uint32_t stride,
uint8_t *frm2_ptr,
int32_t filt_sth,
int32_t filt_wgt,
uint32_t *acc,
uint16_t *cnt) {
int32_t filt_sth, int32_t filt_wgt,
uint32_t *acc, uint16_t *cnt) {
uint32_t row;
v16i8 frm1, frm2, frm3, frm4;
v16u8 frm_r, frm_l;
@@ -183,8 +177,8 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr,
UNPCK_SH_SW(diff1, diff1_r, diff1_l);
MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
mod0_w, mod1_w, mod2_w, mod3_w);
MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3,
mod0_w, mod1_w, mod2_w, mod3_w);
MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
mod1_w, mod2_w, mod3_w);
SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
diff0_r = (mod0_w < cnst16);
@@ -192,8 +186,8 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr,
diff1_r = (mod2_w < cnst16);
diff1_l = (mod3_w < cnst16);
SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w,
mod0_w, mod1_w, mod2_w, mod3_w);
SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
mod1_w, mod2_w, mod3_w);
mod0_w = diff0_r & mod0_w;
mod1_w = diff0_l & mod1_w;
@@ -212,8 +206,8 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr,
UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
mod0_w, mod1_w, mod2_w, mod3_w);
ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3,
mod0_w, mod1_w, mod2_w, mod3_w);
ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
mod2_w, mod3_w);
ST_SW2(mod0_w, mod1_w, acc, 4);
acc += 8;
@@ -230,8 +224,8 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr,
UNPCK_SH_SW(diff1, diff1_r, diff1_l);
MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
mod0_w, mod1_w, mod2_w, mod3_w);
MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3,
mod0_w, mod1_w, mod2_w, mod3_w);
MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
mod1_w, mod2_w, mod3_w);
SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
diff0_r = (mod0_w < cnst16);
@@ -239,8 +233,8 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr,
diff1_r = (mod2_w < cnst16);
diff1_l = (mod3_w < cnst16);
SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w,
mod0_w, mod1_w, mod2_w, mod3_w);
SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
mod1_w, mod2_w, mod3_w);
mod0_w = diff0_r & mod0_w;
mod1_w = diff0_l & mod1_w;
@@ -259,8 +253,8 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr,
UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
mod0_w, mod1_w, mod2_w, mod3_w);
ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3,
mod0_w, mod1_w, mod2_w, mod3_w);
ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
mod2_w, mod3_w);
ST_SW2(mod0_w, mod1_w, acc, 4);
acc += 8;
ST_SW2(mod2_w, mod3_w, acc, 4);
@@ -277,11 +271,11 @@ void vp9_temporal_filter_apply_msa(uint8_t *frame1_ptr, uint32_t stride,
int32_t filt_wgt, uint32_t *accu,
uint16_t *cnt) {
if (8 == (blk_w * blk_h)) {
temporal_filter_apply_8size_msa(frame1_ptr, stride, frame2_ptr,
strength, filt_wgt, accu, cnt);
temporal_filter_apply_8size_msa(frame1_ptr, stride, frame2_ptr, strength,
filt_wgt, accu, cnt);
} else if (16 == (blk_w * blk_h)) {
temporal_filter_apply_16size_msa(frame1_ptr, stride, frame2_ptr,
strength, filt_wgt, accu, cnt);
temporal_filter_apply_16size_msa(frame1_ptr, stride, frame2_ptr, strength,
filt_wgt, accu, cnt);
} else {
vp9_temporal_filter_apply_c(frame1_ptr, stride, frame2_ptr, blk_w, blk_h,
strength, filt_wgt, accu, cnt);

View File

@@ -22,8 +22,8 @@
#include "vp9/encoder/vp9_rd.h"
#include "vp9/encoder/vp9_segmentation.h"
static const double rate_ratio[MAX_SEGMENTS] =
{1.0, 0.75, 0.6, 0.5, 0.4, 0.3, 0.25};
static const double rate_ratio[MAX_SEGMENTS] = { 1.0, 0.75, 0.6, 0.5,
0.4, 0.3, 0.25 };
// Sets segment id 0 for the equatorial region, 1 for temperate region
// and 2 for the polar regions

View File

@@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_ENCODER_VP9_AQ_360_H_
#define VP9_ENCODER_VP9_AQ_360_H_

View File

@@ -19,21 +19,24 @@
#include "vp9/common/vp9_seg_common.h"
#include "vp9/encoder/vp9_segmentation.h"
#define AQ_C_SEGMENTS 5
#define DEFAULT_AQ2_SEG 3 // Neutral Q segment
#define AQ_C_SEGMENTS 5
#define DEFAULT_AQ2_SEG 3 // Neutral Q segment
#define AQ_C_STRENGTHS 3
static const double aq_c_q_adj_factor[AQ_C_STRENGTHS][AQ_C_SEGMENTS] =
{ {1.75, 1.25, 1.05, 1.00, 0.90},
{2.00, 1.50, 1.15, 1.00, 0.85},
{2.50, 1.75, 1.25, 1.00, 0.80} };
static const double aq_c_transitions[AQ_C_STRENGTHS][AQ_C_SEGMENTS] =
{ {0.15, 0.30, 0.55, 2.00, 100.0},
{0.20, 0.40, 0.65, 2.00, 100.0},
{0.25, 0.50, 0.75, 2.00, 100.0} };
static const double aq_c_var_thresholds[AQ_C_STRENGTHS][AQ_C_SEGMENTS] =
{ {-4.0, -3.0, -2.0, 100.00, 100.0},
{-3.5, -2.5, -1.5, 100.00, 100.0},
{-3.0, -2.0, -1.0, 100.00, 100.0} };
static const double aq_c_q_adj_factor[AQ_C_STRENGTHS][AQ_C_SEGMENTS] = {
{ 1.75, 1.25, 1.05, 1.00, 0.90 },
{ 2.00, 1.50, 1.15, 1.00, 0.85 },
{ 2.50, 1.75, 1.25, 1.00, 0.80 }
};
static const double aq_c_transitions[AQ_C_STRENGTHS][AQ_C_SEGMENTS] = {
{ 0.15, 0.30, 0.55, 2.00, 100.0 },
{ 0.20, 0.40, 0.65, 2.00, 100.0 },
{ 0.25, 0.50, 0.75, 2.00, 100.0 }
};
static const double aq_c_var_thresholds[AQ_C_STRENGTHS][AQ_C_SEGMENTS] = {
{ -4.0, -3.0, -2.0, 100.00, 100.0 },
{ -3.5, -2.5, -1.5, 100.00, 100.0 },
{ -3.0, -2.0, -1.0, 100.00, 100.0 }
};
static int get_aq_c_strength(int q_index, vpx_bit_depth_t bit_depth) {
// Approximate base quatizer (truncated to int)
@@ -78,14 +81,11 @@ void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) {
for (segment = 0; segment < AQ_C_SEGMENTS; ++segment) {
int qindex_delta;
if (segment == DEFAULT_AQ2_SEG)
continue;
qindex_delta =
vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, cm->base_qindex,
aq_c_q_adj_factor[aq_strength][segment],
cm->bit_depth);
if (segment == DEFAULT_AQ2_SEG) continue;
qindex_delta = vp9_compute_qdelta_by_rate(
&cpi->rc, cm->frame_type, cm->base_qindex,
aq_c_q_adj_factor[aq_strength][segment], cm->bit_depth);
// For AQ complexity mode, we dont allow Q0 in a segment if the base
// Q is not 0. Q0 (lossless) implies 4x4 only and in AQ mode 2 a segment
@@ -125,26 +125,25 @@ void vp9_caq_select_segment(VP9_COMP *cpi, MACROBLOCK *mb, BLOCK_SIZE bs,
} else {
// Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh).
// It is converted to bits * 256 units.
const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) /
(bw * bh);
const int target_rate =
(cpi->rc.sb64_target_rate * xmis * ymis * 256) / (bw * bh);
double logvar;
double low_var_thresh;
const int aq_strength = get_aq_c_strength(cm->base_qindex, cm->bit_depth);
vpx_clear_system_state();
low_var_thresh = (cpi->oxcf.pass == 2)
? VPXMAX(cpi->twopass.mb_av_energy, MIN_DEFAULT_LV_THRESH)
: DEFAULT_LV_THRESH;
low_var_thresh = (cpi->oxcf.pass == 2) ? VPXMAX(cpi->twopass.mb_av_energy,
MIN_DEFAULT_LV_THRESH)
: DEFAULT_LV_THRESH;
vp9_setup_src_planes(mb, cpi->Source, mi_row, mi_col);
logvar = vp9_log_block_var(cpi, mb, bs);
segment = AQ_C_SEGMENTS - 1; // Just in case no break out below.
segment = AQ_C_SEGMENTS - 1; // Just in case no break out below.
for (i = 0; i < AQ_C_SEGMENTS; ++i) {
// Test rate against a threshold value and variance against a threshold.
// Increasing segment number (higher variance and complexity) = higher Q.
if ((projected_rate <
target_rate * aq_c_transitions[aq_strength][i]) &&
if ((projected_rate < target_rate * aq_c_transitions[aq_strength][i]) &&
(logvar < (low_var_thresh + aq_c_var_thresholds[aq_strength][i]))) {
segment = i;
break;

View File

@@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_ENCODER_VP9_AQ_COMPLEXITY_H_
#define VP9_ENCODER_VP9_AQ_COMPLEXITY_H_
@@ -23,8 +22,8 @@ struct macroblock;
// Select a segment for the current Block.
void vp9_caq_select_segment(struct VP9_COMP *cpi, struct macroblock *,
BLOCK_SIZE bs,
int mi_row, int mi_col, int projected_rate);
BLOCK_SIZE bs, int mi_row, int mi_col,
int projected_rate);
// This function sets up a set of segments with delta Q values around
// the baseline frame quantizer.

View File

@@ -24,8 +24,7 @@
CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
size_t last_coded_q_map_size;
CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr));
if (cr == NULL)
return NULL;
if (cr == NULL) return NULL;
cr->map = vpx_calloc(mi_rows * mi_cols, sizeof(*cr->map));
if (cr->map == NULL) {
@@ -53,11 +52,8 @@ void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) {
// (lower-qp coding). Decision can be based on various factors, such as
// size of the coding block (i.e., below min_block size rejected), coding
// mode, and rate/distortion.
static int candidate_refresh_aq(const CYCLIC_REFRESH *cr,
const MODE_INFO *mi,
int64_t rate,
int64_t dist,
int bsize) {
static int candidate_refresh_aq(const CYCLIC_REFRESH *cr, const MODE_INFO *mi,
int64_t rate, int64_t dist, int bsize) {
MV mv = mi->mv[0].as_mv;
// Reject the block for lower-qp coding if projected distortion
// is above the threshold, and any of the following is true:
@@ -69,11 +65,9 @@ static int candidate_refresh_aq(const CYCLIC_REFRESH *cr,
mv.col > cr->motion_thresh || mv.col < -cr->motion_thresh ||
!is_inter_block(mi)))
return CR_SEGMENT_ID_BASE;
else if (bsize >= BLOCK_16X16 &&
rate < cr->thresh_rate_sb &&
is_inter_block(mi) &&
mi->mv[0].as_int == 0 &&
cr->rate_boost_fac > 10)
else if (bsize >= BLOCK_16X16 && rate < cr->thresh_rate_sb &&
is_inter_block(mi) && mi->mv[0].as_int == 0 &&
cr->rate_boost_fac > 10)
// More aggressive delta-q for bigger blocks with zero motion.
return CR_SEGMENT_ID_BOOST2;
else
@@ -84,9 +78,8 @@ static int candidate_refresh_aq(const CYCLIC_REFRESH *cr,
static int compute_deltaq(const VP9_COMP *cpi, int q, double rate_factor) {
const CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
const RATE_CONTROL *const rc = &cpi->rc;
int deltaq = vp9_compute_qdelta_by_rate(rc, cpi->common.frame_type,
q, rate_factor,
cpi->common.bit_depth);
int deltaq = vp9_compute_qdelta_by_rate(rc, cpi->common.frame_type, q,
rate_factor, cpi->common.bit_depth);
if ((-deltaq) > cr->max_qdelta_perc * q / 100) {
deltaq = -cr->max_qdelta_perc * q / 100;
}
@@ -109,17 +102,18 @@ int vp9_cyclic_refresh_estimate_bits_at_q(const VP9_COMP *cpi,
double weight_segment1 = (double)cr->actual_num_seg1_blocks / num8x8bl;
double weight_segment2 = (double)cr->actual_num_seg2_blocks / num8x8bl;
// Take segment weighted average for estimated bits.
estimated_bits = (int)((1.0 - weight_segment1 - weight_segment2) *
vp9_estimate_bits_at_q(cm->frame_type, cm->base_qindex, mbs,
correction_factor, cm->bit_depth) +
weight_segment1 *
vp9_estimate_bits_at_q(cm->frame_type,
cm->base_qindex + cr->qindex_delta[1], mbs,
correction_factor, cm->bit_depth) +
weight_segment2 *
vp9_estimate_bits_at_q(cm->frame_type,
cm->base_qindex + cr->qindex_delta[2], mbs,
correction_factor, cm->bit_depth));
estimated_bits =
(int)((1.0 - weight_segment1 - weight_segment2) *
vp9_estimate_bits_at_q(cm->frame_type, cm->base_qindex, mbs,
correction_factor, cm->bit_depth) +
weight_segment1 *
vp9_estimate_bits_at_q(cm->frame_type,
cm->base_qindex + cr->qindex_delta[1],
mbs, correction_factor, cm->bit_depth) +
weight_segment2 *
vp9_estimate_bits_at_q(cm->frame_type,
cm->base_qindex + cr->qindex_delta[2],
mbs, correction_factor, cm->bit_depth));
return estimated_bits;
}
@@ -137,30 +131,29 @@ int vp9_cyclic_refresh_rc_bits_per_mb(const VP9_COMP *cpi, int i,
// Weight for segment prior to encoding: take the average of the target
// number for the frame to be encoded and the actual from the previous frame.
int target_refresh = cr->percent_refresh * cm->mi_rows * cm->mi_cols / 100;
double weight_segment = (double)((target_refresh +
cr->actual_num_seg1_blocks + cr->actual_num_seg2_blocks) >> 1) /
double weight_segment =
(double)((target_refresh + cr->actual_num_seg1_blocks +
cr->actual_num_seg2_blocks) >>
1) /
num8x8bl;
// Compute delta-q corresponding to qindex i.
int deltaq = compute_deltaq(cpi, i, cr->rate_ratio_qdelta);
// Take segment weighted average for bits per mb.
bits_per_mb = (int)((1.0 - weight_segment) *
vp9_rc_bits_per_mb(cm->frame_type, i, correction_factor, cm->bit_depth) +
weight_segment *
vp9_rc_bits_per_mb(cm->frame_type, i + deltaq, correction_factor,
cm->bit_depth));
vp9_rc_bits_per_mb(cm->frame_type, i,
correction_factor, cm->bit_depth) +
weight_segment *
vp9_rc_bits_per_mb(cm->frame_type, i + deltaq,
correction_factor, cm->bit_depth));
return bits_per_mb;
}
// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col),
// check if we should reset the segment_id, and update the cyclic_refresh map
// and segmentation map.
void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi,
MODE_INFO *const mi,
int mi_row, int mi_col,
BLOCK_SIZE bsize,
int64_t rate,
int64_t dist,
int skip,
void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi, MODE_INFO *const mi,
int mi_row, int mi_col, BLOCK_SIZE bsize,
int64_t rate, int64_t dist, int skip,
struct macroblock_plane *const p) {
const VP9_COMMON *const cm = &cpi->common;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
@@ -172,22 +165,16 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi,
int refresh_this_block = candidate_refresh_aq(cr, mi, rate, dist, bsize);
// Default is to not update the refresh map.
int new_map_value = cr->map[block_index];
int x = 0; int y = 0;
int x = 0;
int y = 0;
int is_skin = 0;
if (refresh_this_block == 0 &&
bsize <= BLOCK_16X16 &&
if (refresh_this_block == 0 && bsize <= BLOCK_16X16 &&
cpi->use_skin_detection) {
is_skin = vp9_compute_skin_block(p[0].src.buf,
p[1].src.buf,
p[2].src.buf,
p[0].src.stride,
p[1].src.stride,
bsize,
0,
0);
if (is_skin)
refresh_this_block = 1;
is_skin =
vp9_compute_skin_block(p[0].src.buf, p[1].src.buf, p[2].src.buf,
p[0].src.stride, p[1].src.stride, bsize, 0, 0);
if (is_skin) refresh_this_block = 1;
}
if (cpi->oxcf.rc_mode == VPX_VBR && mi->ref_frame[0] == GOLDEN_FRAME)
@@ -198,8 +185,7 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi,
if (cyclic_refresh_segment_id_boosted(mi->segment_id)) {
mi->segment_id = refresh_this_block;
// Reset segment_id if it will be skipped.
if (skip)
mi->segment_id = CR_SEGMENT_ID_BASE;
if (skip) mi->segment_id = CR_SEGMENT_ID_BASE;
}
// Update the cyclic refresh map, to be used for setting segmentation map
@@ -212,8 +198,7 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi,
// Else if it is accepted as candidate for refresh, and has not already
// been refreshed (marked as 1) then mark it as a candidate for cleanup
// for future time (marked as 0), otherwise don't update it.
if (cr->map[block_index] == 1)
new_map_value = 0;
if (cr->map[block_index] == 1) new_map_value = 0;
} else {
// Leave it marked as block that is not candidate for refresh.
new_map_value = 1;
@@ -250,13 +235,12 @@ void vp9_cyclic_refresh_update_sb_postencode(VP9_COMP *const cpi,
// the map for this spatial location is not entirely correct.
if ((!is_inter_block(mi) || !mi->skip) &&
mi->segment_id <= CR_SEGMENT_ID_BOOST2) {
cr->last_coded_q_map[map_offset] = clamp(
cm->base_qindex + cr->qindex_delta[mi->segment_id], 0, MAXQ);
cr->last_coded_q_map[map_offset] =
clamp(cm->base_qindex + cr->qindex_delta[mi->segment_id], 0, MAXQ);
} else if (is_inter_block(mi) && mi->skip &&
mi->segment_id <= CR_SEGMENT_ID_BOOST2) {
cr->last_coded_q_map[map_offset] = VPXMIN(
clamp(cm->base_qindex + cr->qindex_delta[mi->segment_id],
0, MAXQ),
clamp(cm->base_qindex + cr->qindex_delta[mi->segment_id], 0, MAXQ),
cr->last_coded_q_map[map_offset]);
}
}
@@ -272,11 +256,12 @@ void vp9_cyclic_refresh_postencode(VP9_COMP *const cpi) {
cr->actual_num_seg2_blocks = 0;
for (mi_row = 0; mi_row < cm->mi_rows; mi_row++)
for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) {
if (cyclic_refresh_segment_id(
seg_map[mi_row * cm->mi_cols + mi_col]) == CR_SEGMENT_ID_BOOST1)
if (cyclic_refresh_segment_id(seg_map[mi_row * cm->mi_cols + mi_col]) ==
CR_SEGMENT_ID_BOOST1)
cr->actual_num_seg1_blocks++;
else if (cyclic_refresh_segment_id(
seg_map[mi_row * cm->mi_cols + mi_col]) == CR_SEGMENT_ID_BOOST2)
seg_map[mi_row * cm->mi_cols + mi_col]) ==
CR_SEGMENT_ID_BOOST2)
cr->actual_num_seg2_blocks++;
}
}
@@ -292,8 +277,7 @@ void vp9_cyclic_refresh_set_golden_update(VP9_COMP *const cpi) {
rc->baseline_gf_interval = VPXMIN(4 * (100 / cr->percent_refresh), 40);
else
rc->baseline_gf_interval = 40;
if (cpi->oxcf.rc_mode == VPX_VBR)
rc->baseline_gf_interval = 20;
if (cpi->oxcf.rc_mode == VPX_VBR) rc->baseline_gf_interval = 20;
}
// Update some encoding stats (from the just encoded frame). If this frame's
@@ -315,21 +299,21 @@ void vp9_cyclic_refresh_check_golden_update(VP9_COMP *const cpi) {
for (mi_row = 0; mi_row < rows; mi_row++) {
for (mi_col = 0; mi_col < cols; mi_col++) {
if (flag_force_gf_high_motion == 1) {
int16_t abs_mvr = mi[0]->mv[0].as_mv.row >= 0 ?
mi[0]->mv[0].as_mv.row : -1 * mi[0]->mv[0].as_mv.row;
int16_t abs_mvc = mi[0]->mv[0].as_mv.col >= 0 ?
mi[0]->mv[0].as_mv.col : -1 * mi[0]->mv[0].as_mv.col;
int16_t abs_mvr = mi[0]->mv[0].as_mv.row >= 0
? mi[0]->mv[0].as_mv.row
: -1 * mi[0]->mv[0].as_mv.row;
int16_t abs_mvc = mi[0]->mv[0].as_mv.col >= 0
? mi[0]->mv[0].as_mv.col
: -1 * mi[0]->mv[0].as_mv.col;
// Calculate the motion of the background.
if (abs_mvr <= 16 && abs_mvc <= 16) {
cnt1++;
if (abs_mvr == 0 && abs_mvc == 0)
cnt2++;
if (abs_mvr == 0 && abs_mvc == 0) cnt2++;
}
}
mi++;
// Accumulate low_content_frame.
if (cr->map[mi_row * cols + mi_col] < 1)
low_content_frame++;
if (cr->map[mi_row * cols + mi_col] < 1) low_content_frame++;
}
mi += 8;
}
@@ -339,7 +323,7 @@ void vp9_cyclic_refresh_check_golden_update(VP9_COMP *const cpi) {
// Also, force this frame as a golden update frame if this frame will change
// the resolution (resize_pending != 0).
if (cpi->resize_pending != 0 ||
(cnt1 * 100 > (70 * rows * cols) && cnt2 * 20 < cnt1)) {
(cnt1 * 100 > (70 * rows * cols) && cnt2 * 20 < cnt1)) {
vp9_cyclic_refresh_set_golden_update(cpi);
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
@@ -348,8 +332,7 @@ void vp9_cyclic_refresh_check_golden_update(VP9_COMP *const cpi) {
cpi->refresh_golden_frame = 1;
force_gf_refresh = 1;
}
fraction_low =
(double)low_content_frame / (rows * cols);
fraction_low = (double)low_content_frame / (rows * cols);
// Update average.
cr->low_content_avg = (fraction_low + 3 * cr->low_content_avg) / 4;
if (!force_gf_refresh && cpi->refresh_golden_frame == 1) {
@@ -396,14 +379,14 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) {
}
qindex_thresh =
cpi->oxcf.content == VP9E_CONTENT_SCREEN
? vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex)
: vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST1, cm->base_qindex);
? vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex)
: vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST1, cm->base_qindex);
// More aggressive settings for noisy content.
if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium) {
consec_zero_mv_thresh = 80;
qindex_thresh =
VPXMAX(vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST1, cm->base_qindex),
7 * cm->base_qindex >> 3);
7 * cm->base_qindex >> 3);
}
do {
int sum_map = 0;
@@ -454,8 +437,7 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) {
} while (cr->target_num_seg_blocks < block_count && i != cr->sb_index);
cr->sb_index = i;
cr->reduce_refresh = 0;
if (count_sel < (3 * count_tot) >> 2)
cr->reduce_refresh = 1;
if (count_sel<(3 * count_tot)>> 2) cr->reduce_refresh = 1;
}
// Set cyclic refresh parameters.
@@ -464,8 +446,7 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
const VP9_COMMON *const cm = &cpi->common;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
cr->percent_refresh = 10;
if (cr->reduce_refresh)
cr->percent_refresh = 5;
if (cr->reduce_refresh) cr->percent_refresh = 5;
cr->max_qdelta_perc = 50;
cr->time_for_refresh = 0;
cr->motion_thresh = 32;
@@ -474,8 +455,8 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
// periods of the refresh cycle, after a key frame.
// Account for larger interval on base layer for temporal layers.
if (cr->percent_refresh > 0 &&
rc->frames_since_key < (4 * cpi->svc.number_temporal_layers) *
(100 / cr->percent_refresh)) {
rc->frames_since_key <
(4 * cpi->svc.number_temporal_layers) * (100 / cr->percent_refresh)) {
cr->rate_ratio_qdelta = 3.0;
} else {
cr->rate_ratio_qdelta = 2.0;
@@ -486,9 +467,7 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
}
}
// Adjust some parameters for low resolutions at low bitrates.
if (cm->width <= 352 &&
cm->height <= 288 &&
rc->avg_frame_bandwidth < 3400) {
if (cm->width <= 352 && cm->height <= 288 && rc->avg_frame_bandwidth < 3400) {
cr->motion_thresh = 4;
cr->rate_boost_fac = 10;
}
@@ -520,11 +499,9 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
// instead of completely shutting off at low bitrates. For now keep it on.
// const int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cm, rc);
const int apply_cyclic_refresh = 1;
if (cm->current_video_frame == 0)
cr->low_content_avg = 0.0;
if (cm->current_video_frame == 0) cr->low_content_avg = 0.0;
// Don't apply refresh on key frame or temporal enhancement layer frames.
if (!apply_cyclic_refresh ||
(cm->frame_type == KEY_FRAME) ||
if (!apply_cyclic_refresh || (cm->frame_type == KEY_FRAME) ||
(cpi->svc.temporal_layer_id > 0)) {
// Set segmentation map to 0 and disable.
unsigned char *const seg_map = cpi->segmentation_map;
@@ -590,8 +567,7 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
vp9_set_segdata(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q, qindex_delta);
// Reset if resoluton change has occurred.
if (cpi->resize_pending != 0)
vp9_cyclic_refresh_reset_resize(cpi);
if (cpi->resize_pending != 0) vp9_cyclic_refresh_reset_resize(cpi);
// Update the segmentation and refresh map.
cyclic_refresh_update_map(cpi);

View File

@@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_
#define VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_
@@ -23,9 +22,9 @@ extern "C" {
// The segment ids used in cyclic refresh: from base (no boost) to increasing
// boost (higher delta-qp).
#define CR_SEGMENT_ID_BASE 0
#define CR_SEGMENT_ID_BOOST1 1
#define CR_SEGMENT_ID_BOOST2 2
#define CR_SEGMENT_ID_BASE 0
#define CR_SEGMENT_ID_BOOST1 1
#define CR_SEGMENT_ID_BOOST2 2
// Maximum rate target ratio for setting segment delta-qp.
#define CR_MAX_RATE_TARGET_RATIO 4.0
@@ -91,8 +90,8 @@ int vp9_cyclic_refresh_rc_bits_per_mb(const struct VP9_COMP *cpi, int i,
// check if we should reset the segment_id, and update the cyclic_refresh map
// and segmentation map.
void vp9_cyclic_refresh_update_segment(struct VP9_COMP *const cpi,
MODE_INFO *const mi,
int mi_row, int mi_col, BLOCK_SIZE bsize,
MODE_INFO *const mi, int mi_row,
int mi_col, BLOCK_SIZE bsize,
int64_t rate, int64_t dist, int skip,
struct macroblock_plane *const p);

View File

@@ -23,19 +23,19 @@
#define ENERGY_MIN (-4)
#define ENERGY_MAX (1)
#define ENERGY_SPAN (ENERGY_MAX - ENERGY_MIN + 1)
#define ENERGY_IN_BOUNDS(energy)\
#define ENERGY_SPAN (ENERGY_MAX - ENERGY_MIN + 1)
#define ENERGY_IN_BOUNDS(energy) \
assert((energy) >= ENERGY_MIN && (energy) <= ENERGY_MAX)
static const double rate_ratio[MAX_SEGMENTS] =
{2.5, 2.0, 1.5, 1.0, 0.75, 1.0, 1.0, 1.0};
static const int segment_id[ENERGY_SPAN] = {0, 1, 1, 2, 3, 4};
static const double rate_ratio[MAX_SEGMENTS] = { 2.5, 2.0, 1.5, 1.0,
0.75, 1.0, 1.0, 1.0 };
static const int segment_id[ENERGY_SPAN] = { 0, 1, 1, 2, 3, 4 };
#define SEGMENT_ID(i) segment_id[(i) - ENERGY_MIN]
#define SEGMENT_ID(i) segment_id[(i)-ENERGY_MIN]
DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = {0};
DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = { 0 };
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, static const uint16_t, vp9_highbd_64_zeros[64]) = {0};
DECLARE_ALIGNED(16, static const uint16_t, vp9_highbd_64_zeros[64]) = { 0 };
#endif
unsigned int vp9_vaq_segment_id(int energy) {
@@ -85,9 +85,9 @@ void vp9_vaq_frame_setup(VP9_COMP *cpi) {
/* TODO(agrange, paulwilkins): The block_variance calls the unoptimized versions
* of variance() and highbd_8_variance(). It should not.
*/
static void aq_variance(const uint8_t *a, int a_stride,
const uint8_t *b, int b_stride,
int w, int h, unsigned int *sse, int *sum) {
static void aq_variance(const uint8_t *a, int a_stride, const uint8_t *b,
int b_stride, int w, int h, unsigned int *sse,
int *sum) {
int i, j;
*sum = 0;
@@ -106,9 +106,9 @@ static void aq_variance(const uint8_t *a, int a_stride,
}
#if CONFIG_VP9_HIGHBITDEPTH
static void aq_highbd_variance64(const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride,
int w, int h, uint64_t *sse, uint64_t *sum) {
static void aq_highbd_variance64(const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride, int w, int h,
uint64_t *sse, uint64_t *sum) {
int i, j;
uint16_t *a = CONVERT_TO_SHORTPTR(a8);
@@ -127,9 +127,9 @@ static void aq_highbd_variance64(const uint8_t *a8, int a_stride,
}
}
static void aq_highbd_8_variance(const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride,
int w, int h, unsigned int *sse, int *sum) {
static void aq_highbd_8_variance(const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride, int w, int h,
unsigned int *sse, int *sum) {
uint64_t sse_long = 0;
uint64_t sum_long = 0;
aq_highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
@@ -142,10 +142,10 @@ static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bs) {
MACROBLOCKD *xd = &x->e_mbd;
unsigned int var, sse;
int right_overflow = (xd->mb_to_right_edge < 0) ?
((-xd->mb_to_right_edge) >> 3) : 0;
int bottom_overflow = (xd->mb_to_bottom_edge < 0) ?
((-xd->mb_to_bottom_edge) >> 3) : 0;
int right_overflow =
(xd->mb_to_right_edge < 0) ? ((-xd->mb_to_right_edge) >> 3) : 0;
int bottom_overflow =
(xd->mb_to_bottom_edge < 0) ? ((-xd->mb_to_bottom_edge) >> 3) : 0;
if (right_overflow || bottom_overflow) {
const int bw = 8 * num_8x8_blocks_wide_lookup[bs] - right_overflow;
@@ -159,30 +159,27 @@ static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x,
sse >>= 2 * (xd->bd - 8);
avg >>= (xd->bd - 8);
} else {
aq_variance(x->plane[0].src.buf, x->plane[0].src.stride,
vp9_64_zeros, 0, bw, bh, &sse, &avg);
aq_variance(x->plane[0].src.buf, x->plane[0].src.stride, vp9_64_zeros, 0,
bw, bh, &sse, &avg);
}
#else
aq_variance(x->plane[0].src.buf, x->plane[0].src.stride,
vp9_64_zeros, 0, bw, bh, &sse, &avg);
aq_variance(x->plane[0].src.buf, x->plane[0].src.stride, vp9_64_zeros, 0,
bw, bh, &sse, &avg);
#endif // CONFIG_VP9_HIGHBITDEPTH
var = sse - (((int64_t)avg * avg) / (bw * bh));
return (unsigned int)(((uint64_t)256 * var) / (bw * bh));
} else {
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf,
x->plane[0].src.stride,
CONVERT_TO_BYTEPTR(vp9_highbd_64_zeros),
0, &sse);
var =
cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride,
CONVERT_TO_BYTEPTR(vp9_highbd_64_zeros), 0, &sse);
} else {
var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf,
x->plane[0].src.stride,
var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride,
vp9_64_zeros, 0, &sse);
}
#else
var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf,
x->plane[0].src.stride,
var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride,
vp9_64_zeros, 0, &sse);
#endif // CONFIG_VP9_HIGHBITDEPTH
return (unsigned int)(((uint64_t)256 * var) >> num_pels_log2_lookup[bs]);
@@ -201,7 +198,7 @@ int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) {
double energy_midpoint;
vpx_clear_system_state();
energy_midpoint =
(cpi->oxcf.pass == 2) ? cpi->twopass.mb_av_energy : DEFAULT_E_MIDPOINT;
(cpi->oxcf.pass == 2) ? cpi->twopass.mb_av_energy : DEFAULT_E_MIDPOINT;
energy = vp9_log_block_var(cpi, x, bs) - energy_midpoint;
return clamp((int)round(energy), ENERGY_MIN, ENERGY_MAX);
}

View File

@@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_ENCODER_VP9_AQ_VARIANCE_H_
#define VP9_ENCODER_VP9_AQ_VARIANCE_H_

View File

@@ -36,14 +36,17 @@
#include "vp9/encoder/vp9_tokenize.h"
static const struct vp9_token intra_mode_encodings[INTRA_MODES] = {
{0, 1}, {6, 3}, {28, 5}, {30, 5}, {58, 6}, {59, 6}, {126, 7}, {127, 7},
{62, 6}, {2, 2}};
{ 0, 1 }, { 6, 3 }, { 28, 5 }, { 30, 5 }, { 58, 6 },
{ 59, 6 }, { 126, 7 }, { 127, 7 }, { 62, 6 }, { 2, 2 }
};
static const struct vp9_token switchable_interp_encodings[SWITCHABLE_FILTERS] =
{{0, 1}, {2, 2}, {3, 2}};
static const struct vp9_token partition_encodings[PARTITION_TYPES] =
{{0, 1}, {2, 2}, {6, 3}, {7, 3}};
static const struct vp9_token inter_mode_encodings[INTER_MODES] =
{{2, 2}, {6, 3}, {0, 1}, {7, 3}};
{ { 0, 1 }, { 2, 2 }, { 3, 2 } };
static const struct vp9_token partition_encodings[PARTITION_TYPES] = {
{ 0, 1 }, { 2, 2 }, { 6, 3 }, { 7, 3 }
};
static const struct vp9_token inter_mode_encodings[INTER_MODES] = {
{ 2, 2 }, { 6, 3 }, { 0, 1 }, { 7, 3 }
};
static void write_intra_mode(vpx_writer *w, PREDICTION_MODE mode,
const vpx_prob *probs) {
@@ -57,15 +60,15 @@ static void write_inter_mode(vpx_writer *w, PREDICTION_MODE mode,
&inter_mode_encodings[INTER_OFFSET(mode)]);
}
static void encode_unsigned_max(struct vpx_write_bit_buffer *wb,
int data, int max) {
static void encode_unsigned_max(struct vpx_write_bit_buffer *wb, int data,
int max) {
vpx_wb_write_literal(wb, data, get_unsigned_bits(max));
}
static void prob_diff_update(const vpx_tree_index *tree,
vpx_prob probs[/*n - 1*/],
const unsigned int counts[/*n - 1*/],
int n, vpx_writer *w) {
const unsigned int counts[/*n - 1*/], int n,
vpx_writer *w) {
int i;
unsigned int branch_ct[32][2];
@@ -77,13 +80,13 @@ static void prob_diff_update(const vpx_tree_index *tree,
vp9_cond_prob_diff_update(w, &probs[i], branch_ct[i]);
}
static void write_selected_tx_size(const VP9_COMMON *cm,
const MACROBLOCKD *xd, vpx_writer *w) {
static void write_selected_tx_size(const VP9_COMMON *cm, const MACROBLOCKD *xd,
vpx_writer *w) {
TX_SIZE tx_size = xd->mi[0]->tx_size;
BLOCK_SIZE bsize = xd->mi[0]->sb_type;
const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
const vpx_prob *const tx_probs = get_tx_probs2(max_tx_size, xd,
&cm->fc->tx_probs);
const vpx_prob *const tx_probs =
get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
vpx_write(w, tx_size != TX_4X4, tx_probs[0]);
if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) {
vpx_write(w, tx_size != TX_8X8, tx_probs[1]);
@@ -120,18 +123,18 @@ static void update_switchable_interp_probs(VP9_COMMON *cm, vpx_writer *w,
counts->switchable_interp[j], SWITCHABLE_FILTERS, w);
}
static void pack_mb_tokens(vpx_writer *w,
TOKENEXTRA **tp, const TOKENEXTRA *const stop,
static void pack_mb_tokens(vpx_writer *w, TOKENEXTRA **tp,
const TOKENEXTRA *const stop,
vpx_bit_depth_t bit_depth) {
const TOKENEXTRA *p;
const vp9_extra_bit *const extra_bits =
#if CONFIG_VP9_HIGHBITDEPTH
(bit_depth == VPX_BITS_12) ? vp9_extra_bits_high12 :
(bit_depth == VPX_BITS_10) ? vp9_extra_bits_high10 :
vp9_extra_bits;
(bit_depth == VPX_BITS_12)
? vp9_extra_bits_high12
: (bit_depth == VPX_BITS_10) ? vp9_extra_bits_high10 : vp9_extra_bits;
#else
vp9_extra_bits;
(void) bit_depth;
vp9_extra_bits;
(void)bit_depth;
#endif // CONFIG_VP9_HIGHBITDEPTH
for (p = *tp; p < stop && p->token != EOSB_TOKEN; ++p) {
@@ -144,7 +147,7 @@ static void pack_mb_tokens(vpx_writer *w,
vpx_write(w, 0, p->context_tree[1]);
++p;
if (p == stop || p->token == EOSB_TOKEN) {
*tp = (TOKENEXTRA*)(uintptr_t)p + (p->token == EOSB_TOKEN);
*tp = (TOKENEXTRA *)(uintptr_t)p + (p->token == EOSB_TOKEN);
return;
}
}
@@ -182,7 +185,7 @@ static void pack_mb_tokens(vpx_writer *w,
}
}
}
*tp = (TOKENEXTRA*)(uintptr_t)p + (p->token == EOSB_TOKEN);
*tp = (TOKENEXTRA *)(uintptr_t)p + (p->token == EOSB_TOKEN);
}
static void write_segment_id(vpx_writer *w, const struct segmentation *seg,
@@ -203,7 +206,7 @@ static void write_ref_frames(const VP9_COMMON *cm, const MACROBLOCKD *xd,
if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
assert(!is_compound);
assert(mi->ref_frame[0] ==
get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME));
get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME));
} else {
// does the feature use compound prediction or not
// (if not specified at the frame/segment level)
@@ -248,8 +251,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
const int pred_flag = mi->seg_id_predicted;
vpx_prob pred_prob = vp9_get_pred_prob_seg_id(seg, xd);
vpx_write(w, pred_flag, pred_prob);
if (!pred_flag)
write_segment_id(w, seg, segment_id);
if (!pred_flag) write_segment_id(w, seg, segment_id);
} else {
write_segment_id(w, seg, segment_id);
}
@@ -338,8 +340,7 @@ static void write_mb_modes_kf(const VP9_COMMON *cm, const MACROBLOCKD *xd,
const MODE_INFO *const left_mi = xd->left_mi;
const BLOCK_SIZE bsize = mi->sb_type;
if (seg->update_map)
write_segment_id(w, seg, mi->segment_id);
if (seg->update_map) write_segment_id(w, seg, mi->segment_id);
write_skip(cm, xd, mi->segment_id, mi, w);
@@ -367,8 +368,8 @@ static void write_mb_modes_kf(const VP9_COMMON *cm, const MACROBLOCKD *xd,
static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile,
vpx_writer *w, TOKENEXTRA **tok,
const TOKENEXTRA *const tok_end,
int mi_row, int mi_col) {
const TOKENEXTRA *const tok_end, int mi_row,
int mi_col) {
const VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
MODE_INFO *m;
@@ -376,13 +377,12 @@ static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile,
xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col);
m = xd->mi[0];
cpi->td.mb.mbmi_ext = cpi->td.mb.mbmi_ext_base +
(mi_row * cm->mi_cols + mi_col);
cpi->td.mb.mbmi_ext =
cpi->td.mb.mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
set_mi_row_col(xd, tile,
mi_row, num_8x8_blocks_high_lookup[m->sb_type],
mi_col, num_8x8_blocks_wide_lookup[m->sb_type],
cm->mi_rows, cm->mi_cols);
set_mi_row_col(xd, tile, mi_row, num_8x8_blocks_high_lookup[m->sb_type],
mi_col, num_8x8_blocks_wide_lookup[m->sb_type], cm->mi_rows,
cm->mi_cols);
if (frame_is_intra_only(cm)) {
write_mb_modes_kf(cm, xd, xd->mi, w);
} else {
@@ -394,9 +394,9 @@ static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile,
}
static void write_partition(const VP9_COMMON *const cm,
const MACROBLOCKD *const xd,
int hbs, int mi_row, int mi_col,
PARTITION_TYPE p, BLOCK_SIZE bsize, vpx_writer *w) {
const MACROBLOCKD *const xd, int hbs, int mi_row,
int mi_col, PARTITION_TYPE p, BLOCK_SIZE bsize,
vpx_writer *w) {
const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
const vpx_prob *const probs = xd->partition_probs[ctx];
const int has_rows = (mi_row + hbs) < cm->mi_rows;
@@ -415,10 +415,10 @@ static void write_partition(const VP9_COMMON *const cm,
}
}
static void write_modes_sb(VP9_COMP *cpi,
const TileInfo *const tile, vpx_writer *w,
TOKENEXTRA **tok, const TOKENEXTRA *const tok_end,
int mi_row, int mi_col, BLOCK_SIZE bsize) {
static void write_modes_sb(VP9_COMP *cpi, const TileInfo *const tile,
vpx_writer *w, TOKENEXTRA **tok,
const TOKENEXTRA *const tok_end, int mi_row,
int mi_col, BLOCK_SIZE bsize) {
const VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
@@ -428,8 +428,7 @@ static void write_modes_sb(VP9_COMP *cpi,
BLOCK_SIZE subsize;
const MODE_INFO *m = NULL;
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
m = cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col];
@@ -462,8 +461,7 @@ static void write_modes_sb(VP9_COMP *cpi,
write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + bs, mi_col + bs,
subsize);
break;
default:
assert(0);
default: assert(0);
}
}
@@ -473,9 +471,9 @@ static void write_modes_sb(VP9_COMP *cpi,
update_partition_context(xd, mi_row, mi_col, subsize, bsize);
}
static void write_modes(VP9_COMP *cpi,
const TileInfo *const tile, vpx_writer *w,
TOKENEXTRA **tok, const TOKENEXTRA *const tok_end) {
static void write_modes(VP9_COMP *cpi, const TileInfo *const tile,
vpx_writer *w, TOKENEXTRA **tok,
const TOKENEXTRA *const tok_end) {
const VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
int mi_row, mi_col;
@@ -487,8 +485,7 @@ static void write_modes(VP9_COMP *cpi,
vp9_zero(xd->left_seg_context);
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE)
write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col,
BLOCK_64X64);
write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, BLOCK_64X64);
}
}
@@ -496,7 +493,7 @@ static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size,
vp9_coeff_stats *coef_branch_ct,
vp9_coeff_probs_model *coef_probs) {
vp9_coeff_count *coef_counts = cpi->td.rd_counts.coef_counts[tx_size];
unsigned int (*eob_branch_ct)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] =
unsigned int(*eob_branch_ct)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] =
cpi->common.counts.eob_branch[tx_size];
int i, j, k, l, m;
@@ -507,19 +504,19 @@ static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size,
vp9_tree_probs_from_distribution(vp9_coef_tree,
coef_branch_ct[i][j][k][l],
coef_counts[i][j][k][l]);
coef_branch_ct[i][j][k][l][0][1] = eob_branch_ct[i][j][k][l] -
coef_branch_ct[i][j][k][l][0][0];
coef_branch_ct[i][j][k][l][0][1] =
eob_branch_ct[i][j][k][l] - coef_branch_ct[i][j][k][l][0][0];
for (m = 0; m < UNCONSTRAINED_NODES; ++m)
coef_probs[i][j][k][l][m] = get_binary_prob(
coef_branch_ct[i][j][k][l][m][0],
coef_branch_ct[i][j][k][l][m][1]);
coef_probs[i][j][k][l][m] =
get_binary_prob(coef_branch_ct[i][j][k][l][m][0],
coef_branch_ct[i][j][k][l][m][1]);
}
}
}
}
}
static void update_coef_probs_common(vpx_writer* const bc, VP9_COMP *cpi,
static void update_coef_probs_common(vpx_writer *const bc, VP9_COMP *cpi,
TX_SIZE tx_size,
vp9_coeff_stats *frame_branch_ct,
vp9_coeff_probs_model *new_coef_probs) {
@@ -533,7 +530,7 @@ static void update_coef_probs_common(vpx_writer* const bc, VP9_COMP *cpi,
case TWO_LOOP: {
/* dry run to see if there is any update at all needed */
int savings = 0;
int update[2] = {0, 0};
int update[2] = { 0, 0 };
for (i = 0; i < PLANE_TYPES; ++i) {
for (j = 0; j < REF_TYPES; ++j) {
for (k = 0; k < COEF_BANDS; ++k) {
@@ -550,8 +547,7 @@ static void update_coef_probs_common(vpx_writer* const bc, VP9_COMP *cpi,
else
s = vp9_prob_diff_update_savings_search(
frame_branch_ct[i][j][k][l][t], oldp, &newp, upd);
if (s > 0 && newp != oldp)
u = 1;
if (s > 0 && newp != oldp) u = 1;
if (u)
savings += s - (int)(vp9_cost_zero(upd));
else
@@ -583,14 +579,12 @@ static void update_coef_probs_common(vpx_writer* const bc, VP9_COMP *cpi,
int u = 0;
if (t == PIVOT_NODE)
s = vp9_prob_diff_update_savings_search_model(
frame_branch_ct[i][j][k][l][0],
*oldp, &newp, upd, stepsize);
frame_branch_ct[i][j][k][l][0], *oldp, &newp, upd,
stepsize);
else
s = vp9_prob_diff_update_savings_search(
frame_branch_ct[i][j][k][l][t],
*oldp, &newp, upd);
if (s > 0 && newp != *oldp)
u = 1;
frame_branch_ct[i][j][k][l][t], *oldp, &newp, upd);
if (s > 0 && newp != *oldp) u = 1;
vpx_write(bc, u, upd);
if (u) {
/* send/use new probability */
@@ -621,16 +615,14 @@ static void update_coef_probs_common(vpx_writer* const bc, VP9_COMP *cpi,
if (t == PIVOT_NODE) {
s = vp9_prob_diff_update_savings_search_model(
frame_branch_ct[i][j][k][l][0],
*oldp, &newp, upd, stepsize);
frame_branch_ct[i][j][k][l][0], *oldp, &newp, upd,
stepsize);
} else {
s = vp9_prob_diff_update_savings_search(
frame_branch_ct[i][j][k][l][t],
*oldp, &newp, upd);
frame_branch_ct[i][j][k][l][t], *oldp, &newp, upd);
}
if (s > 0 && newp != *oldp)
u = 1;
if (s > 0 && newp != *oldp) u = 1;
updates += u;
if (u == 0 && updates == 0) {
noupdates_before_first++;
@@ -659,12 +651,11 @@ static void update_coef_probs_common(vpx_writer* const bc, VP9_COMP *cpi,
}
return;
}
default:
assert(0);
default: assert(0);
}
}
static void update_coef_probs(VP9_COMP *cpi, vpx_writer* w) {
static void update_coef_probs(VP9_COMP *cpi, vpx_writer *w) {
const TX_MODE tx_mode = cpi->common.tx_mode;
const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode];
TX_SIZE tx_size;
@@ -675,8 +666,7 @@ static void update_coef_probs(VP9_COMP *cpi, vpx_writer* w) {
(tx_size >= TX_16X16 && cpi->sf.tx_size_search_method == USE_TX_8X8)) {
vpx_write_bit(w, 0);
} else {
build_tree_distribution(cpi, tx_size, frame_branch_ct,
frame_coef_probs);
build_tree_distribution(cpi, tx_size, frame_branch_ct, frame_coef_probs);
update_coef_probs_common(w, cpi, tx_size, frame_branch_ct,
frame_coef_probs);
}
@@ -748,8 +738,7 @@ static void encode_segmentation(VP9_COMMON *cm, MACROBLOCKD *xd,
const struct segmentation *seg = &cm->seg;
vpx_wb_write_bit(wb, seg->enabled);
if (!seg->enabled)
return;
if (!seg->enabled) return;
// Segmentation map
vpx_wb_write_bit(wb, seg->update_map);
@@ -761,8 +750,7 @@ static void encode_segmentation(VP9_COMMON *cm, MACROBLOCKD *xd,
const int prob = seg->tree_probs[i];
const int update = prob != MAX_PROB;
vpx_wb_write_bit(wb, update);
if (update)
vpx_wb_write_literal(wb, prob, 8);
if (update) vpx_wb_write_literal(wb, prob, 8);
}
// Write out the chosen coding method.
@@ -772,8 +760,7 @@ static void encode_segmentation(VP9_COMMON *cm, MACROBLOCKD *xd,
const int prob = seg->pred_probs[i];
const int update = prob != MAX_PROB;
vpx_wb_write_bit(wb, update);
if (update)
vpx_wb_write_literal(wb, prob, 8);
if (update) vpx_wb_write_literal(wb, prob, 8);
}
}
}
@@ -817,7 +804,6 @@ static void encode_txfm_probs(VP9_COMMON *cm, vpx_writer *w,
unsigned int ct_16x16p[TX_SIZES - 2][2];
unsigned int ct_32x32p[TX_SIZES - 1][2];
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
tx_counts_to_branch_counts_8x8(counts->tx.p8x8[i], ct_8x8p);
for (j = 0; j < TX_SIZES - 3; j++)
@@ -879,16 +865,13 @@ static void write_tile_info(const VP9_COMMON *const cm,
// columns
ones = cm->log2_tile_cols - min_log2_tile_cols;
while (ones--)
vpx_wb_write_bit(wb, 1);
while (ones--) vpx_wb_write_bit(wb, 1);
if (cm->log2_tile_cols < max_log2_tile_cols)
vpx_wb_write_bit(wb, 0);
if (cm->log2_tile_cols < max_log2_tile_cols) vpx_wb_write_bit(wb, 0);
// rows
vpx_wb_write_bit(wb, cm->log2_tile_rows != 0);
if (cm->log2_tile_rows != 0)
vpx_wb_write_bit(wb, cm->log2_tile_rows != 1);
if (cm->log2_tile_rows != 0) vpx_wb_write_bit(wb, cm->log2_tile_rows != 1);
}
int vp9_get_refresh_mask(VP9_COMP *cpi) {
@@ -935,15 +918,15 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) {
TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
tok_end = cpi->tile_tok[tile_row][tile_col] +
cpi->tok_count[tile_row][tile_col];
cpi->tok_count[tile_row][tile_col];
if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1)
vpx_start_encode(&residual_bc, data_ptr + total_size + 4);
else
vpx_start_encode(&residual_bc, data_ptr + total_size);
write_modes(cpi, &cpi->tile_data[tile_idx].tile_info,
&residual_bc, &tok, tok_end);
write_modes(cpi, &cpi->tile_data[tile_idx].tile_info, &residual_bc, &tok,
tok_end);
assert(tok == tok_end);
vpx_stop_encode(&residual_bc);
if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1) {
@@ -961,8 +944,8 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) {
static void write_render_size(const VP9_COMMON *cm,
struct vpx_write_bit_buffer *wb) {
const int scaling_active = cm->width != cm->render_width ||
cm->height != cm->render_height;
const int scaling_active =
cm->width != cm->render_width || cm->height != cm->render_height;
vpx_wb_write_bit(wb, scaling_active);
if (scaling_active) {
vpx_wb_write_literal(wb, cm->render_width - 1, 16);
@@ -990,17 +973,17 @@ static void write_frame_size_with_refs(VP9_COMP *cpi,
// Set "found" to 0 for temporal svc and for spatial svc key frame
if (cpi->use_svc &&
((cpi->svc.number_temporal_layers > 1 &&
cpi->oxcf.rc_mode == VPX_CBR) ||
(cpi->svc.number_spatial_layers > 1 &&
cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame) ||
(is_two_pass_svc(cpi) &&
cpi->svc.encode_empty_frame_state == ENCODING &&
cpi->svc.layer_context[0].frames_from_key_frame <
cpi->svc.number_temporal_layers + 1))) {
cpi->oxcf.rc_mode == VPX_CBR) ||
(cpi->svc.number_spatial_layers > 1 &&
cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame) ||
(is_two_pass_svc(cpi) &&
cpi->svc.encode_empty_frame_state == ENCODING &&
cpi->svc.layer_context[0].frames_from_key_frame <
cpi->svc.number_temporal_layers + 1))) {
found = 0;
} else if (cfg != NULL) {
found = cm->width == cfg->y_crop_width &&
cm->height == cfg->y_crop_height;
found =
cm->width == cfg->y_crop_width && cm->height == cfg->y_crop_height;
}
vpx_wb_write_bit(wb, found);
if (found) {
@@ -1025,20 +1008,11 @@ static void write_sync_code(struct vpx_write_bit_buffer *wb) {
static void write_profile(BITSTREAM_PROFILE profile,
struct vpx_write_bit_buffer *wb) {
switch (profile) {
case PROFILE_0:
vpx_wb_write_literal(wb, 0, 2);
break;
case PROFILE_1:
vpx_wb_write_literal(wb, 2, 2);
break;
case PROFILE_2:
vpx_wb_write_literal(wb, 1, 2);
break;
case PROFILE_3:
vpx_wb_write_literal(wb, 6, 3);
break;
default:
assert(0);
case PROFILE_0: vpx_wb_write_literal(wb, 0, 2); break;
case PROFILE_1: vpx_wb_write_literal(wb, 2, 2); break;
case PROFILE_2: vpx_wb_write_literal(wb, 1, 2); break;
case PROFILE_3: vpx_wb_write_literal(wb, 6, 3); break;
default: assert(0);
}
}
@@ -1093,8 +1067,7 @@ static void write_uncompressed_header(VP9_COMP *cpi,
// will change to show_frame flag to 0, then add an one byte frame with
// show_existing_frame flag which tells the decoder which frame we want to
// show.
if (!cm->show_frame)
vpx_wb_write_bit(wb, cm->intra_only);
if (!cm->show_frame) vpx_wb_write_bit(wb, cm->intra_only);
if (!cm->error_resilient_mode)
vpx_wb_write_literal(wb, cm->reset_frame_context, 2);
@@ -1222,7 +1195,7 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size) {
uint8_t *data = dest;
size_t first_part_size, uncompressed_hdr_size;
struct vpx_write_bit_buffer wb = {data, 0};
struct vpx_write_bit_buffer wb = { data, 0 };
struct vpx_write_bit_buffer saved_wb;
write_uncompressed_header(cpi, &wb);

View File

@@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_ENCODER_VP9_BITSTREAM_H_
#define VP9_ENCODER_VP9_BITSTREAM_H_
@@ -25,10 +24,9 @@ void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size);
static INLINE int vp9_preserve_existing_gf(VP9_COMP *cpi) {
return !cpi->multi_arf_allowed && cpi->refresh_golden_frame &&
cpi->rc.is_src_frame_alt_ref &&
(!cpi->use_svc || // Add spatial svc base layer case here
(is_two_pass_svc(cpi) &&
cpi->svc.spatial_layer_id == 0 &&
cpi->svc.layer_context[0].gold_ref_idx >=0 &&
(!cpi->use_svc || // Add spatial svc base layer case here
(is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id == 0 &&
cpi->svc.layer_context[0].gold_ref_idx >= 0 &&
cpi->oxcf.ss_enable_auto_arf[0]));
}

View File

@@ -77,8 +77,8 @@ struct macroblock {
int rddiv;
int rdmult;
int mb_energy;
int * m_search_count_ptr;
int * ex_search_count_ptr;
int *m_search_count_ptr;
int *ex_search_count_ptr;
// These are set to their default values at the beginning, and then adjusted
// further in the encoding process.
@@ -130,9 +130,9 @@ struct macroblock {
// skip forward transform and quantization
uint8_t skip_txfm[MAX_MB_PLANE << 2];
#define SKIP_TXFM_NONE 0
#define SKIP_TXFM_AC_DC 1
#define SKIP_TXFM_AC_ONLY 2
#define SKIP_TXFM_NONE 0
#define SKIP_TXFM_AC_DC 1
#define SKIP_TXFM_AC_ONLY 2
int64_t bsse[MAX_MB_PLANE << 2];

View File

@@ -63,9 +63,9 @@ static int blockiness_vertical(const uint8_t *s, int sp, const uint8_t *r,
s_blockiness += horizontal_filter(s);
r_blockiness += horizontal_filter(r);
sum_0 += s[0];
sum_sq_0 += s[0]*s[0];
sum_sq_0 += s[0] * s[0];
sum_1 += s[-1];
sum_sq_1 += s[-1]*s[-1];
sum_sq_1 += s[-1] * s[-1];
}
var_0 = variance(sum_0, sum_sq_0, size);
var_1 = variance(sum_1, sum_sq_1, size);
@@ -113,19 +113,19 @@ static int blockiness_horizontal(const uint8_t *s, int sp, const uint8_t *r,
// This function returns the blockiness for the entire frame currently by
// looking at all borders in steps of 4.
double vp9_get_blockiness(const uint8_t *img1, int img1_pitch,
const uint8_t *img2, int img2_pitch,
int width, int height) {
const uint8_t *img2, int img2_pitch, int width,
int height) {
double blockiness = 0;
int i, j;
vpx_clear_system_state();
for (i = 0; i < height; i += 4, img1 += img1_pitch * 4,
img2 += img2_pitch * 4) {
for (i = 0; i < height;
i += 4, img1 += img1_pitch * 4, img2 += img2_pitch * 4) {
for (j = 0; j < width; j += 4) {
if (i > 0 && i < height && j > 0 && j < width) {
blockiness += blockiness_vertical(img1 + j, img1_pitch,
img2 + j, img2_pitch, 4);
blockiness += blockiness_horizontal(img1 + j, img1_pitch,
img2 + j, img2_pitch, 4);
blockiness +=
blockiness_vertical(img1 + j, img1_pitch, img2 + j, img2_pitch, 4);
blockiness += blockiness_horizontal(img1 + j, img1_pitch, img2 + j,
img2_pitch, 4);
}
}
}

View File

@@ -12,10 +12,7 @@
#include "vp9/encoder/vp9_encoder.h"
static const BLOCK_SIZE square[] = {
BLOCK_8X8,
BLOCK_16X16,
BLOCK_32X32,
BLOCK_64X64,
BLOCK_8X8, BLOCK_16X16, BLOCK_32X32, BLOCK_64X64,
};
static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk,
@@ -25,8 +22,7 @@ static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk,
int i, k;
ctx->num_4x4_blk = num_blk;
CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
vpx_calloc(num_blk, sizeof(uint8_t)));
CHECK_MEM_ERROR(cm, ctx->zcoeff_blk, vpx_calloc(num_blk, sizeof(uint8_t)));
for (i = 0; i < MAX_MB_PLANE; ++i) {
for (k = 0; k < 3; ++k) {
CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
@@ -37,10 +33,10 @@ static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk,
vpx_memalign(32, num_pix * sizeof(*ctx->dqcoeff[i][k])));
CHECK_MEM_ERROR(cm, ctx->eobs[i][k],
vpx_memalign(32, num_blk * sizeof(*ctx->eobs[i][k])));
ctx->coeff_pbuf[i][k] = ctx->coeff[i][k];
ctx->qcoeff_pbuf[i][k] = ctx->qcoeff[i][k];
ctx->coeff_pbuf[i][k] = ctx->coeff[i][k];
ctx->qcoeff_pbuf[i][k] = ctx->qcoeff[i][k];
ctx->dqcoeff_pbuf[i][k] = ctx->dqcoeff[i][k];
ctx->eobs_pbuf[i][k] = ctx->eobs[i][k];
ctx->eobs_pbuf[i][k] = ctx->eobs[i][k];
}
}
}
@@ -66,12 +62,12 @@ static void free_mode_context(PICK_MODE_CONTEXT *ctx) {
static void alloc_tree_contexts(VP9_COMMON *cm, PC_TREE *tree,
int num_4x4_blk) {
alloc_mode_context(cm, num_4x4_blk, &tree->none);
alloc_mode_context(cm, num_4x4_blk/2, &tree->horizontal[0]);
alloc_mode_context(cm, num_4x4_blk/2, &tree->vertical[0]);
alloc_mode_context(cm, num_4x4_blk / 2, &tree->horizontal[0]);
alloc_mode_context(cm, num_4x4_blk / 2, &tree->vertical[0]);
if (num_4x4_blk > 4) {
alloc_mode_context(cm, num_4x4_blk/2, &tree->horizontal[1]);
alloc_mode_context(cm, num_4x4_blk/2, &tree->vertical[1]);
alloc_mode_context(cm, num_4x4_blk / 2, &tree->horizontal[1]);
alloc_mode_context(cm, num_4x4_blk / 2, &tree->vertical[1]);
} else {
memset(&tree->horizontal[1], 0, sizeof(tree->horizontal[1]));
memset(&tree->vertical[1], 0, sizeof(tree->vertical[1]));
@@ -101,19 +97,18 @@ void vp9_setup_pc_tree(VP9_COMMON *cm, ThreadData *td) {
int nodes;
vpx_free(td->leaf_tree);
CHECK_MEM_ERROR(cm, td->leaf_tree, vpx_calloc(leaf_nodes,
sizeof(*td->leaf_tree)));
CHECK_MEM_ERROR(cm, td->leaf_tree,
vpx_calloc(leaf_nodes, sizeof(*td->leaf_tree)));
vpx_free(td->pc_tree);
CHECK_MEM_ERROR(cm, td->pc_tree, vpx_calloc(tree_nodes,
sizeof(*td->pc_tree)));
CHECK_MEM_ERROR(cm, td->pc_tree,
vpx_calloc(tree_nodes, sizeof(*td->pc_tree)));
this_pc = &td->pc_tree[0];
this_leaf = &td->leaf_tree[0];
// 4x4 blocks smaller than 8x8 but in the same 8x8 block share the same
// context so we only need to allocate 1 for each 8x8 block.
for (i = 0; i < leaf_nodes; ++i)
alloc_mode_context(cm, 1, &td->leaf_tree[i]);
for (i = 0; i < leaf_nodes; ++i) alloc_mode_context(cm, 1, &td->leaf_tree[i]);
// Sets up all the leaf nodes in the tree.
for (pc_tree_index = 0; pc_tree_index < leaf_nodes; ++pc_tree_index) {
@@ -121,8 +116,7 @@ void vp9_setup_pc_tree(VP9_COMMON *cm, ThreadData *td) {
tree->block_size = square[0];
alloc_tree_contexts(cm, tree, 4);
tree->leaf_split[0] = this_leaf++;
for (j = 1; j < 4; j++)
tree->leaf_split[j] = tree->leaf_split[0];
for (j = 1; j < 4; j++) tree->leaf_split[j] = tree->leaf_split[0];
}
// Each node has 4 leaf nodes, fill each block_size level of the tree
@@ -132,8 +126,7 @@ void vp9_setup_pc_tree(VP9_COMMON *cm, ThreadData *td) {
PC_TREE *const tree = &td->pc_tree[pc_tree_index];
alloc_tree_contexts(cm, tree, 4 << (2 * square_index));
tree->block_size = square[square_index];
for (j = 0; j < 4; j++)
tree->split[j] = this_pc++;
for (j = 0; j < 4; j++) tree->split[j] = this_pc++;
++pc_tree_index;
}
++square_index;
@@ -147,12 +140,10 @@ void vp9_free_pc_tree(ThreadData *td) {
int i;
// Set up all 4x4 mode contexts
for (i = 0; i < 64; ++i)
free_mode_context(&td->leaf_tree[i]);
for (i = 0; i < 64; ++i) free_mode_context(&td->leaf_tree[i]);
// Sets up all the leaf nodes in the tree.
for (i = 0; i < tree_nodes; ++i)
free_tree_contexts(&td->pc_tree[i]);
for (i = 0; i < tree_nodes; ++i) free_tree_contexts(&td->pc_tree[i]);
vpx_free(td->pc_tree);
td->pc_tree = NULL;

View File

@@ -14,31 +14,30 @@
/* round(-log2(i/256.) * (1 << VP9_PROB_COST_SHIFT))
Begins with a bogus entry for simpler addressing. */
const uint16_t vp9_prob_cost[256] = {
4096, 4096, 3584, 3284, 3072, 2907, 2772, 2659, 2560, 2473, 2395, 2325,
2260, 2201, 2147, 2096, 2048, 2003, 1961, 1921, 1883, 1847, 1813, 1780,
1748, 1718, 1689, 1661, 1635, 1609, 1584, 1559, 1536, 1513, 1491, 1470,
1449, 1429, 1409, 1390, 1371, 1353, 1335, 1318, 1301, 1284, 1268, 1252,
1236, 1221, 1206, 1192, 1177, 1163, 1149, 1136, 1123, 1110, 1097, 1084,
1072, 1059, 1047, 1036, 1024, 1013, 1001, 990, 979, 968, 958, 947,
937, 927, 917, 907, 897, 887, 878, 868, 859, 850, 841, 832,
823, 814, 806, 797, 789, 780, 772, 764, 756, 748, 740, 732,
724, 717, 709, 702, 694, 687, 680, 673, 665, 658, 651, 644,
637, 631, 624, 617, 611, 604, 598, 591, 585, 578, 572, 566,
560, 554, 547, 541, 535, 530, 524, 518, 512, 506, 501, 495,
489, 484, 478, 473, 467, 462, 456, 451, 446, 441, 435, 430,
425, 420, 415, 410, 405, 400, 395, 390, 385, 380, 375, 371,
366, 361, 356, 352, 347, 343, 338, 333, 329, 324, 320, 316,
311, 307, 302, 298, 294, 289, 285, 281, 277, 273, 268, 264,
260, 256, 252, 248, 244, 240, 236, 232, 228, 224, 220, 216,
212, 209, 205, 201, 197, 194, 190, 186, 182, 179, 175, 171,
168, 164, 161, 157, 153, 150, 146, 143, 139, 136, 132, 129,
125, 122, 119, 115, 112, 109, 105, 102, 99, 95, 92, 89,
86, 82, 79, 76, 73, 70, 66, 63, 60, 57, 54, 51,
48, 45, 42, 38, 35, 32, 29, 26, 23, 20, 18, 15,
12, 9, 6, 3};
4096, 4096, 3584, 3284, 3072, 2907, 2772, 2659, 2560, 2473, 2395, 2325, 2260,
2201, 2147, 2096, 2048, 2003, 1961, 1921, 1883, 1847, 1813, 1780, 1748, 1718,
1689, 1661, 1635, 1609, 1584, 1559, 1536, 1513, 1491, 1470, 1449, 1429, 1409,
1390, 1371, 1353, 1335, 1318, 1301, 1284, 1268, 1252, 1236, 1221, 1206, 1192,
1177, 1163, 1149, 1136, 1123, 1110, 1097, 1084, 1072, 1059, 1047, 1036, 1024,
1013, 1001, 990, 979, 968, 958, 947, 937, 927, 917, 907, 897, 887,
878, 868, 859, 850, 841, 832, 823, 814, 806, 797, 789, 780, 772,
764, 756, 748, 740, 732, 724, 717, 709, 702, 694, 687, 680, 673,
665, 658, 651, 644, 637, 631, 624, 617, 611, 604, 598, 591, 585,
578, 572, 566, 560, 554, 547, 541, 535, 530, 524, 518, 512, 506,
501, 495, 489, 484, 478, 473, 467, 462, 456, 451, 446, 441, 435,
430, 425, 420, 415, 410, 405, 400, 395, 390, 385, 380, 375, 371,
366, 361, 356, 352, 347, 343, 338, 333, 329, 324, 320, 316, 311,
307, 302, 298, 294, 289, 285, 281, 277, 273, 268, 264, 260, 256,
252, 248, 244, 240, 236, 232, 228, 224, 220, 216, 212, 209, 205,
201, 197, 194, 190, 186, 182, 179, 175, 171, 168, 164, 161, 157,
153, 150, 146, 143, 139, 136, 132, 129, 125, 122, 119, 115, 112,
109, 105, 102, 99, 95, 92, 89, 86, 82, 79, 76, 73, 70,
66, 63, 60, 57, 54, 51, 48, 45, 42, 38, 35, 32, 29,
26, 23, 20, 18, 15, 12, 9, 6, 3
};
static void cost(int *costs, vpx_tree tree, const vpx_prob *probs,
int i, int c) {
static void cost(int *costs, vpx_tree tree, const vpx_prob *probs, int i,
int c) {
const vpx_prob prob = probs[i / 2];
int b;

View File

@@ -27,16 +27,15 @@ extern const uint16_t vp9_prob_cost[256];
#define vp9_cost_one(prob) vp9_cost_zero(256 - (prob))
#define vp9_cost_bit(prob, bit) vp9_cost_zero((bit) ? 256 - (prob) \
: (prob))
#define vp9_cost_bit(prob, bit) vp9_cost_zero((bit) ? 256 - (prob) : (prob))
static INLINE unsigned int cost_branch256(const unsigned int ct[2],
vpx_prob p) {
return ct[0] * vp9_cost_zero(p) + ct[1] * vp9_cost_one(p);
}
static INLINE int treed_cost(vpx_tree tree, const vpx_prob *probs,
int bits, int len) {
static INLINE int treed_cost(vpx_tree tree, const vpx_prob *probs, int bits,
int len) {
int cost = 0;
vpx_tree_index i = 0;

View File

@@ -61,8 +61,8 @@ static void fdct8(const tran_low_t *input, tran_low_t *output) {
x3 = s0 - s3;
t0 = (x0 + x1) * cospi_16_64;
t1 = (x0 - x1) * cospi_16_64;
t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
output[0] = (tran_low_t)fdct_round_shift(t0);
output[2] = (tran_low_t)fdct_round_shift(t2);
output[4] = (tran_low_t)fdct_round_shift(t1);
@@ -81,10 +81,10 @@ static void fdct8(const tran_low_t *input, tran_low_t *output) {
x3 = s7 + t3;
// Stage 4
t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
output[1] = (tran_low_t)fdct_round_shift(t0);
output[3] = (tran_low_t)fdct_round_shift(t2);
output[5] = (tran_low_t)fdct_round_shift(t1);
@@ -105,11 +105,11 @@ static void fdct16(const tran_low_t in[16], tran_low_t out[16]) {
input[3] = in[3] + in[12];
input[4] = in[4] + in[11];
input[5] = in[5] + in[10];
input[6] = in[6] + in[ 9];
input[7] = in[7] + in[ 8];
input[6] = in[6] + in[9];
input[7] = in[7] + in[8];
step1[0] = in[7] - in[ 8];
step1[1] = in[6] - in[ 9];
step1[0] = in[7] - in[8];
step1[1] = in[6] - in[9];
step1[2] = in[5] - in[10];
step1[3] = in[4] - in[11];
step1[4] = in[3] - in[12];
@@ -140,7 +140,7 @@ static void fdct16(const tran_low_t in[16], tran_low_t out[16]) {
x3 = s0 - s3;
t0 = (x0 + x1) * cospi_16_64;
t1 = (x0 - x1) * cospi_16_64;
t2 = x3 * cospi_8_64 + x2 * cospi_24_64;
t2 = x3 * cospi_8_64 + x2 * cospi_24_64;
t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
out[0] = (tran_low_t)fdct_round_shift(t0);
out[4] = (tran_low_t)fdct_round_shift(t2);
@@ -160,10 +160,10 @@ static void fdct16(const tran_low_t in[16], tran_low_t out[16]) {
x3 = s7 + t3;
// Stage 4
t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
out[2] = (tran_low_t)fdct_round_shift(t0);
out[6] = (tran_low_t)fdct_round_shift(t2);
out[10] = (tran_low_t)fdct_round_shift(t1);
@@ -191,12 +191,12 @@ static void fdct16(const tran_low_t in[16], tran_low_t out[16]) {
step3[7] = step1[7] + step2[4];
// step 4
temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64;
temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64;
temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64;
temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64;
step2[1] = fdct_round_shift(temp1);
step2[2] = fdct_round_shift(temp2);
temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64;
temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64;
temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64;
step2[5] = fdct_round_shift(temp1);
step2[6] = fdct_round_shift(temp2);
@@ -211,23 +211,23 @@ static void fdct16(const tran_low_t in[16], tran_low_t out[16]) {
step1[7] = step3[7] + step2[6];
// step 6
temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64;
temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64;
temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64;
out[1] = (tran_low_t)fdct_round_shift(temp1);
out[9] = (tran_low_t)fdct_round_shift(temp2);
temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64;
temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64;
temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64;
out[5] = (tran_low_t)fdct_round_shift(temp1);
out[13] = (tran_low_t)fdct_round_shift(temp2);
temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64;
temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64;
temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;
out[3] = (tran_low_t)fdct_round_shift(temp1);
out[11] = (tran_low_t)fdct_round_shift(temp2);
temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;
temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;
temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;
out[7] = (tran_low_t)fdct_round_shift(temp1);
out[15] = (tran_low_t)fdct_round_shift(temp2);
}
@@ -285,14 +285,14 @@ static void fadst8(const tran_low_t *input, tran_low_t *output) {
tran_high_t x7 = input[6];
// stage 1
s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
s1 = cospi_30_64 * x0 - cospi_2_64 * x1;
s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
s1 = cospi_30_64 * x0 - cospi_2_64 * x1;
s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
s5 = cospi_14_64 * x4 - cospi_18_64 * x5;
s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
x0 = fdct_round_shift(s0 + s4);
x1 = fdct_round_shift(s1 + s5);
@@ -308,10 +308,10 @@ static void fadst8(const tran_low_t *input, tran_low_t *output) {
s1 = x1;
s2 = x2;
s3 = x3;
s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
s6 = - cospi_24_64 * x6 + cospi_8_64 * x7;
s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
s6 = -cospi_24_64 * x6 + cospi_8_64 * x7;
s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
x0 = s0 + s2;
x1 = s1 + s3;
@@ -365,11 +365,11 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) {
tran_high_t x15 = input[14];
// stage 1
s0 = x0 * cospi_1_64 + x1 * cospi_31_64;
s0 = x0 * cospi_1_64 + x1 * cospi_31_64;
s1 = x0 * cospi_31_64 - x1 * cospi_1_64;
s2 = x2 * cospi_5_64 + x3 * cospi_27_64;
s2 = x2 * cospi_5_64 + x3 * cospi_27_64;
s3 = x2 * cospi_27_64 - x3 * cospi_5_64;
s4 = x4 * cospi_9_64 + x5 * cospi_23_64;
s4 = x4 * cospi_9_64 + x5 * cospi_23_64;
s5 = x4 * cospi_23_64 - x5 * cospi_9_64;
s6 = x6 * cospi_13_64 + x7 * cospi_19_64;
s7 = x6 * cospi_19_64 - x7 * cospi_13_64;
@@ -378,9 +378,9 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) {
s10 = x10 * cospi_21_64 + x11 * cospi_11_64;
s11 = x10 * cospi_11_64 - x11 * cospi_21_64;
s12 = x12 * cospi_25_64 + x13 * cospi_7_64;
s13 = x12 * cospi_7_64 - x13 * cospi_25_64;
s13 = x12 * cospi_7_64 - x13 * cospi_25_64;
s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
x0 = fdct_round_shift(s0 + s8);
x1 = fdct_round_shift(s1 + s9);
@@ -390,8 +390,8 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) {
x5 = fdct_round_shift(s5 + s13);
x6 = fdct_round_shift(s6 + s14);
x7 = fdct_round_shift(s7 + s15);
x8 = fdct_round_shift(s0 - s8);
x9 = fdct_round_shift(s1 - s9);
x8 = fdct_round_shift(s0 - s8);
x9 = fdct_round_shift(s1 - s9);
x10 = fdct_round_shift(s2 - s10);
x11 = fdct_round_shift(s3 - s11);
x12 = fdct_round_shift(s4 - s12);
@@ -408,14 +408,14 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) {
s5 = x5;
s6 = x6;
s7 = x7;
s8 = x8 * cospi_4_64 + x9 * cospi_28_64;
s9 = x8 * cospi_28_64 - x9 * cospi_4_64;
s10 = x10 * cospi_20_64 + x11 * cospi_12_64;
s11 = x10 * cospi_12_64 - x11 * cospi_20_64;
s12 = - x12 * cospi_28_64 + x13 * cospi_4_64;
s13 = x12 * cospi_4_64 + x13 * cospi_28_64;
s14 = - x14 * cospi_12_64 + x15 * cospi_20_64;
s15 = x14 * cospi_20_64 + x15 * cospi_12_64;
s8 = x8 * cospi_4_64 + x9 * cospi_28_64;
s9 = x8 * cospi_28_64 - x9 * cospi_4_64;
s10 = x10 * cospi_20_64 + x11 * cospi_12_64;
s11 = x10 * cospi_12_64 - x11 * cospi_20_64;
s12 = -x12 * cospi_28_64 + x13 * cospi_4_64;
s13 = x12 * cospi_4_64 + x13 * cospi_28_64;
s14 = -x14 * cospi_12_64 + x15 * cospi_20_64;
s15 = x14 * cospi_20_64 + x15 * cospi_12_64;
x0 = s0 + s4;
x1 = s1 + s5;
@@ -439,18 +439,18 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) {
s1 = x1;
s2 = x2;
s3 = x3;
s4 = x4 * cospi_8_64 + x5 * cospi_24_64;
s4 = x4 * cospi_8_64 + x5 * cospi_24_64;
s5 = x4 * cospi_24_64 - x5 * cospi_8_64;
s6 = - x6 * cospi_24_64 + x7 * cospi_8_64;
s7 = x6 * cospi_8_64 + x7 * cospi_24_64;
s6 = -x6 * cospi_24_64 + x7 * cospi_8_64;
s7 = x6 * cospi_8_64 + x7 * cospi_24_64;
s8 = x8;
s9 = x9;
s10 = x10;
s11 = x11;
s12 = x12 * cospi_8_64 + x13 * cospi_24_64;
s12 = x12 * cospi_8_64 + x13 * cospi_24_64;
s13 = x12 * cospi_24_64 - x13 * cospi_8_64;
s14 = - x14 * cospi_24_64 + x15 * cospi_8_64;
s15 = x14 * cospi_8_64 + x15 * cospi_24_64;
s14 = -x14 * cospi_24_64 + x15 * cospi_8_64;
s15 = x14 * cospi_8_64 + x15 * cospi_24_64;
x0 = s0 + s2;
x1 = s1 + s3;
@@ -470,13 +470,13 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) {
x15 = fdct_round_shift(s13 - s15);
// stage 4
s2 = (- cospi_16_64) * (x2 + x3);
s2 = (-cospi_16_64) * (x2 + x3);
s3 = cospi_16_64 * (x2 - x3);
s6 = cospi_16_64 * (x6 + x7);
s7 = cospi_16_64 * (- x6 + x7);
s7 = cospi_16_64 * (-x6 + x7);
s10 = cospi_16_64 * (x10 + x11);
s11 = cospi_16_64 * (- x10 + x11);
s14 = (- cospi_16_64) * (x14 + x15);
s11 = cospi_16_64 * (-x10 + x11);
s14 = (-cospi_16_64) * (x14 + x15);
s15 = cospi_16_64 * (x14 - x15);
x2 = fdct_round_shift(s2);
@@ -507,28 +507,28 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) {
}
static const transform_2d FHT_4[] = {
{ fdct4, fdct4 }, // DCT_DCT = 0
{ fadst4, fdct4 }, // ADST_DCT = 1
{ fdct4, fadst4 }, // DCT_ADST = 2
{ fadst4, fadst4 } // ADST_ADST = 3
{ fdct4, fdct4 }, // DCT_DCT = 0
{ fadst4, fdct4 }, // ADST_DCT = 1
{ fdct4, fadst4 }, // DCT_ADST = 2
{ fadst4, fadst4 } // ADST_ADST = 3
};
static const transform_2d FHT_8[] = {
{ fdct8, fdct8 }, // DCT_DCT = 0
{ fadst8, fdct8 }, // ADST_DCT = 1
{ fdct8, fadst8 }, // DCT_ADST = 2
{ fadst8, fadst8 } // ADST_ADST = 3
{ fdct8, fdct8 }, // DCT_DCT = 0
{ fadst8, fdct8 }, // ADST_DCT = 1
{ fdct8, fadst8 }, // DCT_ADST = 2
{ fadst8, fadst8 } // ADST_ADST = 3
};
static const transform_2d FHT_16[] = {
{ fdct16, fdct16 }, // DCT_DCT = 0
{ fadst16, fdct16 }, // ADST_DCT = 1
{ fdct16, fadst16 }, // DCT_ADST = 2
{ fadst16, fadst16 } // ADST_ADST = 3
{ fdct16, fdct16 }, // DCT_DCT = 0
{ fadst16, fdct16 }, // ADST_DCT = 1
{ fdct16, fadst16 }, // DCT_ADST = 2
{ fadst16, fadst16 } // ADST_ADST = 3
};
void vp9_fht4x4_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
void vp9_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
int tx_type) {
if (tx_type == DCT_DCT) {
vpx_fdct4x4_c(input, output, stride);
} else {
@@ -539,36 +539,29 @@ void vp9_fht4x4_c(const int16_t *input, tran_low_t *output,
// Columns
for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j)
temp_in[j] = input[j * stride + i] * 16;
if (i == 0 && temp_in[0])
temp_in[0] += 1;
for (j = 0; j < 4; ++j) temp_in[j] = input[j * stride + i] * 16;
if (i == 0 && temp_in[0]) temp_in[0] += 1;
ht.cols(temp_in, temp_out);
for (j = 0; j < 4; ++j)
out[j * 4 + i] = temp_out[j];
for (j = 0; j < 4; ++j) out[j * 4 + i] = temp_out[j];
}
// Rows
for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j)
temp_in[j] = out[j + i * 4];
for (j = 0; j < 4; ++j) temp_in[j] = out[j + i * 4];
ht.rows(temp_in, temp_out);
for (j = 0; j < 4; ++j)
output[j + i * 4] = (temp_out[j] + 1) >> 2;
for (j = 0; j < 4; ++j) output[j + i * 4] = (temp_out[j] + 1) >> 2;
}
}
}
void vp9_fdct8x8_quant_c(const int16_t *input, int stride,
tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan,
const int16_t *iscan) {
int eob = -1;
int i, j;
@@ -600,8 +593,8 @@ void vp9_fdct8x8_quant_c(const int16_t *input, int stride,
x3 = s0 - s3;
t0 = (x0 + x1) * cospi_16_64;
t1 = (x0 - x1) * cospi_16_64;
t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
output[0 * 8] = (tran_low_t)fdct_round_shift(t0);
output[2 * 8] = (tran_low_t)fdct_round_shift(t2);
output[4 * 8] = (tran_low_t)fdct_round_shift(t1);
@@ -620,10 +613,10 @@ void vp9_fdct8x8_quant_c(const int16_t *input, int stride,
x3 = s7 + t3;
// Stage 4
t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
output[1 * 8] = (tran_low_t)fdct_round_shift(t0);
output[3 * 8] = (tran_low_t)fdct_round_shift(t2);
output[5 * 8] = (tran_low_t)fdct_round_shift(t1);
@@ -636,8 +629,7 @@ void vp9_fdct8x8_quant_c(const int16_t *input, int stride,
// Rows
for (i = 0; i < 8; ++i) {
fdct8(&intermediate[i * 8], &coeff_ptr[i * 8]);
for (j = 0; j < 8; ++j)
coeff_ptr[j + i * 8] /= 2;
for (j = 0; j < 8; ++j) coeff_ptr[j + i * 8] /= 2;
}
// TODO(jingning) Decide the need of these arguments after the
@@ -664,15 +656,14 @@ void vp9_fdct8x8_quant_c(const int16_t *input, int stride,
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
if (tmp)
eob = i;
if (tmp) eob = i;
}
}
*eob_ptr = eob + 1;
}
void vp9_fht8x8_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
void vp9_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
int tx_type) {
if (tx_type == DCT_DCT) {
vpx_fdct8x8_c(input, output, stride);
} else {
@@ -683,17 +674,14 @@ void vp9_fht8x8_c(const int16_t *input, tran_low_t *output,
// Columns
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j)
temp_in[j] = input[j * stride + i] * 4;
for (j = 0; j < 8; ++j) temp_in[j] = input[j * stride + i] * 4;
ht.cols(temp_in, temp_out);
for (j = 0; j < 8; ++j)
out[j * 8 + i] = temp_out[j];
for (j = 0; j < 8; ++j) out[j * 8 + i] = temp_out[j];
}
// Rows
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j)
temp_in[j] = out[j + i * 8];
for (j = 0; j < 8; ++j) temp_in[j] = out[j + i * 8];
ht.rows(temp_in, temp_out);
for (j = 0; j < 8; ++j)
output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
@@ -757,8 +745,8 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) {
}
}
void vp9_fht16x16_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
void vp9_fht16x16_c(const int16_t *input, tran_low_t *output, int stride,
int tx_type) {
if (tx_type == DCT_DCT) {
vpx_fdct16x16_c(input, output, stride);
} else {
@@ -769,8 +757,7 @@ void vp9_fht16x16_c(const int16_t *input, tran_low_t *output,
// Columns
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j)
temp_in[j] = input[j * stride + i] * 4;
for (j = 0; j < 16; ++j) temp_in[j] = input[j * stride + i] * 4;
ht.cols(temp_in, temp_out);
for (j = 0; j < 16; ++j)
out[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
@@ -778,23 +765,21 @@ void vp9_fht16x16_c(const int16_t *input, tran_low_t *output,
// Rows
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j)
temp_in[j] = out[j + i * 16];
for (j = 0; j < 16; ++j) temp_in[j] = out[j + i * 16];
ht.rows(temp_in, temp_out);
for (j = 0; j < 16; ++j)
output[j + i * 16] = temp_out[j];
for (j = 0; j < 16; ++j) output[j + i * 16] = temp_out[j];
}
}
}
#if CONFIG_VP9_HIGHBITDEPTH
void vp9_highbd_fht4x4_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
void vp9_highbd_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
int tx_type) {
vp9_fht4x4_c(input, output, stride, tx_type);
}
void vp9_highbd_fht8x8_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
void vp9_highbd_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
int tx_type) {
vp9_fht8x8_c(input, output, stride, tx_type);
}
@@ -803,8 +788,8 @@ void vp9_highbd_fwht4x4_c(const int16_t *input, tran_low_t *output,
vp9_fwht4x4_c(input, output, stride);
}
void vp9_highbd_fht16x16_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
void vp9_highbd_fht16x16_c(const int16_t *input, tran_low_t *output, int stride,
int tx_type) {
vp9_fht16x16_c(input, output, stride, tx_type);
}
#endif // CONFIG_VP9_HIGHBITDEPTH

View File

@@ -48,8 +48,7 @@ static unsigned int sse_thresh(BLOCK_SIZE bs, int increase_denoising) {
static int sse_diff_thresh(BLOCK_SIZE bs, int increase_denoising,
int motion_magnitude) {
if (motion_magnitude >
noise_motion_thresh(bs, increase_denoising)) {
if (motion_magnitude > noise_motion_thresh(bs, increase_denoising)) {
if (increase_denoising)
return (1 << num_pels_log2_lookup[bs]) << 2;
else
@@ -67,18 +66,15 @@ static int total_adj_weak_thresh(BLOCK_SIZE bs, int increase_denoising) {
// we might need to update the code for calculating 'total_adj' in
// case the C code is not bit-exact with corresponding sse2 code.
int vp9_denoiser_filter_c(const uint8_t *sig, int sig_stride,
const uint8_t *mc_avg,
int mc_avg_stride,
uint8_t *avg, int avg_stride,
int increase_denoising,
BLOCK_SIZE bs,
int motion_magnitude) {
const uint8_t *mc_avg, int mc_avg_stride,
uint8_t *avg, int avg_stride, int increase_denoising,
BLOCK_SIZE bs, int motion_magnitude) {
int r, c;
const uint8_t *sig_start = sig;
const uint8_t *mc_avg_start = mc_avg;
uint8_t *avg_start = avg;
int diff, adj, absdiff, delta;
int adj_val[] = {3, 4, 6};
int adj_val[] = { 3, 4, 6 };
int total_adj = 0;
int shift_inc = 1;
@@ -105,15 +101,19 @@ int vp9_denoiser_filter_c(const uint8_t *sig, int sig_stride,
total_adj += diff;
} else {
switch (absdiff) {
case 4: case 5: case 6: case 7:
adj = adj_val[0];
break;
case 8: case 9: case 10: case 11:
case 12: case 13: case 14: case 15:
adj = adj_val[1];
break;
default:
adj = adj_val[2];
case 4:
case 5:
case 6:
case 7: adj = adj_val[0]; break;
case 8:
case 9:
case 10:
case 11:
case 12:
case 13:
case 14:
case 15: adj = adj_val[1]; break;
default: adj = adj_val[2];
}
if (diff > 0) {
avg[c] = VPXMIN(UINT8_MAX, sig[c] + adj);
@@ -135,14 +135,15 @@ int vp9_denoiser_filter_c(const uint8_t *sig, int sig_stride,
}
// Otherwise, we try to dampen the filter if the delta is not too high.
delta = ((abs(total_adj) - total_adj_strong_thresh(bs, increase_denoising))
>> num_pels_log2_lookup[bs]) + 1;
delta = ((abs(total_adj) - total_adj_strong_thresh(bs, increase_denoising)) >>
num_pels_log2_lookup[bs]) +
1;
if (delta >= delta_thresh(bs, increase_denoising)) {
return COPY_BLOCK;
}
mc_avg = mc_avg_start;
mc_avg = mc_avg_start;
avg = avg_start;
sig = sig_start;
for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) {
@@ -178,22 +179,15 @@ int vp9_denoiser_filter_c(const uint8_t *sig, int sig_stride,
return COPY_BLOCK;
}
static uint8_t *block_start(uint8_t *framebuf, int stride,
int mi_row, int mi_col) {
return framebuf + (stride * mi_row << 3) + (mi_col << 3);
static uint8_t *block_start(uint8_t *framebuf, int stride, int mi_row,
int mi_col) {
return framebuf + (stride * mi_row << 3) + (mi_col << 3);
}
static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser,
MACROBLOCK *mb,
BLOCK_SIZE bs,
int increase_denoising,
int mi_row,
int mi_col,
PICK_MODE_CONTEXT *ctx,
int motion_magnitude,
int is_skin,
int *zeromv_filter,
int consec_zeromv) {
static VP9_DENOISER_DECISION perform_motion_compensation(
VP9_DENOISER *denoiser, MACROBLOCK *mb, BLOCK_SIZE bs,
int increase_denoising, int mi_row, int mi_col, PICK_MODE_CONTEXT *ctx,
int motion_magnitude, int is_skin, int *zeromv_filter, int consec_zeromv) {
int sse_diff = ctx->zeromv_sse - ctx->newmv_sse;
MV_REFERENCE_FRAME frame;
MACROBLOCKD *filter_mbd = &mb->e_mbd;
@@ -206,20 +200,18 @@ static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser,
frame = ctx->best_reference_frame;
saved_mi = *mi;
if (is_skin && (motion_magnitude > 0 || consec_zeromv < 4))
return COPY_BLOCK;
if (is_skin && (motion_magnitude > 0 || consec_zeromv < 4)) return COPY_BLOCK;
// Avoid denoising for small block (unless motion is small).
// Small blocks are selected in variance partition (before encoding) and
// will typically lie on moving areas.
if (denoiser->denoising_level < kDenHigh &&
motion_magnitude > 16 && bs <= BLOCK_8X8)
if (denoiser->denoising_level < kDenHigh && motion_magnitude > 16 &&
bs <= BLOCK_8X8)
return COPY_BLOCK;
// If the best reference frame uses inter-prediction and there is enough of a
// difference in sum-squared-error, use it.
if (frame != INTRA_FRAME &&
ctx->newmv_sse != UINT_MAX &&
if (frame != INTRA_FRAME && ctx->newmv_sse != UINT_MAX &&
sse_diff > sse_diff_thresh(bs, increase_denoising, motion_magnitude)) {
mi->ref_frame[0] = ctx->best_reference_frame;
mi->mode = ctx->best_sse_inter_mode;
@@ -230,7 +222,7 @@ static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser,
ctx->newmv_sse = ctx->zeromv_sse;
// Bias to last reference.
if (frame != LAST_FRAME &&
((ctx->zeromv_lastref_sse < (5 * ctx->zeromv_sse) >> 2) ||
((ctx->zeromv_lastref_sse<(5 * ctx->zeromv_sse)>> 2) ||
denoiser->denoising_level >= kDenHigh)) {
frame = LAST_FRAME;
ctx->newmv_sse = ctx->zeromv_lastref_sse;
@@ -251,8 +243,7 @@ static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser,
*mi = saved_mi;
return COPY_BLOCK;
}
if (motion_magnitude >
(noise_motion_thresh(bs, increase_denoising) << 3)) {
if (motion_magnitude > (noise_motion_thresh(bs, increase_denoising) << 3)) {
// Restore everything to its original state
*mi = saved_mi;
return COPY_BLOCK;
@@ -268,37 +259,28 @@ static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser,
// struct.
filter_mbd->plane[0].pre[0].buf =
block_start(denoiser->running_avg_y[frame].y_buffer,
denoiser->running_avg_y[frame].y_stride,
mi_row, mi_col);
filter_mbd->plane[0].pre[0].stride =
denoiser->running_avg_y[frame].y_stride;
denoiser->running_avg_y[frame].y_stride, mi_row, mi_col);
filter_mbd->plane[0].pre[0].stride = denoiser->running_avg_y[frame].y_stride;
filter_mbd->plane[1].pre[0].buf =
block_start(denoiser->running_avg_y[frame].u_buffer,
denoiser->running_avg_y[frame].uv_stride,
mi_row, mi_col);
filter_mbd->plane[1].pre[0].stride =
denoiser->running_avg_y[frame].uv_stride;
block_start(denoiser->running_avg_y[frame].u_buffer,
denoiser->running_avg_y[frame].uv_stride, mi_row, mi_col);
filter_mbd->plane[1].pre[0].stride = denoiser->running_avg_y[frame].uv_stride;
filter_mbd->plane[2].pre[0].buf =
block_start(denoiser->running_avg_y[frame].v_buffer,
denoiser->running_avg_y[frame].uv_stride,
mi_row, mi_col);
filter_mbd->plane[2].pre[0].stride =
denoiser->running_avg_y[frame].uv_stride;
denoiser->running_avg_y[frame].uv_stride, mi_row, mi_col);
filter_mbd->plane[2].pre[0].stride = denoiser->running_avg_y[frame].uv_stride;
filter_mbd->plane[0].dst.buf =
block_start(denoiser->mc_running_avg_y.y_buffer,
denoiser->mc_running_avg_y.y_stride,
mi_row, mi_col);
denoiser->mc_running_avg_y.y_stride, mi_row, mi_col);
filter_mbd->plane[0].dst.stride = denoiser->mc_running_avg_y.y_stride;
filter_mbd->plane[1].dst.buf =
block_start(denoiser->mc_running_avg_y.u_buffer,
denoiser->mc_running_avg_y.uv_stride,
mi_row, mi_col);
denoiser->mc_running_avg_y.uv_stride, mi_row, mi_col);
filter_mbd->plane[1].dst.stride = denoiser->mc_running_avg_y.uv_stride;
filter_mbd->plane[2].dst.buf =
block_start(denoiser->mc_running_avg_y.v_buffer,
denoiser->mc_running_avg_y.uv_stride,
mi_row, mi_col);
denoiser->mc_running_avg_y.uv_stride, mi_row, mi_col);
filter_mbd->plane[2].dst.stride = denoiser->mc_running_avg_y.uv_stride;
vp9_build_inter_predictors_sby(filter_mbd, mi_row, mi_col, bs);
@@ -313,9 +295,8 @@ static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser,
return FILTER_BLOCK;
}
void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb,
int mi_row, int mi_col, BLOCK_SIZE bs,
PICK_MODE_CONTEXT *ctx,
void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col,
BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx,
VP9_DENOISER_DECISION *denoiser_decision) {
int mv_col, mv_row;
int motion_magnitude = 0;
@@ -325,8 +306,8 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb,
YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME];
YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y;
uint8_t *avg_start = block_start(avg.y_buffer, avg.y_stride, mi_row, mi_col);
uint8_t *mc_avg_start = block_start(mc_avg.y_buffer, mc_avg.y_stride,
mi_row, mi_col);
uint8_t *mc_avg_start =
block_start(mc_avg.y_buffer, mc_avg.y_stride, mi_row, mi_col);
struct buf_2d src = mb->plane[0].src;
int is_skin = 0;
int consec_zeromv = 0;
@@ -334,8 +315,7 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb,
mv_row = ctx->best_sse_mv.as_mv.row;
motion_magnitude = mv_row * mv_row + mv_col * mv_col;
if (cpi->use_skin_detection &&
bs <= BLOCK_32X32 &&
if (cpi->use_skin_detection && bs <= BLOCK_32X32 &&
denoiser->denoising_level < kDenHigh) {
int motion_level = (motion_magnitude < 16) ? 0 : 1;
// If motion for current block is small/zero, compute consec_zeromv for
@@ -343,7 +323,7 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb,
// consec_zeromv when current block has small/zero motion).
consec_zeromv = 0;
if (motion_level == 0) {
VP9_COMMON * const cm = &cpi->common;
VP9_COMMON *const cm = &cpi->common;
int j, i;
// Loop through the 8x8 sub-blocks.
const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
@@ -367,48 +347,35 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb,
}
}
// TODO(marpan): Compute skin detection over sub-blocks.
is_skin = vp9_compute_skin_block(mb->plane[0].src.buf,
mb->plane[1].src.buf,
mb->plane[2].src.buf,
mb->plane[0].src.stride,
mb->plane[1].src.stride,
bs,
consec_zeromv,
motion_level);
is_skin = vp9_compute_skin_block(
mb->plane[0].src.buf, mb->plane[1].src.buf, mb->plane[2].src.buf,
mb->plane[0].src.stride, mb->plane[1].src.stride, bs, consec_zeromv,
motion_level);
}
if (!is_skin &&
denoiser->denoising_level == kDenHigh) {
if (!is_skin && denoiser->denoising_level == kDenHigh) {
denoiser->increase_denoising = 1;
} else {
denoiser->increase_denoising = 0;
}
if (denoiser->denoising_level >= kDenLow)
decision = perform_motion_compensation(denoiser, mb, bs,
denoiser->increase_denoising,
mi_row, mi_col, ctx,
motion_magnitude,
is_skin,
&zeromv_filter,
consec_zeromv);
decision = perform_motion_compensation(
denoiser, mb, bs, denoiser->increase_denoising, mi_row, mi_col, ctx,
motion_magnitude, is_skin, &zeromv_filter, consec_zeromv);
if (decision == FILTER_BLOCK) {
decision = vp9_denoiser_filter(src.buf, src.stride,
mc_avg_start, mc_avg.y_stride,
avg_start, avg.y_stride,
denoiser->increase_denoising,
bs, motion_magnitude);
decision = vp9_denoiser_filter(
src.buf, src.stride, mc_avg_start, mc_avg.y_stride, avg_start,
avg.y_stride, denoiser->increase_denoising, bs, motion_magnitude);
}
if (decision == FILTER_BLOCK) {
vpx_convolve_copy(avg_start, avg.y_stride, src.buf, src.stride,
NULL, 0, NULL, 0,
num_4x4_blocks_wide_lookup[bs] << 2,
vpx_convolve_copy(avg_start, avg.y_stride, src.buf, src.stride, NULL, 0,
NULL, 0, num_4x4_blocks_wide_lookup[bs] << 2,
num_4x4_blocks_high_lookup[bs] << 2);
} else { // COPY_BLOCK
vpx_convolve_copy(src.buf, src.stride, avg_start, avg.y_stride,
NULL, 0, NULL, 0,
num_4x4_blocks_wide_lookup[bs] << 2,
vpx_convolve_copy(src.buf, src.stride, avg_start, avg.y_stride, NULL, 0,
NULL, 0, num_4x4_blocks_wide_lookup[bs] << 2,
num_4x4_blocks_high_lookup[bs] << 2);
}
*denoiser_decision = decision;
@@ -416,8 +383,8 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb,
*denoiser_decision = FILTER_ZEROMV_BLOCK;
}
static void copy_frame(YV12_BUFFER_CONFIG * const dest,
const YV12_BUFFER_CONFIG * const src) {
static void copy_frame(YV12_BUFFER_CONFIG *const dest,
const YV12_BUFFER_CONFIG *const src) {
int r;
const uint8_t *srcbuf = src->y_buffer;
uint8_t *destbuf = dest->y_buffer;
@@ -432,8 +399,8 @@ static void copy_frame(YV12_BUFFER_CONFIG * const dest,
}
}
static void swap_frame_buffer(YV12_BUFFER_CONFIG * const dest,
YV12_BUFFER_CONFIG * const src) {
static void swap_frame_buffer(YV12_BUFFER_CONFIG *const dest,
YV12_BUFFER_CONFIG *const src) {
uint8_t *tmp_buf = dest->y_buffer;
assert(dest->y_width == src->y_width);
assert(dest->y_height == src->y_height);
@@ -446,8 +413,7 @@ void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser,
FRAME_TYPE frame_type,
int refresh_alt_ref_frame,
int refresh_golden_frame,
int refresh_last_frame,
int resized) {
int refresh_last_frame, int resized) {
// Copy source into denoised reference buffers on KEY_FRAME or
// if the just encoded frame was resized.
if (frame_type == KEY_FRAME || resized != 0 || denoiser->reset) {
@@ -460,8 +426,7 @@ void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser,
}
// If more than one refresh occurs, must copy frame buffer.
if ((refresh_alt_ref_frame + refresh_golden_frame + refresh_last_frame)
> 1) {
if ((refresh_alt_ref_frame + refresh_golden_frame + refresh_last_frame) > 1) {
if (refresh_alt_ref_frame) {
copy_frame(&denoiser->running_avg_y[ALTREF_FRAME],
&denoiser->running_avg_y[INTRA_FRAME]);
@@ -503,8 +468,7 @@ void vp9_denoiser_update_frame_stats(MODE_INFO *mi, unsigned int sse,
if (mi->mv[0].as_int == 0 && sse < ctx->zeromv_sse) {
ctx->zeromv_sse = sse;
ctx->best_zeromv_reference_frame = mi->ref_frame[0];
if (mi->ref_frame[0] == LAST_FRAME)
ctx->zeromv_lastref_sse = sse;
if (mi->ref_frame[0] == LAST_FRAME) ctx->zeromv_lastref_sse = sse;
}
if (mi->mv[0].as_int != 0 && sse < ctx->newmv_sse) {
@@ -515,8 +479,8 @@ void vp9_denoiser_update_frame_stats(MODE_INFO *mi, unsigned int sse,
}
}
int vp9_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height,
int ssx, int ssy,
int vp9_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height, int ssx,
int ssy,
#if CONFIG_VP9_HIGHBITDEPTH
int use_highbitdepth,
#endif
@@ -541,8 +505,8 @@ int vp9_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height,
#endif
}
fail = vpx_alloc_frame_buffer(&denoiser->mc_running_avg_y, width, height,
ssx, ssy,
fail = vpx_alloc_frame_buffer(&denoiser->mc_running_avg_y, width, height, ssx,
ssy,
#if CONFIG_VP9_HIGHBITDEPTH
use_highbitdepth,
#endif
@@ -552,8 +516,7 @@ int vp9_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height,
return 1;
}
fail = vpx_alloc_frame_buffer(&denoiser->last_source, width, height,
ssx, ssy,
fail = vpx_alloc_frame_buffer(&denoiser->last_source, width, height, ssx, ssy,
#if CONFIG_VP9_HIGHBITDEPTH
use_highbitdepth,
#endif
@@ -586,8 +549,7 @@ void vp9_denoiser_free(VP9_DENOISER *denoiser) {
vpx_free_frame_buffer(&denoiser->last_source);
}
void vp9_denoiser_set_noise_level(VP9_DENOISER *denoiser,
int noise_level) {
void vp9_denoiser_set_noise_level(VP9_DENOISER *denoiser, int noise_level) {
denoiser->denoising_level = noise_level;
if (denoiser->denoising_level > kDenLowLow &&
denoiser->prev_denoising_level == kDenLowLow)

View File

@@ -64,22 +64,20 @@ void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser,
FRAME_TYPE frame_type,
int refresh_alt_ref_frame,
int refresh_golden_frame,
int refresh_last_frame,
int resized);
int refresh_last_frame, int resized);
void vp9_denoiser_denoise(struct VP9_COMP *cpi, MACROBLOCK *mb,
int mi_row, int mi_col, BLOCK_SIZE bs,
PICK_MODE_CONTEXT *ctx ,
void vp9_denoiser_denoise(struct VP9_COMP *cpi, MACROBLOCK *mb, int mi_row,
int mi_col, BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx,
VP9_DENOISER_DECISION *denoiser_decision);
void vp9_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx);
void vp9_denoiser_update_frame_stats(MODE_INFO *mi,
unsigned int sse, PREDICTION_MODE mode,
void vp9_denoiser_update_frame_stats(MODE_INFO *mi, unsigned int sse,
PREDICTION_MODE mode,
PICK_MODE_CONTEXT *ctx);
int vp9_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height,
int ssx, int ssy,
int vp9_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height, int ssx,
int ssy,
#if CONFIG_VP9_HIGHBITDEPTH
int use_highbitdepth,
#endif
@@ -97,8 +95,7 @@ static INLINE int total_adj_strong_thresh(BLOCK_SIZE bs,
void vp9_denoiser_free(VP9_DENOISER *denoiser);
void vp9_denoiser_set_noise_level(VP9_DENOISER *denoiser,
int noise_level);
void vp9_denoiser_set_noise_level(VP9_DENOISER *denoiser, int noise_level);
#ifdef __cplusplus
} // extern "C"

File diff suppressed because it is too large Load Diff

View File

@@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_ENCODER_VP9_ENCODEFRAME_H_
#define VP9_ENCODER_VP9_ENCODEFRAME_H_
@@ -31,14 +30,14 @@ struct ThreadData;
#define VAR_HIST_SMALL_CUT_OFF 45
void vp9_setup_src_planes(struct macroblock *x,
const struct yv12_buffer_config *src,
int mi_row, int mi_col);
const struct yv12_buffer_config *src, int mi_row,
int mi_col);
void vp9_encode_frame(struct VP9_COMP *cpi);
void vp9_init_tile_data(struct VP9_COMP *cpi);
void vp9_encode_tile(struct VP9_COMP *cpi, struct ThreadData *td,
int tile_row, int tile_col);
void vp9_encode_tile(struct VP9_COMP *cpi, struct ThreadData *td, int tile_row,
int tile_col);
void vp9_set_variance_partition_thresholds(struct VP9_COMP *cpi, int q);

View File

@@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp9_rtcd.h"
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
@@ -51,29 +50,29 @@ void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
}
typedef struct vp9_token_state {
int64_t error;
int rate;
int16_t next;
int16_t token;
tran_low_t qc;
tran_low_t dqc;
uint8_t best_index;
int64_t error;
int rate;
int16_t next;
int16_t token;
tran_low_t qc;
tran_low_t dqc;
uint8_t best_index;
} vp9_token_state;
static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] ={ {10, 6}, {8, 5}, };
static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
{ 10, 6 }, { 8, 5 },
};
#define UPDATE_RD_COST()\
{\
rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\
rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
}
#define UPDATE_RD_COST() \
{ \
rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); \
rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); \
}
// This function is a place holder for now but may ultimately need
// to scan previous tokens to work out the correct context.
static int trellis_get_coeff_context(const int16_t *scan,
const int16_t *nb,
int idx, int token,
uint8_t *token_cache) {
static int trellis_get_coeff_context(const int16_t *scan, const int16_t *nb,
int idx, int token, uint8_t *token_cache) {
int bak = token_cache[scan[idx]], pt;
token_cache[scan[idx]] = vp9_pt_energy_class[token];
pt = get_coef_context(nb, token_cache, idx + 1);
@@ -81,8 +80,8 @@ static int trellis_get_coeff_context(const int16_t *scan,
return pt;
}
int vp9_optimize_b(MACROBLOCK *mb, int plane, int block,
TX_SIZE tx_size, int ctx) {
int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
int ctx) {
MACROBLOCKD *const xd = &mb->e_mbd;
struct macroblock_plane *const p = &mb->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
@@ -96,8 +95,8 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block,
const PLANE_TYPE type = get_plane_type(plane);
const int default_eob = 16 << (tx_size << 1);
const int shift = (tx_size == TX_32X32);
const int16_t* const dequant_ptr = pd->dequant;
const uint8_t* const band_translate = get_band_translate(tx_size);
const int16_t *const dequant_ptr = pd->dequant;
const uint8_t *const band_translate = get_band_translate(tx_size);
const scan_order *const so = get_scan(xd, tx_size, type, block);
const int16_t *const scan = so->scan;
const int16_t *const nb = so->neighbors;
@@ -130,8 +129,7 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block,
tokens[eob][1] = tokens[eob][0];
for (i = 0; i < eob; i++)
token_cache[scan[i]] =
vp9_pt_energy_class[vp9_get_token(qcoeff[scan[i]])];
token_cache[scan[i]] = vp9_pt_energy_class[vp9_get_token(qcoeff[scan[i]])];
for (i = eob; i-- > 0;) {
int base_bits, d2, dx;
@@ -150,10 +148,12 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block,
if (next < default_eob) {
band = band_translate[i + 1];
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
rate0 += mb->token_costs[tx_size][type][ref][band][0][pt]
[tokens[next][0].token];
rate1 += mb->token_costs[tx_size][type][ref][band][0][pt]
[tokens[next][1].token];
rate0 +=
mb->token_costs[tx_size][type][ref][band][0][pt][tokens[next][0]
.token];
rate1 +=
mb->token_costs[tx_size][type][ref][band][0][pt][tokens[next][1]
.token];
}
UPDATE_RD_COST();
/* And pick the best. */
@@ -179,8 +179,8 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block,
rate1 = tokens[next][1].rate;
if ((abs(x) * dequant_ptr[rc != 0] > (abs(coeff[rc]) << shift)) &&
(abs(x) * dequant_ptr[rc != 0] < (abs(coeff[rc]) << shift) +
dequant_ptr[rc != 0]))
(abs(x) * dequant_ptr[rc != 0] <
(abs(coeff[rc]) << shift) + dequant_ptr[rc != 0]))
shortcut = 1;
else
shortcut = 0;
@@ -210,13 +210,15 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block,
band = band_translate[i + 1];
if (t0 != EOB_TOKEN) {
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt]
[tokens[next][0].token];
rate0 +=
mb->token_costs[tx_size][type][ref][band][!x][pt][tokens[next][0]
.token];
}
if (t1 != EOB_TOKEN) {
pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache);
rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt]
[tokens[next][1].token];
rate1 +=
mb->token_costs[tx_size][type][ref][band][!x][pt][tokens[next][1]
.token];
}
}
@@ -249,8 +251,7 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block,
// Account for the rounding difference in the dequantized coefficeint
// value when the quantization index is dropped from an even number
// to an odd number.
if (shift & x)
offset += (dequant_ptr[rc != 0] & 0x01);
if (shift & x) offset += (dequant_ptr[rc != 0] & 0x01);
if (sz == 0)
tokens[i][1].dqc = dqcoeff[rc] - offset;
@@ -316,9 +317,8 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block,
return final_eob;
}
static INLINE void fdct32x32(int rd_transform,
const int16_t *src, tran_low_t *dst,
int src_stride) {
static INLINE void fdct32x32(int rd_transform, const int16_t *src,
tran_low_t *dst, int src_stride) {
if (rd_transform)
vpx_fdct32x32_rd(src, dst, src_stride);
else
@@ -356,33 +356,31 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col,
highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
vp9_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
p->round_fp, p->quant_fp, p->quant_shift,
qcoeff, dqcoeff, pd->dequant,
eob, scan_order->scan,
scan_order->iscan);
qcoeff, dqcoeff, pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
case TX_16X16:
vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
vp9_highbd_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
scan_order->scan, scan_order->iscan);
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
break;
case TX_8X8:
vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
vp9_highbd_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
scan_order->scan, scan_order->iscan);
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
break;
case TX_4X4:
x->fwd_txm4x4(src_diff, coeff, diff_stride);
vp9_highbd_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
scan_order->scan, scan_order->iscan);
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
break;
default:
assert(0);
default: assert(0);
}
return;
}
@@ -399,27 +397,22 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col,
case TX_16X16:
vpx_fdct16x16(src_diff, coeff, diff_stride);
vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
scan_order->scan, scan_order->iscan);
p->quant_fp, p->quant_shift, qcoeff, dqcoeff, pd->dequant,
eob, scan_order->scan, scan_order->iscan);
break;
case TX_8X8:
vp9_fdct8x8_quant(src_diff, diff_stride, coeff, 64,
x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
scan_order->scan, scan_order->iscan);
vp9_fdct8x8_quant(src_diff, diff_stride, coeff, 64, x->skip_block,
p->zbin, p->round_fp, p->quant_fp, p->quant_shift,
qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
scan_order->iscan);
break;
case TX_4X4:
x->fwd_txm4x4(src_diff, coeff, diff_stride);
vp9_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
default:
assert(0);
p->quant_fp, p->quant_shift, qcoeff, dqcoeff, pd->dequant,
eob, scan_order->scan, scan_order->iscan);
break;
default: assert(0); break;
}
}
@@ -447,23 +440,22 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col,
case TX_16X16:
vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
vpx_highbd_quantize_dc(coeff, 256, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
eob);
break;
case TX_8X8:
vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
vpx_highbd_quantize_dc(coeff, 64, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
eob);
break;
case TX_4X4:
x->fwd_txm4x4(src_diff, coeff, diff_stride);
vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
eob);
break;
default:
assert(0);
default: assert(0);
}
return;
}
@@ -472,31 +464,25 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col,
switch (tx_size) {
case TX_32X32:
vpx_fdct32x32_1(src_diff, coeff, diff_stride);
vpx_quantize_dc_32x32(coeff, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
vpx_quantize_dc_32x32(coeff, x->skip_block, p->round, p->quant_fp[0],
qcoeff, dqcoeff, pd->dequant[0], eob);
break;
case TX_16X16:
vpx_fdct16x16_1(src_diff, coeff, diff_stride);
vpx_quantize_dc(coeff, 256, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
vpx_quantize_dc(coeff, 256, x->skip_block, p->round, p->quant_fp[0],
qcoeff, dqcoeff, pd->dequant[0], eob);
break;
case TX_8X8:
vpx_fdct8x8_1(src_diff, coeff, diff_stride);
vpx_quantize_dc(coeff, 64, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
vpx_quantize_dc(coeff, 64, x->skip_block, p->round, p->quant_fp[0],
qcoeff, dqcoeff, pd->dequant[0], eob);
break;
case TX_4X4:
x->fwd_txm4x4(src_diff, coeff, diff_stride);
vpx_quantize_dc(coeff, 16, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
break;
default:
assert(0);
vpx_quantize_dc(coeff, 16, x->skip_block, p->round, p->quant_fp[0],
qcoeff, dqcoeff, pd->dequant[0], eob);
break;
default: assert(0); break;
}
}
@@ -516,37 +502,36 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
switch (tx_size) {
switch (tx_size) {
case TX_32X32:
highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
p->round, p->quant, p->quant_shift, qcoeff,
dqcoeff, pd->dequant, eob,
scan_order->scan, scan_order->iscan);
dqcoeff, pd->dequant, eob, scan_order->scan,
scan_order->iscan);
break;
case TX_16X16:
vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
scan_order->scan, scan_order->iscan);
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
break;
case TX_8X8:
vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
scan_order->scan, scan_order->iscan);
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
break;
case TX_4X4:
x->fwd_txm4x4(src_diff, coeff, diff_stride);
vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
scan_order->scan, scan_order->iscan);
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
break;
default:
assert(0);
default: assert(0);
}
return;
}
@@ -562,34 +547,28 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
break;
case TX_16X16:
vpx_fdct16x16(src_diff, coeff, diff_stride);
vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
case TX_8X8:
vpx_fdct8x8(src_diff, coeff, diff_stride);
vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
case TX_4X4:
x->fwd_txm4x4(src_diff, coeff, diff_stride);
vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
default:
assert(0);
break;
default: assert(0); break;
}
}
static void encode_block(int plane, int block, int row, int col,
BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
struct encode_b_args *const args = arg;
MACROBLOCK *const x = args->x;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -649,35 +628,32 @@ static void encode_block(int plane, int block, int row, int col,
*a = *l = p->eobs[block] > 0;
}
if (p->eobs[block])
*(args->skip) = 0;
if (p->eobs[block]) *(args->skip) = 0;
if (x->skip_encode || p->eobs[block] == 0)
return;
if (x->skip_encode || p->eobs[block] == 0) return;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
switch (tx_size) {
case TX_32X32:
vp9_highbd_idct32x32_add(dqcoeff, dst, pd->dst.stride,
p->eobs[block], xd->bd);
vp9_highbd_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
xd->bd);
break;
case TX_16X16:
vp9_highbd_idct16x16_add(dqcoeff, dst, pd->dst.stride,
p->eobs[block], xd->bd);
vp9_highbd_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
xd->bd);
break;
case TX_8X8:
vp9_highbd_idct8x8_add(dqcoeff, dst, pd->dst.stride,
p->eobs[block], xd->bd);
vp9_highbd_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
xd->bd);
break;
case TX_4X4:
// this is like vp9_short_idct4x4 but has a special case around eob<=1
// which is significant (not just an optimization) for the lossless
// case.
x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride,
p->eobs[block], xd->bd);
x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
xd->bd);
break;
default:
assert(0 && "Invalid transform size");
default: assert(0 && "Invalid transform size");
}
return;
}
@@ -699,15 +675,13 @@ static void encode_block(int plane, int block, int row, int col,
// case.
x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
break;
default:
assert(0 && "Invalid transform size");
break;
default: assert(0 && "Invalid transform size"); break;
}
}
static void encode_block_pass1(int plane, int block, int row, int col,
BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
void *arg) {
MACROBLOCK *const x = (MACROBLOCK *)arg;
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblock_plane *const p = &x->plane[plane];
@@ -721,8 +695,8 @@ static void encode_block_pass1(int plane, int block, int row, int col,
if (p->eobs[block] > 0) {
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block], xd->bd);
return;
x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block], xd->bd);
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
@@ -739,23 +713,21 @@ void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
MACROBLOCKD *const xd = &x->e_mbd;
struct optimize_ctx ctx;
MODE_INFO *mi = xd->mi[0];
struct encode_b_args arg = {x, 1, NULL, NULL, &mi->skip};
struct encode_b_args arg = { x, 1, NULL, NULL, &mi->skip };
int plane;
mi->skip = 1;
if (x->skip)
return;
if (x->skip) return;
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
if (!x->skip_recode)
vp9_subtract_plane(x, bsize, plane);
if (!x->skip_recode) vp9_subtract_plane(x, bsize, plane);
if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
const struct macroblockd_plane* const pd = &xd->plane[plane];
const struct macroblockd_plane *const pd = &xd->plane[plane];
const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size;
vp9_get_entropy_contexts(bsize, tx_size, pd,
ctx.ta[plane], ctx.tl[plane]);
vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane],
ctx.tl[plane]);
arg.enable_coeff_opt = 1;
} else {
arg.enable_coeff_opt = 0;
@@ -769,9 +741,9 @@ void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
}
void vp9_encode_block_intra(int plane, int block, int row, int col,
BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
struct encode_b_args* const args = arg;
BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
void *arg) {
struct encode_b_args *const args = arg;
MACROBLOCK *const x = args->x;
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *mi = xd->mi[0];
@@ -817,16 +789,16 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
}
vp9_predict_intra_block(xd, bwl, tx_size, mode, x->skip_encode ? src : dst,
x->skip_encode ? src_stride : dst_stride,
dst, dst_stride, col, row, plane);
x->skip_encode ? src_stride : dst_stride, dst,
dst_stride, col, row, plane);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
switch (tx_size) {
case TX_32X32:
if (!x->skip_recode) {
vpx_highbd_subtract_block(32, 32, src_diff, diff_stride,
src, src_stride, dst, dst_stride, xd->bd);
vpx_highbd_subtract_block(32, 32, src_diff, diff_stride, src,
src_stride, dst, dst_stride, xd->bd);
highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
p->round, p->quant, p->quant_shift,
@@ -839,34 +811,34 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
break;
case TX_16X16:
if (!x->skip_recode) {
vpx_highbd_subtract_block(16, 16, src_diff, diff_stride,
src, src_stride, dst, dst_stride, xd->bd);
vpx_highbd_subtract_block(16, 16, src_diff, diff_stride, src,
src_stride, dst, dst_stride, xd->bd);
if (tx_type == DCT_DCT)
vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
else
vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
scan_order->scan, scan_order->iscan);
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
}
if (!x->skip_encode && *eob) {
vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, dst_stride,
*eob, xd->bd);
vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob,
xd->bd);
}
break;
case TX_8X8:
if (!x->skip_recode) {
vpx_highbd_subtract_block(8, 8, src_diff, diff_stride,
src, src_stride, dst, dst_stride, xd->bd);
vpx_highbd_subtract_block(8, 8, src_diff, diff_stride, src,
src_stride, dst, dst_stride, xd->bd);
if (tx_type == DCT_DCT)
vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
else
vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
scan_order->scan, scan_order->iscan);
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
}
if (!x->skip_encode && *eob) {
vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob,
@@ -875,16 +847,16 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
break;
case TX_4X4:
if (!x->skip_recode) {
vpx_highbd_subtract_block(4, 4, src_diff, diff_stride,
src, src_stride, dst, dst_stride, xd->bd);
vpx_highbd_subtract_block(4, 4, src_diff, diff_stride, src,
src_stride, dst, dst_stride, xd->bd);
if (tx_type != DCT_DCT)
vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
else
x->fwd_txm4x4(src_diff, coeff, diff_stride);
vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
scan_order->scan, scan_order->iscan);
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
}
if (!x->skip_encode && *eob) {
@@ -898,12 +870,9 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
}
}
break;
default:
assert(0);
return;
default: assert(0); return;
}
if (*eob)
*(args->skip) = 0;
if (*eob) *(args->skip) = 0;
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -911,8 +880,8 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
switch (tx_size) {
case TX_32X32:
if (!x->skip_recode) {
vpx_subtract_block(32, 32, src_diff, diff_stride,
src, src_stride, dst, dst_stride);
vpx_subtract_block(32, 32, src_diff, diff_stride, src, src_stride, dst,
dst_stride);
fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
@@ -920,20 +889,19 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
scan_order->iscan);
}
if (args->enable_coeff_opt && !x->skip_recode) {
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
}
if (!x->skip_encode && *eob)
vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
break;
case TX_16X16:
if (!x->skip_recode) {
vpx_subtract_block(16, 16, src_diff, diff_stride,
src, src_stride, dst, dst_stride);
vpx_subtract_block(16, 16, src_diff, diff_stride, src, src_stride, dst,
dst_stride);
vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
scan_order->scan, scan_order->iscan);
}
if (args->enable_coeff_opt && !x->skip_recode) {
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
@@ -943,13 +911,12 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
break;
case TX_8X8:
if (!x->skip_recode) {
vpx_subtract_block(8, 8, src_diff, diff_stride,
src, src_stride, dst, dst_stride);
vpx_subtract_block(8, 8, src_diff, diff_stride, src, src_stride, dst,
dst_stride);
vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
scan_order->scan, scan_order->iscan);
}
if (args->enable_coeff_opt && !x->skip_recode) {
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
@@ -959,16 +926,15 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
break;
case TX_4X4:
if (!x->skip_recode) {
vpx_subtract_block(4, 4, src_diff, diff_stride,
src, src_stride, dst, dst_stride);
vpx_subtract_block(4, 4, src_diff, diff_stride, src, src_stride, dst,
dst_stride);
if (tx_type != DCT_DCT)
vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
else
x->fwd_txm4x4(src_diff, coeff, diff_stride);
vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
scan_order->scan, scan_order->iscan);
}
if (args->enable_coeff_opt && !x->skip_recode) {
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
@@ -983,27 +949,23 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type);
}
break;
default:
assert(0);
break;
default: assert(0); break;
}
if (*eob)
*(args->skip) = 0;
if (*eob) *(args->skip) = 0;
}
void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
int enable_optimize_b) {
const MACROBLOCKD *const xd = &x->e_mbd;
struct optimize_ctx ctx;
struct encode_b_args arg = {x, enable_optimize_b,
ctx.ta[plane], ctx.tl[plane],
&xd->mi[0]->skip};
struct encode_b_args arg = { x, enable_optimize_b, ctx.ta[plane],
ctx.tl[plane], &xd->mi[0]->skip };
if (enable_optimize_b && x->optimize &&
(!x->skip_recode || !x->skip_optimize)) {
const struct macroblockd_plane* const pd = &xd->plane[plane];
const TX_SIZE tx_size = plane ? get_uv_tx_size(xd->mi[0], pd) :
xd->mi[0]->tx_size;
const struct macroblockd_plane *const pd = &xd->plane[plane];
const TX_SIZE tx_size =
plane ? get_uv_tx_size(xd->mi[0], pd) : xd->mi[0]->tx_size;
vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
} else {
arg.enable_coeff_opt = 0;

View File

@@ -25,8 +25,8 @@ struct encode_b_args {
ENTROPY_CONTEXT *tl;
int8_t *skip;
};
int vp9_optimize_b(MACROBLOCK *mb, int plane, int block,
TX_SIZE tx_size, int ctx);
int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
int ctx);
void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize);
void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize);
void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col,
@@ -39,8 +39,7 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
void vp9_encode_block_intra(int plane, int block, int row, int col,
BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg);
BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg);
void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
int enable_optimize_b);

View File

@@ -30,15 +30,15 @@ void vp9_entropy_mv_init(void) {
vp9_tokens_from_tree(mv_fp_encodings, vp9_mv_fp_tree);
}
static void encode_mv_component(vpx_writer* w, int comp,
const nmv_component* mvcomp, int usehp) {
static void encode_mv_component(vpx_writer *w, int comp,
const nmv_component *mvcomp, int usehp) {
int offset;
const int sign = comp < 0;
const int mag = sign ? -comp : comp;
const int mv_class = vp9_get_mv_class(mag - 1, &offset);
const int d = offset >> 3; // int mv data
const int fr = (offset >> 1) & 3; // fractional mv data
const int hp = offset & 1; // high precision mv data
const int d = offset >> 3; // int mv data
const int fr = (offset >> 1) & 3; // fractional mv data
const int hp = offset & 1; // high precision mv data
assert(comp != 0);
@@ -56,24 +56,21 @@ static void encode_mv_component(vpx_writer* w, int comp,
} else {
int i;
const int n = mv_class + CLASS0_BITS - 1; // number of bits
for (i = 0; i < n; ++i)
vpx_write(w, (d >> i) & 1, mvcomp->bits[i]);
for (i = 0; i < n; ++i) vpx_write(w, (d >> i) & 1, mvcomp->bits[i]);
}
// Fractional bits
vp9_write_token(w, vp9_mv_fp_tree,
mv_class == MV_CLASS_0 ? mvcomp->class0_fp[d] : mvcomp->fp,
mv_class == MV_CLASS_0 ? mvcomp->class0_fp[d] : mvcomp->fp,
&mv_fp_encodings[fr]);
// High precision bit
if (usehp)
vpx_write(w, hp,
mv_class == MV_CLASS_0 ? mvcomp->class0_hp : mvcomp->hp);
vpx_write(w, hp, mv_class == MV_CLASS_0 ? mvcomp->class0_hp : mvcomp->hp);
}
static void build_nmv_component_cost_table(int *mvcost,
const nmv_component* const mvcomp,
const nmv_component *const mvcomp,
int usehp) {
int sign_cost[2], class_cost[MV_CLASSES], class0_cost[CLASS0_SIZE];
int bits_cost[MV_OFFSET_BITS][2];
@@ -107,12 +104,12 @@ static void build_nmv_component_cost_table(int *mvcost,
int d, e, f;
int cost = class_cost[MV_CLASS_0];
int v = o + 1;
d = (o >> 3); /* int mv data */
f = (o >> 1) & 3; /* fractional pel mv data */
d = (o >> 3); /* int mv data */
f = (o >> 1) & 3; /* fractional pel mv data */
cost += class0_cost[d];
cost += class0_fp_cost[d][f];
if (usehp) {
e = (o & 1); /* high precision mv data */
e = (o & 1); /* high precision mv data */
cost += class0_hp_cost[e];
}
mvcost[v] = cost + sign_cost[0];
@@ -123,9 +120,8 @@ static void build_nmv_component_cost_table(int *mvcost,
for (d = 0; d < (1 << c); ++d) {
int f;
int whole_cost = class_cost[c];
int b = c + CLASS0_BITS - 1; /* number of bits */
for (i = 0; i < b; ++i)
whole_cost += bits_cost[i][((d >> i) & 1)];
int b = c + CLASS0_BITS - 1; /* number of bits */
for (i = 0; i < b; ++i) whole_cost += bits_cost[i][((d >> i) & 1)];
for (f = 0; f < 4; ++f) {
int cost = whole_cost + fp_cost[f];
int v = (CLASS0_SIZE << (c + 2)) + d * 8 + f * 2 /* + e */ + 1;
@@ -163,8 +159,8 @@ static int update_mv(vpx_writer *w, const unsigned int ct[2], vpx_prob *cur_p,
static void write_mv_update(const vpx_tree_index *tree,
vpx_prob probs[/*n - 1*/],
const unsigned int counts[/*n - 1*/],
int n, vpx_writer *w) {
const unsigned int counts[/*n - 1*/], int n,
vpx_writer *w) {
int i;
unsigned int branch_ct[32][2];
@@ -214,11 +210,9 @@ void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vpx_writer *w,
}
}
void vp9_encode_mv(VP9_COMP* cpi, vpx_writer* w,
const MV* mv, const MV* ref,
const nmv_context* mvctx, int usehp) {
const MV diff = {mv->row - ref->row,
mv->col - ref->col};
void vp9_encode_mv(VP9_COMP *cpi, vpx_writer *w, const MV *mv, const MV *ref,
const nmv_context *mvctx, int usehp) {
const MV diff = { mv->row - ref->row, mv->col - ref->col };
const MV_JOINT_TYPE j = vp9_get_mv_joint(&diff);
usehp = usehp && use_mv_hp(ref);
@@ -238,21 +232,20 @@ void vp9_encode_mv(VP9_COMP* cpi, vpx_writer* w,
}
void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
const nmv_context* ctx, int usehp) {
const nmv_context *ctx, int usehp) {
vp9_cost_tokens(mvjoint, ctx->joints, vp9_mv_joint_tree);
build_nmv_component_cost_table(mvcost[0], &ctx->comps[0], usehp);
build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], usehp);
}
static void inc_mvs(const MODE_INFO *mi, const MB_MODE_INFO_EXT *mbmi_ext,
const int_mv mvs[2],
nmv_context_counts *counts) {
const int_mv mvs[2], nmv_context_counts *counts) {
int i;
for (i = 0; i < 1 + has_second_ref(mi); ++i) {
const MV *ref = &mbmi_ext->ref_mvs[mi->ref_frame[i]][0].as_mv;
const MV diff = {mvs[i].as_mv.row - ref->row,
mvs[i].as_mv.col - ref->col};
const MV diff = { mvs[i].as_mv.row - ref->row,
mvs[i].as_mv.col - ref->col };
vp9_inc_mv(&diff, counts);
}
}
@@ -275,8 +268,6 @@ void vp9_update_mv_count(ThreadData *td) {
}
}
} else {
if (mi->mode == NEWMV)
inc_mvs(mi, mbmi_ext, mi->mv, &td->counts->mv);
if (mi->mode == NEWMV) inc_mvs(mi, mbmi_ext, mi->mv, &td->counts->mv);
}
}

View File

@@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_ENCODER_VP9_ENCODEMV_H_
#define VP9_ENCODER_VP9_ENCODEMV_H_
@@ -23,11 +22,11 @@ void vp9_entropy_mv_init(void);
void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vpx_writer *w,
nmv_context_counts *const counts);
void vp9_encode_mv(VP9_COMP *cpi, vpx_writer* w, const MV* mv, const MV* ref,
const nmv_context* mvctx, int usehp);
void vp9_encode_mv(VP9_COMP *cpi, vpx_writer *w, const MV *mv, const MV *ref,
const nmv_context *mvctx, int usehp);
void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
const nmv_context* mvctx, int usehp);
const nmv_context *mvctx, int usehp);
void vp9_update_mv_count(ThreadData *td);

File diff suppressed because it is too large Load Diff

View File

@@ -72,7 +72,6 @@ typedef struct {
FRAME_CONTEXT fc;
} CODING_CONTEXT;
typedef enum {
// encode_breakout is disabled.
ENCODE_BREAKOUT_DISABLED = 0,
@@ -83,10 +82,10 @@ typedef enum {
} ENCODE_BREAKOUT_TYPE;
typedef enum {
NORMAL = 0,
FOURFIVE = 1,
THREEFIVE = 2,
ONETWO = 3
NORMAL = 0,
FOURFIVE = 1,
THREEFIVE = 2,
ONETWO = 3
} VPX_SCALING;
typedef enum {
@@ -106,7 +105,7 @@ typedef enum {
} MODE;
typedef enum {
FRAMEFLAGS_KEY = 1 << 0,
FRAMEFLAGS_KEY = 1 << 0,
FRAMEFLAGS_GOLDEN = 1 << 1,
FRAMEFLAGS_ALTREF = 1 << 2,
} FRAMETYPE_FLAGS;
@@ -129,14 +128,14 @@ typedef enum {
typedef struct VP9EncoderConfig {
BITSTREAM_PROFILE profile;
vpx_bit_depth_t bit_depth; // Codec bit-depth.
int width; // width of data passed to the compressor
int height; // height of data passed to the compressor
int width; // width of data passed to the compressor
int height; // height of data passed to the compressor
unsigned int input_bit_depth; // Input bit depth.
double init_framerate; // set to passed in framerate
int64_t target_bandwidth; // bandwidth to be used in bits per second
double init_framerate; // set to passed in framerate
int64_t target_bandwidth; // bandwidth to be used in bits per second
int noise_sensitivity; // pre processing blur: recommendation 0
int sharpness; // sharpening output: recommendation 0:
int sharpness; // sharpening output: recommendation 0:
int speed;
// maximum allowed bitrate for any intra frame in % of bitrate target.
unsigned int rc_max_intra_bitrate_pct;
@@ -188,7 +187,7 @@ typedef struct VP9EncoderConfig {
int frame_periodic_boost;
// two pass datarate control
int two_pass_vbrbias; // two pass datarate control tweaks
int two_pass_vbrbias; // two pass datarate control tweaks
int two_pass_vbrmin_section;
int two_pass_vbrmax_section;
// END DATARATE CONTROL OPTIONS
@@ -289,15 +288,10 @@ typedef struct ActiveMap {
unsigned char *map;
} ActiveMap;
typedef enum {
Y,
U,
V,
ALL
} STAT_TYPE;
typedef enum { Y, U, V, ALL } STAT_TYPE;
typedef struct IMAGE_STAT {
double stat[ALL+1];
double stat[ALL + 1];
double worst;
} ImageStat;
@@ -333,7 +327,7 @@ typedef struct {
uint64_t max_luma_sample_rate;
uint32_t max_luma_picture_size;
double average_bitrate; // in kilobits per second
double max_cpb_size; // in kilobits
double max_cpb_size; // in kilobits
double compression_ratio;
uint8_t max_col_tiles;
uint32_t min_altref_distance;
@@ -375,8 +369,8 @@ typedef struct VP9_COMP {
DECLARE_ALIGNED(16, int16_t, uv_dequant[QINDEX_RANGE][8]);
VP9_COMMON common;
VP9EncoderConfig oxcf;
struct lookahead_ctx *lookahead;
struct lookahead_entry *alt_ref_source;
struct lookahead_ctx *lookahead;
struct lookahead_entry *alt_ref_source;
YV12_BUFFER_CONFIG *Source;
YV12_BUFFER_CONFIG *Last_Source; // NULL for first frame and alt_ref frames
@@ -439,11 +433,11 @@ typedef struct VP9_COMP {
int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE];
struct vpx_codec_pkt_list *output_pkt_list;
struct vpx_codec_pkt_list *output_pkt_list;
MBGRAPH_FRAME_STATS mbgraph_stats[MAX_LAG_BUFFERS];
int mbgraph_n_frames; // number of frames filled in the above
int static_mb_pct; // % forced skip mbs by segmentation
int mbgraph_n_frames; // number of frames filled in the above
int static_mb_pct; // % forced skip mbs by segmentation
int ref_frame_flags;
SPEED_FEATURES sf;
@@ -500,7 +494,7 @@ typedef struct VP9_COMP {
double total_blockiness;
double worst_blockiness;
int bytes;
int bytes;
double summed_quality;
double summed_weights;
double summedp_quality;
@@ -606,15 +600,15 @@ void vp9_remove_compressor(VP9_COMP *cpi);
void vp9_change_config(VP9_COMP *cpi, const VP9EncoderConfig *oxcf);
// receive a frames worth of data. caller can assume that a copy of this
// frame is made and not just a copy of the pointer..
// receive a frames worth of data. caller can assume that a copy of this
// frame is made and not just a copy of the pointer..
int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags,
YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
int64_t end_time_stamp);
int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
size_t *size, uint8_t *dest,
int64_t *time_stamp, int64_t *time_end, int flush);
size_t *size, uint8_t *dest, int64_t *time_stamp,
int64_t *time_end, int flush);
int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest,
vp9_ppflags_t *flags);
@@ -635,8 +629,8 @@ int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols);
int vp9_get_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols);
int vp9_set_internal_size(VP9_COMP *cpi,
VPX_SCALING horiz_mode, VPX_SCALING vert_mode);
int vp9_set_internal_size(VP9_COMP *cpi, VPX_SCALING horiz_mode,
VPX_SCALING vert_mode);
int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width,
unsigned int height);
@@ -646,8 +640,7 @@ void vp9_set_svc(VP9_COMP *cpi, int use_svc);
int vp9_get_quantizer(struct VP9_COMP *cpi);
static INLINE int frame_is_kf_gf_arf(const VP9_COMP *cpi) {
return frame_is_intra_only(&cpi->common) ||
cpi->refresh_alt_ref_frame ||
return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame ||
(cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref);
}
@@ -673,8 +666,8 @@ static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer(
VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) {
VP9_COMMON *const cm = &cpi->common;
const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
return
buf_idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[buf_idx].buf : NULL;
return buf_idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[buf_idx].buf
: NULL;
}
static INLINE int get_token_alloc(int mb_rows, int mb_cols) {
@@ -737,10 +730,10 @@ static INLINE int is_altref_enabled(const VP9_COMP *const cpi) {
static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd,
MV_REFERENCE_FRAME ref0,
MV_REFERENCE_FRAME ref1) {
xd->block_refs[0] = &cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME
: 0];
xd->block_refs[1] = &cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME
: 0];
xd->block_refs[0] =
&cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME : 0];
xd->block_refs[1] =
&cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME : 0];
}
static INLINE int get_chessboard_index(const int frame_index) {

View File

@@ -43,10 +43,10 @@ static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) {
const int tile_rows = 1 << cm->log2_tile_rows;
int t;
(void) unused;
(void)unused;
for (t = thread_data->start; t < tile_rows * tile_cols;
t += cpi->num_workers) {
t += cpi->num_workers) {
int tile_row = t / tile_cols;
int tile_col = t % tile_cols;
@@ -63,8 +63,8 @@ static int get_max_tile_cols(VP9_COMP *cpi) {
int log2_tile_cols;
vp9_get_tile_n_bits(mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
log2_tile_cols = clamp(cpi->oxcf.tile_columns,
min_log2_tile_cols, max_log2_tile_cols);
log2_tile_cols =
clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols);
return (1 << log2_tile_cols);
}
@@ -92,8 +92,7 @@ void vp9_encode_tiles_mt(VP9_COMP *cpi) {
vpx_malloc(allocated_workers * sizeof(*cpi->workers)));
CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
vpx_calloc(allocated_workers,
sizeof(*cpi->tile_thr_data)));
vpx_calloc(allocated_workers, sizeof(*cpi->tile_thr_data)));
for (i = 0; i < allocated_workers; i++) {
VPxWorker *const worker = &cpi->workers[i];
@@ -140,7 +139,7 @@ void vp9_encode_tiles_mt(VP9_COMP *cpi) {
worker->hook = (VPxWorkerHook)enc_worker_hook;
worker->data1 = &cpi->tile_thr_data[i];
worker->data2 = NULL;
thread_data = (EncWorkerData*)worker->data1;
thread_data = (EncWorkerData *)worker->data1;
// Before encoding a frame, copy the thread data from cpi.
if (thread_data->td != &cpi->td) {
@@ -173,7 +172,7 @@ void vp9_encode_tiles_mt(VP9_COMP *cpi) {
// Encode a frame
for (i = 0; i < num_workers; i++) {
VPxWorker *const worker = &cpi->workers[i];
EncWorkerData *const thread_data = (EncWorkerData*)worker->data1;
EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
// Set the starting tile for each thread.
thread_data->start = i;
@@ -192,7 +191,7 @@ void vp9_encode_tiles_mt(VP9_COMP *cpi) {
for (i = 0; i < num_workers; i++) {
VPxWorker *const worker = &cpi->workers[i];
EncWorkerData *const thread_data = (EncWorkerData*)worker->data1;
EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
// Accumulate counters.
if (i < cpi->num_workers - 1) {

View File

@@ -16,8 +16,7 @@
#include "vp9/encoder/vp9_extend.h"
static void copy_and_extend_plane(const uint8_t *src, int src_pitch,
uint8_t *dst, int dst_pitch,
int w, int h,
uint8_t *dst, int dst_pitch, int w, int h,
int extend_top, int extend_left,
int extend_bottom, int extend_right) {
int i, linesize;
@@ -43,7 +42,7 @@ static void copy_and_extend_plane(const uint8_t *src, int src_pitch,
src_ptr1 = dst - extend_left;
src_ptr2 = dst + dst_pitch * (h - 1) - extend_left;
dst_ptr1 = dst + dst_pitch * (-extend_top) - extend_left;
dst_ptr2 = dst + dst_pitch * (h) - extend_left;
dst_ptr2 = dst + dst_pitch * (h)-extend_left;
linesize = extend_left + extend_right + w;
for (i = 0; i < extend_top; i++) {
@@ -59,9 +58,8 @@ static void copy_and_extend_plane(const uint8_t *src, int src_pitch,
#if CONFIG_VP9_HIGHBITDEPTH
static void highbd_copy_and_extend_plane(const uint8_t *src8, int src_pitch,
uint8_t *dst8, int dst_pitch,
int w, int h,
int extend_top, int extend_left,
uint8_t *dst8, int dst_pitch, int w,
int h, int extend_top, int extend_left,
int extend_bottom, int extend_right) {
int i, linesize;
uint16_t *src = CONVERT_TO_SHORTPTR(src8);
@@ -88,7 +86,7 @@ static void highbd_copy_and_extend_plane(const uint8_t *src8, int src_pitch,
src_ptr1 = dst - extend_left;
src_ptr2 = dst + dst_pitch * (h - 1) - extend_left;
dst_ptr1 = dst + dst_pitch * (-extend_top) - extend_left;
dst_ptr2 = dst + dst_pitch * (h) - extend_left;
dst_ptr2 = dst + dst_pitch * (h)-extend_left;
linesize = extend_left + extend_right + w;
for (i = 0; i < extend_top; i++) {
@@ -127,51 +125,46 @@ void vp9_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src,
#if CONFIG_VP9_HIGHBITDEPTH
if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
highbd_copy_and_extend_plane(src->y_buffer, src->y_stride,
dst->y_buffer, dst->y_stride,
src->y_crop_width, src->y_crop_height,
et_y, el_y, eb_y, er_y);
highbd_copy_and_extend_plane(src->y_buffer, src->y_stride, dst->y_buffer,
dst->y_stride, src->y_crop_width,
src->y_crop_height, et_y, el_y, eb_y, er_y);
highbd_copy_and_extend_plane(src->u_buffer, src->uv_stride,
dst->u_buffer, dst->uv_stride,
src->uv_crop_width, src->uv_crop_height,
et_uv, el_uv, eb_uv, er_uv);
highbd_copy_and_extend_plane(
src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride,
src->uv_crop_width, src->uv_crop_height, et_uv, el_uv, eb_uv, er_uv);
highbd_copy_and_extend_plane(src->v_buffer, src->uv_stride,
dst->v_buffer, dst->uv_stride,
src->uv_crop_width, src->uv_crop_height,
et_uv, el_uv, eb_uv, er_uv);
highbd_copy_and_extend_plane(
src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride,
src->uv_crop_width, src->uv_crop_height, et_uv, el_uv, eb_uv, er_uv);
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
copy_and_extend_plane(src->y_buffer, src->y_stride,
dst->y_buffer, dst->y_stride,
src->y_crop_width, src->y_crop_height,
copy_and_extend_plane(src->y_buffer, src->y_stride, dst->y_buffer,
dst->y_stride, src->y_crop_width, src->y_crop_height,
et_y, el_y, eb_y, er_y);
copy_and_extend_plane(src->u_buffer, src->uv_stride,
dst->u_buffer, dst->uv_stride,
src->uv_crop_width, src->uv_crop_height,
copy_and_extend_plane(src->u_buffer, src->uv_stride, dst->u_buffer,
dst->uv_stride, src->uv_crop_width, src->uv_crop_height,
et_uv, el_uv, eb_uv, er_uv);
copy_and_extend_plane(src->v_buffer, src->uv_stride,
dst->v_buffer, dst->uv_stride,
src->uv_crop_width, src->uv_crop_height,
copy_and_extend_plane(src->v_buffer, src->uv_stride, dst->v_buffer,
dst->uv_stride, src->uv_crop_width, src->uv_crop_height,
et_uv, el_uv, eb_uv, er_uv);
}
void vp9_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst,
int srcy, int srcx,
int srch, int srcw) {
YV12_BUFFER_CONFIG *dst, int srcy,
int srcx, int srch, int srcw) {
// If the side is not touching the bounder then don't extend.
const int et_y = srcy ? 0 : dst->border;
const int el_y = srcx ? 0 : dst->border;
const int eb_y = srcy + srch != src->y_height ? 0 :
dst->border + dst->y_height - src->y_height;
const int er_y = srcx + srcw != src->y_width ? 0 :
dst->border + dst->y_width - src->y_width;
const int eb_y = srcy + srch != src->y_height
? 0
: dst->border + dst->y_height - src->y_height;
const int er_y = srcx + srcw != src->y_width
? 0
: dst->border + dst->y_width - src->y_width;
const int src_y_offset = srcy * src->y_stride + srcx;
const int dst_y_offset = srcy * dst->y_stride + srcx;
@@ -185,17 +178,14 @@ void vp9_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src,
const int srcw_uv = ROUND_POWER_OF_TWO(srcw, 1);
copy_and_extend_plane(src->y_buffer + src_y_offset, src->y_stride,
dst->y_buffer + dst_y_offset, dst->y_stride,
srcw, srch,
dst->y_buffer + dst_y_offset, dst->y_stride, srcw, srch,
et_y, el_y, eb_y, er_y);
copy_and_extend_plane(src->u_buffer + src_uv_offset, src->uv_stride,
dst->u_buffer + dst_uv_offset, dst->uv_stride,
srcw_uv, srch_uv,
et_uv, el_uv, eb_uv, er_uv);
dst->u_buffer + dst_uv_offset, dst->uv_stride, srcw_uv,
srch_uv, et_uv, el_uv, eb_uv, er_uv);
copy_and_extend_plane(src->v_buffer + src_uv_offset, src->uv_stride,
dst->v_buffer + dst_uv_offset, dst->uv_stride,
srcw_uv, srch_uv,
et_uv, el_uv, eb_uv, er_uv);
dst->v_buffer + dst_uv_offset, dst->uv_stride, srcw_uv,
srch_uv, et_uv, el_uv, eb_uv, er_uv);
}

View File

@@ -18,14 +18,12 @@
extern "C" {
#endif
void vp9_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst);
void vp9_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst,
int srcy, int srcx,
int srch, int srcw);
YV12_BUFFER_CONFIG *dst, int srcy,
int srcx, int srch, int srcw);
#ifdef __cplusplus
} // extern "C"
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -154,8 +154,7 @@ void vp9_twopass_postencode_update(struct VP9_COMP *cpi);
// Post encode update of the rate control parameters for 2-pass
void vp9_twopass_postencode_update(struct VP9_COMP *cpi);
void calculate_coded_size(struct VP9_COMP *cpi,
int *scaled_frame_width,
void calculate_coded_size(struct VP9_COMP *cpi, int *scaled_frame_width,
int *scaled_frame_height);
#ifdef __cplusplus

View File

@@ -19,33 +19,28 @@
#include "vp9/encoder/vp9_lookahead.h"
/* Return the buffer at the given absolute index and increment the index */
static struct lookahead_entry *pop(struct lookahead_ctx *ctx,
int *idx) {
static struct lookahead_entry *pop(struct lookahead_ctx *ctx, int *idx) {
int index = *idx;
struct lookahead_entry *buf = ctx->buf + index;
assert(index < ctx->max_sz);
if (++index >= ctx->max_sz)
index -= ctx->max_sz;
if (++index >= ctx->max_sz) index -= ctx->max_sz;
*idx = index;
return buf;
}
void vp9_lookahead_destroy(struct lookahead_ctx *ctx) {
if (ctx) {
if (ctx->buf) {
int i;
for (i = 0; i < ctx->max_sz; i++)
vpx_free_frame_buffer(&ctx->buf[i].img);
for (i = 0; i < ctx->max_sz; i++) vpx_free_frame_buffer(&ctx->buf[i].img);
free(ctx->buf);
}
free(ctx);
}
}
struct lookahead_ctx *vp9_lookahead_init(unsigned int width,
unsigned int height,
unsigned int subsampling_x,
@@ -69,20 +64,18 @@ struct lookahead_ctx *vp9_lookahead_init(unsigned int width,
unsigned int i;
ctx->max_sz = depth;
ctx->buf = calloc(depth, sizeof(*ctx->buf));
if (!ctx->buf)
goto bail;
if (!ctx->buf) goto bail;
for (i = 0; i < depth; i++)
if (vpx_alloc_frame_buffer(&ctx->buf[i].img,
width, height, subsampling_x, subsampling_y,
if (vpx_alloc_frame_buffer(
&ctx->buf[i].img, width, height, subsampling_x, subsampling_y,
#if CONFIG_VP9_HIGHBITDEPTH
use_highbitdepth,
use_highbitdepth,
#endif
VP9_ENC_BORDER_IN_PIXELS,
legacy_byte_alignment))
VP9_ENC_BORDER_IN_PIXELS, legacy_byte_alignment))
goto bail;
}
return ctx;
bail:
bail:
vp9_lookahead_destroy(ctx);
return NULL;
}
@@ -109,8 +102,7 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
int subsampling_y = src->subsampling_y;
int larger_dimensions, new_dimensions;
if (ctx->sz + 1 + MAX_PRE_FRAMES > ctx->max_sz)
return 1;
if (ctx->sz + 1 + MAX_PRE_FRAMES > ctx->max_sz) return 1;
ctx->sz++;
buf = pop(ctx, &ctx->write_idx);
@@ -118,8 +110,7 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
height != buf->img.y_crop_height ||
uv_width != buf->img.uv_crop_width ||
uv_height != buf->img.uv_crop_height;
larger_dimensions = width > buf->img.y_width ||
height > buf->img.y_height ||
larger_dimensions = width > buf->img.y_width || height > buf->img.y_height ||
uv_width > buf->img.uv_width ||
uv_height > buf->img.uv_height;
assert(!larger_dimensions || new_dimensions);
@@ -139,27 +130,22 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
while (1) {
// Find the first active macroblock in this row.
for (; col < mb_cols; ++col) {
if (active_map[col])
break;
if (active_map[col]) break;
}
// No more active macroblock in this row.
if (col == mb_cols)
break;
if (col == mb_cols) break;
// Find the end of active region in this row.
active_end = col;
for (; active_end < mb_cols; ++active_end) {
if (!active_map[active_end])
break;
if (!active_map[active_end]) break;
}
// Only copy this active region.
vp9_copy_and_extend_frame_with_rect(src, &buf->img,
row << 4,
col << 4, 16,
(active_end - col) << 4);
vp9_copy_and_extend_frame_with_rect(src, &buf->img, row << 4, col << 4,
16, (active_end - col) << 4);
// Start again from the end of this active region.
col = active_end;
@@ -172,14 +158,13 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
if (larger_dimensions) {
YV12_BUFFER_CONFIG new_img;
memset(&new_img, 0, sizeof(new_img));
if (vpx_alloc_frame_buffer(&new_img,
width, height, subsampling_x, subsampling_y,
if (vpx_alloc_frame_buffer(&new_img, width, height, subsampling_x,
subsampling_y,
#if CONFIG_VP9_HIGHBITDEPTH
use_highbitdepth,
#endif
VP9_ENC_BORDER_IN_PIXELS,
0))
return 1;
VP9_ENC_BORDER_IN_PIXELS, 0))
return 1;
vpx_free_frame_buffer(&buf->img);
buf->img = new_img;
} else if (new_dimensions) {
@@ -202,7 +187,6 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
return 0;
}
struct lookahead_entry *vp9_lookahead_pop(struct lookahead_ctx *ctx,
int drain) {
struct lookahead_entry *buf = NULL;
@@ -214,7 +198,6 @@ struct lookahead_entry *vp9_lookahead_pop(struct lookahead_ctx *ctx,
return buf;
}
struct lookahead_entry *vp9_lookahead_peek(struct lookahead_ctx *ctx,
int index) {
struct lookahead_entry *buf = NULL;
@@ -223,16 +206,14 @@ struct lookahead_entry *vp9_lookahead_peek(struct lookahead_ctx *ctx,
// Forward peek
if (index < ctx->sz) {
index += ctx->read_idx;
if (index >= ctx->max_sz)
index -= ctx->max_sz;
if (index >= ctx->max_sz) index -= ctx->max_sz;
buf = ctx->buf + index;
}
} else if (index < 0) {
// Backward peek
if (-index <= MAX_PRE_FRAMES) {
index += ctx->read_idx;
if (index < 0)
index += ctx->max_sz;
if (index < 0) index += ctx->max_sz;
buf = ctx->buf + index;
}
}
@@ -240,6 +221,4 @@ struct lookahead_entry *vp9_lookahead_peek(struct lookahead_ctx *ctx,
return buf;
}
unsigned int vp9_lookahead_depth(struct lookahead_ctx *ctx) {
return ctx->sz;
}
unsigned int vp9_lookahead_depth(struct lookahead_ctx *ctx) { return ctx->sz; }

View File

@@ -26,10 +26,10 @@ extern "C" {
#define MAX_LAG_BUFFERS 25
struct lookahead_entry {
YV12_BUFFER_CONFIG img;
int64_t ts_start;
int64_t ts_end;
unsigned int flags;
YV12_BUFFER_CONFIG img;
int64_t ts_start;
int64_t ts_end;
unsigned int flags;
};
// The max of past frames we want to keep in the queue.
@@ -57,12 +57,10 @@ struct lookahead_ctx *vp9_lookahead_init(unsigned int width,
#endif
unsigned int depth);
/**\brief Destroys the lookahead stage
*/
void vp9_lookahead_destroy(struct lookahead_ctx *ctx);
/**\brief Enqueue a source buffer
*
* This function will copy the source image into a new framebuffer with
@@ -85,7 +83,6 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
#endif
unsigned int flags);
/**\brief Get the next source buffer to encode
*
*
@@ -96,9 +93,7 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
* \retval NULL, if drain set and queue is empty
* \retval NULL, if drain not set and queue not of the configured depth
*/
struct lookahead_entry *vp9_lookahead_pop(struct lookahead_ctx *ctx,
int drain);
struct lookahead_entry *vp9_lookahead_pop(struct lookahead_ctx *ctx, int drain);
/**\brief Get a future source buffer to encode
*
@@ -110,7 +105,6 @@ struct lookahead_entry *vp9_lookahead_pop(struct lookahead_ctx *ctx,
struct lookahead_entry *vp9_lookahead_peek(struct lookahead_ctx *ctx,
int index);
/**\brief Get the number of frames currently in the lookahead queue
*
* \param[in] ctx Pointer to the lookahead context

View File

@@ -22,11 +22,8 @@
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
const MV *ref_mv,
MV *dst_mv,
int mb_row,
static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, const MV *ref_mv,
MV *dst_mv, int mb_row,
int mb_col) {
MACROBLOCK *const x = &cpi->td.mb;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -64,9 +61,8 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
cpi->find_fractional_mv_step(
x, dst_mv, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
&v_fn_ptr, 0, mv_sf->subpel_iters_per_step,
cond_cost_list(cpi, cost_list),
NULL, NULL,
&distortion, &sse, NULL, 0, 0);
cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0,
0);
}
xd->mi[0]->mode = NEWMV;
@@ -109,10 +105,10 @@ static int do_16x16_motion_search(VP9_COMP *cpi, const MV *ref_mv,
// based search as well.
if (ref_mv->row != 0 || ref_mv->col != 0) {
unsigned int tmp_err;
MV zero_ref_mv = {0, 0}, tmp_mv;
MV zero_ref_mv = { 0, 0 }, tmp_mv;
tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, &tmp_mv,
mb_row, mb_col);
tmp_err =
do_16x16_motion_iteration(cpi, &zero_ref_mv, &tmp_mv, mb_row, mb_col);
if (tmp_err < err) {
dst_mv->as_mv = tmp_mv;
err = tmp_err;
@@ -137,7 +133,7 @@ static int do_16x16_zerozero_search(VP9_COMP *cpi, int_mv *dst_mv) {
return err;
}
static int find_best_16x16_intra(VP9_COMP *cpi, PREDICTION_MODE *pbest_mode) {
MACROBLOCK *const x = &cpi->td.mb;
MACROBLOCK *const x = &cpi->td.mb;
MACROBLOCKD *const xd = &x->e_mbd;
PREDICTION_MODE best_mode = -1, mode;
unsigned int best_err = INT_MAX;
@@ -148,38 +144,30 @@ static int find_best_16x16_intra(VP9_COMP *cpi, PREDICTION_MODE *pbest_mode) {
unsigned int err;
xd->mi[0]->mode = mode;
vp9_predict_intra_block(xd, 2, TX_16X16, mode,
x->plane[0].src.buf, x->plane[0].src.stride,
xd->plane[0].dst.buf, xd->plane[0].dst.stride,
0, 0, 0);
vp9_predict_intra_block(xd, 2, TX_16X16, mode, x->plane[0].src.buf,
x->plane[0].src.stride, xd->plane[0].dst.buf,
xd->plane[0].dst.stride, 0, 0, 0);
err = vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
xd->plane[0].dst.buf, xd->plane[0].dst.stride);
// find best
if (err < best_err) {
best_err = err;
best_err = err;
best_mode = mode;
}
}
if (pbest_mode)
*pbest_mode = best_mode;
if (pbest_mode) *pbest_mode = best_mode;
return best_err;
}
static void update_mbgraph_mb_stats
(
VP9_COMP *cpi,
MBGRAPH_MB_STATS *stats,
YV12_BUFFER_CONFIG *buf,
int mb_y_offset,
YV12_BUFFER_CONFIG *golden_ref,
const MV *prev_golden_ref_mv,
YV12_BUFFER_CONFIG *alt_ref,
int mb_row,
int mb_col
) {
static void update_mbgraph_mb_stats(VP9_COMP *cpi, MBGRAPH_MB_STATS *stats,
YV12_BUFFER_CONFIG *buf, int mb_y_offset,
YV12_BUFFER_CONFIG *golden_ref,
const MV *prev_golden_ref_mv,
YV12_BUFFER_CONFIG *alt_ref, int mb_row,
int mb_col) {
MACROBLOCK *const x = &cpi->td.mb;
MACROBLOCKD *const xd = &x->e_mbd;
int intra_error;
@@ -193,10 +181,8 @@ static void update_mbgraph_mb_stats
xd->plane[0].dst.stride = get_frame_new_buffer(cm)->y_stride;
// do intra 16x16 prediction
intra_error = find_best_16x16_intra(cpi,
&stats->ref[INTRA_FRAME].m.mode);
if (intra_error <= 0)
intra_error = 1;
intra_error = find_best_16x16_intra(cpi, &stats->ref[INTRA_FRAME].m.mode);
if (intra_error <= 0) intra_error = 1;
stats->ref[INTRA_FRAME].err = intra_error;
// Golden frame MV search, if it exists and is different than last frame
@@ -204,10 +190,9 @@ static void update_mbgraph_mb_stats
int g_motion_error;
xd->plane[0].pre[0].buf = golden_ref->y_buffer + mb_y_offset;
xd->plane[0].pre[0].stride = golden_ref->y_stride;
g_motion_error = do_16x16_motion_search(cpi,
prev_golden_ref_mv,
&stats->ref[GOLDEN_FRAME].m.mv,
mb_row, mb_col);
g_motion_error =
do_16x16_motion_search(cpi, prev_golden_ref_mv,
&stats->ref[GOLDEN_FRAME].m.mv, mb_row, mb_col);
stats->ref[GOLDEN_FRAME].err = g_motion_error;
} else {
stats->ref[GOLDEN_FRAME].err = INT_MAX;
@@ -220,8 +205,8 @@ static void update_mbgraph_mb_stats
int a_motion_error;
xd->plane[0].pre[0].buf = alt_ref->y_buffer + mb_y_offset;
xd->plane[0].pre[0].stride = alt_ref->y_stride;
a_motion_error = do_16x16_zerozero_search(cpi,
&stats->ref[ALTREF_FRAME].m.mv);
a_motion_error =
do_16x16_zerozero_search(cpi, &stats->ref[ALTREF_FRAME].m.mv);
stats->ref[ALTREF_FRAME].err = a_motion_error;
} else {
@@ -241,20 +226,20 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi,
int mb_col, mb_row, offset = 0;
int mb_y_offset = 0, arf_y_offset = 0, gld_y_offset = 0;
MV gld_top_mv = {0, 0};
MV gld_top_mv = { 0, 0 };
MODE_INFO mi_local;
MODE_INFO mi_above, mi_left;
vp9_zero(mi_local);
// Set up limit values for motion vectors to prevent them extending outside
// the UMV borders.
x->mv_row_min = -BORDER_MV_PIXELS_B16;
x->mv_row_max = (cm->mb_rows - 1) * 8 + BORDER_MV_PIXELS_B16;
x->mv_row_min = -BORDER_MV_PIXELS_B16;
x->mv_row_max = (cm->mb_rows - 1) * 8 + BORDER_MV_PIXELS_B16;
// Signal to vp9_predict_intra_block() that above is not available
xd->above_mi = NULL;
xd->plane[0].dst.stride = buf->y_stride;
xd->plane[0].pre[0].stride = buf->y_stride;
xd->plane[0].dst.stride = buf->y_stride;
xd->plane[0].pre[0].stride = buf->y_stride;
xd->plane[1].dst.stride = buf->uv_stride;
xd->mi[0] = &mi_local;
mi_local.sb_type = BLOCK_16X16;
@@ -263,23 +248,22 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi,
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
MV gld_left_mv = gld_top_mv;
int mb_y_in_offset = mb_y_offset;
int mb_y_in_offset = mb_y_offset;
int arf_y_in_offset = arf_y_offset;
int gld_y_in_offset = gld_y_offset;
// Set up limit values for motion vectors to prevent them extending outside
// the UMV borders.
x->mv_col_min = -BORDER_MV_PIXELS_B16;
x->mv_col_max = (cm->mb_cols - 1) * 8 + BORDER_MV_PIXELS_B16;
x->mv_col_min = -BORDER_MV_PIXELS_B16;
x->mv_col_max = (cm->mb_cols - 1) * 8 + BORDER_MV_PIXELS_B16;
// Signal to vp9_predict_intra_block() that left is not available
xd->left_mi = NULL;
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
MBGRAPH_MB_STATS *mb_stats = &stats->mb_stats[offset + mb_col];
update_mbgraph_mb_stats(cpi, mb_stats, buf, mb_y_in_offset,
golden_ref, &gld_left_mv, alt_ref,
mb_row, mb_col);
update_mbgraph_mb_stats(cpi, mb_stats, buf, mb_y_in_offset, golden_ref,
&gld_left_mv, alt_ref, mb_row, mb_col);
gld_left_mv = mb_stats->ref[GOLDEN_FRAME].m.mv.as_mv;
if (mb_col == 0) {
gld_top_mv = gld_left_mv;
@@ -287,23 +271,22 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi,
// Signal to vp9_predict_intra_block() that left is available
xd->left_mi = &mi_left;
mb_y_in_offset += 16;
gld_y_in_offset += 16;
arf_y_in_offset += 16;
x->mv_col_min -= 16;
x->mv_col_max -= 16;
mb_y_in_offset += 16;
gld_y_in_offset += 16;
arf_y_in_offset += 16;
x->mv_col_min -= 16;
x->mv_col_max -= 16;
}
// Signal to vp9_predict_intra_block() that above is available
xd->above_mi = &mi_above;
mb_y_offset += buf->y_stride * 16;
gld_y_offset += golden_ref->y_stride * 16;
if (alt_ref)
arf_y_offset += alt_ref->y_stride * 16;
x->mv_row_min -= 16;
x->mv_row_max -= 16;
offset += cm->mb_cols;
mb_y_offset += buf->y_stride * 16;
gld_y_offset += golden_ref->y_stride * 16;
if (alt_ref) arf_y_offset += alt_ref->y_stride * 16;
x->mv_row_min -= 16;
x->mv_row_max -= 16;
offset += cm->mb_cols;
}
}
@@ -317,9 +300,9 @@ static void separate_arf_mbs(VP9_COMP *cpi) {
int *arf_not_zz;
CHECK_MEM_ERROR(cm, arf_not_zz,
vpx_calloc(cm->mb_rows * cm->mb_cols * sizeof(*arf_not_zz),
1));
CHECK_MEM_ERROR(
cm, arf_not_zz,
vpx_calloc(cm->mb_rows * cm->mb_cols * sizeof(*arf_not_zz), 1));
// We are not interested in results beyond the alt ref itself.
if (n_frames > cpi->rc.frames_till_gf_update_due)
@@ -335,12 +318,11 @@ static void separate_arf_mbs(VP9_COMP *cpi) {
MBGRAPH_MB_STATS *mb_stats = &frame_stats->mb_stats[offset + mb_col];
int altref_err = mb_stats->ref[ALTREF_FRAME].err;
int intra_err = mb_stats->ref[INTRA_FRAME ].err;
int intra_err = mb_stats->ref[INTRA_FRAME].err;
int golden_err = mb_stats->ref[GOLDEN_FRAME].err;
// Test for altref vs intra and gf and that its mv was 0,0.
if (altref_err > 1000 ||
altref_err > intra_err ||
if (altref_err > 1000 || altref_err > intra_err ||
altref_err > golden_err) {
arf_not_zz[offset + mb_col]++;
}
@@ -395,11 +377,9 @@ void vp9_update_mbgraph_stats(VP9_COMP *cpi) {
// we need to look ahead beyond where the ARF transitions into
// being a GF - so exit if we don't look ahead beyond that
if (n_frames <= cpi->rc.frames_till_gf_update_due)
return;
if (n_frames <= cpi->rc.frames_till_gf_update_due) return;
if (n_frames > MAX_LAG_BUFFERS)
n_frames = MAX_LAG_BUFFERS;
if (n_frames > MAX_LAG_BUFFERS) n_frames = MAX_LAG_BUFFERS;
cpi->mbgraph_n_frames = n_frames;
for (i = 0; i < n_frames; i++) {
@@ -418,8 +398,8 @@ void vp9_update_mbgraph_stats(VP9_COMP *cpi) {
assert(q_cur != NULL);
update_mbgraph_frame_stats(cpi, frame_stats, &q_cur->img,
golden_ref, cpi->Source);
update_mbgraph_frame_stats(cpi, frame_stats, &q_cur->img, golden_ref,
cpi->Source);
}
vpx_clear_system_state();

View File

@@ -25,9 +25,7 @@ typedef struct {
} ref[MAX_REF_FRAMES];
} MBGRAPH_MB_STATS;
typedef struct {
MBGRAPH_MB_STATS *mb_stats;
} MBGRAPH_FRAME_STATS;
typedef struct { MBGRAPH_MB_STATS *mb_stats; } MBGRAPH_FRAME_STATS;
struct VP9_COMP;

File diff suppressed because it is too large Load Diff

View File

@@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_ENCODER_VP9_MCOMP_H_
#define VP9_ENCODER_VP9_MCOMP_H_
@@ -26,67 +25,56 @@ extern "C" {
// Enable the use of motion vector in range [-1023, 1023].
#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS - 1)) - 1)
// Maximum size of the first step in full pel units
#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1))
#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS - 1))
// Allowed motion vector pixel distance outside image border
// for Block_16x16
#define BORDER_MV_PIXELS_B16 (16 + VP9_INTERP_EXTEND)
typedef struct search_site_config {
// motion search sites
MV ss_mv[8 * MAX_MVSEARCH_STEPS]; // Motion vector
intptr_t ss_os[8 * MAX_MVSEARCH_STEPS]; // Offset
MV ss_mv[8 * MAX_MVSEARCH_STEPS]; // Motion vector
intptr_t ss_os[8 * MAX_MVSEARCH_STEPS]; // Offset
int searches_per_step;
int total_steps;
} search_site_config;
void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride);
void vp9_init3smotion_compensation(search_site_config *cfg, int stride);
void vp9_init3smotion_compensation(search_site_config *cfg, int stride);
void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv);
int vp9_mv_bit_cost(const MV *mv, const MV *ref,
const int *mvjcost, int *mvcost[2], int weight);
int vp9_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost,
int *mvcost[2], int weight);
// Utility to compute variance + MV rate cost for a given MV
int vp9_get_mvpred_var(const MACROBLOCK *x,
const MV *best_mv, const MV *center_mv,
const vp9_variance_fn_ptr_t *vfp,
int vp9_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv,
const MV *center_mv, const vp9_variance_fn_ptr_t *vfp,
int use_mvcost);
int vp9_get_mvpred_av_var(const MACROBLOCK *x,
const MV *best_mv, const MV *center_mv,
const uint8_t *second_pred,
const vp9_variance_fn_ptr_t *vfp,
int use_mvcost);
int vp9_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv,
const MV *center_mv, const uint8_t *second_pred,
const vp9_variance_fn_ptr_t *vfp, int use_mvcost);
struct VP9_COMP;
struct SPEED_FEATURES;
int vp9_init_search_range(int size);
int vp9_refining_search_sad(const struct macroblock *x,
struct mv *ref_mv,
int vp9_refining_search_sad(const struct macroblock *x, struct mv *ref_mv,
int sad_per_bit, int distance,
const struct vp9_variance_vtable *fn_ptr,
const struct mv *center_mv);
// Perform integral projection based motion estimation.
unsigned int vp9_int_pro_motion_estimation(const struct VP9_COMP *cpi,
MACROBLOCK *x,
BLOCK_SIZE bsize,
MACROBLOCK *x, BLOCK_SIZE bsize,
int mi_row, int mi_col);
typedef uint32_t (fractional_mv_step_fp) (
const MACROBLOCK *x,
MV *bestmv, const MV *ref_mv,
int allow_hp,
int error_per_bit,
const vp9_variance_fn_ptr_t *vfp,
typedef uint32_t(fractional_mv_step_fp)(
const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
int error_per_bit, const vp9_variance_fn_ptr_t *vfp,
int forced_stop, // 0 - full, 1 - qtr only, 2 - half only
int iters_per_step,
int *cost_list,
int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1,
const uint8_t *second_pred,
int w, int h);
int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
int h);
extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree;
extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned;
@@ -94,39 +82,31 @@ extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned_more;
extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned_evenmore;
extern fractional_mv_step_fp vp9_skip_sub_pixel_tree;
typedef int (*vp9_full_search_fn_t)(const MACROBLOCK *x,
const MV *ref_mv, int sad_per_bit,
int distance,
typedef int (*vp9_full_search_fn_t)(const MACROBLOCK *x, const MV *ref_mv,
int sad_per_bit, int distance,
const vp9_variance_fn_ptr_t *fn_ptr,
const MV *center_mv, MV *best_mv);
typedef int (*vp9_refining_search_fn_t)(const MACROBLOCK *x,
MV *ref_mv, int sad_per_bit,
int distance,
typedef int (*vp9_refining_search_fn_t)(const MACROBLOCK *x, MV *ref_mv,
int sad_per_bit, int distance,
const vp9_variance_fn_ptr_t *fn_ptr,
const MV *center_mv);
typedef int (*vp9_diamond_search_fn_t)(const MACROBLOCK *x,
const search_site_config *cfg,
MV *ref_mv, MV *best_mv,
int search_param, int sad_per_bit,
int *num00,
const vp9_variance_fn_ptr_t *fn_ptr,
const MV *center_mv);
typedef int (*vp9_diamond_search_fn_t)(
const MACROBLOCK *x, const search_site_config *cfg, MV *ref_mv, MV *best_mv,
int search_param, int sad_per_bit, int *num00,
const vp9_variance_fn_ptr_t *fn_ptr, const MV *center_mv);
int vp9_refining_search_8p_c(const MACROBLOCK *x,
MV *ref_mv, int error_per_bit,
int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
int search_range,
const vp9_variance_fn_ptr_t *fn_ptr,
const MV *center_mv, const uint8_t *second_pred);
struct VP9_COMP;
int vp9_full_pixel_search(struct VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, MV *mvp_full,
int step_param, int error_per_bit,
int *cost_list,
const MV *ref_mv, MV *tmp_mv,
int vp9_full_pixel_search(struct VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
MV *mvp_full, int step_param, int error_per_bit,
int *cost_list, const MV *ref_mv, MV *tmp_mv,
int var_max, int rd);
#ifdef __cplusplus

View File

@@ -21,9 +21,7 @@
#include "vp9/encoder/vp9_noise_estimate.h"
#include "vp9/encoder/vp9_encoder.h"
void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne,
int width,
int height) {
void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne, int width, int height) {
ne->enabled = 0;
ne->level = kLowLow;
ne->value = 0;
@@ -40,24 +38,18 @@ void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne,
}
static int enable_noise_estimation(VP9_COMP *const cpi) {
// Enable noise estimation if denoising is on.
// Enable noise estimation if denoising is on.
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0)
return 1;
if (cpi->oxcf.noise_sensitivity > 0) return 1;
#endif
// Only allow noise estimate under certain encoding mode.
// Enabled for 1 pass CBR, speed >=5, and if resolution is same as original.
// Not enabled for SVC mode and screen_content_mode.
// Not enabled for low resolutions.
if (cpi->oxcf.pass == 0 &&
cpi->oxcf.rc_mode == VPX_CBR &&
cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
cpi->oxcf.speed >= 5 &&
cpi->resize_state == ORIG &&
cpi->resize_pending == 0 &&
!cpi->use_svc &&
cpi->oxcf.content != VP9E_CONTENT_SCREEN &&
cpi->common.width >= 640 &&
if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&
cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cpi->oxcf.speed >= 5 &&
cpi->resize_state == ORIG && cpi->resize_pending == 0 && !cpi->use_svc &&
cpi->oxcf.content != VP9E_CONTENT_SCREEN && cpi->common.width >= 640 &&
cpi->common.height >= 480)
return 1;
else
@@ -65,8 +57,8 @@ static int enable_noise_estimation(VP9_COMP *const cpi) {
}
#if CONFIG_VP9_TEMPORAL_DENOISING
static void copy_frame(YV12_BUFFER_CONFIG * const dest,
const YV12_BUFFER_CONFIG * const src) {
static void copy_frame(YV12_BUFFER_CONFIG *const dest,
const YV12_BUFFER_CONFIG *const src) {
int r;
const uint8_t *srcbuf = src->y_buffer;
uint8_t *destbuf = dest->y_buffer;
@@ -110,18 +102,15 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
// Estimate is between current source and last source.
YV12_BUFFER_CONFIG *last_source = cpi->Last_Source;
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0)
last_source = &cpi->denoiser.last_source;
if (cpi->oxcf.noise_sensitivity > 0) last_source = &cpi->denoiser.last_source;
#endif
ne->enabled = enable_noise_estimation(cpi);
if (!ne->enabled ||
cm->current_video_frame % frame_period != 0 ||
last_source == NULL ||
ne->last_w != cm->width ||
if (!ne->enabled || cm->current_video_frame % frame_period != 0 ||
last_source == NULL || ne->last_w != cm->width ||
ne->last_h != cm->height) {
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0)
copy_frame(&cpi->denoiser.last_source, cpi->Source);
if (cpi->oxcf.noise_sensitivity > 0)
copy_frame(&cpi->denoiser.last_source, cpi->Source);
#endif
if (last_source != NULL) {
ne->last_w = cm->width;
@@ -140,8 +129,8 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
int num_samples = 0;
uint64_t avg_est = 0;
int bsize = BLOCK_16X16;
static const unsigned char const_source[16] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
static const unsigned char const_source[16] = { 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0 };
// Loop over sub-sample of 16x16 blocks of frame, and for blocks that have
// been encoded as zero/small mv at least x consecutive frames, compute
// the variance to update estimate of noise in the source.
@@ -167,8 +156,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) {
for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) {
// 16x16 blocks, 1/4 sample of frame.
if (mi_row % 4 == 0 && mi_col % 4 == 0 &&
mi_row < cm->mi_rows - 1 &&
if (mi_row % 4 == 0 && mi_col % 4 == 0 && mi_row < cm->mi_rows - 1 &&
mi_col < cm->mi_cols - 1) {
int bl_index = mi_row * cm->mi_cols + mi_col;
int bl_index1 = bl_index + 1;
@@ -178,20 +166,16 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
// been encoded as zero/low motion x (= thresh_consec_zeromv) frames
// in a row. consec_zero_mv[] defined for 8x8 blocks, so consider all
// 4 sub-blocks for 16x16 block. Also, avoid skin blocks.
int consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index],
VPXMIN(cpi->consec_zero_mv[bl_index1],
VPXMIN(cpi->consec_zero_mv[bl_index2],
cpi->consec_zero_mv[bl_index3])));
int consec_zeromv =
VPXMIN(cpi->consec_zero_mv[bl_index],
VPXMIN(cpi->consec_zero_mv[bl_index1],
VPXMIN(cpi->consec_zero_mv[bl_index2],
cpi->consec_zero_mv[bl_index3])));
int is_skin = 0;
if (cpi->use_skin_detection) {
is_skin = vp9_compute_skin_block(src_y,
src_u,
src_v,
src_ystride,
src_uvstride,
bsize,
consec_zeromv,
0);
is_skin =
vp9_compute_skin_block(src_y, src_u, src_v, src_ystride,
src_uvstride, bsize, consec_zeromv, 0);
}
if (frame_low_motion &&
cpi->consec_zero_mv[bl_index] > thresh_consec_zeromv &&
@@ -201,19 +185,15 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
!is_skin) {
// Compute variance.
unsigned int sse;
unsigned int variance = cpi->fn_ptr[bsize].vf(src_y,
src_ystride,
last_src_y,
last_src_ystride,
&sse);
unsigned int variance = cpi->fn_ptr[bsize].vf(
src_y, src_ystride, last_src_y, last_src_ystride, &sse);
// Only consider this block as valid for noise measurement if the
// average term (sse - variance = N * avg^{2}, N = 16X16) of the
// temporal residual is small (avoid effects from lighting change).
if ((sse - variance) < thresh_sum_diff) {
unsigned int sse2;
const unsigned int spatial_variance =
cpi->fn_ptr[bsize].vf(src_y, src_ystride, const_source,
0, &sse2);
const unsigned int spatial_variance = cpi->fn_ptr[bsize].vf(
src_y, src_ystride, const_source, 0, &sse2);
// Avoid blocks with high brightness and high spatial variance.
if ((sse2 - spatial_variance) < thresh_sum_spatial &&
spatial_variance < thresh_spatial_var) {

View File

@@ -23,12 +23,7 @@
extern "C" {
#endif
typedef enum noise_level {
kLowLow,
kLow,
kMedium,
kHigh
} NOISE_LEVEL;
typedef enum noise_level { kLowLow, kLow, kMedium, kHigh } NOISE_LEVEL;
typedef struct noise_estimate {
int enabled;
@@ -43,9 +38,7 @@ typedef struct noise_estimate {
struct VP9_COMP;
void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne,
int width,
int height);
void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne, int width, int height);
NOISE_LEVEL vp9_noise_estimate_extract_level(NOISE_ESTIMATE *const ne);

View File

@@ -33,10 +33,9 @@ static int get_max_filter_level(const VP9_COMP *cpi) {
}
}
static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd,
VP9_COMP *const cpi,
int filt_level, int partial_frame) {
VP9_COMP *const cpi, int filt_level,
int partial_frame) {
VP9_COMMON *const cm = &cpi->common;
int64_t filt_err;
@@ -44,8 +43,8 @@ static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd,
if (cpi->num_workers > 1)
vp9_loop_filter_frame_mt(cm->frame_to_show, cm, cpi->td.mb.e_mbd.plane,
filt_level, 1, partial_frame,
cpi->workers, cpi->num_workers, &cpi->lf_row_sync);
filt_level, 1, partial_frame, cpi->workers,
cpi->num_workers, &cpi->lf_row_sync);
else
vp9_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level,
1, partial_frame);
@@ -78,8 +77,7 @@ static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
// Start the search at the previous frame filter level unless it is now out of
// range.
int filt_mid =
clamp(lf->last_filt_level, min_filter_level, max_filter_level);
int filt_mid = clamp(lf->last_filt_level, min_filter_level, max_filter_level);
int filter_step = filt_mid < 16 ? 4 : filt_mid / 4;
// Sum squared error at each filter level
int64_t ss_err[MAX_LOOP_FILTER + 1];
@@ -105,8 +103,7 @@ static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
bias = (bias * cpi->twopass.section_intra_rating) / 20;
// yx, bias less for large block size
if (cm->tx_mode != ONLY_4X4)
bias >>= 1;
if (cm->tx_mode != ONLY_4X4) bias >>= 1;
if (filt_direction <= 0 && filt_low != filt_mid) {
// Get Low filter error score
@@ -117,8 +114,7 @@ static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
// filter value.
if ((ss_err[filt_low] - bias) < best_err) {
// Was it actually better than the previous best?
if (ss_err[filt_low] < best_err)
best_err = ss_err[filt_low];
if (ss_err[filt_low] < best_err) best_err = ss_err[filt_low];
filt_best = filt_low;
}
@@ -154,17 +150,16 @@ void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
VP9_COMMON *const cm = &cpi->common;
struct loopfilter *const lf = &cm->lf;
lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0
: cpi->oxcf.sharpness;
lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0 : cpi->oxcf.sharpness;
if (method == LPF_PICK_MINIMAL_LPF && lf->filter_level) {
lf->filter_level = 0;
lf->filter_level = 0;
} else if (method >= LPF_PICK_FROM_Q) {
const int min_filter_level = 0;
const int max_filter_level = get_max_filter_level(cpi);
const int q = vp9_ac_quant(cm->base_qindex, 0, cm->bit_depth);
// These values were determined by linear fitting the result of the
// searched level, filt_guess = q * 0.316206 + 3.87252
// These values were determined by linear fitting the result of the
// searched level, filt_guess = q * 0.316206 + 3.87252
#if CONFIG_VP9_HIGHBITDEPTH
int filt_guess;
switch (cm->bit_depth) {
@@ -178,18 +173,18 @@ void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 16242526, 22);
break;
default:
assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 "
"or VPX_BITS_12");
assert(0 &&
"bit_depth should be VPX_BITS_8, VPX_BITS_10 "
"or VPX_BITS_12");
return;
}
#else
int filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18);
#endif // CONFIG_VP9_HIGHBITDEPTH
if (cm->frame_type == KEY_FRAME)
filt_guess -= 4;
if (cm->frame_type == KEY_FRAME) filt_guess -= 4;
lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level);
} else {
lf->filter_level = search_filter_level(sd, cpi,
method == LPF_PICK_FROM_SUBIMAGE);
lf->filter_level =
search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE);
}
}

View File

@@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_ENCODER_VP9_PICKLPF_H_
#define VP9_ENCODER_VP9_PICKLPF_H_

File diff suppressed because it is too large Load Diff

View File

@@ -20,15 +20,12 @@ extern "C" {
void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx);
void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
TileDataEnc *tile_data,
void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
int mi_row, int mi_col, RD_COST *rd_cost,
BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx);
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx);
void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
int mi_row, int mi_col, RD_COST *rd_cost,
BLOCK_SIZE bsize,
void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int mi_row,
int mi_col, RD_COST *rd_cost, BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx);
#ifdef __cplusplus

View File

@@ -21,13 +21,12 @@
#include "vp9/encoder/vp9_rd.h"
void vp9_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan,
const int16_t *iscan) {
int i, eob = -1;
// TODO(jingning) Decide the need of these arguments after the
// quantization process is completed.
@@ -53,27 +52,21 @@ void vp9_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
if (tmp)
eob = i;
if (tmp) eob = i;
}
}
*eob_ptr = eob + 1;
}
#if CONFIG_VP9_HIGHBITDEPTH
void vp9_highbd_quantize_fp_c(const tran_low_t *coeff_ptr,
intptr_t count,
int skip_block,
const int16_t *zbin_ptr,
void vp9_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
uint16_t *eob_ptr,
const int16_t *scan,
const int16_t *iscan) {
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
int i;
int eob = -1;
// TODO(jingning) Decide the need of these arguments after the
@@ -97,8 +90,7 @@ void vp9_highbd_quantize_fp_c(const tran_low_t *coeff_ptr,
const int abs_qcoeff = (int)((tmp * quant_ptr[rc != 0]) >> 16);
qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
if (abs_qcoeff)
eob = i;
if (abs_qcoeff) eob = i;
}
}
*eob_ptr = eob + 1;
@@ -108,13 +100,11 @@ void vp9_highbd_quantize_fp_c(const tran_low_t *coeff_ptr,
// TODO(jingning) Refactor this file and combine functions with similar
// operations.
void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
uint16_t *eob_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
int i, eob = -1;
(void)zbin_ptr;
@@ -140,25 +130,19 @@ void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
}
if (tmp)
eob = i;
if (tmp) eob = i;
}
}
*eob_ptr = eob + 1;
}
#if CONFIG_VP9_HIGHBITDEPTH
void vp9_highbd_quantize_fp_32x32_c(const tran_low_t *coeff_ptr,
intptr_t n_coeffs, int skip_block,
const int16_t *zbin_ptr,
const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
void vp9_highbd_quantize_fp_32x32_c(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
int i, eob = -1;
(void)zbin_ptr;
(void)quant_shift_ptr;
@@ -176,15 +160,14 @@ void vp9_highbd_quantize_fp_32x32_c(const tran_low_t *coeff_ptr,
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
if (abs_coeff >= (dequant_ptr[rc != 0] >> 2)) {
const int64_t tmp = abs_coeff
+ ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
abs_qcoeff = (uint32_t) ((tmp * quant_ptr[rc != 0]) >> 15);
const int64_t tmp =
abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
abs_qcoeff = (uint32_t)((tmp * quant_ptr[rc != 0]) >> 15);
qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
}
if (abs_qcoeff)
eob = i;
if (abs_qcoeff) eob = i;
}
}
*eob_ptr = eob + 1;
@@ -199,30 +182,26 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
vpx_highbd_quantize_b(BLOCK_OFFSET(p->coeff, block),
16, x->skip_block,
vpx_highbd_quantize_b(BLOCK_OFFSET(p->coeff, block), 16, x->skip_block,
p->zbin, p->round, p->quant, p->quant_shift,
BLOCK_OFFSET(p->qcoeff, block),
BLOCK_OFFSET(pd->dqcoeff, block),
pd->dequant, &p->eobs[block],
scan, iscan);
BLOCK_OFFSET(pd->dqcoeff, block), pd->dequant,
&p->eobs[block], scan, iscan);
return;
}
#endif
vpx_quantize_b(BLOCK_OFFSET(p->coeff, block),
16, x->skip_block,
p->zbin, p->round, p->quant, p->quant_shift,
vpx_quantize_b(BLOCK_OFFSET(p->coeff, block), 16, x->skip_block, p->zbin,
p->round, p->quant, p->quant_shift,
BLOCK_OFFSET(p->qcoeff, block),
BLOCK_OFFSET(pd->dqcoeff, block),
pd->dequant, &p->eobs[block], scan, iscan);
BLOCK_OFFSET(pd->dqcoeff, block), pd->dequant, &p->eobs[block],
scan, iscan);
}
static void invert_quant(int16_t *quant, int16_t *shift, int d) {
unsigned t;
int l, m;
t = d;
for (l = 0; t > 1; l++)
t >>= 1;
for (l = 0; t > 1; l++) t >>= 1;
m = 1 + (1 << (16 + l)) / d;
*quant = (int16_t)(m - (1 << 16));
*shift = 1 << (16 - l);
@@ -232,18 +211,15 @@ static int get_qzbin_factor(int q, vpx_bit_depth_t bit_depth) {
const int quant = vp9_dc_quant(q, 0, bit_depth);
#if CONFIG_VP9_HIGHBITDEPTH
switch (bit_depth) {
case VPX_BITS_8:
return q == 0 ? 64 : (quant < 148 ? 84 : 80);
case VPX_BITS_10:
return q == 0 ? 64 : (quant < 592 ? 84 : 80);
case VPX_BITS_12:
return q == 0 ? 64 : (quant < 2368 ? 84 : 80);
case VPX_BITS_8: return q == 0 ? 64 : (quant < 148 ? 84 : 80);
case VPX_BITS_10: return q == 0 ? 64 : (quant < 592 ? 84 : 80);
case VPX_BITS_12: return q == 0 ? 64 : (quant < 2368 ? 84 : 80);
default:
assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
return -1;
}
#else
(void) bit_depth;
(void)bit_depth;
return q == 0 ? 64 : (quant < 148 ? 84 : 80);
#endif
}
@@ -259,8 +235,7 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
for (i = 0; i < 2; ++i) {
int qrounding_factor_fp = i == 0 ? 48 : 42;
if (q == 0)
qrounding_factor_fp = 64;
if (q == 0) qrounding_factor_fp = 64;
// y
quant = i == 0 ? vp9_dc_quant(q, cm->y_dc_delta_q, cm->bit_depth)
@@ -275,8 +250,8 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
// uv
quant = i == 0 ? vp9_dc_quant(q, cm->uv_dc_delta_q, cm->bit_depth)
: vp9_ac_quant(q, cm->uv_ac_delta_q, cm->bit_depth);
invert_quant(&quants->uv_quant[q][i],
&quants->uv_quant_shift[q][i], quant);
invert_quant(&quants->uv_quant[q][i], &quants->uv_quant_shift[q][i],
quant);
quants->uv_quant_fp[q][i] = (1 << 16) / quant;
quants->uv_round_fp[q][i] = (qrounding_factor_fp * quant) >> 7;
quants->uv_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7);
@@ -363,14 +338,11 @@ void vp9_set_quantizer(VP9_COMMON *cm, int q) {
// Table that converts 0-63 Q-range values passed in outside to the Qindex
// range used internally.
static const int quantizer_to_qindex[] = {
0, 4, 8, 12, 16, 20, 24, 28,
32, 36, 40, 44, 48, 52, 56, 60,
64, 68, 72, 76, 80, 84, 88, 92,
96, 100, 104, 108, 112, 116, 120, 124,
128, 132, 136, 140, 144, 148, 152, 156,
160, 164, 168, 172, 176, 180, 184, 188,
192, 196, 200, 204, 208, 212, 216, 220,
224, 228, 232, 236, 240, 244, 249, 255,
0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48,
52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100,
104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152,
156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204,
208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255,
};
int vp9_quantizer_to_qindex(int quantizer) {
@@ -381,8 +353,7 @@ int vp9_qindex_to_quantizer(int qindex) {
int quantizer;
for (quantizer = 0; quantizer < 64; ++quantizer)
if (quantizer_to_qindex[quantizer] >= qindex)
return quantizer;
if (quantizer_to_qindex[quantizer] >= qindex) return quantizer;
return 63;
}

File diff suppressed because it is too large Load Diff

View File

@@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_ENCODER_VP9_RATECTRL_H_
#define VP9_ENCODER_VP9_RATECTRL_H_
@@ -23,12 +22,12 @@ extern "C" {
#endif
// Bits Per MB at different Q (Multiplied by 512)
#define BPER_MB_NORMBITS 9
#define BPER_MB_NORMBITS 9
#define MIN_GF_INTERVAL 4
#define MAX_GF_INTERVAL 16
#define FIXED_GF_INTERVAL 8 // Used in some testing modes only
#define ONEHALFONLY_RESIZE 0
#define MIN_GF_INTERVAL 4
#define MAX_GF_INTERVAL 16
#define FIXED_GF_INTERVAL 8 // Used in some testing modes only
#define ONEHALFONLY_RESIZE 0
typedef enum {
INTER_NORMAL = 0,
@@ -54,36 +53,32 @@ typedef enum {
UP_ORIG = -2, // From 1/2 or 3/4 to orig.
} RESIZE_ACTION;
typedef enum {
ORIG = 0,
THREE_QUARTER = 1,
ONE_HALF = 2
} RESIZE_STATE;
typedef enum { ORIG = 0, THREE_QUARTER = 1, ONE_HALF = 2 } RESIZE_STATE;
// Frame dimensions multiplier wrt the native frame size, in 1/16ths,
// specified for the scale-up case.
// e.g. 24 => 16/24 = 2/3 of native size. The restriction to 1/16th is
// intended to match the capabilities of the normative scaling filters,
// giving precedence to the up-scaling accuracy.
static const int frame_scale_factor[FRAME_SCALE_STEPS] = {16, 24};
static const int frame_scale_factor[FRAME_SCALE_STEPS] = { 16, 24 };
// Multiplier of the target rate to be used as threshold for triggering scaling.
static const double rate_thresh_mult[FRAME_SCALE_STEPS] = {1.0, 2.0};
static const double rate_thresh_mult[FRAME_SCALE_STEPS] = { 1.0, 2.0 };
// Scale dependent Rate Correction Factor multipliers. Compensates for the
// greater number of bits per pixel generated in down-scaled frames.
static const double rcf_mult[FRAME_SCALE_STEPS] = {1.0, 2.0};
static const double rcf_mult[FRAME_SCALE_STEPS] = { 1.0, 2.0 };
typedef struct {
// Rate targetting variables
int base_frame_target; // A baseline frame target before adjustment
// for previous under or over shoot.
int this_frame_target; // Actual frame target after rc adjustment.
int base_frame_target; // A baseline frame target before adjustment
// for previous under or over shoot.
int this_frame_target; // Actual frame target after rc adjustment.
int projected_frame_size;
int sb64_target_rate;
int last_q[FRAME_TYPES]; // Separate values for Intra/Inter
int last_boosted_qindex; // Last boosted GF/KF/ARF q
int last_kf_qindex; // Q index of the last key frame coded.
int last_q[FRAME_TYPES]; // Separate values for Intra/Inter
int last_boosted_qindex; // Last boosted GF/KF/ARF q
int last_kf_qindex; // Q index of the last key frame coded.
int gfu_boost;
int last_boost;
@@ -179,8 +174,7 @@ void vp9_rc_init(const struct VP9EncoderConfig *oxcf, int pass,
RATE_CONTROL *rc);
int vp9_estimate_bits_at_q(FRAME_TYPE frame_kind, int q, int mbs,
double correction_factor,
vpx_bit_depth_t bit_depth);
double correction_factor, vpx_bit_depth_t bit_depth);
double vp9_convert_qindex_to_q(int qindex, vpx_bit_depth_t bit_depth);
@@ -241,8 +235,7 @@ void vp9_rc_compute_frame_size_bounds(const struct VP9_COMP *cpi,
int *frame_over_shoot_limit);
// Picks q and q bounds given the target for bits
int vp9_rc_pick_q_and_bounds(const struct VP9_COMP *cpi,
int *bottom_index,
int vp9_rc_pick_q_and_bounds(const struct VP9_COMP *cpi, int *bottom_index,
int *top_index);
// Estimates q to achieve a target bits per frame

View File

@@ -40,7 +40,7 @@
#include "vp9/encoder/vp9_rd.h"
#include "vp9/encoder/vp9_tokenize.h"
#define RD_THRESH_POW 1.25
#define RD_THRESH_POW 1.25
// Factor to weigh the rate for switchable interp filters.
#define SWITCHABLE_INTERP_RATE_FACTOR 1
@@ -98,8 +98,7 @@ static void fill_token_costs(vp9_coeff_cost *c,
for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
vpx_prob probs[ENTROPY_NODES];
vp9_model_to_full_probs(p[t][i][j][k][l], probs);
vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
vp9_coef_tree);
vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs, vp9_coef_tree);
vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
vp9_coef_tree);
assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
@@ -142,28 +141,19 @@ void vp9_init_me_luts(void) {
#endif
}
static const int rd_boost_factor[16] = {
64, 32, 32, 32, 24, 16, 12, 12,
8, 8, 4, 4, 2, 2, 1, 0
};
static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = {
128, 144, 128, 128, 144
};
static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
8, 8, 4, 4, 2, 2, 1, 0 };
static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128,
128, 144 };
int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
const int64_t q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
#if CONFIG_VP9_HIGHBITDEPTH
int64_t rdmult = 0;
switch (cpi->common.bit_depth) {
case VPX_BITS_8:
rdmult = 88 * q * q / 24;
break;
case VPX_BITS_10:
rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4);
break;
case VPX_BITS_12:
rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8);
break;
case VPX_BITS_8: rdmult = 88 * q * q / 24; break;
case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4); break;
case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8); break;
default:
assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
return -1;
@@ -179,8 +169,7 @@ int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
}
if (rdmult < 1)
rdmult = 1;
if (rdmult < 1) rdmult = 1;
return (int)rdmult;
}
@@ -188,21 +177,15 @@ static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
double q;
#if CONFIG_VP9_HIGHBITDEPTH
switch (bit_depth) {
case VPX_BITS_8:
q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
break;
case VPX_BITS_10:
q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0;
break;
case VPX_BITS_12:
q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0;
break;
case VPX_BITS_8: q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; break;
case VPX_BITS_10: q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0; break;
case VPX_BITS_12: q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0; break;
default:
assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
return -1;
}
#else
(void) bit_depth;
(void)bit_depth;
q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
#endif // CONFIG_VP9_HIGHBITDEPTH
// TODO(debargha): Adjust the function below.
@@ -240,7 +223,8 @@ static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
const int qindex =
clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
cm->y_dc_delta_q, 0, MAXQ);
cm->y_dc_delta_q,
0, MAXQ);
const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
@@ -251,10 +235,9 @@ static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
if (bsize >= BLOCK_8X8) {
for (i = 0; i < MAX_MODES; ++i)
rd->threshes[segment_id][bsize][i] =
rd->thresh_mult[i] < thresh_max
? rd->thresh_mult[i] * t / 4
: INT_MAX;
rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max
? rd->thresh_mult[i] * t / 4
: INT_MAX;
} else {
for (i = 0; i < MAX_REFS; ++i)
rd->threshes[segment_id][bsize][i] =
@@ -281,7 +264,9 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
set_error_per_bit(x, rd->RDMULT);
x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
cm->frame_type != KEY_FRAME) ? 0 : 1;
cm->frame_type != KEY_FRAME)
? 0
: 1;
set_block_thresholds(cm, rd);
set_partition_probs(cm, xd);
@@ -335,19 +320,15 @@ static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
// where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
// and H(x) is the binary entropy function.
static const int rate_tab_q10[] = {
65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651,
4553, 4389, 4255, 4142, 4044, 3958, 3881, 3811,
3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638,
2589, 2501, 2423, 2353, 2290, 2232, 2179, 2130,
2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199,
1159, 1086, 1021, 963, 911, 864, 821, 781,
745, 680, 623, 574, 530, 490, 455, 424,
395, 345, 304, 269, 239, 213, 190, 171,
154, 126, 104, 87, 73, 61, 52, 44,
38, 28, 21, 16, 12, 10, 8, 6,
5, 3, 2, 1, 1, 1, 0, 0,
65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142,
4044, 3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353,
2290, 2232, 2179, 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963,
911, 864, 821, 781, 745, 680, 623, 574, 530, 490, 455, 424,
395, 345, 304, 269, 239, 213, 190, 171, 154, 126, 104, 87,
73, 61, 52, 44, 38, 28, 21, 16, 12, 10, 8, 6,
5, 3, 2, 1, 1, 1, 0, 0,
};
// Normalized distortion:
@@ -358,34 +339,29 @@ static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
// where x = qpstep / sqrt(variance).
// Note the actual distortion is Dn * variance.
static const int dist_tab_q10[] = {
0, 0, 1, 1, 1, 2, 2, 2,
3, 3, 4, 5, 5, 6, 7, 7,
8, 9, 11, 12, 13, 15, 16, 17,
18, 21, 24, 26, 29, 31, 34, 36,
39, 44, 49, 54, 59, 64, 69, 73,
78, 88, 97, 106, 115, 124, 133, 142,
151, 167, 184, 200, 215, 231, 245, 260,
274, 301, 327, 351, 375, 397, 418, 439,
458, 495, 528, 559, 587, 613, 637, 659,
680, 717, 749, 777, 801, 823, 842, 859,
874, 899, 919, 936, 949, 960, 969, 977,
983, 994, 1001, 1006, 1010, 1013, 1015, 1017,
1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5,
5, 6, 7, 7, 8, 9, 11, 12, 13, 15, 16, 17,
18, 21, 24, 26, 29, 31, 34, 36, 39, 44, 49, 54,
59, 64, 69, 73, 78, 88, 97, 106, 115, 124, 133, 142,
151, 167, 184, 200, 215, 231, 245, 260, 274, 301, 327, 351,
375, 397, 418, 439, 458, 495, 528, 559, 587, 613, 637, 659,
680, 717, 749, 777, 801, 823, 842, 859, 874, 899, 919, 936,
949, 960, 969, 977, 983, 994, 1001, 1006, 1010, 1013, 1015, 1017,
1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
};
static const int xsq_iq_q10[] = {
0, 4, 8, 12, 16, 20, 24, 28,
32, 40, 48, 56, 64, 72, 80, 88,
96, 112, 128, 144, 160, 176, 192, 208,
224, 256, 288, 320, 352, 384, 416, 448,
480, 544, 608, 672, 736, 800, 864, 928,
992, 1120, 1248, 1376, 1504, 1632, 1760, 1888,
2016, 2272, 2528, 2784, 3040, 3296, 3552, 3808,
4064, 4576, 5088, 5600, 6112, 6624, 7136, 7648,
8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688,
32736, 36832, 40928, 45024, 49120, 53216, 57312, 61408,
65504, 73696, 81888, 90080, 98272, 106464, 114656, 122848,
131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
0, 4, 8, 12, 16, 20, 24, 28, 32,
40, 48, 56, 64, 72, 80, 88, 96, 112,
128, 144, 160, 176, 192, 208, 224, 256, 288,
320, 352, 384, 416, 448, 480, 544, 608, 672,
736, 800, 864, 928, 992, 1120, 1248, 1376, 1504,
1632, 1760, 1888, 2016, 2272, 2528, 2784, 3040, 3296,
3552, 3808, 4064, 4576, 5088, 5600, 6112, 6624, 7136,
7648, 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, 32736,
36832, 40928, 45024, 49120, 53216, 57312, 61408, 65504, 73696,
81888, 90080, 98272, 106464, 114656, 122848, 131040, 147424, 163808,
180192, 196576, 212960, 229344, 245728,
};
const int tmp = (xsq_q10 >> 2) + 8;
const int k = get_msb(tmp) - 3;
@@ -455,15 +431,12 @@ void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
for (i = 0; i < num_4x4_h; i += 8)
t_left[i] = !!*(const uint64_t *)&left[i];
break;
default:
assert(0 && "Invalid transform size.");
break;
default: assert(0 && "Invalid transform size."); break;
}
}
void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
uint8_t *ref_y_buffer, int ref_y_stride,
int ref_frame, BLOCK_SIZE block_size) {
void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
int i;
int zero_seen = 0;
int best_index = 0;
@@ -473,9 +446,9 @@ void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
int near_same_nearest;
uint8_t *src_y_ptr = x->plane[0].src.buf;
uint8_t *ref_y_ptr;
const int num_mv_refs = MAX_MV_REF_CANDIDATES +
(cpi->sf.adaptive_motion_search &&
block_size < x->max_partition_size);
const int num_mv_refs =
MAX_MV_REF_CANDIDATES +
(cpi->sf.adaptive_motion_search && block_size < x->max_partition_size);
MV pred_mv[3];
pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
@@ -483,25 +456,22 @@ void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
pred_mv[2] = x->pred_mv[ref_frame];
assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
near_same_nearest =
x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
near_same_nearest = x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
// Get the sad for each candidate reference mv.
for (i = 0; i < num_mv_refs; ++i) {
const MV *this_mv = &pred_mv[i];
int fp_row, fp_col;
if (i == 1 && near_same_nearest)
continue;
if (i == 1 && near_same_nearest) continue;
fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
if (fp_row ==0 && fp_col == 0 && zero_seen)
continue;
zero_seen |= (fp_row ==0 && fp_col == 0);
if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
zero_seen |= (fp_row == 0 && fp_col == 0);
ref_y_ptr =&ref_y_buffer[ref_y_stride * fp_row + fp_col];
ref_y_ptr = &ref_y_buffer[ref_y_stride * fp_row + fp_col];
// Find sad for current vector.
this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
ref_y_ptr, ref_y_stride);
@@ -520,8 +490,7 @@ void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
void vp9_setup_pred_block(const MACROBLOCKD *xd,
struct buf_2d dst[MAX_MB_PLANE],
const YV12_BUFFER_CONFIG *src,
int mi_row, int mi_col,
const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
const struct scale_factors *scale,
const struct scale_factors *scale_uv) {
int i;
@@ -534,21 +503,21 @@ void vp9_setup_pred_block(const MACROBLOCKD *xd,
for (i = 0; i < MAX_MB_PLANE; ++i) {
setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
i ? scale_uv : scale,
xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
i ? scale_uv : scale, xd->plane[i].subsampling_x,
xd->plane[i].subsampling_y);
}
}
int vp9_raster_block_offset(BLOCK_SIZE plane_bsize,
int raster_block, int stride) {
int vp9_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
int stride) {
const int bw = b_width_log2_lookup[plane_bsize];
const int y = 4 * (raster_block >> bw);
const int x = 4 * (raster_block & ((1 << bw) - 1));
return y * stride + x;
}
int16_t* vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,
int raster_block, int16_t *base) {
int16_t *vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block,
int16_t *base) {
const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
return base + vp9_raster_block_offset(plane_bsize, raster_block, stride);
}
@@ -558,16 +527,16 @@ YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
const VP9_COMMON *const cm = &cpi->common;
const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
return
(scaled_idx != ref_idx && scaled_idx != INVALID_IDX) ?
&cm->buffer_pool->frame_bufs[scaled_idx].buf : NULL;
return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX)
? &cm->buffer_pool->frame_bufs[scaled_idx].buf
: NULL;
}
int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) {
const MODE_INFO *const mi = xd->mi[0];
const int ctx = get_pred_context_switchable_interp(xd);
return SWITCHABLE_INTERP_RATE_FACTOR *
cpi->switchable_interp_costs[ctx][mi->interp_filter];
cpi->switchable_interp_costs[ctx][mi->interp_filter];
}
void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
@@ -616,7 +585,7 @@ void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
rd->thresh_mult[THR_H_PRED] += 2000;
rd->thresh_mult[THR_V_PRED] += 2000;
rd->thresh_mult[THR_D45_PRED ] += 2500;
rd->thresh_mult[THR_D45_PRED] += 2500;
rd->thresh_mult[THR_D135_PRED] += 2500;
rd->thresh_mult[THR_D117_PRED] += 2500;
rd->thresh_mult[THR_D153_PRED] += 2500;
@@ -625,9 +594,10 @@ void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
}
void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
static const int thresh_mult[2][MAX_REFS] =
{{2500, 2500, 2500, 4500, 4500, 2500},
{2000, 2000, 2000, 4000, 4000, 2000}};
static const int thresh_mult[2][MAX_REFS] = {
{ 2500, 2500, 2500, 4500, 4500, 2500 },
{ 2000, 2000, 2000, 4000, 4000, 2000 }
};
RD_OPT *const rd = &cpi->rd;
const int idx = cpi->oxcf.mode == BEST;
memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
@@ -659,12 +629,9 @@ int vp9_get_intra_cost_penalty(int qindex, int qdelta,
const int q = vp9_dc_quant(qindex, qdelta, bit_depth);
#if CONFIG_VP9_HIGHBITDEPTH
switch (bit_depth) {
case VPX_BITS_8:
return 20 * q;
case VPX_BITS_10:
return 5 * q;
case VPX_BITS_12:
return ROUND_POWER_OF_TWO(5 * q, 2);
case VPX_BITS_8: return 20 * q;
case VPX_BITS_10: return 5 * q;
case VPX_BITS_12: return ROUND_POWER_OF_TWO(5 * q, 2);
default:
assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
return -1;
@@ -673,4 +640,3 @@ int vp9_get_intra_cost_penalty(int qindex, int qdelta,
return 20 * q;
#endif // CONFIG_VP9_HIGHBITDEPTH
}

View File

@@ -23,23 +23,23 @@
extern "C" {
#endif
#define RDDIV_BITS 7
#define RD_EPB_SHIFT 6
#define RDDIV_BITS 7
#define RD_EPB_SHIFT 6
#define RDCOST(RM, DM, R, D) \
(ROUND_POWER_OF_TWO(((int64_t)R) * (RM), VP9_PROB_COST_SHIFT) + (D << DM))
#define QIDX_SKIP_THRESH 115
#define QIDX_SKIP_THRESH 115
#define MV_COST_WEIGHT 108
#define MV_COST_WEIGHT_SUB 120
#define MV_COST_WEIGHT 108
#define MV_COST_WEIGHT_SUB 120
#define INVALID_MV 0x80008000
#define MAX_MODES 30
#define MAX_REFS 6
#define MAX_REFS 6
#define RD_THRESH_MAX_FACT 64
#define RD_THRESH_INC 1
#define RD_THRESH_INC 1
// This enumerator type needs to be kept aligned with the mode order in
// const MODE_DEFINITION vp9_mode_order[MAX_MODES] used in the rd code.
@@ -135,17 +135,16 @@ void vp9_initialize_rd_consts(struct VP9_COMP *cpi);
void vp9_initialize_me_consts(struct VP9_COMP *cpi, MACROBLOCK *x, int qindex);
void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
unsigned int qstep, int *rate,
int64_t *dist);
unsigned int qstep, int *rate, int64_t *dist);
int vp9_get_switchable_rate(const struct VP9_COMP *cpi,
const MACROBLOCKD *const xd);
int vp9_raster_block_offset(BLOCK_SIZE plane_bsize,
int raster_block, int stride);
int vp9_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
int stride);
int16_t* vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,
int raster_block, int16_t *base);
int16_t *vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block,
int16_t *base);
YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const struct VP9_COMP *cpi,
int ref_frame);
@@ -161,12 +160,12 @@ void vp9_set_rd_speed_thresholds(struct VP9_COMP *cpi);
void vp9_set_rd_speed_thresholds_sub8x8(struct VP9_COMP *cpi);
void vp9_update_rd_thresh_fact(int (*fact)[MAX_MODES], int rd_thresh,
int bsize, int best_mode_index);
void vp9_update_rd_thresh_fact(int (*fact)[MAX_MODES], int rd_thresh, int bsize,
int best_mode_index);
static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
int thresh_fact) {
return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
}
static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) {
@@ -174,14 +173,12 @@ static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) {
x->errorperbit += (x->errorperbit == 0);
}
void vp9_mv_pred(struct VP9_COMP *cpi, MACROBLOCK *x,
uint8_t *ref_y_buffer, int ref_y_stride,
int ref_frame, BLOCK_SIZE block_size);
void vp9_mv_pred(struct VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
int ref_y_stride, int ref_frame, BLOCK_SIZE block_size);
void vp9_setup_pred_block(const MACROBLOCKD *xd,
struct buf_2d dst[MAX_MB_PLANE],
const YV12_BUFFER_CONFIG *src,
int mi_row, int mi_col,
const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
const struct scale_factors *scale,
const struct scale_factors *scale_uv);

File diff suppressed because it is too large Load Diff

View File

@@ -31,19 +31,14 @@ void vp9_rd_pick_intra_mode_sb(struct VP9_COMP *cpi, struct macroblock *x,
void vp9_rd_pick_inter_mode_sb(struct VP9_COMP *cpi,
struct TileDataEnc *tile_data,
struct macroblock *x,
int mi_row, int mi_col,
struct RD_COST *rd_cost,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
int64_t best_rd_so_far);
struct macroblock *x, int mi_row, int mi_col,
struct RD_COST *rd_cost, BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far);
void vp9_rd_pick_inter_mode_sb_seg_skip(struct VP9_COMP *cpi,
struct TileDataEnc *tile_data,
struct macroblock *x,
struct RD_COST *rd_cost,
BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx,
int64_t best_rd_so_far);
void vp9_rd_pick_inter_mode_sb_seg_skip(
struct VP9_COMP *cpi, struct TileDataEnc *tile_data, struct macroblock *x,
struct RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
int64_t best_rd_so_far);
int vp9_internal_image_edge(struct VP9_COMP *cpi);
int vp9_active_h_edge(struct VP9_COMP *cpi, int mi_row, int mi_step);
@@ -52,10 +47,9 @@ int vp9_active_edge_sb(struct VP9_COMP *cpi, int mi_row, int mi_col);
void vp9_rd_pick_inter_mode_sub8x8(struct VP9_COMP *cpi,
struct TileDataEnc *tile_data,
struct macroblock *x,
int mi_row, int mi_col,
struct RD_COST *rd_cost,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
struct macroblock *x, int mi_row, int mi_col,
struct RD_COST *rd_cost, BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx,
int64_t best_rd_so_far);
#ifdef __cplusplus

View File

@@ -23,198 +23,118 @@
#include "vp9/common/vp9_common.h"
#include "vp9/encoder/vp9_resize.h"
#define FILTER_BITS 7
#define FILTER_BITS 7
#define INTERP_TAPS 8
#define SUBPEL_BITS 5
#define SUBPEL_MASK ((1 << SUBPEL_BITS) - 1)
#define INTERP_PRECISION_BITS 32
#define INTERP_TAPS 8
#define SUBPEL_BITS 5
#define SUBPEL_MASK ((1 << SUBPEL_BITS) - 1)
#define INTERP_PRECISION_BITS 32
typedef int16_t interp_kernel[INTERP_TAPS];
// Filters for interpolation (0.5-band) - note this also filters integer pels.
static const interp_kernel filteredinterp_filters500[(1 << SUBPEL_BITS)] = {
{-3, 0, 35, 64, 35, 0, -3, 0},
{-3, -1, 34, 64, 36, 1, -3, 0},
{-3, -1, 32, 64, 38, 1, -3, 0},
{-2, -2, 31, 63, 39, 2, -3, 0},
{-2, -2, 29, 63, 41, 2, -3, 0},
{-2, -2, 28, 63, 42, 3, -4, 0},
{-2, -3, 27, 63, 43, 4, -4, 0},
{-2, -3, 25, 62, 45, 5, -4, 0},
{-2, -3, 24, 62, 46, 5, -4, 0},
{-2, -3, 23, 61, 47, 6, -4, 0},
{-2, -3, 21, 60, 49, 7, -4, 0},
{-1, -4, 20, 60, 50, 8, -4, -1},
{-1, -4, 19, 59, 51, 9, -4, -1},
{-1, -4, 17, 58, 52, 10, -4, 0},
{-1, -4, 16, 57, 53, 12, -4, -1},
{-1, -4, 15, 56, 54, 13, -4, -1},
{-1, -4, 14, 55, 55, 14, -4, -1},
{-1, -4, 13, 54, 56, 15, -4, -1},
{-1, -4, 12, 53, 57, 16, -4, -1},
{0, -4, 10, 52, 58, 17, -4, -1},
{-1, -4, 9, 51, 59, 19, -4, -1},
{-1, -4, 8, 50, 60, 20, -4, -1},
{0, -4, 7, 49, 60, 21, -3, -2},
{0, -4, 6, 47, 61, 23, -3, -2},
{0, -4, 5, 46, 62, 24, -3, -2},
{0, -4, 5, 45, 62, 25, -3, -2},
{0, -4, 4, 43, 63, 27, -3, -2},
{0, -4, 3, 42, 63, 28, -2, -2},
{0, -3, 2, 41, 63, 29, -2, -2},
{0, -3, 2, 39, 63, 31, -2, -2},
{0, -3, 1, 38, 64, 32, -1, -3},
{0, -3, 1, 36, 64, 34, -1, -3}
{ -3, 0, 35, 64, 35, 0, -3, 0 }, { -3, -1, 34, 64, 36, 1, -3, 0 },
{ -3, -1, 32, 64, 38, 1, -3, 0 }, { -2, -2, 31, 63, 39, 2, -3, 0 },
{ -2, -2, 29, 63, 41, 2, -3, 0 }, { -2, -2, 28, 63, 42, 3, -4, 0 },
{ -2, -3, 27, 63, 43, 4, -4, 0 }, { -2, -3, 25, 62, 45, 5, -4, 0 },
{ -2, -3, 24, 62, 46, 5, -4, 0 }, { -2, -3, 23, 61, 47, 6, -4, 0 },
{ -2, -3, 21, 60, 49, 7, -4, 0 }, { -1, -4, 20, 60, 50, 8, -4, -1 },
{ -1, -4, 19, 59, 51, 9, -4, -1 }, { -1, -4, 17, 58, 52, 10, -4, 0 },
{ -1, -4, 16, 57, 53, 12, -4, -1 }, { -1, -4, 15, 56, 54, 13, -4, -1 },
{ -1, -4, 14, 55, 55, 14, -4, -1 }, { -1, -4, 13, 54, 56, 15, -4, -1 },
{ -1, -4, 12, 53, 57, 16, -4, -1 }, { 0, -4, 10, 52, 58, 17, -4, -1 },
{ -1, -4, 9, 51, 59, 19, -4, -1 }, { -1, -4, 8, 50, 60, 20, -4, -1 },
{ 0, -4, 7, 49, 60, 21, -3, -2 }, { 0, -4, 6, 47, 61, 23, -3, -2 },
{ 0, -4, 5, 46, 62, 24, -3, -2 }, { 0, -4, 5, 45, 62, 25, -3, -2 },
{ 0, -4, 4, 43, 63, 27, -3, -2 }, { 0, -4, 3, 42, 63, 28, -2, -2 },
{ 0, -3, 2, 41, 63, 29, -2, -2 }, { 0, -3, 2, 39, 63, 31, -2, -2 },
{ 0, -3, 1, 38, 64, 32, -1, -3 }, { 0, -3, 1, 36, 64, 34, -1, -3 }
};
// Filters for interpolation (0.625-band) - note this also filters integer pels.
static const interp_kernel filteredinterp_filters625[(1 << SUBPEL_BITS)] = {
{-1, -8, 33, 80, 33, -8, -1, 0},
{-1, -8, 30, 80, 35, -8, -1, 1},
{-1, -8, 28, 80, 37, -7, -2, 1},
{0, -8, 26, 79, 39, -7, -2, 1},
{0, -8, 24, 79, 41, -7, -2, 1},
{0, -8, 22, 78, 43, -6, -2, 1},
{0, -8, 20, 78, 45, -5, -3, 1},
{0, -8, 18, 77, 48, -5, -3, 1},
{0, -8, 16, 76, 50, -4, -3, 1},
{0, -8, 15, 75, 52, -3, -4, 1},
{0, -7, 13, 74, 54, -3, -4, 1},
{0, -7, 11, 73, 56, -2, -4, 1},
{0, -7, 10, 71, 58, -1, -4, 1},
{1, -7, 8, 70, 60, 0, -5, 1},
{1, -6, 6, 68, 62, 1, -5, 1},
{1, -6, 5, 67, 63, 2, -5, 1},
{1, -6, 4, 65, 65, 4, -6, 1},
{1, -5, 2, 63, 67, 5, -6, 1},
{1, -5, 1, 62, 68, 6, -6, 1},
{1, -5, 0, 60, 70, 8, -7, 1},
{1, -4, -1, 58, 71, 10, -7, 0},
{1, -4, -2, 56, 73, 11, -7, 0},
{1, -4, -3, 54, 74, 13, -7, 0},
{1, -4, -3, 52, 75, 15, -8, 0},
{1, -3, -4, 50, 76, 16, -8, 0},
{1, -3, -5, 48, 77, 18, -8, 0},
{1, -3, -5, 45, 78, 20, -8, 0},
{1, -2, -6, 43, 78, 22, -8, 0},
{1, -2, -7, 41, 79, 24, -8, 0},
{1, -2, -7, 39, 79, 26, -8, 0},
{1, -2, -7, 37, 80, 28, -8, -1},
{1, -1, -8, 35, 80, 30, -8, -1},
{ -1, -8, 33, 80, 33, -8, -1, 0 }, { -1, -8, 30, 80, 35, -8, -1, 1 },
{ -1, -8, 28, 80, 37, -7, -2, 1 }, { 0, -8, 26, 79, 39, -7, -2, 1 },
{ 0, -8, 24, 79, 41, -7, -2, 1 }, { 0, -8, 22, 78, 43, -6, -2, 1 },
{ 0, -8, 20, 78, 45, -5, -3, 1 }, { 0, -8, 18, 77, 48, -5, -3, 1 },
{ 0, -8, 16, 76, 50, -4, -3, 1 }, { 0, -8, 15, 75, 52, -3, -4, 1 },
{ 0, -7, 13, 74, 54, -3, -4, 1 }, { 0, -7, 11, 73, 56, -2, -4, 1 },
{ 0, -7, 10, 71, 58, -1, -4, 1 }, { 1, -7, 8, 70, 60, 0, -5, 1 },
{ 1, -6, 6, 68, 62, 1, -5, 1 }, { 1, -6, 5, 67, 63, 2, -5, 1 },
{ 1, -6, 4, 65, 65, 4, -6, 1 }, { 1, -5, 2, 63, 67, 5, -6, 1 },
{ 1, -5, 1, 62, 68, 6, -6, 1 }, { 1, -5, 0, 60, 70, 8, -7, 1 },
{ 1, -4, -1, 58, 71, 10, -7, 0 }, { 1, -4, -2, 56, 73, 11, -7, 0 },
{ 1, -4, -3, 54, 74, 13, -7, 0 }, { 1, -4, -3, 52, 75, 15, -8, 0 },
{ 1, -3, -4, 50, 76, 16, -8, 0 }, { 1, -3, -5, 48, 77, 18, -8, 0 },
{ 1, -3, -5, 45, 78, 20, -8, 0 }, { 1, -2, -6, 43, 78, 22, -8, 0 },
{ 1, -2, -7, 41, 79, 24, -8, 0 }, { 1, -2, -7, 39, 79, 26, -8, 0 },
{ 1, -2, -7, 37, 80, 28, -8, -1 }, { 1, -1, -8, 35, 80, 30, -8, -1 },
};
// Filters for interpolation (0.75-band) - note this also filters integer pels.
static const interp_kernel filteredinterp_filters750[(1 << SUBPEL_BITS)] = {
{2, -11, 25, 96, 25, -11, 2, 0},
{2, -11, 22, 96, 28, -11, 2, 0},
{2, -10, 19, 95, 31, -11, 2, 0},
{2, -10, 17, 95, 34, -12, 2, 0},
{2, -9, 14, 94, 37, -12, 2, 0},
{2, -8, 12, 93, 40, -12, 1, 0},
{2, -8, 9, 92, 43, -12, 1, 1},
{2, -7, 7, 91, 46, -12, 1, 0},
{2, -7, 5, 90, 49, -12, 1, 0},
{2, -6, 3, 88, 52, -12, 0, 1},
{2, -5, 1, 86, 55, -12, 0, 1},
{2, -5, -1, 84, 58, -11, 0, 1},
{2, -4, -2, 82, 61, -11, -1, 1},
{2, -4, -4, 80, 64, -10, -1, 1},
{1, -3, -5, 77, 67, -9, -1, 1},
{1, -3, -6, 75, 70, -8, -2, 1},
{1, -2, -7, 72, 72, -7, -2, 1},
{1, -2, -8, 70, 75, -6, -3, 1},
{1, -1, -9, 67, 77, -5, -3, 1},
{1, -1, -10, 64, 80, -4, -4, 2},
{1, -1, -11, 61, 82, -2, -4, 2},
{1, 0, -11, 58, 84, -1, -5, 2},
{1, 0, -12, 55, 86, 1, -5, 2},
{1, 0, -12, 52, 88, 3, -6, 2},
{0, 1, -12, 49, 90, 5, -7, 2},
{0, 1, -12, 46, 91, 7, -7, 2},
{1, 1, -12, 43, 92, 9, -8, 2},
{0, 1, -12, 40, 93, 12, -8, 2},
{0, 2, -12, 37, 94, 14, -9, 2},
{0, 2, -12, 34, 95, 17, -10, 2},
{0, 2, -11, 31, 95, 19, -10, 2},
{0, 2, -11, 28, 96, 22, -11, 2}
{ 2, -11, 25, 96, 25, -11, 2, 0 }, { 2, -11, 22, 96, 28, -11, 2, 0 },
{ 2, -10, 19, 95, 31, -11, 2, 0 }, { 2, -10, 17, 95, 34, -12, 2, 0 },
{ 2, -9, 14, 94, 37, -12, 2, 0 }, { 2, -8, 12, 93, 40, -12, 1, 0 },
{ 2, -8, 9, 92, 43, -12, 1, 1 }, { 2, -7, 7, 91, 46, -12, 1, 0 },
{ 2, -7, 5, 90, 49, -12, 1, 0 }, { 2, -6, 3, 88, 52, -12, 0, 1 },
{ 2, -5, 1, 86, 55, -12, 0, 1 }, { 2, -5, -1, 84, 58, -11, 0, 1 },
{ 2, -4, -2, 82, 61, -11, -1, 1 }, { 2, -4, -4, 80, 64, -10, -1, 1 },
{ 1, -3, -5, 77, 67, -9, -1, 1 }, { 1, -3, -6, 75, 70, -8, -2, 1 },
{ 1, -2, -7, 72, 72, -7, -2, 1 }, { 1, -2, -8, 70, 75, -6, -3, 1 },
{ 1, -1, -9, 67, 77, -5, -3, 1 }, { 1, -1, -10, 64, 80, -4, -4, 2 },
{ 1, -1, -11, 61, 82, -2, -4, 2 }, { 1, 0, -11, 58, 84, -1, -5, 2 },
{ 1, 0, -12, 55, 86, 1, -5, 2 }, { 1, 0, -12, 52, 88, 3, -6, 2 },
{ 0, 1, -12, 49, 90, 5, -7, 2 }, { 0, 1, -12, 46, 91, 7, -7, 2 },
{ 1, 1, -12, 43, 92, 9, -8, 2 }, { 0, 1, -12, 40, 93, 12, -8, 2 },
{ 0, 2, -12, 37, 94, 14, -9, 2 }, { 0, 2, -12, 34, 95, 17, -10, 2 },
{ 0, 2, -11, 31, 95, 19, -10, 2 }, { 0, 2, -11, 28, 96, 22, -11, 2 }
};
// Filters for interpolation (0.875-band) - note this also filters integer pels.
static const interp_kernel filteredinterp_filters875[(1 << SUBPEL_BITS)] = {
{3, -8, 13, 112, 13, -8, 3, 0},
{3, -7, 10, 112, 17, -9, 3, -1},
{2, -6, 7, 111, 21, -9, 3, -1},
{2, -5, 4, 111, 24, -10, 3, -1},
{2, -4, 1, 110, 28, -11, 3, -1},
{1, -3, -1, 108, 32, -12, 4, -1},
{1, -2, -3, 106, 36, -13, 4, -1},
{1, -1, -6, 105, 40, -14, 4, -1},
{1, -1, -7, 102, 44, -14, 4, -1},
{1, 0, -9, 100, 48, -15, 4, -1},
{1, 1, -11, 97, 53, -16, 4, -1},
{0, 1, -12, 95, 57, -16, 4, -1},
{0, 2, -13, 91, 61, -16, 4, -1},
{0, 2, -14, 88, 65, -16, 4, -1},
{0, 3, -15, 84, 69, -17, 4, 0},
{0, 3, -16, 81, 73, -16, 3, 0},
{0, 3, -16, 77, 77, -16, 3, 0},
{0, 3, -16, 73, 81, -16, 3, 0},
{0, 4, -17, 69, 84, -15, 3, 0},
{-1, 4, -16, 65, 88, -14, 2, 0},
{-1, 4, -16, 61, 91, -13, 2, 0},
{-1, 4, -16, 57, 95, -12, 1, 0},
{-1, 4, -16, 53, 97, -11, 1, 1},
{-1, 4, -15, 48, 100, -9, 0, 1},
{-1, 4, -14, 44, 102, -7, -1, 1},
{-1, 4, -14, 40, 105, -6, -1, 1},
{-1, 4, -13, 36, 106, -3, -2, 1},
{-1, 4, -12, 32, 108, -1, -3, 1},
{-1, 3, -11, 28, 110, 1, -4, 2},
{-1, 3, -10, 24, 111, 4, -5, 2},
{-1, 3, -9, 21, 111, 7, -6, 2},
{-1, 3, -9, 17, 112, 10, -7, 3}
{ 3, -8, 13, 112, 13, -8, 3, 0 }, { 3, -7, 10, 112, 17, -9, 3, -1 },
{ 2, -6, 7, 111, 21, -9, 3, -1 }, { 2, -5, 4, 111, 24, -10, 3, -1 },
{ 2, -4, 1, 110, 28, -11, 3, -1 }, { 1, -3, -1, 108, 32, -12, 4, -1 },
{ 1, -2, -3, 106, 36, -13, 4, -1 }, { 1, -1, -6, 105, 40, -14, 4, -1 },
{ 1, -1, -7, 102, 44, -14, 4, -1 }, { 1, 0, -9, 100, 48, -15, 4, -1 },
{ 1, 1, -11, 97, 53, -16, 4, -1 }, { 0, 1, -12, 95, 57, -16, 4, -1 },
{ 0, 2, -13, 91, 61, -16, 4, -1 }, { 0, 2, -14, 88, 65, -16, 4, -1 },
{ 0, 3, -15, 84, 69, -17, 4, 0 }, { 0, 3, -16, 81, 73, -16, 3, 0 },
{ 0, 3, -16, 77, 77, -16, 3, 0 }, { 0, 3, -16, 73, 81, -16, 3, 0 },
{ 0, 4, -17, 69, 84, -15, 3, 0 }, { -1, 4, -16, 65, 88, -14, 2, 0 },
{ -1, 4, -16, 61, 91, -13, 2, 0 }, { -1, 4, -16, 57, 95, -12, 1, 0 },
{ -1, 4, -16, 53, 97, -11, 1, 1 }, { -1, 4, -15, 48, 100, -9, 0, 1 },
{ -1, 4, -14, 44, 102, -7, -1, 1 }, { -1, 4, -14, 40, 105, -6, -1, 1 },
{ -1, 4, -13, 36, 106, -3, -2, 1 }, { -1, 4, -12, 32, 108, -1, -3, 1 },
{ -1, 3, -11, 28, 110, 1, -4, 2 }, { -1, 3, -10, 24, 111, 4, -5, 2 },
{ -1, 3, -9, 21, 111, 7, -6, 2 }, { -1, 3, -9, 17, 112, 10, -7, 3 }
};
// Filters for interpolation (full-band) - no filtering for integer pixels
static const interp_kernel filteredinterp_filters1000[(1 << SUBPEL_BITS)] = {
{0, 0, 0, 128, 0, 0, 0, 0},
{0, 1, -3, 128, 3, -1, 0, 0},
{-1, 2, -6, 127, 7, -2, 1, 0},
{-1, 3, -9, 126, 12, -4, 1, 0},
{-1, 4, -12, 125, 16, -5, 1, 0},
{-1, 4, -14, 123, 20, -6, 2, 0},
{-1, 5, -15, 120, 25, -8, 2, 0},
{-1, 5, -17, 118, 30, -9, 3, -1},
{-1, 6, -18, 114, 35, -10, 3, -1},
{-1, 6, -19, 111, 41, -12, 3, -1},
{-1, 6, -20, 107, 46, -13, 4, -1},
{-1, 6, -21, 103, 52, -14, 4, -1},
{-1, 6, -21, 99, 57, -16, 5, -1},
{-1, 6, -21, 94, 63, -17, 5, -1},
{-1, 6, -20, 89, 68, -18, 5, -1},
{-1, 6, -20, 84, 73, -19, 6, -1},
{-1, 6, -20, 79, 79, -20, 6, -1},
{-1, 6, -19, 73, 84, -20, 6, -1},
{-1, 5, -18, 68, 89, -20, 6, -1},
{-1, 5, -17, 63, 94, -21, 6, -1},
{-1, 5, -16, 57, 99, -21, 6, -1},
{-1, 4, -14, 52, 103, -21, 6, -1},
{-1, 4, -13, 46, 107, -20, 6, -1},
{-1, 3, -12, 41, 111, -19, 6, -1},
{-1, 3, -10, 35, 114, -18, 6, -1},
{-1, 3, -9, 30, 118, -17, 5, -1},
{0, 2, -8, 25, 120, -15, 5, -1},
{0, 2, -6, 20, 123, -14, 4, -1},
{0, 1, -5, 16, 125, -12, 4, -1},
{0, 1, -4, 12, 126, -9, 3, -1},
{0, 1, -2, 7, 127, -6, 2, -1},
{0, 0, -1, 3, 128, -3, 1, 0}
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 1, -3, 128, 3, -1, 0, 0 },
{ -1, 2, -6, 127, 7, -2, 1, 0 }, { -1, 3, -9, 126, 12, -4, 1, 0 },
{ -1, 4, -12, 125, 16, -5, 1, 0 }, { -1, 4, -14, 123, 20, -6, 2, 0 },
{ -1, 5, -15, 120, 25, -8, 2, 0 }, { -1, 5, -17, 118, 30, -9, 3, -1 },
{ -1, 6, -18, 114, 35, -10, 3, -1 }, { -1, 6, -19, 111, 41, -12, 3, -1 },
{ -1, 6, -20, 107, 46, -13, 4, -1 }, { -1, 6, -21, 103, 52, -14, 4, -1 },
{ -1, 6, -21, 99, 57, -16, 5, -1 }, { -1, 6, -21, 94, 63, -17, 5, -1 },
{ -1, 6, -20, 89, 68, -18, 5, -1 }, { -1, 6, -20, 84, 73, -19, 6, -1 },
{ -1, 6, -20, 79, 79, -20, 6, -1 }, { -1, 6, -19, 73, 84, -20, 6, -1 },
{ -1, 5, -18, 68, 89, -20, 6, -1 }, { -1, 5, -17, 63, 94, -21, 6, -1 },
{ -1, 5, -16, 57, 99, -21, 6, -1 }, { -1, 4, -14, 52, 103, -21, 6, -1 },
{ -1, 4, -13, 46, 107, -20, 6, -1 }, { -1, 3, -12, 41, 111, -19, 6, -1 },
{ -1, 3, -10, 35, 114, -18, 6, -1 }, { -1, 3, -9, 30, 118, -17, 5, -1 },
{ 0, 2, -8, 25, 120, -15, 5, -1 }, { 0, 2, -6, 20, 123, -14, 4, -1 },
{ 0, 1, -5, 16, 125, -12, 4, -1 }, { 0, 1, -4, 12, 126, -9, 3, -1 },
{ 0, 1, -2, 7, 127, -6, 2, -1 }, { 0, 0, -1, 3, 128, -3, 1, 0 }
};
// Filters for factor of 2 downsampling.
static const int16_t vp9_down2_symeven_half_filter[] = {56, 12, -3, -1};
static const int16_t vp9_down2_symodd_half_filter[] = {64, 35, 0, -3};
static const int16_t vp9_down2_symeven_half_filter[] = { 56, 12, -3, -1 };
static const int16_t vp9_down2_symodd_half_filter[] = { 64, 35, 0, -3 };
static const interp_kernel *choose_interp_filter(int inlength, int outlength) {
int outlength16 = outlength * 16;
@@ -232,11 +152,14 @@ static const interp_kernel *choose_interp_filter(int inlength, int outlength) {
static void interpolate(const uint8_t *const input, int inlength,
uint8_t *output, int outlength) {
const int64_t delta = (((uint64_t)inlength << 32) + outlength / 2) /
outlength;
const int64_t offset = inlength > outlength ?
(((int64_t)(inlength - outlength) << 31) + outlength / 2) / outlength :
-(((int64_t)(outlength - inlength) << 31) + outlength / 2) / outlength;
const int64_t delta =
(((uint64_t)inlength << 32) + outlength / 2) / outlength;
const int64_t offset =
inlength > outlength
? (((int64_t)(inlength - outlength) << 31) + outlength / 2) /
outlength
: -(((int64_t)(outlength - inlength) << 31) + outlength / 2) /
outlength;
uint8_t *optr = output;
int x, x1, x2, sum, k, int_pel, sub_pel;
int64_t y;
@@ -253,8 +176,8 @@ static void interpolate(const uint8_t *const input, int inlength,
x1 = x;
x = outlength - 1;
y = delta * x + offset;
while ((y >> INTERP_PRECISION_BITS) +
(int64_t)(INTERP_TAPS / 2) >= inlength) {
while ((y >> INTERP_PRECISION_BITS) + (int64_t)(INTERP_TAPS / 2) >=
inlength) {
x--;
y -= delta;
}
@@ -268,8 +191,8 @@ static void interpolate(const uint8_t *const input, int inlength,
sum = 0;
for (k = 0; k < INTERP_TAPS; ++k) {
const int pk = int_pel - INTERP_TAPS / 2 + 1 + k;
sum += filter[k] * input[(pk < 0 ? 0 :
(pk >= inlength ? inlength - 1 : pk))];
sum += filter[k] *
input[(pk < 0 ? 0 : (pk >= inlength ? inlength - 1 : pk))];
}
*optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
}
@@ -282,9 +205,9 @@ static void interpolate(const uint8_t *const input, int inlength,
filter = interp_filters[sub_pel];
sum = 0;
for (k = 0; k < INTERP_TAPS; ++k)
sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0 ?
0 :
int_pel - INTERP_TAPS / 2 + 1 + k)];
sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0
? 0
: int_pel - INTERP_TAPS / 2 + 1 + k)];
*optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
}
// Middle part.
@@ -306,9 +229,9 @@ static void interpolate(const uint8_t *const input, int inlength,
filter = interp_filters[sub_pel];
sum = 0;
for (k = 0; k < INTERP_TAPS; ++k)
sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >=
inlength ? inlength - 1 :
int_pel - INTERP_TAPS / 2 + 1 + k)];
sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >= inlength
? inlength - 1
: int_pel - INTERP_TAPS / 2 + 1 + k)];
*optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
}
}
@@ -332,7 +255,7 @@ static void down2_symeven(const uint8_t *const input, int length,
for (j = 0; j < filter_len_half; ++j) {
sum += (input[(i - j < 0 ? 0 : i - j)] +
input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) *
filter[j];
filter[j];
}
sum >>= FILTER_BITS;
*optr++ = clip_pixel(sum);
@@ -362,7 +285,7 @@ static void down2_symeven(const uint8_t *const input, int length,
for (j = 0; j < filter_len_half; ++j) {
sum += (input[i - j] +
input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) *
filter[j];
filter[j];
}
sum >>= FILTER_BITS;
*optr++ = clip_pixel(sum);
@@ -388,7 +311,7 @@ static void down2_symodd(const uint8_t *const input, int length,
for (j = 1; j < filter_len_half; ++j) {
sum += (input[(i - j < 0 ? 0 : i - j)] +
input[(i + j >= length ? length - 1 : i + j)]) *
filter[j];
filter[j];
}
sum >>= FILTER_BITS;
*optr++ = clip_pixel(sum);
@@ -417,7 +340,7 @@ static void down2_symodd(const uint8_t *const input, int length,
int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
for (j = 1; j < filter_len_half; ++j) {
sum += (input[i - j] + input[(i + j >= length ? length - 1 : i + j)]) *
filter[j];
filter[j];
}
sum >>= FILTER_BITS;
*optr++ = clip_pixel(sum);
@@ -427,8 +350,7 @@ static void down2_symodd(const uint8_t *const input, int length,
static int get_down2_length(int length, int steps) {
int s;
for (s = 0; s < steps; ++s)
length = (length + 1) >> 1;
for (s = 0; s < steps; ++s) length = (length + 1) >> 1;
return length;
}
@@ -442,11 +364,8 @@ static int get_down2_steps(int in_length, int out_length) {
return steps;
}
static void resize_multistep(const uint8_t *const input,
int length,
uint8_t *output,
int olength,
uint8_t *otmp) {
static void resize_multistep(const uint8_t *const input, int length,
uint8_t *output, int olength, uint8_t *otmp) {
int steps;
if (length == olength) {
memcpy(output, input, sizeof(output[0]) * length);
@@ -501,37 +420,31 @@ static void fill_arr_to_col(uint8_t *img, int stride, int len, uint8_t *arr) {
}
}
void vp9_resize_plane(const uint8_t *const input,
int height,
int width,
int in_stride,
uint8_t *output,
int height2,
int width2,
void vp9_resize_plane(const uint8_t *const input, int height, int width,
int in_stride, uint8_t *output, int height2, int width2,
int out_stride) {
int i;
uint8_t *intbuf = (uint8_t *)malloc(sizeof(uint8_t) * width2 * height);
uint8_t *tmpbuf = (uint8_t *)malloc(sizeof(uint8_t) *
(width < height ? height : width));
uint8_t *tmpbuf =
(uint8_t *)malloc(sizeof(uint8_t) * (width < height ? height : width));
uint8_t *arrbuf = (uint8_t *)malloc(sizeof(uint8_t) * height);
uint8_t *arrbuf2 = (uint8_t *)malloc(sizeof(uint8_t) * height2);
if (intbuf == NULL || tmpbuf == NULL ||
arrbuf == NULL || arrbuf2 == NULL)
if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL || arrbuf2 == NULL)
goto Error;
assert(width > 0);
assert(height > 0);
assert(width2 > 0);
assert(height2 > 0);
for (i = 0; i < height; ++i)
resize_multistep(input + in_stride * i, width,
intbuf + width2 * i, width2, tmpbuf);
resize_multistep(input + in_stride * i, width, intbuf + width2 * i, width2,
tmpbuf);
for (i = 0; i < width2; ++i) {
fill_col_to_arr(intbuf + i, width2, height, arrbuf);
resize_multistep(arrbuf, height, arrbuf2, height2, tmpbuf);
fill_arr_to_col(output + i, out_stride, height2, arrbuf2);
}
Error:
Error:
free(intbuf);
free(tmpbuf);
free(arrbuf);
@@ -543,9 +456,12 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
uint16_t *output, int outlength, int bd) {
const int64_t delta =
(((uint64_t)inlength << 32) + outlength / 2) / outlength;
const int64_t offset = inlength > outlength ?
(((int64_t)(inlength - outlength) << 31) + outlength / 2) / outlength :
-(((int64_t)(outlength - inlength) << 31) + outlength / 2) / outlength;
const int64_t offset =
inlength > outlength
? (((int64_t)(inlength - outlength) << 31) + outlength / 2) /
outlength
: -(((int64_t)(outlength - inlength) << 31) + outlength / 2) /
outlength;
uint16_t *optr = output;
int x, x1, x2, sum, k, int_pel, sub_pel;
int64_t y;
@@ -562,8 +478,8 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
x1 = x;
x = outlength - 1;
y = delta * x + offset;
while ((y >> INTERP_PRECISION_BITS) +
(int64_t)(INTERP_TAPS / 2) >= inlength) {
while ((y >> INTERP_PRECISION_BITS) + (int64_t)(INTERP_TAPS / 2) >=
inlength) {
x--;
y -= delta;
}
@@ -578,7 +494,7 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
for (k = 0; k < INTERP_TAPS; ++k) {
const int pk = int_pel - INTERP_TAPS / 2 + 1 + k;
sum += filter[k] *
input[(pk < 0 ? 0 : (pk >= inlength ? inlength - 1 : pk))];
input[(pk < 0 ? 0 : (pk >= inlength ? inlength - 1 : pk))];
}
*optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
}
@@ -591,9 +507,9 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
filter = interp_filters[sub_pel];
sum = 0;
for (k = 0; k < INTERP_TAPS; ++k)
sum += filter[k] *
input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0 ?
0 : int_pel - INTERP_TAPS / 2 + 1 + k)];
sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0
? 0
: int_pel - INTERP_TAPS / 2 + 1 + k)];
*optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
}
// Middle part.
@@ -615,9 +531,9 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
filter = interp_filters[sub_pel];
sum = 0;
for (k = 0; k < INTERP_TAPS; ++k)
sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >=
inlength ? inlength - 1 :
int_pel - INTERP_TAPS / 2 + 1 + k)];
sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >= inlength
? inlength - 1
: int_pel - INTERP_TAPS / 2 + 1 + k)];
*optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
}
}
@@ -641,7 +557,7 @@ static void highbd_down2_symeven(const uint16_t *const input, int length,
for (j = 0; j < filter_len_half; ++j) {
sum += (input[(i - j < 0 ? 0 : i - j)] +
input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) *
filter[j];
filter[j];
}
sum >>= FILTER_BITS;
*optr++ = clip_pixel_highbd(sum, bd);
@@ -671,7 +587,7 @@ static void highbd_down2_symeven(const uint16_t *const input, int length,
for (j = 0; j < filter_len_half; ++j) {
sum += (input[i - j] +
input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) *
filter[j];
filter[j];
}
sum >>= FILTER_BITS;
*optr++ = clip_pixel_highbd(sum, bd);
@@ -680,7 +596,7 @@ static void highbd_down2_symeven(const uint16_t *const input, int length,
}
static void highbd_down2_symodd(const uint16_t *const input, int length,
uint16_t *output, int bd) {
uint16_t *output, int bd) {
// Actual filter len = 2 * filter_len_half - 1.
static const int16_t *filter = vp9_down2_symodd_half_filter;
const int filter_len_half = sizeof(vp9_down2_symodd_half_filter) / 2;
@@ -697,7 +613,7 @@ static void highbd_down2_symodd(const uint16_t *const input, int length,
for (j = 1; j < filter_len_half; ++j) {
sum += (input[(i - j < 0 ? 0 : i - j)] +
input[(i + j >= length ? length - 1 : i + j)]) *
filter[j];
filter[j];
}
sum >>= FILTER_BITS;
*optr++ = clip_pixel_highbd(sum, bd);
@@ -726,7 +642,7 @@ static void highbd_down2_symodd(const uint16_t *const input, int length,
int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
for (j = 1; j < filter_len_half; ++j) {
sum += (input[i - j] + input[(i + j >= length ? length - 1 : i + j)]) *
filter[j];
filter[j];
}
sum >>= FILTER_BITS;
*optr++ = clip_pixel_highbd(sum, bd);
@@ -734,12 +650,9 @@ static void highbd_down2_symodd(const uint16_t *const input, int length,
}
}
static void highbd_resize_multistep(const uint16_t *const input,
int length,
uint16_t *output,
int olength,
uint16_t *otmp,
int bd) {
static void highbd_resize_multistep(const uint16_t *const input, int length,
uint16_t *output, int olength,
uint16_t *otmp, int bd) {
int steps;
if (length == olength) {
memcpy(output, input, sizeof(output[0]) * length);
@@ -796,23 +709,16 @@ static void highbd_fill_arr_to_col(uint16_t *img, int stride, int len,
}
}
void vp9_highbd_resize_plane(const uint8_t *const input,
int height,
int width,
int in_stride,
uint8_t *output,
int height2,
int width2,
int out_stride,
int bd) {
void vp9_highbd_resize_plane(const uint8_t *const input, int height, int width,
int in_stride, uint8_t *output, int height2,
int width2, int out_stride, int bd) {
int i;
uint16_t *intbuf = (uint16_t *)malloc(sizeof(uint16_t) * width2 * height);
uint16_t *tmpbuf = (uint16_t *)malloc(sizeof(uint16_t) *
(width < height ? height : width));
uint16_t *tmpbuf =
(uint16_t *)malloc(sizeof(uint16_t) * (width < height ? height : width));
uint16_t *arrbuf = (uint16_t *)malloc(sizeof(uint16_t) * height);
uint16_t *arrbuf2 = (uint16_t *)malloc(sizeof(uint16_t) * height2);
if (intbuf == NULL || tmpbuf == NULL ||
arrbuf == NULL || arrbuf2 == NULL)
if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL || arrbuf2 == NULL)
goto Error;
for (i = 0; i < height; ++i) {
highbd_resize_multistep(CONVERT_TO_SHORTPTR(input + in_stride * i), width,
@@ -820,13 +726,12 @@ void vp9_highbd_resize_plane(const uint8_t *const input,
}
for (i = 0; i < width2; ++i) {
highbd_fill_col_to_arr(intbuf + i, width2, height, arrbuf);
highbd_resize_multistep(arrbuf, height, arrbuf2, height2, tmpbuf,
bd);
highbd_resize_multistep(arrbuf, height, arrbuf2, height2, tmpbuf, bd);
highbd_fill_arr_to_col(CONVERT_TO_SHORTPTR(output + i), out_stride, height2,
arrbuf2);
}
Error:
Error:
free(intbuf);
free(tmpbuf);
free(arrbuf);
@@ -834,96 +739,82 @@ void vp9_highbd_resize_plane(const uint8_t *const input,
}
#endif // CONFIG_VP9_HIGHBITDEPTH
void vp9_resize_frame420(const uint8_t *const y,
int y_stride,
void vp9_resize_frame420(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride,
int height, int width,
uint8_t *oy, int oy_stride,
uint8_t *ou, uint8_t *ov, int ouv_stride,
int oheight, int owidth) {
vp9_resize_plane(y, height, width, y_stride,
oy, oheight, owidth, oy_stride);
vp9_resize_plane(u, height / 2, width / 2, uv_stride,
ou, oheight / 2, owidth / 2, ouv_stride);
vp9_resize_plane(v, height / 2, width / 2, uv_stride,
ov, oheight / 2, owidth / 2, ouv_stride);
int uv_stride, int height, int width, uint8_t *oy,
int oy_stride, uint8_t *ou, uint8_t *ov,
int ouv_stride, int oheight, int owidth) {
vp9_resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride);
vp9_resize_plane(u, height / 2, width / 2, uv_stride, ou, oheight / 2,
owidth / 2, ouv_stride);
vp9_resize_plane(v, height / 2, width / 2, uv_stride, ov, oheight / 2,
owidth / 2, ouv_stride);
}
void vp9_resize_frame422(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride,
int height, int width,
uint8_t *oy, int oy_stride,
uint8_t *ou, uint8_t *ov, int ouv_stride,
int oheight, int owidth) {
vp9_resize_plane(y, height, width, y_stride,
oy, oheight, owidth, oy_stride);
vp9_resize_plane(u, height, width / 2, uv_stride,
ou, oheight, owidth / 2, ouv_stride);
vp9_resize_plane(v, height, width / 2, uv_stride,
ov, oheight, owidth / 2, ouv_stride);
int uv_stride, int height, int width, uint8_t *oy,
int oy_stride, uint8_t *ou, uint8_t *ov,
int ouv_stride, int oheight, int owidth) {
vp9_resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride);
vp9_resize_plane(u, height, width / 2, uv_stride, ou, oheight, owidth / 2,
ouv_stride);
vp9_resize_plane(v, height, width / 2, uv_stride, ov, oheight, owidth / 2,
ouv_stride);
}
void vp9_resize_frame444(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride,
int height, int width,
uint8_t *oy, int oy_stride,
uint8_t *ou, uint8_t *ov, int ouv_stride,
int oheight, int owidth) {
vp9_resize_plane(y, height, width, y_stride,
oy, oheight, owidth, oy_stride);
vp9_resize_plane(u, height, width, uv_stride,
ou, oheight, owidth, ouv_stride);
vp9_resize_plane(v, height, width, uv_stride,
ov, oheight, owidth, ouv_stride);
int uv_stride, int height, int width, uint8_t *oy,
int oy_stride, uint8_t *ou, uint8_t *ov,
int ouv_stride, int oheight, int owidth) {
vp9_resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride);
vp9_resize_plane(u, height, width, uv_stride, ou, oheight, owidth,
ouv_stride);
vp9_resize_plane(v, height, width, uv_stride, ov, oheight, owidth,
ouv_stride);
}
#if CONFIG_VP9_HIGHBITDEPTH
void vp9_highbd_resize_frame420(const uint8_t *const y,
int y_stride,
void vp9_highbd_resize_frame420(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride,
int height, int width,
uint8_t *oy, int oy_stride,
uint8_t *ou, uint8_t *ov, int ouv_stride,
int oheight, int owidth, int bd) {
vp9_highbd_resize_plane(y, height, width, y_stride,
oy, oheight, owidth, oy_stride, bd);
vp9_highbd_resize_plane(u, height / 2, width / 2, uv_stride,
ou, oheight / 2, owidth / 2, ouv_stride, bd);
vp9_highbd_resize_plane(v, height / 2, width / 2, uv_stride,
ov, oheight / 2, owidth / 2, ouv_stride, bd);
int uv_stride, int height, int width,
uint8_t *oy, int oy_stride, uint8_t *ou,
uint8_t *ov, int ouv_stride, int oheight,
int owidth, int bd) {
vp9_highbd_resize_plane(y, height, width, y_stride, oy, oheight, owidth,
oy_stride, bd);
vp9_highbd_resize_plane(u, height / 2, width / 2, uv_stride, ou, oheight / 2,
owidth / 2, ouv_stride, bd);
vp9_highbd_resize_plane(v, height / 2, width / 2, uv_stride, ov, oheight / 2,
owidth / 2, ouv_stride, bd);
}
void vp9_highbd_resize_frame422(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride,
int height, int width,
uint8_t *oy, int oy_stride,
uint8_t *ou, uint8_t *ov, int ouv_stride,
int oheight, int owidth, int bd) {
vp9_highbd_resize_plane(y, height, width, y_stride,
oy, oheight, owidth, oy_stride, bd);
vp9_highbd_resize_plane(u, height, width / 2, uv_stride,
ou, oheight, owidth / 2, ouv_stride, bd);
vp9_highbd_resize_plane(v, height, width / 2, uv_stride,
ov, oheight, owidth / 2, ouv_stride, bd);
int uv_stride, int height, int width,
uint8_t *oy, int oy_stride, uint8_t *ou,
uint8_t *ov, int ouv_stride, int oheight,
int owidth, int bd) {
vp9_highbd_resize_plane(y, height, width, y_stride, oy, oheight, owidth,
oy_stride, bd);
vp9_highbd_resize_plane(u, height, width / 2, uv_stride, ou, oheight,
owidth / 2, ouv_stride, bd);
vp9_highbd_resize_plane(v, height, width / 2, uv_stride, ov, oheight,
owidth / 2, ouv_stride, bd);
}
void vp9_highbd_resize_frame444(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride,
int height, int width,
uint8_t *oy, int oy_stride,
uint8_t *ou, uint8_t *ov, int ouv_stride,
int oheight, int owidth, int bd) {
vp9_highbd_resize_plane(y, height, width, y_stride,
oy, oheight, owidth, oy_stride, bd);
vp9_highbd_resize_plane(u, height, width, uv_stride,
ou, oheight, owidth, ouv_stride, bd);
vp9_highbd_resize_plane(v, height, width, uv_stride,
ov, oheight, owidth, ouv_stride, bd);
int uv_stride, int height, int width,
uint8_t *oy, int oy_stride, uint8_t *ou,
uint8_t *ov, int ouv_stride, int oheight,
int owidth, int bd) {
vp9_highbd_resize_plane(y, height, width, y_stride, oy, oheight, owidth,
oy_stride, bd);
vp9_highbd_resize_plane(u, height, width, uv_stride, ou, oheight, owidth,
ouv_stride, bd);
vp9_highbd_resize_plane(v, height, width, uv_stride, ov, oheight, owidth,
ouv_stride, bd);
}
#endif // CONFIG_VP9_HIGHBITDEPTH

View File

@@ -18,116 +18,51 @@
extern "C" {
#endif
void vp9_resize_plane(const uint8_t *const input,
int height,
int width,
int in_stride,
uint8_t *output,
int height2,
int width2,
void vp9_resize_plane(const uint8_t *const input, int height, int width,
int in_stride, uint8_t *output, int height2, int width2,
int out_stride);
void vp9_resize_frame420(const uint8_t *const y,
int y_stride,
const uint8_t *const u,
const uint8_t *const v,
int uv_stride,
int height,
int width,
uint8_t *oy,
int oy_stride,
uint8_t *ou,
uint8_t *ov,
int ouv_stride,
int oheight,
int owidth);
void vp9_resize_frame422(const uint8_t *const y,
int y_stride,
const uint8_t *const u,
const uint8_t *const v,
int uv_stride,
int height,
int width,
uint8_t *oy,
int oy_stride,
uint8_t *ou,
uint8_t *ov,
int ouv_stride,
int oheight,
int owidth);
void vp9_resize_frame444(const uint8_t *const y,
int y_stride,
const uint8_t *const u,
const uint8_t *const v,
int uv_stride,
int height,
int width,
uint8_t *oy,
int oy_stride,
uint8_t *ou,
uint8_t *ov,
int ouv_stride,
int oheight,
int owidth);
void vp9_resize_frame420(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride, int height, int width, uint8_t *oy,
int oy_stride, uint8_t *ou, uint8_t *ov,
int ouv_stride, int oheight, int owidth);
void vp9_resize_frame422(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride, int height, int width, uint8_t *oy,
int oy_stride, uint8_t *ou, uint8_t *ov,
int ouv_stride, int oheight, int owidth);
void vp9_resize_frame444(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride, int height, int width, uint8_t *oy,
int oy_stride, uint8_t *ou, uint8_t *ov,
int ouv_stride, int oheight, int owidth);
#if CONFIG_VP9_HIGHBITDEPTH
void vp9_highbd_resize_plane(const uint8_t *const input,
int height,
int width,
int in_stride,
uint8_t *output,
int height2,
int width2,
int out_stride,
int bd);
void vp9_highbd_resize_frame420(const uint8_t *const y,
int y_stride,
const uint8_t *const u,
const uint8_t *const v,
int uv_stride,
int height,
int width,
uint8_t *oy,
int oy_stride,
uint8_t *ou,
uint8_t *ov,
int ouv_stride,
int oheight,
int owidth,
int bd);
void vp9_highbd_resize_frame422(const uint8_t *const y,
int y_stride,
const uint8_t *const u,
const uint8_t *const v,
int uv_stride,
int height,
int width,
uint8_t *oy,
int oy_stride,
uint8_t *ou,
uint8_t *ov,
int ouv_stride,
int oheight,
int owidth,
int bd);
void vp9_highbd_resize_frame444(const uint8_t *const y,
int y_stride,
const uint8_t *const u,
const uint8_t *const v,
int uv_stride,
int height,
int width,
uint8_t *oy,
int oy_stride,
uint8_t *ou,
uint8_t *ov,
int ouv_stride,
int oheight,
int owidth,
int bd);
#endif // CONFIG_VP9_HIGHBITDEPTH
void vp9_highbd_resize_plane(const uint8_t *const input, int height, int width,
int in_stride, uint8_t *output, int height2,
int width2, int out_stride, int bd);
void vp9_highbd_resize_frame420(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride, int height, int width,
uint8_t *oy, int oy_stride, uint8_t *ou,
uint8_t *ov, int ouv_stride, int oheight,
int owidth, int bd);
void vp9_highbd_resize_frame422(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride, int height, int width,
uint8_t *oy, int oy_stride, uint8_t *ou,
uint8_t *ov, int ouv_stride, int oheight,
int owidth, int bd);
void vp9_highbd_resize_frame444(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride, int height, int width,
uint8_t *oy, int oy_stride, uint8_t *ou,
uint8_t *ov, int ouv_stride, int oheight,
int owidth, int bd);
#endif // CONFIG_VP9_HIGHBITDEPTH
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP9_ENCODER_VP9_RESIZE_H_
#endif // VP9_ENCODER_VP9_RESIZE_H_

View File

@@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include <limits.h>
#include "vpx_mem/vpx_mem.h"
@@ -31,8 +30,7 @@ void vp9_disable_segmentation(struct segmentation *seg) {
seg->update_data = 0;
}
void vp9_set_segment_data(struct segmentation *seg,
signed char *feature_data,
void vp9_set_segment_data(struct segmentation *seg, signed char *feature_data,
unsigned char abs_delta) {
seg->abs_delta = abs_delta;
@@ -75,13 +73,11 @@ static int cost_segmap(int *segcounts, vpx_prob *probs) {
const int c4567 = c45 + c67;
// Cost the top node of the tree
int cost = c0123 * vp9_cost_zero(probs[0]) +
c4567 * vp9_cost_one(probs[0]);
int cost = c0123 * vp9_cost_zero(probs[0]) + c4567 * vp9_cost_one(probs[0]);
// Cost subsequent levels
if (c0123 > 0) {
cost += c01 * vp9_cost_zero(probs[1]) +
c23 * vp9_cost_one(probs[1]);
cost += c01 * vp9_cost_zero(probs[1]) + c23 * vp9_cost_one(probs[1]);
if (c01 > 0)
cost += segcounts[0] * vp9_cost_zero(probs[3]) +
@@ -92,8 +88,7 @@ static int cost_segmap(int *segcounts, vpx_prob *probs) {
}
if (c4567 > 0) {
cost += c45 * vp9_cost_zero(probs[2]) +
c67 * vp9_cost_one(probs[2]);
cost += c45 * vp9_cost_zero(probs[2]) + c67 * vp9_cost_one(probs[2]);
if (c45 > 0)
cost += segcounts[4] * vp9_cost_zero(probs[5]) +
@@ -110,12 +105,11 @@ static void count_segs(const VP9_COMMON *cm, MACROBLOCKD *xd,
const TileInfo *tile, MODE_INFO **mi,
int *no_pred_segcounts,
int (*temporal_predictor_count)[2],
int *t_unpred_seg_counts,
int bw, int bh, int mi_row, int mi_col) {
int *t_unpred_seg_counts, int bw, int bh, int mi_row,
int mi_col) {
int segment_id;
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
xd->mi = mi;
segment_id = xd->mi[0]->segment_id;
@@ -129,8 +123,8 @@ static void count_segs(const VP9_COMMON *cm, MACROBLOCKD *xd,
if (cm->frame_type != KEY_FRAME) {
const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
// Test to see if the segment id matches the predicted value.
const int pred_segment_id = get_segment_id(cm, cm->last_frame_seg_map,
bsize, mi_row, mi_col);
const int pred_segment_id =
get_segment_id(cm, cm->last_frame_seg_map, bsize, mi_row, mi_col);
const int pred_flag = pred_segment_id == segment_id;
const int pred_context = vp9_get_pred_context_seg_id(xd);
@@ -140,8 +134,7 @@ static void count_segs(const VP9_COMMON *cm, MACROBLOCKD *xd,
temporal_predictor_count[pred_context][pred_flag]++;
// Update the "unpredicted" segment count
if (!pred_flag)
t_unpred_seg_counts[segment_id]++;
if (!pred_flag) t_unpred_seg_counts[segment_id]++;
}
}
@@ -149,15 +142,13 @@ static void count_segs_sb(const VP9_COMMON *cm, MACROBLOCKD *xd,
const TileInfo *tile, MODE_INFO **mi,
int *no_pred_segcounts,
int (*temporal_predictor_count)[2],
int *t_unpred_seg_counts,
int mi_row, int mi_col,
int *t_unpred_seg_counts, int mi_row, int mi_col,
BLOCK_SIZE bsize) {
const int mis = cm->mi_stride;
int bw, bh;
const int bs = num_8x8_blocks_wide_lookup[bsize], hbs = bs / 2;
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
bw = num_8x8_blocks_wide_lookup[mi[0]->sb_type];
bh = num_8x8_blocks_high_lookup[mi[0]->sb_type];
@@ -174,9 +165,9 @@ static void count_segs_sb(const VP9_COMMON *cm, MACROBLOCKD *xd,
} else if (bw < bs && bh == bs) {
count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
t_unpred_seg_counts, hbs, bs, mi_row, mi_col);
count_segs(cm, xd, tile, mi + hbs,
no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts,
hbs, bs, mi_row, mi_col + hbs);
count_segs(cm, xd, tile, mi + hbs, no_pred_segcounts,
temporal_predictor_count, t_unpred_seg_counts, hbs, bs, mi_row,
mi_col + hbs);
} else {
const BLOCK_SIZE subsize = subsize_lookup[PARTITION_SPLIT][bsize];
int n;
@@ -187,9 +178,8 @@ static void count_segs_sb(const VP9_COMMON *cm, MACROBLOCKD *xd,
const int mi_dc = hbs * (n & 1);
const int mi_dr = hbs * (n >> 1);
count_segs_sb(cm, xd, tile, &mi[mi_dr * mis + mi_dc],
no_pred_segcounts, temporal_predictor_count,
t_unpred_seg_counts,
count_segs_sb(cm, xd, tile, &mi[mi_dr * mis + mi_dc], no_pred_segcounts,
temporal_predictor_count, t_unpred_seg_counts,
mi_row + mi_dr, mi_col + mi_dc, subsize);
}
}
@@ -230,8 +220,8 @@ void vp9_choose_segmap_coding_method(VP9_COMMON *cm, MACROBLOCKD *xd) {
for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end;
mi_col += 8, mi += 8)
count_segs_sb(cm, xd, &tile, mi, no_pred_segcounts,
temporal_predictor_count, t_unpred_seg_counts,
mi_row, mi_col, BLOCK_64X64);
temporal_predictor_count, t_unpred_seg_counts, mi_row,
mi_col, BLOCK_64X64);
}
}

View File

@@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_ENCODER_VP9_SEGMENTATION_H_
#define VP9_ENCODER_VP9_SEGMENTATION_H_
@@ -22,11 +21,9 @@ extern "C" {
void vp9_enable_segmentation(struct segmentation *seg);
void vp9_disable_segmentation(struct segmentation *seg);
void vp9_disable_segfeature(struct segmentation *seg,
int segment_id,
void vp9_disable_segfeature(struct segmentation *seg, int segment_id,
SEG_LVL_FEATURES feature_id);
void vp9_clear_segdata(struct segmentation *seg,
int segment_id,
void vp9_clear_segdata(struct segmentation *seg, int segment_id,
SEG_LVL_FEATURES feature_id);
// The values given for each segment can be either deltas (from the default

View File

@@ -18,11 +18,14 @@
#define MODEL_MODE 1
// Fixed-point skin color model parameters.
static const int skin_mean[5][2] = {
{7463, 9614}, {6400, 10240}, {7040, 10240}, {8320, 9280}, {6800, 9614}};
static const int skin_inv_cov[4] = {4107, 1663, 1663, 2157}; // q16
static const int skin_threshold[6] = {1570636, 1400000, 800000, 800000, 800000,
800000}; // q18
static const int skin_mean[5][2] = { { 7463, 9614 },
{ 6400, 10240 },
{ 7040, 10240 },
{ 8320, 9280 },
{ 6800, 9614 } };
static const int skin_inv_cov[4] = { 4107, 1663, 1663, 2157 }; // q16
static const int skin_threshold[6] = { 1570636, 1400000, 800000,
800000, 800000, 800000 }; // q18
// Thresholds on luminance.
static const int y_low = 40;
@@ -41,10 +44,9 @@ static int evaluate_skin_color_difference(int cb, int cr, int idx) {
const int cb_diff_q2 = (cb_diff_q12 + (1 << 9)) >> 10;
const int cbcr_diff_q2 = (cbcr_diff_q12 + (1 << 9)) >> 10;
const int cr_diff_q2 = (cr_diff_q12 + (1 << 9)) >> 10;
const int skin_diff = skin_inv_cov[0] * cb_diff_q2 +
skin_inv_cov[1] * cbcr_diff_q2 +
skin_inv_cov[2] * cbcr_diff_q2 +
skin_inv_cov[3] * cr_diff_q2;
const int skin_diff =
skin_inv_cov[0] * cb_diff_q2 + skin_inv_cov[1] * cbcr_diff_q2 +
skin_inv_cov[2] * cbcr_diff_q2 + skin_inv_cov[3] * cr_diff_q2;
return skin_diff;
}
@@ -58,20 +60,18 @@ int vp9_skin_pixel(const uint8_t y, const uint8_t cb, const uint8_t cr,
} else {
int i = 0;
// Exit on grey.
if (cb == 128 && cr == 128)
return 0;
if (cb == 128 && cr == 128) return 0;
// Exit on very strong cb.
if (cb > 150 && cr < 110)
return 0;
if (cb > 150 && cr < 110) return 0;
for (; i < 5; i++) {
int skin_color_diff = evaluate_skin_color_difference(cb, cr, i);
if (skin_color_diff < skin_threshold[i + 1]) {
if (y < 60 && skin_color_diff > 3 * (skin_threshold[i + 1] >> 2))
return 0;
else if (motion == 0 &&
skin_color_diff > (skin_threshold[i + 1] >> 1))
return 0;
else
if (y < 60 && skin_color_diff > 3 * (skin_threshold[i + 1] >> 2))
return 0;
else if (motion == 0 &&
skin_color_diff > (skin_threshold[i + 1] >> 1))
return 0;
else
return 1;
}
// Exit if difference is much large than the threshold.
@@ -100,13 +100,11 @@ int vp9_compute_skin_block(const uint8_t *y, const uint8_t *u, const uint8_t *v,
const uint8_t ysource = y[y_height_shift * stride + y_width_shift];
const uint8_t usource = u[uv_height_shift * strideuv + uv_width_shift];
const uint8_t vsource = v[uv_height_shift * strideuv + uv_width_shift];
if (consec_zeromv > 25 && curr_motion_magn == 0)
motion = 0;
if (consec_zeromv > 25 && curr_motion_magn == 0) motion = 0;
return vp9_skin_pixel(ysource, usource, vsource, motion);
}
}
#ifdef OUTPUT_YUV_SKINMAP
// For viewing skin map on input source.
void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) {
@@ -129,11 +127,11 @@ void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) {
int mode_filter = 0;
YV12_BUFFER_CONFIG skinmap;
memset(&skinmap, 0, sizeof(YV12_BUFFER_CONFIG));
if (vpx_alloc_frame_buffer(&skinmap, cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment)) {
vpx_free_frame_buffer(&skinmap);
return;
if (vpx_alloc_frame_buffer(&skinmap, cm->width, cm->height, cm->subsampling_x,
cm->subsampling_y, VP9_ENC_BORDER_IN_PIXELS,
cm->byte_alignment)) {
vpx_free_frame_buffer(&skinmap);
return;
}
memset(skinmap.buffer_alloc, 128, skinmap.frame_size);
y = skinmap.y_buffer;
@@ -153,11 +151,11 @@ void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) {
uint8_t usource2 = src_u[(uvpos + 1) * src_uvstride + uvpos];
uint8_t vsource2 = src_v[(uvpos + 1) * src_uvstride + uvpos];
uint8_t ysource3 = src_y[ypos * src_ystride + (ypos + 1)];
uint8_t usource3 = src_u[uvpos * src_uvstride + (uvpos + 1)];
uint8_t vsource3 = src_v[uvpos * src_uvstride + (uvpos + 1)];
uint8_t usource3 = src_u[uvpos * src_uvstride + (uvpos + 1)];
uint8_t vsource3 = src_v[uvpos * src_uvstride + (uvpos + 1)];
uint8_t ysource4 = src_y[(ypos + 1) * src_ystride + (ypos + 1)];
uint8_t usource4 = src_u[(uvpos + 1) * src_uvstride + (uvpos + 1)];
uint8_t vsource4 = src_v[(uvpos + 1) * src_uvstride + (uvpos + 1)];
uint8_t usource4 = src_u[(uvpos + 1) * src_uvstride + (uvpos + 1)];
uint8_t vsource4 = src_v[(uvpos + 1) * src_uvstride + (uvpos + 1)];
ysource = (ysource + ysource2 + ysource3 + ysource4) >> 2;
usource = (usource + usource2 + usource3 + usource4) >> 2;
vsource = (vsource + vsource2 + vsource3 + vsource4) >> 2;
@@ -172,16 +170,15 @@ void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) {
if (y_bsize == 8)
consec_zeromv = cpi->consec_zero_mv[bl_index];
else
consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index],
VPXMIN(cpi->consec_zero_mv[bl_index1],
VPXMIN(cpi->consec_zero_mv[bl_index2],
cpi->consec_zero_mv[bl_index3])));
if (y_bsize == 16)
block_size = BLOCK_16X16;
is_skin = vp9_compute_skin_block(src_y, src_u, src_v, src_ystride,
src_uvstride, block_size,
consec_zeromv,
0);
consec_zeromv =
VPXMIN(cpi->consec_zero_mv[bl_index],
VPXMIN(cpi->consec_zero_mv[bl_index1],
VPXMIN(cpi->consec_zero_mv[bl_index2],
cpi->consec_zero_mv[bl_index3])));
if (y_bsize == 16) block_size = BLOCK_16X16;
is_skin =
vp9_compute_skin_block(src_y, src_u, src_v, src_ystride,
src_uvstride, block_size, consec_zeromv, 0);
}
for (i = 0; i < y_bsize; i++) {
for (j = 0; j < y_bsize; j++) {

View File

@@ -16,21 +16,23 @@
#include "vpx_dsp/vpx_dsp_common.h"
// Mesh search patters for various speed settings
static MESH_PATTERN best_quality_mesh_pattern[MAX_MESH_STEP] =
{{64, 4}, {28, 2}, {15, 1}, {7, 1}};
static MESH_PATTERN best_quality_mesh_pattern[MAX_MESH_STEP] = {
{ 64, 4 }, { 28, 2 }, { 15, 1 }, { 7, 1 }
};
#define MAX_MESH_SPEED 5 // Max speed setting for mesh motion method
static MESH_PATTERN good_quality_mesh_patterns[MAX_MESH_SPEED + 1]
[MAX_MESH_STEP] =
{{{64, 8}, {28, 4}, {15, 1}, {7, 1}},
{{64, 8}, {28, 4}, {15, 1}, {7, 1}},
{{64, 8}, {14, 2}, {7, 1}, {7, 1}},
{{64, 16}, {24, 8}, {12, 4}, {7, 1}},
{{64, 16}, {24, 8}, {12, 4}, {7, 1}},
{{64, 16}, {24, 8}, {12, 4}, {7, 1}},
static MESH_PATTERN
good_quality_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
{ { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
{ { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
{ { 64, 8 }, { 14, 2 }, { 7, 1 }, { 7, 1 } },
{ { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
{ { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
{ { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
};
static unsigned char good_quality_max_mesh_pct[MAX_MESH_SPEED + 1] =
{50, 25, 15, 5, 1, 1};
static unsigned char good_quality_max_mesh_pct[MAX_MESH_SPEED + 1] = {
50, 25, 15, 5, 1, 1
};
// Intra only frames, golden frames (except alt ref overlays) and
// alt ref frames tend to be coded at a higher than ambient quality
@@ -67,8 +69,8 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi,
if (speed >= 1) {
if (VPXMIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
: DISABLE_ALL_INTER_SPLIT;
sf->disable_split_mask =
cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
sf->partition_search_breakout_dist_thr = (1 << 23);
} else {
sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
@@ -78,8 +80,8 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi,
if (speed >= 2) {
if (VPXMIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
: DISABLE_ALL_INTER_SPLIT;
sf->disable_split_mask =
cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
sf->adaptive_pred_interp_filter = 0;
sf->partition_search_breakout_dist_thr = (1 << 24);
sf->partition_search_breakout_rate_thr = 120;
@@ -147,7 +149,7 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
}
sf->use_square_only_threshold = BLOCK_4X4;
sf->less_rectangular_check = 1;
sf->less_rectangular_check = 1;
sf->use_rd_breakout = 1;
sf->adaptive_motion_search = 1;
@@ -167,17 +169,17 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
}
if (speed >= 2) {
sf->tx_size_search_method = frame_is_boosted(cpi) ? USE_FULL_RD
: USE_LARGESTALL;
sf->tx_size_search_method =
frame_is_boosted(cpi) ? USE_FULL_RD : USE_LARGESTALL;
// Reference masking is not supported in dynamic scaling mode.
sf->reference_masking = cpi->oxcf.resize_mode != RESIZE_DYNAMIC ? 1 : 0;
sf->mode_search_skip_flags = (cm->frame_type == KEY_FRAME) ? 0 :
FLAG_SKIP_INTRA_DIRMISMATCH |
FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA |
FLAG_SKIP_INTRA_LOWVAR;
sf->mode_search_skip_flags =
(cm->frame_type == KEY_FRAME) ? 0 : FLAG_SKIP_INTRA_DIRMISMATCH |
FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA |
FLAG_SKIP_INTRA_LOWVAR;
sf->disable_filter_search_var_thresh = 100;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
@@ -186,8 +188,8 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
if (speed >= 3) {
sf->use_square_partition_only = !frame_is_intra_only(cm);
sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD
: USE_LARGESTALL;
sf->tx_size_search_method =
frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL;
sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED;
sf->adaptive_pred_interp_filter = 0;
sf->adaptive_mode_search = 1;
@@ -234,13 +236,14 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
}
static void set_rt_speed_feature_framesize_dependent(VP9_COMP *cpi,
SPEED_FEATURES *sf, int speed) {
SPEED_FEATURES *sf,
int speed) {
VP9_COMMON *const cm = &cpi->common;
if (speed >= 1) {
if (VPXMIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
: DISABLE_ALL_INTER_SPLIT;
sf->disable_split_mask =
cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
} else {
sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
}
@@ -248,8 +251,8 @@ static void set_rt_speed_feature_framesize_dependent(VP9_COMP *cpi,
if (speed >= 2) {
if (VPXMIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
: DISABLE_ALL_INTER_SPLIT;
sf->disable_split_mask =
cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
} else {
sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
}
@@ -264,13 +267,13 @@ static void set_rt_speed_feature_framesize_dependent(VP9_COMP *cpi,
}
if (speed >= 7) {
sf->encode_breakout_thresh = (VPXMIN(cm->width, cm->height) >= 720) ?
800 : 300;
sf->encode_breakout_thresh =
(VPXMIN(cm->width, cm->height) >= 720) ? 800 : 300;
}
}
static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
int speed, vp9e_tune_content content) {
static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, int speed,
vp9e_tune_content content) {
VP9_COMMON *const cm = &cpi->common;
const int is_keyframe = cm->frame_type == KEY_FRAME;
const int frames_since_key = is_keyframe ? 0 : cpi->rc.frames_since_key;
@@ -285,8 +288,8 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->quant_coeff_opt = 0;
sf->use_square_partition_only = !frame_is_intra_only(cm);
sf->less_rectangular_check = 1;
sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD
: USE_LARGESTALL;
sf->tx_size_search_method =
frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL;
sf->use_rd_breakout = 1;
@@ -300,11 +303,11 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
}
if (speed >= 2) {
sf->mode_search_skip_flags = (cm->frame_type == KEY_FRAME) ? 0 :
FLAG_SKIP_INTRA_DIRMISMATCH |
FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA |
FLAG_SKIP_INTRA_LOWVAR;
sf->mode_search_skip_flags =
(cm->frame_type == KEY_FRAME) ? 0 : FLAG_SKIP_INTRA_DIRMISMATCH |
FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA |
FLAG_SKIP_INTRA_LOWVAR;
sf->adaptive_pred_interp_filter = 2;
// Reference masking only enabled for 1 spatial layer, and if none of the
@@ -315,15 +318,14 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
(cpi->external_resize == 1 ||
cpi->oxcf.resize_mode == RESIZE_DYNAMIC)) {
MV_REFERENCE_FRAME ref_frame;
static const int flag_list[4] =
{0, VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG};
static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
VP9_ALT_FLAG };
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
if (yv12 != NULL && (cpi->ref_frame_flags & flag_list[ref_frame])) {
const struct scale_factors *const scale_fac =
&cm->frame_refs[ref_frame - 1].sf;
if (vp9_is_scaled(scale_fac))
sf->reference_masking = 0;
if (vp9_is_scaled(scale_fac)) sf->reference_masking = 0;
}
}
}
@@ -360,8 +362,8 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->use_fast_coef_costing = 0;
sf->auto_min_max_partition_size = STRICT_NEIGHBORING_MIN_MAX;
sf->adjust_partitioning_from_last_frame =
cm->last_frame_type != cm->frame_type || (0 ==
(frames_since_key + 1) % sf->last_partitioning_redo_frequency);
cm->last_frame_type != cm->frame_type ||
(0 == (frames_since_key + 1) % sf->last_partitioning_redo_frequency);
sf->mv.subpel_force_stop = 1;
for (i = 0; i < TX_SIZES; i++) {
sf->intra_y_mode_mask[i] = INTRA_DC_H_V;
@@ -381,11 +383,12 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
if (speed >= 5) {
sf->use_quant_fp = !is_keyframe;
sf->auto_min_max_partition_size = is_keyframe ? RELAXED_NEIGHBORING_MIN_MAX
: STRICT_NEIGHBORING_MIN_MAX;
sf->auto_min_max_partition_size =
is_keyframe ? RELAXED_NEIGHBORING_MIN_MAX : STRICT_NEIGHBORING_MIN_MAX;
sf->default_max_partition_size = BLOCK_32X32;
sf->default_min_partition_size = BLOCK_8X8;
sf->force_frame_boost = is_keyframe ||
sf->force_frame_boost =
is_keyframe ||
(frames_since_key % (sf->last_partitioning_redo_frequency << 1) == 1);
sf->max_delta_qindex = is_keyframe ? 20 : 15;
sf->partition_search_type = REFERENCE_PARTITION;
@@ -404,8 +407,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH;
sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8;
sf->simple_model_rd_from_var = 1;
if (cpi->oxcf.rc_mode == VPX_VBR)
sf->mv.search_method = NSTEP;
if (cpi->oxcf.rc_mode == VPX_VBR) sf->mv.search_method = NSTEP;
if (!is_keyframe) {
int i;
@@ -562,8 +564,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set
sf->schedule_mode_search = 0;
sf->use_nonrd_pick_mode = 0;
for (i = 0; i < BLOCK_SIZES; ++i)
sf->inter_mode_mask[i] = INTER_ALL;
for (i = 0; i < BLOCK_SIZES; ++i) sf->inter_mode_mask[i] = INTER_ALL;
sf->max_intra_bsize = BLOCK_64X64;
sf->reuse_inter_pred_sby = 0;
// This setting only takes effect when partition_search_type is set
@@ -614,8 +615,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
sf->exhaustive_searches_thresh = sf->exhaustive_searches_thresh << 1;
for (i = 0; i < MAX_MESH_STEP; ++i) {
sf->mesh_patterns[i].range =
good_quality_mesh_patterns[speed][i].range;
sf->mesh_patterns[i].range = good_quality_mesh_patterns[speed][i].range;
sf->mesh_patterns[i].interval =
good_quality_mesh_patterns[speed][i].interval;
}
@@ -623,8 +623,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
// Slow quant, dct and trellis not worthwhile for first pass
// so make sure they are always turned off.
if (oxcf->pass == 1)
sf->optimize_coefficients = 0;
if (oxcf->pass == 1) sf->optimize_coefficients = 0;
// No recode for 1 pass.
if (oxcf->pass == 0) {

View File

@@ -18,17 +18,14 @@ extern "C" {
#endif
enum {
INTRA_ALL = (1 << DC_PRED) |
(1 << V_PRED) | (1 << H_PRED) |
(1 << D45_PRED) | (1 << D135_PRED) |
(1 << D117_PRED) | (1 << D153_PRED) |
(1 << D207_PRED) | (1 << D63_PRED) |
(1 << TM_PRED),
INTRA_DC = (1 << DC_PRED),
INTRA_DC_TM = (1 << DC_PRED) | (1 << TM_PRED),
INTRA_DC_H_V = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED),
INTRA_DC_TM_H_V = (1 << DC_PRED) | (1 << TM_PRED) | (1 << V_PRED) |
(1 << H_PRED)
INTRA_ALL = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED) | (1 << D45_PRED) |
(1 << D135_PRED) | (1 << D117_PRED) | (1 << D153_PRED) |
(1 << D207_PRED) | (1 << D63_PRED) | (1 << TM_PRED),
INTRA_DC = (1 << DC_PRED),
INTRA_DC_TM = (1 << DC_PRED) | (1 << TM_PRED),
INTRA_DC_H_V = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED),
INTRA_DC_TM_H_V =
(1 << DC_PRED) | (1 << TM_PRED) | (1 << V_PRED) | (1 << H_PRED)
};
enum {
@@ -42,20 +39,15 @@ enum {
};
enum {
DISABLE_ALL_INTER_SPLIT = (1 << THR_COMP_GA) |
(1 << THR_COMP_LA) |
(1 << THR_ALTR) |
(1 << THR_GOLD) |
(1 << THR_LAST),
DISABLE_ALL_INTER_SPLIT = (1 << THR_COMP_GA) | (1 << THR_COMP_LA) |
(1 << THR_ALTR) | (1 << THR_GOLD) | (1 << THR_LAST),
DISABLE_ALL_SPLIT = (1 << THR_INTRA) | DISABLE_ALL_INTER_SPLIT,
DISABLE_ALL_SPLIT = (1 << THR_INTRA) | DISABLE_ALL_INTER_SPLIT,
DISABLE_COMPOUND_SPLIT = (1 << THR_COMP_GA) | (1 << THR_COMP_LA),
DISABLE_COMPOUND_SPLIT = (1 << THR_COMP_GA) | (1 << THR_COMP_LA),
LAST_AND_INTRA_SPLIT_ONLY = (1 << THR_COMP_GA) |
(1 << THR_COMP_LA) |
(1 << THR_ALTR) |
(1 << THR_GOLD)
LAST_AND_INTRA_SPLIT_ONLY = (1 << THR_COMP_GA) | (1 << THR_COMP_LA) |
(1 << THR_ALTR) | (1 << THR_GOLD)
};
typedef enum {

View File

@@ -15,22 +15,20 @@
#include "vp9/encoder/vp9_subexp.h"
static const uint8_t update_bits[255] = {
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 0,
};
#define MIN_DELP_BITS 5
@@ -48,23 +46,23 @@ static int remap_prob(int v, int m) {
static const uint8_t map_table[MAX_PROB - 1] = {
// generated by:
// map_table[j] = split_index(j, MAX_PROB - 1, MODULUS_PARAM);
20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 2, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
3, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 4, 74,
75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 5, 86, 87, 88,
89, 90, 91, 92, 93, 94, 95, 96, 97, 6, 98, 99, 100, 101, 102,
103, 104, 105, 106, 107, 108, 109, 7, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 8, 122, 123, 124, 125, 126, 127, 128, 129, 130,
131, 132, 133, 9, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144,
145, 10, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11,
158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 12, 170, 171,
172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 13, 182, 183, 184, 185,
186, 187, 188, 189, 190, 191, 192, 193, 14, 194, 195, 196, 197, 198, 199,
200, 201, 202, 203, 204, 205, 15, 206, 207, 208, 209, 210, 211, 212, 213,
214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227,
228, 229, 17, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241,
18, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 19,
20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 2, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
3, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 4, 74,
75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 5, 86, 87, 88,
89, 90, 91, 92, 93, 94, 95, 96, 97, 6, 98, 99, 100, 101, 102,
103, 104, 105, 106, 107, 108, 109, 7, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 8, 122, 123, 124, 125, 126, 127, 128, 129, 130,
131, 132, 133, 9, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144,
145, 10, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11,
158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 12, 170, 171,
172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 13, 182, 183, 184, 185,
186, 187, 188, 189, 190, 191, 192, 193, 14, 194, 195, 196, 197, 198, 199,
200, 201, 202, 203, 204, 205, 15, 206, 207, 208, 209, 210, 211, 212, 213,
214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227,
228, 229, 17, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241,
18, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 19,
};
v--;
m--;
@@ -115,9 +113,8 @@ void vp9_write_prob_diff_update(vpx_writer *w, vpx_prob newp, vpx_prob oldp) {
encode_term_subexp(w, delp);
}
int vp9_prob_diff_update_savings_search(const unsigned int *ct,
vpx_prob oldp, vpx_prob *bestp,
vpx_prob upd) {
int vp9_prob_diff_update_savings_search(const unsigned int *ct, vpx_prob oldp,
vpx_prob *bestp, vpx_prob upd) {
const int old_b = cost_branch256(ct, oldp);
int bestsavings = 0;
vpx_prob newp, bestnewp = oldp;
@@ -141,8 +138,7 @@ int vp9_prob_diff_update_savings_search(const unsigned int *ct,
int vp9_prob_diff_update_savings_search_model(const unsigned int *ct,
const vpx_prob oldp,
vpx_prob *bestp,
vpx_prob upd,
vpx_prob *bestp, vpx_prob upd,
int stepsize) {
int i, old_b, new_b, update_b, savings, bestsavings;
int newp;
@@ -185,8 +181,8 @@ void vp9_cond_prob_diff_update(vpx_writer *w, vpx_prob *oldp,
const unsigned int ct[2]) {
const vpx_prob upd = DIFF_UPDATE_PROB;
vpx_prob newp = get_binary_prob(ct[0], ct[1]);
const int savings = vp9_prob_diff_update_savings_search(ct, *oldp, &newp,
upd);
const int savings =
vp9_prob_diff_update_savings_search(ct, *oldp, &newp, upd);
assert(newp >= 1);
if (savings > 0) {
vpx_write(w, 1, upd);

View File

@@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_ENCODER_VP9_SUBEXP_H_
#define VP9_ENCODER_VP9_SUBEXP_H_
@@ -20,21 +19,18 @@ extern "C" {
struct vpx_writer;
void vp9_write_prob_diff_update(struct vpx_writer *w,
vpx_prob newp, vpx_prob oldp);
void vp9_write_prob_diff_update(struct vpx_writer *w, vpx_prob newp,
vpx_prob oldp);
void vp9_cond_prob_diff_update(struct vpx_writer *w, vpx_prob *oldp,
const unsigned int ct[2]);
int vp9_prob_diff_update_savings_search(const unsigned int *ct,
vpx_prob oldp, vpx_prob *bestp,
vpx_prob upd);
int vp9_prob_diff_update_savings_search(const unsigned int *ct, vpx_prob oldp,
vpx_prob *bestp, vpx_prob upd);
int vp9_prob_diff_update_savings_search_model(const unsigned int *ct,
const vpx_prob oldp,
vpx_prob *bestp,
vpx_prob upd,
vpx_prob *bestp, vpx_prob upd,
int stepsize);
#ifdef __cplusplus

View File

@@ -16,7 +16,7 @@
#include "vp9/encoder/vp9_extend.h"
#include "vpx_dsp/vpx_dsp_common.h"
#define SMALL_FRAME_WIDTH 32
#define SMALL_FRAME_WIDTH 32
#define SMALL_FRAME_HEIGHT 16
void vp9_init_layer_context(VP9_COMP *const cpi) {
@@ -36,8 +36,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
svc->scaled_temp_is_alloc = 0;
svc->scaled_one_half = 0;
svc->current_superframe = 0;
for (i = 0; i < REF_FRAMES; ++i)
svc->ref_frame_index[i] = -1;
for (i = 0; i < REF_FRAMES; ++i) svc->ref_frame_index[i] = -1;
for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
cpi->svc.ext_frame_flags[sl] = 0;
cpi->svc.ext_lst_fb_idx[sl] = 0;
@@ -46,16 +45,14 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
}
if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
if (vpx_realloc_frame_buffer(&cpi->svc.empty_frame.img,
SMALL_FRAME_WIDTH, SMALL_FRAME_HEIGHT,
cpi->common.subsampling_x,
if (vpx_realloc_frame_buffer(&cpi->svc.empty_frame.img, SMALL_FRAME_WIDTH,
SMALL_FRAME_HEIGHT, cpi->common.subsampling_x,
cpi->common.subsampling_y,
#if CONFIG_VP9_HIGHBITDEPTH
cpi->common.use_highbitdepth,
#endif
VP9_ENC_BORDER_IN_PIXELS,
cpi->common.byte_alignment,
NULL, NULL, NULL))
cpi->common.byte_alignment, NULL, NULL, NULL))
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to allocate empty frame for multiple frame "
"contexts");
@@ -97,10 +94,10 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
lc->target_bandwidth = oxcf->layer_target_bitrate[layer];
lrc->last_q[KEY_FRAME] = oxcf->best_allowed_q;
lrc->last_q[INTER_FRAME] = oxcf->best_allowed_q;
lrc->avg_frame_qindex[KEY_FRAME] = (oxcf->worst_allowed_q +
oxcf->best_allowed_q) / 2;
lrc->avg_frame_qindex[INTER_FRAME] = (oxcf->worst_allowed_q +
oxcf->best_allowed_q) / 2;
lrc->avg_frame_qindex[KEY_FRAME] =
(oxcf->worst_allowed_q + oxcf->best_allowed_q) / 2;
lrc->avg_frame_qindex[INTER_FRAME] =
(oxcf->worst_allowed_q + oxcf->best_allowed_q) / 2;
if (oxcf->ss_enable_auto_arf[sl])
lc->alt_ref_idx = alt_ref_idx++;
else
@@ -108,16 +105,15 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
lc->gold_ref_idx = INVALID_IDX;
}
lrc->buffer_level = oxcf->starting_buffer_level_ms *
lc->target_bandwidth / 1000;
lrc->buffer_level =
oxcf->starting_buffer_level_ms * lc->target_bandwidth / 1000;
lrc->bits_off_target = lrc->buffer_level;
// Initialize the cyclic refresh parameters. If spatial layers are used
// (i.e., ss_number_layers > 1), these need to be updated per spatial
// layer.
// Cyclic refresh is only applied on base temporal layer.
if (oxcf->ss_number_layers > 1 &&
tl == 0) {
if (oxcf->ss_number_layers > 1 && tl == 0) {
size_t last_coded_q_map_size;
size_t consec_zero_mv_size;
VP9_COMMON *const cm = &cpi->common;
@@ -125,8 +121,8 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
CHECK_MEM_ERROR(cm, lc->map,
vpx_malloc(mi_rows * mi_cols * sizeof(*lc->map)));
memset(lc->map, 0, mi_rows * mi_cols);
last_coded_q_map_size = mi_rows * mi_cols *
sizeof(*lc->last_coded_q_map);
last_coded_q_map_size =
mi_rows * mi_cols * sizeof(*lc->last_coded_q_map);
CHECK_MEM_ERROR(cm, lc->last_coded_q_map,
vpx_malloc(last_coded_q_map_size));
assert(MAXQ <= 255);
@@ -140,8 +136,8 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
}
// Still have extra buffer for base layer golden frame
if (!(svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR)
&& alt_ref_idx < REF_FRAMES)
if (!(svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) &&
alt_ref_idx < REF_FRAMES)
svc->layer_context[0].gold_ref_idx = alt_ref_idx;
}
@@ -162,11 +158,12 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi,
oxcf->layer_target_bitrate[layer];
}
layer = LAYER_IDS_TO_IDX(sl, ((oxcf->ts_number_layers - 1) < 0 ?
0 : (oxcf->ts_number_layers - 1)), oxcf->ts_number_layers);
spatial_layer_target =
svc->layer_context[layer].target_bandwidth =
oxcf->layer_target_bitrate[layer];
layer = LAYER_IDS_TO_IDX(
sl,
((oxcf->ts_number_layers - 1) < 0 ? 0 : (oxcf->ts_number_layers - 1)),
oxcf->ts_number_layers);
spatial_layer_target = svc->layer_context[layer].target_bandwidth =
oxcf->layer_target_bitrate[layer];
for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
LAYER_CONTEXT *const lc =
@@ -214,8 +211,8 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi,
(int64_t)(rc->optimal_buffer_level * bitrate_alloc);
lrc->maximum_buffer_size =
(int64_t)(rc->maximum_buffer_size * bitrate_alloc);
lrc->bits_off_target = VPXMIN(lrc->bits_off_target,
lrc->maximum_buffer_size);
lrc->bits_off_target =
VPXMIN(lrc->bits_off_target, lrc->maximum_buffer_size);
lrc->buffer_level = VPXMIN(lrc->buffer_level, lrc->maximum_buffer_size);
// Update framerate-related quantities.
if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {
@@ -235,12 +232,12 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi,
static LAYER_CONTEXT *get_layer_context(VP9_COMP *const cpi) {
if (is_one_pass_cbr_svc(cpi))
return &cpi->svc.layer_context[cpi->svc.spatial_layer_id *
cpi->svc.number_temporal_layers + cpi->svc.temporal_layer_id];
cpi->svc.number_temporal_layers +
cpi->svc.temporal_layer_id];
else
return (cpi->svc.number_temporal_layers > 1 &&
cpi->oxcf.rc_mode == VPX_CBR) ?
&cpi->svc.layer_context[cpi->svc.temporal_layer_id] :
&cpi->svc.layer_context[cpi->svc.spatial_layer_id];
return (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR)
? &cpi->svc.layer_context[cpi->svc.temporal_layer_id]
: &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
}
void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) {
@@ -250,7 +247,7 @@ void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) {
RATE_CONTROL *const lrc = &lc->rc;
// Index into spatial+temporal arrays.
const int st_idx = svc->spatial_layer_id * svc->number_temporal_layers +
svc->temporal_layer_id;
svc->temporal_layer_id;
const int tl = svc->temporal_layer_id;
lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[tl];
@@ -277,10 +274,11 @@ void vp9_update_spatial_layer_framerate(VP9_COMP *const cpi, double framerate) {
lc->framerate = framerate;
lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
lrc->min_frame_bandwidth = (int)(lrc->avg_frame_bandwidth *
oxcf->two_pass_vbrmin_section / 100);
lrc->min_frame_bandwidth =
(int)(lrc->avg_frame_bandwidth * oxcf->two_pass_vbrmin_section / 100);
lrc->max_frame_bandwidth = (int)(((int64_t)lrc->avg_frame_bandwidth *
oxcf->two_pass_vbrmax_section) / 100);
oxcf->two_pass_vbrmax_section) /
100);
vp9_rc_set_gf_interval_range(cpi, lrc);
}
@@ -309,8 +307,7 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) {
// For spatial-svc, allow cyclic-refresh to be applied on the spatial layers,
// for the base temporal layer.
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
cpi->svc.number_spatial_layers > 1 &&
cpi->svc.temporal_layer_id == 0) {
cpi->svc.number_spatial_layers > 1 && cpi->svc.temporal_layer_id == 0) {
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
signed char *temp = cr->map;
uint8_t *temp2 = cr->last_coded_q_map;
@@ -337,8 +334,7 @@ void vp9_save_layer_context(VP9_COMP *const cpi) {
// For spatial-svc, allow cyclic-refresh to be applied on the spatial layers,
// for the base temporal layer.
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
cpi->svc.number_spatial_layers > 1 &&
cpi->svc.temporal_layer_id == 0) {
cpi->svc.number_spatial_layers > 1 && cpi->svc.temporal_layer_id == 0) {
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
signed char *temp = lc->map;
uint8_t *temp2 = lc->last_coded_q_map;
@@ -380,20 +376,19 @@ void vp9_inc_frame_in_layer(VP9_COMP *const cpi) {
}
int vp9_is_upper_layer_key_frame(const VP9_COMP *const cpi) {
return is_two_pass_svc(cpi) &&
cpi->svc.spatial_layer_id > 0 &&
return is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id > 0 &&
cpi->svc.layer_context[cpi->svc.spatial_layer_id *
cpi->svc.number_temporal_layers +
cpi->svc.temporal_layer_id].is_key_frame;
cpi->svc.number_temporal_layers +
cpi->svc.temporal_layer_id]
.is_key_frame;
}
static void get_layer_resolution(const int width_org, const int height_org,
const int num, const int den,
int *width_out, int *height_out) {
const int num, const int den, int *width_out,
int *height_out) {
int w, h;
if (width_out == NULL || height_out == NULL || den == 0)
return;
if (width_out == NULL || height_out == NULL || den == 0) return;
w = width_org * num / den;
h = height_org * num / den;
@@ -415,10 +410,13 @@ static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) {
spatial_id = cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode;
frame_num_within_temporal_struct =
cpi->svc.layer_context[cpi->svc.spatial_layer_id *
cpi->svc.number_temporal_layers].current_video_frame_in_layer % 4;
cpi->svc.number_temporal_layers]
.current_video_frame_in_layer %
4;
temporal_id = cpi->svc.temporal_layer_id =
(frame_num_within_temporal_struct & 1) ? 2 :
(frame_num_within_temporal_struct >> 1);
(frame_num_within_temporal_struct & 1)
? 2
: (frame_num_within_temporal_struct >> 1);
cpi->ext_refresh_last_frame = cpi->ext_refresh_golden_frame =
cpi->ext_refresh_alt_ref_frame = 0;
if (!temporal_id) {
@@ -465,7 +463,7 @@ static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) {
if (spatial_id == cpi->svc.number_spatial_layers - 1) { // top layer
cpi->ext_refresh_frame_flags_pending = 1;
if (!spatial_id)
cpi->ref_frame_flags = VP9_LAST_FLAG;
cpi->ref_frame_flags = VP9_LAST_FLAG;
else
cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
} else if (!spatial_id) {
@@ -486,7 +484,7 @@ static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) {
cpi->lst_fb_idx = spatial_id - 1;
cpi->gld_fb_idx = spatial_id;
} else {
cpi->gld_fb_idx = spatial_id - 1;
cpi->gld_fb_idx = spatial_id - 1;
}
} else {
cpi->gld_fb_idx = 0;
@@ -515,9 +513,11 @@ static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) {
spatial_id = cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode;
temporal_id = cpi->svc.temporal_layer_id =
cpi->svc.layer_context[cpi->svc.spatial_layer_id *
cpi->svc.number_temporal_layers].current_video_frame_in_layer & 1;
cpi->svc.number_temporal_layers]
.current_video_frame_in_layer &
1;
cpi->ext_refresh_last_frame = cpi->ext_refresh_golden_frame =
cpi->ext_refresh_alt_ref_frame = 0;
cpi->ext_refresh_alt_ref_frame = 0;
if (!temporal_id) {
cpi->ext_refresh_frame_flags_pending = 1;
cpi->ext_refresh_last_frame = 1;
@@ -548,7 +548,7 @@ static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) {
cpi->lst_fb_idx = spatial_id - 1;
cpi->gld_fb_idx = spatial_id;
} else {
cpi->gld_fb_idx = spatial_id - 1;
cpi->gld_fb_idx = spatial_id - 1;
}
} else {
cpi->gld_fb_idx = 0;
@@ -567,8 +567,8 @@ static void set_flags_and_fb_idx_for_temporal_mode_noLayering(
VP9_COMP *const cpi) {
int spatial_id;
spatial_id = cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode;
cpi->ext_refresh_last_frame =
cpi->ext_refresh_golden_frame = cpi->ext_refresh_alt_ref_frame = 0;
cpi->ext_refresh_last_frame = cpi->ext_refresh_golden_frame =
cpi->ext_refresh_alt_ref_frame = 0;
cpi->ext_refresh_frame_flags_pending = 1;
cpi->ext_refresh_last_frame = 1;
if (!spatial_id) {
@@ -586,7 +586,7 @@ static void set_flags_and_fb_idx_for_temporal_mode_noLayering(
cpi->lst_fb_idx = spatial_id - 1;
cpi->gld_fb_idx = spatial_id;
} else {
cpi->gld_fb_idx = spatial_id - 1;
cpi->gld_fb_idx = spatial_id - 1;
}
} else {
cpi->gld_fb_idx = 0;
@@ -596,20 +596,19 @@ static void set_flags_and_fb_idx_for_temporal_mode_noLayering(
int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
int width = 0, height = 0;
LAYER_CONTEXT *lc = NULL;
if (cpi->svc.number_spatial_layers > 1)
cpi->svc.use_base_mv = 1;
if (cpi->svc.number_spatial_layers > 1) cpi->svc.use_base_mv = 1;
cpi->svc.force_zero_mode_spatial_ref = 1;
if (cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) {
set_flags_and_fb_idx_for_temporal_mode3(cpi);
} else if (cpi->svc.temporal_layering_mode ==
VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) {
VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) {
set_flags_and_fb_idx_for_temporal_mode_noLayering(cpi);
} else if (cpi->svc.temporal_layering_mode ==
VP9E_TEMPORAL_LAYERING_MODE_0101) {
VP9E_TEMPORAL_LAYERING_MODE_0101) {
set_flags_and_fb_idx_for_temporal_mode2(cpi);
} else if (cpi->svc.temporal_layering_mode ==
VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
// In the BYPASS/flexible mode, the encoder is relying on the application
// to specify, for each spatial layer, the flags and buffer indices for the
// layering.
@@ -633,7 +632,7 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
cpi->svc.rc_drop_superframe = 0;
lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id *
cpi->svc.number_temporal_layers +
cpi->svc.number_temporal_layers +
cpi->svc.temporal_layer_id];
// Setting the worst/best_quality via the encoder control: SET_SVC_PARAMETERS,
@@ -641,12 +640,12 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
if (cpi->svc.temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
RATE_CONTROL *const lrc = &lc->rc;
lrc->worst_quality = vp9_quantizer_to_qindex(lc->max_q);
lrc->best_quality = vp9_quantizer_to_qindex(lc->min_q);
lrc->best_quality = vp9_quantizer_to_qindex(lc->min_q);
}
get_layer_resolution(cpi->oxcf.width, cpi->oxcf.height,
lc->scaling_factor_num, lc->scaling_factor_den,
&width, &height);
lc->scaling_factor_num, lc->scaling_factor_den, &width,
&height);
if (vp9_set_size_literal(cpi, width, height) != 0)
return VPX_CODEC_INVALID_PARAM;
@@ -677,8 +676,8 @@ int vp9_svc_start_frame(VP9_COMP *const cpi) {
cpi->lst_fb_idx = cpi->svc.spatial_layer_id;
if (cpi->svc.spatial_layer_id == 0)
cpi->gld_fb_idx = (lc->gold_ref_idx >= 0) ?
lc->gold_ref_idx : cpi->lst_fb_idx;
cpi->gld_fb_idx =
(lc->gold_ref_idx >= 0) ? lc->gold_ref_idx : cpi->lst_fb_idx;
else
cpi->gld_fb_idx = cpi->svc.spatial_layer_id - 1;
@@ -692,8 +691,7 @@ int vp9_svc_start_frame(VP9_COMP *const cpi) {
} else {
if (cpi->oxcf.ss_enable_auto_arf[cpi->svc.spatial_layer_id]) {
cpi->alt_fb_idx = lc->alt_ref_idx;
if (!lc->has_alt_frame)
cpi->ref_frame_flags &= (~VP9_ALT_FLAG);
if (!lc->has_alt_frame) cpi->ref_frame_flags &= (~VP9_ALT_FLAG);
} else {
// Find a proper alt_fb_idx for layers that don't have alt ref frame
if (cpi->svc.spatial_layer_id == 0) {
@@ -714,8 +712,8 @@ int vp9_svc_start_frame(VP9_COMP *const cpi) {
}
get_layer_resolution(cpi->oxcf.width, cpi->oxcf.height,
lc->scaling_factor_num, lc->scaling_factor_den,
&width, &height);
lc->scaling_factor_num, lc->scaling_factor_den, &width,
&height);
// Workaround for multiple frame contexts. In some frames we can't use prev_mi
// since its previous frame could be changed during decoding time. The idea is
@@ -740,11 +738,10 @@ int vp9_svc_start_frame(VP9_COMP *const cpi) {
cpi->common.show_frame = 0;
cpi->ref_frame_flags = 0;
cpi->common.frame_type = INTER_FRAME;
cpi->lst_fb_idx =
cpi->gld_fb_idx = cpi->alt_fb_idx = SMALL_FRAME_FB_IDX;
cpi->lst_fb_idx = cpi->gld_fb_idx = cpi->alt_fb_idx =
SMALL_FRAME_FB_IDX;
if (cpi->svc.encode_intra_empty_frame != 0)
cpi->common.intra_only = 1;
if (cpi->svc.encode_intra_empty_frame != 0) cpi->common.intra_only = 1;
width = SMALL_FRAME_WIDTH;
height = SMALL_FRAME_HEIGHT;
@@ -794,12 +791,9 @@ void vp9_free_svc_cyclic_refresh(VP9_COMP *const cpi) {
for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
int layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers);
LAYER_CONTEXT *const lc = &svc->layer_context[layer];
if (lc->map)
vpx_free(lc->map);
if (lc->last_coded_q_map)
vpx_free(lc->last_coded_q_map);
if (lc->consec_zero_mv)
vpx_free(lc->consec_zero_mv);
if (lc->map) vpx_free(lc->map);
if (lc->last_coded_q_map) vpx_free(lc->last_coded_q_map);
if (lc->consec_zero_mv) vpx_free(lc->consec_zero_mv);
}
}
}
@@ -820,9 +814,9 @@ void vp9_svc_reset_key_frame(VP9_COMP *const cpi) {
set_flags_and_fb_idx_for_temporal_mode3(cpi);
} else if (svc->temporal_layering_mode ==
VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) {
set_flags_and_fb_idx_for_temporal_mode_noLayering(cpi);
set_flags_and_fb_idx_for_temporal_mode_noLayering(cpi);
} else if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0101) {
set_flags_and_fb_idx_for_temporal_mode2(cpi);
set_flags_and_fb_idx_for_temporal_mode2(cpi);
}
vp9_update_temporal_layer_framerate(cpi);
vp9_restore_layer_context(cpi);

View File

@@ -35,7 +35,7 @@ typedef struct {
int is_key_frame;
int frames_from_key_frame;
FRAME_TYPE last_frame_type;
struct lookahead_entry *alt_ref_source;
struct lookahead_entry *alt_ref_source;
int alt_ref_idx;
int gold_ref_idx;
int has_alt_frame;
@@ -60,11 +60,7 @@ typedef struct {
int rc_drop_superframe;
// Workaround for multiple frame contexts
enum {
ENCODED = 0,
ENCODING,
NEED_TO_ENCODE
}encode_empty_frame_state;
enum { ENCODED = 0, ENCODING, NEED_TO_ENCODE } encode_empty_frame_state;
struct lookahead_entry empty_frame;
int encode_intra_empty_frame;

View File

@@ -31,18 +31,10 @@
static int fixed_divide[512];
static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
uint8_t *y_mb_ptr,
uint8_t *u_mb_ptr,
uint8_t *v_mb_ptr,
int stride,
int uv_block_width,
int uv_block_height,
int mv_row,
int mv_col,
uint8_t *pred,
struct scale_factors *scale,
int x, int y) {
static void temporal_filter_predictors_mb_c(
MACROBLOCKD *xd, uint8_t *y_mb_ptr, uint8_t *u_mb_ptr, uint8_t *v_mb_ptr,
int stride, int uv_block_width, int uv_block_height, int mv_row, int mv_col,
uint8_t *pred, struct scale_factors *scale, int x, int y) {
const int which_mv = 0;
const MV mv = { mv_row, mv_col };
const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP_SHARP];
@@ -59,74 +51,46 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
vp9_highbd_build_inter_predictor(y_mb_ptr, stride,
&pred[0], 16,
&mv,
scale,
16, 16,
which_mv,
kernel, MV_PRECISION_Q3, x, y, xd->bd);
vp9_highbd_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, &mv, scale,
16, 16, which_mv, kernel, MV_PRECISION_Q3,
x, y, xd->bd);
vp9_highbd_build_inter_predictor(u_mb_ptr, uv_stride,
&pred[256], uv_block_width,
&mv,
scale,
uv_block_width, uv_block_height,
which_mv,
kernel, mv_precision_uv, x, y, xd->bd);
vp9_highbd_build_inter_predictor(u_mb_ptr, uv_stride, &pred[256],
uv_block_width, &mv, scale, uv_block_width,
uv_block_height, which_mv, kernel,
mv_precision_uv, x, y, xd->bd);
vp9_highbd_build_inter_predictor(v_mb_ptr, uv_stride,
&pred[512], uv_block_width,
&mv,
scale,
uv_block_width, uv_block_height,
which_mv,
kernel, mv_precision_uv, x, y, xd->bd);
vp9_highbd_build_inter_predictor(v_mb_ptr, uv_stride, &pred[512],
uv_block_width, &mv, scale, uv_block_width,
uv_block_height, which_mv, kernel,
mv_precision_uv, x, y, xd->bd);
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
(void)xd;
vp9_build_inter_predictor(y_mb_ptr, stride,
&pred[0], 16,
&mv,
scale,
16, 16,
which_mv,
kernel, MV_PRECISION_Q3, x, y);
vp9_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, &mv, scale, 16, 16,
which_mv, kernel, MV_PRECISION_Q3, x, y);
vp9_build_inter_predictor(u_mb_ptr, uv_stride,
&pred[256], uv_block_width,
&mv,
scale,
uv_block_width, uv_block_height,
which_mv,
kernel, mv_precision_uv, x, y);
vp9_build_inter_predictor(u_mb_ptr, uv_stride, &pred[256], uv_block_width,
&mv, scale, uv_block_width, uv_block_height,
which_mv, kernel, mv_precision_uv, x, y);
vp9_build_inter_predictor(v_mb_ptr, uv_stride,
&pred[512], uv_block_width,
&mv,
scale,
uv_block_width, uv_block_height,
which_mv,
kernel, mv_precision_uv, x, y);
vp9_build_inter_predictor(v_mb_ptr, uv_stride, &pred[512], uv_block_width,
&mv, scale, uv_block_width, uv_block_height,
which_mv, kernel, mv_precision_uv, x, y);
}
void vp9_temporal_filter_init(void) {
int i;
fixed_divide[0] = 0;
for (i = 1; i < 512; ++i)
fixed_divide[i] = 0x80000 / i;
for (i = 1; i < 512; ++i) fixed_divide[i] = 0x80000 / i;
}
void vp9_temporal_filter_apply_c(uint8_t *frame1,
unsigned int stride,
uint8_t *frame2,
unsigned int block_width,
unsigned int block_height,
int strength,
int filter_weight,
unsigned int *accumulator,
void vp9_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride,
uint8_t *frame2, unsigned int block_width,
unsigned int block_height, int strength,
int filter_weight, unsigned int *accumulator,
uint16_t *count) {
unsigned int i, j, k;
int modifier;
@@ -146,10 +110,10 @@ void vp9_temporal_filter_apply_c(uint8_t *frame1,
int row = (int)i + idy;
int col = (int)j + idx;
if (row >= 0 && row < (int)block_height &&
col >= 0 && col < (int)block_width) {
if (row >= 0 && row < (int)block_height && col >= 0 &&
col < (int)block_width) {
int diff = frame1[byte + idy * (int)stride + idx] -
frame2[idy * (int)block_width + idx];
frame2[idy * (int)block_width + idx];
diff_sse[index] = diff * diff;
++index;
}
@@ -159,19 +123,17 @@ void vp9_temporal_filter_apply_c(uint8_t *frame1,
assert(index > 0);
modifier = 0;
for (idx = 0; idx < 9; ++idx)
modifier += diff_sse[idx];
for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx];
modifier *= 3;
modifier /= index;
++frame2;
modifier += rounding;
modifier += rounding;
modifier >>= strength;
if (modifier > 16)
modifier = 16;
if (modifier > 16) modifier = 16;
modifier = 16 - modifier;
modifier *= filter_weight;
@@ -187,15 +149,10 @@ void vp9_temporal_filter_apply_c(uint8_t *frame1,
}
#if CONFIG_VP9_HIGHBITDEPTH
void vp9_highbd_temporal_filter_apply_c(uint8_t *frame1_8,
unsigned int stride,
uint8_t *frame2_8,
unsigned int block_width,
unsigned int block_height,
int strength,
int filter_weight,
unsigned int *accumulator,
uint16_t *count) {
void vp9_highbd_temporal_filter_apply_c(
uint8_t *frame1_8, unsigned int stride, uint8_t *frame2_8,
unsigned int block_width, unsigned int block_height, int strength,
int filter_weight, unsigned int *accumulator, uint16_t *count) {
uint16_t *frame1 = CONVERT_TO_SHORTPTR(frame1_8);
uint16_t *frame2 = CONVERT_TO_SHORTPTR(frame2_8);
unsigned int i, j, k;
@@ -214,10 +171,10 @@ void vp9_highbd_temporal_filter_apply_c(uint8_t *frame1_8,
int row = (int)i + idy;
int col = (int)j + idx;
if (row >= 0 && row < (int)block_height &&
col >= 0 && col < (int)block_width) {
if (row >= 0 && row < (int)block_height && col >= 0 &&
col < (int)block_width) {
int diff = frame1[byte + idy * (int)stride + idx] -
frame2[idy * (int)block_width + idx];
frame2[idy * (int)block_width + idx];
diff_sse[index] = diff * diff;
++index;
}
@@ -226,8 +183,7 @@ void vp9_highbd_temporal_filter_apply_c(uint8_t *frame1_8,
assert(index > 0);
modifier = 0;
for (idx = 0; idx < 9; ++idx)
modifier += diff_sse[idx];
for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx];
modifier *= 3;
modifier /= index;
@@ -236,8 +192,7 @@ void vp9_highbd_temporal_filter_apply_c(uint8_t *frame1_8,
modifier += rounding;
modifier >>= strength;
if (modifier > 16)
modifier = 16;
if (modifier > 16) modifier = 16;
modifier = 16 - modifier;
modifier *= filter_weight;
@@ -268,7 +223,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
uint32_t sse;
int cost_list[5];
MV best_ref_mv1 = {0, 0};
MV best_ref_mv1 = { 0, 0 };
MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
MV *ref_mv = &x->e_mbd.mi[0]->bmi[0].as_mv[0].as_mv;
@@ -295,15 +250,11 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
mv_sf->search_method = old_search_method;
// Ignore mv costing by sending NULL pointer instead of cost array
bestsme = cpi->find_fractional_mv_step(x, ref_mv,
&best_ref_mv1,
cpi->common.allow_high_precision_mv,
x->errorperbit,
&cpi->fn_ptr[BLOCK_16X16],
0, mv_sf->subpel_iters_per_step,
cond_cost_list(cpi, cost_list),
NULL, NULL,
&distortion, &sse, NULL, 0, 0);
bestsme = cpi->find_fractional_mv_step(
x, ref_mv, &best_ref_mv1, cpi->common.allow_high_precision_mv,
x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], 0,
mv_sf->subpel_iters_per_step, cond_cost_list(cpi, cost_list), NULL, NULL,
&distortion, &sse, NULL, 0, 0);
// Restore input state
x->plane[0].src = src;
@@ -314,8 +265,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
static void temporal_filter_iterate_c(VP9_COMP *cpi,
YV12_BUFFER_CONFIG **frames,
int frame_count,
int alt_ref_index,
int frame_count, int alt_ref_index,
int strength,
struct scale_factors *scale) {
int byte;
@@ -332,17 +282,17 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
YV12_BUFFER_CONFIG *f = frames[alt_ref_index];
uint8_t *dst1, *dst2;
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, predictor16[16 * 16 * 3]);
DECLARE_ALIGNED(16, uint8_t, predictor8[16 * 16 * 3]);
DECLARE_ALIGNED(16, uint16_t, predictor16[16 * 16 * 3]);
DECLARE_ALIGNED(16, uint8_t, predictor8[16 * 16 * 3]);
uint8_t *predictor;
#else
DECLARE_ALIGNED(16, uint8_t, predictor[16 * 16 * 3]);
DECLARE_ALIGNED(16, uint8_t, predictor[16 * 16 * 3]);
#endif
const int mb_uv_height = 16 >> mbd->plane[1].subsampling_y;
const int mb_uv_width = 16 >> mbd->plane[1].subsampling_x;
const int mb_uv_width = 16 >> mbd->plane[1].subsampling_x;
// Save input state
uint8_t* input_buffer[MAX_MB_PLANE];
uint8_t *input_buffer[MAX_MB_PLANE];
int i;
#if CONFIG_VP9_HIGHBITDEPTH
if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -352,8 +302,7 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
}
#endif
for (i = 0; i < MAX_MB_PLANE; i++)
input_buffer[i] = mbd->plane[i].pre[0].buf;
for (i = 0; i < MAX_MB_PLANE; i++) input_buffer[i] = mbd->plane[i].pre[0].buf;
for (mb_row = 0; mb_row < mb_rows; mb_row++) {
// Source frames are extended to 16 pixels. This is different than
@@ -368,8 +317,8 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
// To keep the mv in play for both Y and UV planes the max that it
// can be on a border is therefore 16 - (2*VP9_INTERP_EXTEND+1).
cpi->td.mb.mv_row_min = -((mb_row * 16) + (17 - 2 * VP9_INTERP_EXTEND));
cpi->td.mb.mv_row_max = ((mb_rows - 1 - mb_row) * 16)
+ (17 - 2 * VP9_INTERP_EXTEND);
cpi->td.mb.mv_row_max =
((mb_rows - 1 - mb_row) * 16) + (17 - 2 * VP9_INTERP_EXTEND);
for (mb_col = 0; mb_col < mb_cols; mb_col++) {
int i, j, k;
@@ -379,15 +328,14 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
memset(count, 0, 16 * 16 * 3 * sizeof(count[0]));
cpi->td.mb.mv_col_min = -((mb_col * 16) + (17 - 2 * VP9_INTERP_EXTEND));
cpi->td.mb.mv_col_max = ((mb_cols - 1 - mb_col) * 16)
+ (17 - 2 * VP9_INTERP_EXTEND);
cpi->td.mb.mv_col_max =
((mb_cols - 1 - mb_col) * 16) + (17 - 2 * VP9_INTERP_EXTEND);
for (frame = 0; frame < frame_count; frame++) {
const int thresh_low = 10000;
const int thresh_low = 10000;
const int thresh_high = 20000;
if (frames[frame] == NULL)
continue;
if (frames[frame] == NULL) continue;
mbd->mi[0]->bmi[0].as_mv[0].as_mv.row = 0;
mbd->mi[0]->bmi[0].as_mv[0].as_mv.col = 0;
@@ -396,86 +344,69 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
filter_weight = 2;
} else {
// Find best match in this frame by MC
int err = temporal_filter_find_matching_mb_c(cpi,
frames[alt_ref_index]->y_buffer + mb_y_offset,
frames[frame]->y_buffer + mb_y_offset,
frames[frame]->y_stride);
int err = temporal_filter_find_matching_mb_c(
cpi, frames[alt_ref_index]->y_buffer + mb_y_offset,
frames[frame]->y_buffer + mb_y_offset, frames[frame]->y_stride);
// Assign higher weight to matching MB if its error
// score is lower. If not applying MC default behavior
// is to weight all MBs equal.
filter_weight = err < thresh_low
? 2 : err < thresh_high ? 1 : 0;
filter_weight = err < thresh_low ? 2 : err < thresh_high ? 1 : 0;
}
if (filter_weight != 0) {
// Construct the predictors
temporal_filter_predictors_mb_c(mbd,
frames[frame]->y_buffer + mb_y_offset,
temporal_filter_predictors_mb_c(
mbd, frames[frame]->y_buffer + mb_y_offset,
frames[frame]->u_buffer + mb_uv_offset,
frames[frame]->v_buffer + mb_uv_offset,
frames[frame]->y_stride,
mb_uv_width, mb_uv_height,
mbd->mi[0]->bmi[0].as_mv[0].as_mv.row,
mbd->mi[0]->bmi[0].as_mv[0].as_mv.col,
predictor, scale,
frames[frame]->v_buffer + mb_uv_offset, frames[frame]->y_stride,
mb_uv_width, mb_uv_height, mbd->mi[0]->bmi[0].as_mv[0].as_mv.row,
mbd->mi[0]->bmi[0].as_mv[0].as_mv.col, predictor, scale,
mb_col * 16, mb_row * 16);
#if CONFIG_VP9_HIGHBITDEPTH
if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int adj_strength = strength + 2 * (mbd->bd - 8);
// Apply the filter (YUV)
vp9_highbd_temporal_filter_apply_c(f->y_buffer + mb_y_offset,
f->y_stride,
predictor, 16, 16, adj_strength,
filter_weight,
accumulator, count);
vp9_highbd_temporal_filter_apply_c(f->u_buffer + mb_uv_offset,
f->uv_stride, predictor + 256,
mb_uv_width, mb_uv_height,
adj_strength, filter_weight,
accumulator + 256, count + 256);
vp9_highbd_temporal_filter_apply_c(f->v_buffer + mb_uv_offset,
f->uv_stride, predictor + 512,
mb_uv_width, mb_uv_height,
adj_strength, filter_weight,
accumulator + 512, count + 512);
vp9_highbd_temporal_filter_apply_c(
f->y_buffer + mb_y_offset, f->y_stride, predictor, 16, 16,
adj_strength, filter_weight, accumulator, count);
vp9_highbd_temporal_filter_apply_c(
f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256,
mb_uv_width, mb_uv_height, adj_strength, filter_weight,
accumulator + 256, count + 256);
vp9_highbd_temporal_filter_apply_c(
f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512,
mb_uv_width, mb_uv_height, adj_strength, filter_weight,
accumulator + 512, count + 512);
} else {
// Apply the filter (YUV)
vp9_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
predictor, 16, 16,
strength, filter_weight,
accumulator, count);
vp9_temporal_filter_apply_c(f->u_buffer + mb_uv_offset,
f->uv_stride,
predictor + 256,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 256,
count + 256);
vp9_temporal_filter_apply_c(f->v_buffer + mb_uv_offset,
f->uv_stride,
predictor + 512,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 512,
count + 512);
predictor, 16, 16, strength,
filter_weight, accumulator, count);
vp9_temporal_filter_apply_c(
f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256,
mb_uv_width, mb_uv_height, strength, filter_weight,
accumulator + 256, count + 256);
vp9_temporal_filter_apply_c(
f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512,
mb_uv_width, mb_uv_height, strength, filter_weight,
accumulator + 512, count + 512);
}
#else
// Apply the filter (YUV)
// TODO(jingning): Need SIMD optimization for this.
vp9_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
predictor, 16, 16,
strength, filter_weight,
accumulator, count);
predictor, 16, 16, strength,
filter_weight, accumulator, count);
vp9_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, f->uv_stride,
predictor + 256,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 256,
count + 256);
predictor + 256, mb_uv_width,
mb_uv_height, strength, filter_weight,
accumulator + 256, count + 256);
vp9_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, f->uv_stride,
predictor + 512,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 512,
count + 512);
predictor + 512, mb_uv_width,
mb_uv_height, strength, filter_weight,
accumulator + 512, count + 512);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}
@@ -630,13 +561,11 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
}
// Restore input state
for (i = 0; i < MAX_MB_PLANE; i++)
mbd->plane[i].pre[0].buf = input_buffer[i];
for (i = 0; i < MAX_MB_PLANE; i++) mbd->plane[i].pre[0].buf = input_buffer[i];
}
// Apply buffer limits and context specific adjustments to arnr filter.
static void adjust_arnr_filter(VP9_COMP *cpi,
int distance, int group_boost,
static void adjust_arnr_filter(VP9_COMP *cpi, int distance, int group_boost,
int *arnr_frames, int *arnr_strength) {
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
const int frames_after_arf =
@@ -647,8 +576,7 @@ static void adjust_arnr_filter(VP9_COMP *cpi,
// Context dependent two pass adjustment to strength.
if (oxcf->pass == 2) {
base_strength =
oxcf->arnr_strength + cpi->twopass.arnr_strength_adjustment;
base_strength = oxcf->arnr_strength + cpi->twopass.arnr_strength_adjustment;
// Clip to allowed range.
base_strength = VPXMIN(6, VPXMAX(0, base_strength));
} else {
@@ -656,34 +584,30 @@ static void adjust_arnr_filter(VP9_COMP *cpi,
}
// Define the forward and backwards filter limits for this arnr group.
if (frames_fwd > frames_after_arf)
frames_fwd = frames_after_arf;
if (frames_fwd > distance)
frames_fwd = distance;
if (frames_fwd > frames_after_arf) frames_fwd = frames_after_arf;
if (frames_fwd > distance) frames_fwd = distance;
frames_bwd = frames_fwd;
// For even length filter there is one more frame backward
// than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff.
if (frames_bwd < distance)
frames_bwd += (oxcf->arnr_max_frames + 1) & 0x1;
if (frames_bwd < distance) frames_bwd += (oxcf->arnr_max_frames + 1) & 0x1;
// Set the baseline active filter size.
frames = frames_bwd + 1 + frames_fwd;
// Adjust the strength based on active max q.
if (cpi->common.current_video_frame > 1)
q = ((int)vp9_convert_qindex_to_q(
cpi->rc.avg_frame_qindex[INTER_FRAME], cpi->common.bit_depth));
q = ((int)vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[INTER_FRAME],
cpi->common.bit_depth));
else
q = ((int)vp9_convert_qindex_to_q(
cpi->rc.avg_frame_qindex[KEY_FRAME], cpi->common.bit_depth));
q = ((int)vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[KEY_FRAME],
cpi->common.bit_depth));
if (q > 16) {
strength = base_strength;
} else {
strength = base_strength - ((16 - q) / 2);
if (strength < 0)
strength = 0;
if (strength < 0) strength = 0;
}
// Adjust number of frames in filter and strength based on gf boost level.
@@ -719,7 +643,7 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) {
int frames_to_blur_backward;
int frames_to_blur_forward;
struct scale_factors sf;
YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = {NULL};
YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL };
// Apply context specific adjustments to the arnr filter parameters.
adjust_arnr_filter(cpi, distance, rc->gfu_boost, &frames_to_blur, &strength);
@@ -730,8 +654,8 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) {
// Setup frame pointers, NULL indicates frame not included in filter.
for (frame = 0; frame < frames_to_blur; ++frame) {
const int which_buffer = start_frame - frame;
struct lookahead_entry *buf = vp9_lookahead_peek(cpi->lookahead,
which_buffer);
struct lookahead_entry *buf =
vp9_lookahead_peek(cpi->lookahead, which_buffer);
frames[frames_to_blur - 1 - frame] = &buf->img;
}
@@ -744,16 +668,13 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) {
int frame_used = 0;
#if CONFIG_VP9_HIGHBITDEPTH
vp9_setup_scale_factors_for_frame(
&sf,
get_frame_new_buffer(cm)->y_crop_width,
&sf, get_frame_new_buffer(cm)->y_crop_width,
get_frame_new_buffer(cm)->y_crop_height,
get_frame_new_buffer(cm)->y_crop_width,
get_frame_new_buffer(cm)->y_crop_height,
cm->use_highbitdepth);
get_frame_new_buffer(cm)->y_crop_height, cm->use_highbitdepth);
#else
vp9_setup_scale_factors_for_frame(
&sf,
get_frame_new_buffer(cm)->y_crop_width,
&sf, get_frame_new_buffer(cm)->y_crop_width,
get_frame_new_buffer(cm)->y_crop_height,
get_frame_new_buffer(cm)->y_crop_width,
get_frame_new_buffer(cm)->y_crop_height);
@@ -763,14 +684,13 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) {
if (cm->mi_cols * MI_SIZE != frames[frame]->y_width ||
cm->mi_rows * MI_SIZE != frames[frame]->y_height) {
if (vpx_realloc_frame_buffer(&cpi->svc.scaled_frames[frame_used],
cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
cm->width, cm->height, cm->subsampling_x,
cm->subsampling_y,
#if CONFIG_VP9_HIGHBITDEPTH
cm->use_highbitdepth,
#endif
VP9_ENC_BORDER_IN_PIXELS,
cm->byte_alignment,
NULL, NULL, NULL)) {
cm->byte_alignment, NULL, NULL, NULL)) {
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to reallocate alt_ref_buffer");
}
@@ -783,20 +703,16 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) {
xd->mi = cm->mi_grid_visible;
xd->mi[0] = cm->mi;
} else {
// ARF is produced at the native frame size and resized when coded.
// ARF is produced at the native frame size and resized when coded.
#if CONFIG_VP9_HIGHBITDEPTH
vp9_setup_scale_factors_for_frame(&sf,
frames[0]->y_crop_width,
frames[0]->y_crop_height,
frames[0]->y_crop_width,
frames[0]->y_crop_height,
cm->use_highbitdepth);
vp9_setup_scale_factors_for_frame(
&sf, frames[0]->y_crop_width, frames[0]->y_crop_height,
frames[0]->y_crop_width, frames[0]->y_crop_height,
cm->use_highbitdepth);
#else
vp9_setup_scale_factors_for_frame(&sf,
frames[0]->y_crop_width,
frames[0]->y_crop_height,
frames[0]->y_crop_width,
frames[0]->y_crop_height);
vp9_setup_scale_factors_for_frame(
&sf, frames[0]->y_crop_width, frames[0]->y_crop_height,
frames[0]->y_crop_width, frames[0]->y_crop_height);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}

View File

@@ -24,62 +24,54 @@
#include "vp9/encoder/vp9_tokenize.h"
static const TOKENVALUE dct_cat_lt_10_value_tokens[] = {
{9, 63}, {9, 61}, {9, 59}, {9, 57}, {9, 55}, {9, 53}, {9, 51}, {9, 49},
{9, 47}, {9, 45}, {9, 43}, {9, 41}, {9, 39}, {9, 37}, {9, 35}, {9, 33},
{9, 31}, {9, 29}, {9, 27}, {9, 25}, {9, 23}, {9, 21}, {9, 19}, {9, 17},
{9, 15}, {9, 13}, {9, 11}, {9, 9}, {9, 7}, {9, 5}, {9, 3}, {9, 1},
{8, 31}, {8, 29}, {8, 27}, {8, 25}, {8, 23}, {8, 21},
{8, 19}, {8, 17}, {8, 15}, {8, 13}, {8, 11}, {8, 9},
{8, 7}, {8, 5}, {8, 3}, {8, 1},
{7, 15}, {7, 13}, {7, 11}, {7, 9}, {7, 7}, {7, 5}, {7, 3}, {7, 1},
{6, 7}, {6, 5}, {6, 3}, {6, 1}, {5, 3}, {5, 1},
{4, 1}, {3, 1}, {2, 1}, {1, 1}, {0, 0},
{1, 0}, {2, 0}, {3, 0}, {4, 0},
{5, 0}, {5, 2}, {6, 0}, {6, 2}, {6, 4}, {6, 6},
{7, 0}, {7, 2}, {7, 4}, {7, 6}, {7, 8}, {7, 10}, {7, 12}, {7, 14},
{8, 0}, {8, 2}, {8, 4}, {8, 6}, {8, 8}, {8, 10}, {8, 12},
{8, 14}, {8, 16}, {8, 18}, {8, 20}, {8, 22}, {8, 24},
{8, 26}, {8, 28}, {8, 30}, {9, 0}, {9, 2},
{9, 4}, {9, 6}, {9, 8}, {9, 10}, {9, 12}, {9, 14}, {9, 16},
{9, 18}, {9, 20}, {9, 22}, {9, 24}, {9, 26}, {9, 28},
{9, 30}, {9, 32}, {9, 34}, {9, 36}, {9, 38}, {9, 40},
{9, 42}, {9, 44}, {9, 46}, {9, 48}, {9, 50}, {9, 52},
{9, 54}, {9, 56}, {9, 58}, {9, 60}, {9, 62}
{ 9, 63 }, { 9, 61 }, { 9, 59 }, { 9, 57 }, { 9, 55 }, { 9, 53 }, { 9, 51 },
{ 9, 49 }, { 9, 47 }, { 9, 45 }, { 9, 43 }, { 9, 41 }, { 9, 39 }, { 9, 37 },
{ 9, 35 }, { 9, 33 }, { 9, 31 }, { 9, 29 }, { 9, 27 }, { 9, 25 }, { 9, 23 },
{ 9, 21 }, { 9, 19 }, { 9, 17 }, { 9, 15 }, { 9, 13 }, { 9, 11 }, { 9, 9 },
{ 9, 7 }, { 9, 5 }, { 9, 3 }, { 9, 1 }, { 8, 31 }, { 8, 29 }, { 8, 27 },
{ 8, 25 }, { 8, 23 }, { 8, 21 }, { 8, 19 }, { 8, 17 }, { 8, 15 }, { 8, 13 },
{ 8, 11 }, { 8, 9 }, { 8, 7 }, { 8, 5 }, { 8, 3 }, { 8, 1 }, { 7, 15 },
{ 7, 13 }, { 7, 11 }, { 7, 9 }, { 7, 7 }, { 7, 5 }, { 7, 3 }, { 7, 1 },
{ 6, 7 }, { 6, 5 }, { 6, 3 }, { 6, 1 }, { 5, 3 }, { 5, 1 }, { 4, 1 },
{ 3, 1 }, { 2, 1 }, { 1, 1 }, { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 },
{ 4, 0 }, { 5, 0 }, { 5, 2 }, { 6, 0 }, { 6, 2 }, { 6, 4 }, { 6, 6 },
{ 7, 0 }, { 7, 2 }, { 7, 4 }, { 7, 6 }, { 7, 8 }, { 7, 10 }, { 7, 12 },
{ 7, 14 }, { 8, 0 }, { 8, 2 }, { 8, 4 }, { 8, 6 }, { 8, 8 }, { 8, 10 },
{ 8, 12 }, { 8, 14 }, { 8, 16 }, { 8, 18 }, { 8, 20 }, { 8, 22 }, { 8, 24 },
{ 8, 26 }, { 8, 28 }, { 8, 30 }, { 9, 0 }, { 9, 2 }, { 9, 4 }, { 9, 6 },
{ 9, 8 }, { 9, 10 }, { 9, 12 }, { 9, 14 }, { 9, 16 }, { 9, 18 }, { 9, 20 },
{ 9, 22 }, { 9, 24 }, { 9, 26 }, { 9, 28 }, { 9, 30 }, { 9, 32 }, { 9, 34 },
{ 9, 36 }, { 9, 38 }, { 9, 40 }, { 9, 42 }, { 9, 44 }, { 9, 46 }, { 9, 48 },
{ 9, 50 }, { 9, 52 }, { 9, 54 }, { 9, 56 }, { 9, 58 }, { 9, 60 }, { 9, 62 }
};
const TOKENVALUE *vp9_dct_cat_lt_10_value_tokens = dct_cat_lt_10_value_tokens +
(sizeof(dct_cat_lt_10_value_tokens) / sizeof(*dct_cat_lt_10_value_tokens))
/ 2;
const TOKENVALUE *vp9_dct_cat_lt_10_value_tokens =
dct_cat_lt_10_value_tokens +
(sizeof(dct_cat_lt_10_value_tokens) / sizeof(*dct_cat_lt_10_value_tokens)) /
2;
// The corresponding costs of the extrabits for the tokens in the above table
// are stored in the table below. The values are obtained from looking up the
// entry for the specified extrabits in the table corresponding to the token
// (as defined in cost element vp9_extra_bits)
// e.g. {9, 63} maps to cat5_cost[63 >> 1], {1, 1} maps to sign_cost[1 >> 1]
static const int dct_cat_lt_10_value_cost[] = {
3773, 3750, 3704, 3681, 3623, 3600, 3554, 3531,
3432, 3409, 3363, 3340, 3282, 3259, 3213, 3190,
3136, 3113, 3067, 3044, 2986, 2963, 2917, 2894,
2795, 2772, 2726, 2703, 2645, 2622, 2576, 2553,
3197, 3116, 3058, 2977, 2881, 2800,
2742, 2661, 2615, 2534, 2476, 2395,
2299, 2218, 2160, 2079,
2566, 2427, 2334, 2195, 2023, 1884, 1791, 1652,
1893, 1696, 1453, 1256, 1229, 864,
512, 512, 512, 512, 0,
512, 512, 512, 512,
864, 1229, 1256, 1453, 1696, 1893,
1652, 1791, 1884, 2023, 2195, 2334, 2427, 2566,
2079, 2160, 2218, 2299, 2395, 2476, 2534, 2615,
2661, 2742, 2800, 2881, 2977, 3058, 3116, 3197,
2553, 2576, 2622, 2645, 2703, 2726, 2772, 2795,
2894, 2917, 2963, 2986, 3044, 3067, 3113, 3136,
3190, 3213, 3259, 3282, 3340, 3363, 3409, 3432,
3531, 3554, 3600, 3623, 3681, 3704, 3750, 3773,
3773, 3750, 3704, 3681, 3623, 3600, 3554, 3531, 3432, 3409, 3363, 3340, 3282,
3259, 3213, 3190, 3136, 3113, 3067, 3044, 2986, 2963, 2917, 2894, 2795, 2772,
2726, 2703, 2645, 2622, 2576, 2553, 3197, 3116, 3058, 2977, 2881, 2800, 2742,
2661, 2615, 2534, 2476, 2395, 2299, 2218, 2160, 2079, 2566, 2427, 2334, 2195,
2023, 1884, 1791, 1652, 1893, 1696, 1453, 1256, 1229, 864, 512, 512, 512,
512, 0, 512, 512, 512, 512, 864, 1229, 1256, 1453, 1696, 1893, 1652,
1791, 1884, 2023, 2195, 2334, 2427, 2566, 2079, 2160, 2218, 2299, 2395, 2476,
2534, 2615, 2661, 2742, 2800, 2881, 2977, 3058, 3116, 3197, 2553, 2576, 2622,
2645, 2703, 2726, 2772, 2795, 2894, 2917, 2963, 2986, 3044, 3067, 3113, 3136,
3190, 3213, 3259, 3282, 3340, 3363, 3409, 3432, 3531, 3554, 3600, 3623, 3681,
3704, 3750, 3773,
};
const int *vp9_dct_cat_lt_10_value_cost = dct_cat_lt_10_value_cost +
(sizeof(dct_cat_lt_10_value_cost) / sizeof(*dct_cat_lt_10_value_cost))
/ 2;
const int *vp9_dct_cat_lt_10_value_cost =
dct_cat_lt_10_value_cost +
(sizeof(dct_cat_lt_10_value_cost) / sizeof(*dct_cat_lt_10_value_cost)) / 2;
// Array indices are identical to previously-existing CONTEXT_NODE indices
/* clang-format off */
const vpx_tree_index vp9_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
-EOB_TOKEN, 2, // 0 = EOB
-ZERO_TOKEN, 4, // 1 = ZERO
@@ -93,226 +85,229 @@ const vpx_tree_index vp9_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
-CATEGORY3_TOKEN, -CATEGORY4_TOKEN, // 9 = CAT_THREE
-CATEGORY5_TOKEN, -CATEGORY6_TOKEN // 10 = CAT_FIVE
};
/* clang-format on */
static const int16_t zero_cost[] = {0};
static const int16_t sign_cost[1] = {512};
static const int16_t cat1_cost[1 << 1] = {864, 1229};
static const int16_t cat2_cost[1 << 2] = {1256, 1453, 1696, 1893};
static const int16_t cat3_cost[1 << 3] = {1652, 1791, 1884, 2023,
2195, 2334, 2427, 2566};
static const int16_t cat4_cost[1 << 4] = {2079, 2160, 2218, 2299, 2395, 2476,
2534, 2615, 2661, 2742, 2800, 2881,
2977, 3058, 3116, 3197};
static const int16_t zero_cost[] = { 0 };
static const int16_t sign_cost[1] = { 512 };
static const int16_t cat1_cost[1 << 1] = { 864, 1229 };
static const int16_t cat2_cost[1 << 2] = { 1256, 1453, 1696, 1893 };
static const int16_t cat3_cost[1 << 3] = { 1652, 1791, 1884, 2023,
2195, 2334, 2427, 2566 };
static const int16_t cat4_cost[1 << 4] = { 2079, 2160, 2218, 2299, 2395, 2476,
2534, 2615, 2661, 2742, 2800, 2881,
2977, 3058, 3116, 3197 };
static const int16_t cat5_cost[1 << 5] = {
2553, 2576, 2622, 2645, 2703, 2726, 2772, 2795, 2894, 2917, 2963,
2986, 3044, 3067, 3113, 3136, 3190, 3213, 3259, 3282, 3340, 3363,
3409, 3432, 3531, 3554, 3600, 3623, 3681, 3704, 3750, 3773};
2553, 2576, 2622, 2645, 2703, 2726, 2772, 2795, 2894, 2917, 2963,
2986, 3044, 3067, 3113, 3136, 3190, 3213, 3259, 3282, 3340, 3363,
3409, 3432, 3531, 3554, 3600, 3623, 3681, 3704, 3750, 3773
};
const int16_t vp9_cat6_low_cost[256] = {
3378, 3390, 3401, 3413, 3435, 3447, 3458, 3470, 3517, 3529, 3540, 3552,
3574, 3586, 3597, 3609, 3671, 3683, 3694, 3706, 3728, 3740, 3751, 3763,
3810, 3822, 3833, 3845, 3867, 3879, 3890, 3902, 3973, 3985, 3996, 4008,
4030, 4042, 4053, 4065, 4112, 4124, 4135, 4147, 4169, 4181, 4192, 4204,
4266, 4278, 4289, 4301, 4323, 4335, 4346, 4358, 4405, 4417, 4428, 4440,
4462, 4474, 4485, 4497, 4253, 4265, 4276, 4288, 4310, 4322, 4333, 4345,
4392, 4404, 4415, 4427, 4449, 4461, 4472, 4484, 4546, 4558, 4569, 4581,
4603, 4615, 4626, 4638, 4685, 4697, 4708, 4720, 4742, 4754, 4765, 4777,
4848, 4860, 4871, 4883, 4905, 4917, 4928, 4940, 4987, 4999, 5010, 5022,
5044, 5056, 5067, 5079, 5141, 5153, 5164, 5176, 5198, 5210, 5221, 5233,
5280, 5292, 5303, 5315, 5337, 5349, 5360, 5372, 4988, 5000, 5011, 5023,
5045, 5057, 5068, 5080, 5127, 5139, 5150, 5162, 5184, 5196, 5207, 5219,
5281, 5293, 5304, 5316, 5338, 5350, 5361, 5373, 5420, 5432, 5443, 5455,
5477, 5489, 5500, 5512, 5583, 5595, 5606, 5618, 5640, 5652, 5663, 5675,
5722, 5734, 5745, 5757, 5779, 5791, 5802, 5814, 5876, 5888, 5899, 5911,
5933, 5945, 5956, 5968, 6015, 6027, 6038, 6050, 6072, 6084, 6095, 6107,
5863, 5875, 5886, 5898, 5920, 5932, 5943, 5955, 6002, 6014, 6025, 6037,
6059, 6071, 6082, 6094, 6156, 6168, 6179, 6191, 6213, 6225, 6236, 6248,
6295, 6307, 6318, 6330, 6352, 6364, 6375, 6387, 6458, 6470, 6481, 6493,
6515, 6527, 6538, 6550, 6597, 6609, 6620, 6632, 6654, 6666, 6677, 6689,
6751, 6763, 6774, 6786, 6808, 6820, 6831, 6843, 6890, 6902, 6913, 6925,
6947, 6959, 6970, 6982};
3378, 3390, 3401, 3413, 3435, 3447, 3458, 3470, 3517, 3529, 3540, 3552, 3574,
3586, 3597, 3609, 3671, 3683, 3694, 3706, 3728, 3740, 3751, 3763, 3810, 3822,
3833, 3845, 3867, 3879, 3890, 3902, 3973, 3985, 3996, 4008, 4030, 4042, 4053,
4065, 4112, 4124, 4135, 4147, 4169, 4181, 4192, 4204, 4266, 4278, 4289, 4301,
4323, 4335, 4346, 4358, 4405, 4417, 4428, 4440, 4462, 4474, 4485, 4497, 4253,
4265, 4276, 4288, 4310, 4322, 4333, 4345, 4392, 4404, 4415, 4427, 4449, 4461,
4472, 4484, 4546, 4558, 4569, 4581, 4603, 4615, 4626, 4638, 4685, 4697, 4708,
4720, 4742, 4754, 4765, 4777, 4848, 4860, 4871, 4883, 4905, 4917, 4928, 4940,
4987, 4999, 5010, 5022, 5044, 5056, 5067, 5079, 5141, 5153, 5164, 5176, 5198,
5210, 5221, 5233, 5280, 5292, 5303, 5315, 5337, 5349, 5360, 5372, 4988, 5000,
5011, 5023, 5045, 5057, 5068, 5080, 5127, 5139, 5150, 5162, 5184, 5196, 5207,
5219, 5281, 5293, 5304, 5316, 5338, 5350, 5361, 5373, 5420, 5432, 5443, 5455,
5477, 5489, 5500, 5512, 5583, 5595, 5606, 5618, 5640, 5652, 5663, 5675, 5722,
5734, 5745, 5757, 5779, 5791, 5802, 5814, 5876, 5888, 5899, 5911, 5933, 5945,
5956, 5968, 6015, 6027, 6038, 6050, 6072, 6084, 6095, 6107, 5863, 5875, 5886,
5898, 5920, 5932, 5943, 5955, 6002, 6014, 6025, 6037, 6059, 6071, 6082, 6094,
6156, 6168, 6179, 6191, 6213, 6225, 6236, 6248, 6295, 6307, 6318, 6330, 6352,
6364, 6375, 6387, 6458, 6470, 6481, 6493, 6515, 6527, 6538, 6550, 6597, 6609,
6620, 6632, 6654, 6666, 6677, 6689, 6751, 6763, 6774, 6786, 6808, 6820, 6831,
6843, 6890, 6902, 6913, 6925, 6947, 6959, 6970, 6982
};
const int vp9_cat6_high_cost[64] = {
88, 2251, 2727, 4890, 3148, 5311, 5787, 7950, 3666, 5829, 6305,
8468, 6726, 8889, 9365, 11528, 3666, 5829, 6305, 8468, 6726, 8889,
9365, 11528, 7244, 9407, 9883, 12046, 10304, 12467, 12943, 15106, 3666,
5829, 6305, 8468, 6726, 8889, 9365, 11528, 7244, 9407, 9883, 12046,
10304, 12467, 12943, 15106, 7244, 9407, 9883, 12046, 10304, 12467, 12943,
15106, 10822, 12985, 13461, 15624, 13882, 16045, 16521, 18684};
88, 2251, 2727, 4890, 3148, 5311, 5787, 7950, 3666, 5829, 6305,
8468, 6726, 8889, 9365, 11528, 3666, 5829, 6305, 8468, 6726, 8889,
9365, 11528, 7244, 9407, 9883, 12046, 10304, 12467, 12943, 15106, 3666,
5829, 6305, 8468, 6726, 8889, 9365, 11528, 7244, 9407, 9883, 12046,
10304, 12467, 12943, 15106, 7244, 9407, 9883, 12046, 10304, 12467, 12943,
15106, 10822, 12985, 13461, 15624, 13882, 16045, 16521, 18684
};
#if CONFIG_VP9_HIGHBITDEPTH
const int vp9_cat6_high10_high_cost[256] = {
94, 2257, 2733, 4896, 3154, 5317, 5793, 7956, 3672, 5835, 6311,
8474, 6732, 8895, 9371, 11534, 3672, 5835, 6311, 8474, 6732, 8895,
9371, 11534, 7250, 9413, 9889, 12052, 10310, 12473, 12949, 15112, 3672,
5835, 6311, 8474, 6732, 8895, 9371, 11534, 7250, 9413, 9889, 12052,
10310, 12473, 12949, 15112, 7250, 9413, 9889, 12052, 10310, 12473, 12949,
15112, 10828, 12991, 13467, 15630, 13888, 16051, 16527, 18690, 4187, 6350,
6826, 8989, 7247, 9410, 9886, 12049, 7765, 9928, 10404, 12567, 10825,
12988, 13464, 15627, 7765, 9928, 10404, 12567, 10825, 12988, 13464, 15627,
11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 7765, 9928, 10404,
12567, 10825, 12988, 13464, 15627, 11343, 13506, 13982, 16145, 14403, 16566,
17042, 19205, 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 14921,
17084, 17560, 19723, 17981, 20144, 20620, 22783, 4187, 6350, 6826, 8989,
7247, 9410, 9886, 12049, 7765, 9928, 10404, 12567, 10825, 12988, 13464,
15627, 7765, 9928, 10404, 12567, 10825, 12988, 13464, 15627, 11343, 13506,
13982, 16145, 14403, 16566, 17042, 19205, 7765, 9928, 10404, 12567, 10825,
12988, 13464, 15627, 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205,
11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 14921, 17084, 17560,
19723, 17981, 20144, 20620, 22783, 8280, 10443, 10919, 13082, 11340, 13503,
13979, 16142, 11858, 14021, 14497, 16660, 14918, 17081, 17557, 19720, 11858,
14021, 14497, 16660, 14918, 17081, 17557, 19720, 15436, 17599, 18075, 20238,
18496, 20659, 21135, 23298, 11858, 14021, 14497, 16660, 14918, 17081, 17557,
19720, 15436, 17599, 18075, 20238, 18496, 20659, 21135, 23298, 15436, 17599,
18075, 20238, 18496, 20659, 21135, 23298, 19014, 21177, 21653, 23816, 22074,
24237, 24713, 26876};
94, 2257, 2733, 4896, 3154, 5317, 5793, 7956, 3672, 5835, 6311,
8474, 6732, 8895, 9371, 11534, 3672, 5835, 6311, 8474, 6732, 8895,
9371, 11534, 7250, 9413, 9889, 12052, 10310, 12473, 12949, 15112, 3672,
5835, 6311, 8474, 6732, 8895, 9371, 11534, 7250, 9413, 9889, 12052,
10310, 12473, 12949, 15112, 7250, 9413, 9889, 12052, 10310, 12473, 12949,
15112, 10828, 12991, 13467, 15630, 13888, 16051, 16527, 18690, 4187, 6350,
6826, 8989, 7247, 9410, 9886, 12049, 7765, 9928, 10404, 12567, 10825,
12988, 13464, 15627, 7765, 9928, 10404, 12567, 10825, 12988, 13464, 15627,
11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 7765, 9928, 10404,
12567, 10825, 12988, 13464, 15627, 11343, 13506, 13982, 16145, 14403, 16566,
17042, 19205, 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 14921,
17084, 17560, 19723, 17981, 20144, 20620, 22783, 4187, 6350, 6826, 8989,
7247, 9410, 9886, 12049, 7765, 9928, 10404, 12567, 10825, 12988, 13464,
15627, 7765, 9928, 10404, 12567, 10825, 12988, 13464, 15627, 11343, 13506,
13982, 16145, 14403, 16566, 17042, 19205, 7765, 9928, 10404, 12567, 10825,
12988, 13464, 15627, 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205,
11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 14921, 17084, 17560,
19723, 17981, 20144, 20620, 22783, 8280, 10443, 10919, 13082, 11340, 13503,
13979, 16142, 11858, 14021, 14497, 16660, 14918, 17081, 17557, 19720, 11858,
14021, 14497, 16660, 14918, 17081, 17557, 19720, 15436, 17599, 18075, 20238,
18496, 20659, 21135, 23298, 11858, 14021, 14497, 16660, 14918, 17081, 17557,
19720, 15436, 17599, 18075, 20238, 18496, 20659, 21135, 23298, 15436, 17599,
18075, 20238, 18496, 20659, 21135, 23298, 19014, 21177, 21653, 23816, 22074,
24237, 24713, 26876
};
const int vp9_cat6_high12_high_cost[1024] = {
100, 2263, 2739, 4902, 3160, 5323, 5799, 7962, 3678, 5841, 6317,
8480, 6738, 8901, 9377, 11540, 3678, 5841, 6317, 8480, 6738, 8901,
9377, 11540, 7256, 9419, 9895, 12058, 10316, 12479, 12955, 15118, 3678,
5841, 6317, 8480, 6738, 8901, 9377, 11540, 7256, 9419, 9895, 12058,
10316, 12479, 12955, 15118, 7256, 9419, 9895, 12058, 10316, 12479, 12955,
15118, 10834, 12997, 13473, 15636, 13894, 16057, 16533, 18696, 4193, 6356,
6832, 8995, 7253, 9416, 9892, 12055, 7771, 9934, 10410, 12573, 10831,
12994, 13470, 15633, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633,
11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 7771, 9934, 10410,
12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572,
17048, 19211, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 14927,
17090, 17566, 19729, 17987, 20150, 20626, 22789, 4193, 6356, 6832, 8995,
7253, 9416, 9892, 12055, 7771, 9934, 10410, 12573, 10831, 12994, 13470,
15633, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349, 13512,
13988, 16151, 14409, 16572, 17048, 19211, 7771, 9934, 10410, 12573, 10831,
12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211,
11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 14927, 17090, 17566,
19729, 17987, 20150, 20626, 22789, 8286, 10449, 10925, 13088, 11346, 13509,
13985, 16148, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864,
14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244,
18502, 20665, 21141, 23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563,
19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605,
18081, 20244, 18502, 20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080,
24243, 24719, 26882, 4193, 6356, 6832, 8995, 7253, 9416, 9892, 12055,
7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 7771, 9934, 10410,
12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572,
17048, 19211, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349,
13512, 13988, 16151, 14409, 16572, 17048, 19211, 11349, 13512, 13988, 16151,
14409, 16572, 17048, 19211, 14927, 17090, 17566, 19729, 17987, 20150, 20626,
22789, 8286, 10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027,
14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924,
17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304,
11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081,
20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665,
21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882, 8286,
10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027, 14503, 16666,
14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924, 17087, 17563,
19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 11864, 14027,
14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502,
20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304,
19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882, 12379, 14542, 15018,
17181, 15439, 17602, 18078, 20241, 15957, 18120, 18596, 20759, 19017, 21180,
21656, 23819, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535,
21698, 22174, 24337, 22595, 24758, 25234, 27397, 15957, 18120, 18596, 20759,
19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234,
27397, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 23113, 25276,
25752, 27915, 26173, 28336, 28812, 30975, 4193, 6356, 6832, 8995, 7253,
9416, 9892, 12055, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633,
7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988,
16151, 14409, 16572, 17048, 19211, 7771, 9934, 10410, 12573, 10831, 12994,
13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 11349,
13512, 13988, 16151, 14409, 16572, 17048, 19211, 14927, 17090, 17566, 19729,
17987, 20150, 20626, 22789, 8286, 10449, 10925, 13088, 11346, 13509, 13985,
16148, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027,
14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502,
20665, 21141, 23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726,
15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081,
20244, 18502, 20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243,
24719, 26882, 8286, 10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864,
14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666,
14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141,
23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605,
18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502,
20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882,
12379, 14542, 15018, 17181, 15439, 17602, 18078, 20241, 15957, 18120, 18596,
20759, 19017, 21180, 21656, 23819, 15957, 18120, 18596, 20759, 19017, 21180,
21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 15957,
18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337,
22595, 24758, 25234, 27397, 19535, 21698, 22174, 24337, 22595, 24758, 25234,
27397, 23113, 25276, 25752, 27915, 26173, 28336, 28812, 30975, 8286, 10449,
10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027, 14503, 16666, 14924,
17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726,
15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 11864, 14027, 14503,
16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665,
21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 19020,
21183, 21659, 23822, 22080, 24243, 24719, 26882, 12379, 14542, 15018, 17181,
15439, 17602, 18078, 20241, 15957, 18120, 18596, 20759, 19017, 21180, 21656,
23819, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698,
22174, 24337, 22595, 24758, 25234, 27397, 15957, 18120, 18596, 20759, 19017,
21180, 21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397,
19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 23113, 25276, 25752,
27915, 26173, 28336, 28812, 30975, 12379, 14542, 15018, 17181, 15439, 17602,
18078, 20241, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 15957,
18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337,
22595, 24758, 25234, 27397, 15957, 18120, 18596, 20759, 19017, 21180, 21656,
23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 19535, 21698,
22174, 24337, 22595, 24758, 25234, 27397, 23113, 25276, 25752, 27915, 26173,
28336, 28812, 30975, 16472, 18635, 19111, 21274, 19532, 21695, 22171, 24334,
20050, 22213, 22689, 24852, 23110, 25273, 25749, 27912, 20050, 22213, 22689,
24852, 23110, 25273, 25749, 27912, 23628, 25791, 26267, 28430, 26688, 28851,
29327, 31490, 20050, 22213, 22689, 24852, 23110, 25273, 25749, 27912, 23628,
25791, 26267, 28430, 26688, 28851, 29327, 31490, 23628, 25791, 26267, 28430,
26688, 28851, 29327, 31490, 27206, 29369, 29845, 32008, 30266, 32429, 32905,
35068};
100, 2263, 2739, 4902, 3160, 5323, 5799, 7962, 3678, 5841, 6317,
8480, 6738, 8901, 9377, 11540, 3678, 5841, 6317, 8480, 6738, 8901,
9377, 11540, 7256, 9419, 9895, 12058, 10316, 12479, 12955, 15118, 3678,
5841, 6317, 8480, 6738, 8901, 9377, 11540, 7256, 9419, 9895, 12058,
10316, 12479, 12955, 15118, 7256, 9419, 9895, 12058, 10316, 12479, 12955,
15118, 10834, 12997, 13473, 15636, 13894, 16057, 16533, 18696, 4193, 6356,
6832, 8995, 7253, 9416, 9892, 12055, 7771, 9934, 10410, 12573, 10831,
12994, 13470, 15633, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633,
11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 7771, 9934, 10410,
12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572,
17048, 19211, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 14927,
17090, 17566, 19729, 17987, 20150, 20626, 22789, 4193, 6356, 6832, 8995,
7253, 9416, 9892, 12055, 7771, 9934, 10410, 12573, 10831, 12994, 13470,
15633, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349, 13512,
13988, 16151, 14409, 16572, 17048, 19211, 7771, 9934, 10410, 12573, 10831,
12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211,
11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 14927, 17090, 17566,
19729, 17987, 20150, 20626, 22789, 8286, 10449, 10925, 13088, 11346, 13509,
13985, 16148, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864,
14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244,
18502, 20665, 21141, 23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563,
19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605,
18081, 20244, 18502, 20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080,
24243, 24719, 26882, 4193, 6356, 6832, 8995, 7253, 9416, 9892, 12055,
7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 7771, 9934, 10410,
12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572,
17048, 19211, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349,
13512, 13988, 16151, 14409, 16572, 17048, 19211, 11349, 13512, 13988, 16151,
14409, 16572, 17048, 19211, 14927, 17090, 17566, 19729, 17987, 20150, 20626,
22789, 8286, 10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027,
14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924,
17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304,
11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081,
20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665,
21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882, 8286,
10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027, 14503, 16666,
14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924, 17087, 17563,
19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 11864, 14027,
14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502,
20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304,
19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882, 12379, 14542, 15018,
17181, 15439, 17602, 18078, 20241, 15957, 18120, 18596, 20759, 19017, 21180,
21656, 23819, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535,
21698, 22174, 24337, 22595, 24758, 25234, 27397, 15957, 18120, 18596, 20759,
19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234,
27397, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 23113, 25276,
25752, 27915, 26173, 28336, 28812, 30975, 4193, 6356, 6832, 8995, 7253,
9416, 9892, 12055, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633,
7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988,
16151, 14409, 16572, 17048, 19211, 7771, 9934, 10410, 12573, 10831, 12994,
13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 11349,
13512, 13988, 16151, 14409, 16572, 17048, 19211, 14927, 17090, 17566, 19729,
17987, 20150, 20626, 22789, 8286, 10449, 10925, 13088, 11346, 13509, 13985,
16148, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027,
14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502,
20665, 21141, 23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726,
15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081,
20244, 18502, 20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243,
24719, 26882, 8286, 10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864,
14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666,
14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141,
23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605,
18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502,
20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882,
12379, 14542, 15018, 17181, 15439, 17602, 18078, 20241, 15957, 18120, 18596,
20759, 19017, 21180, 21656, 23819, 15957, 18120, 18596, 20759, 19017, 21180,
21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 15957,
18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337,
22595, 24758, 25234, 27397, 19535, 21698, 22174, 24337, 22595, 24758, 25234,
27397, 23113, 25276, 25752, 27915, 26173, 28336, 28812, 30975, 8286, 10449,
10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027, 14503, 16666, 14924,
17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726,
15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 11864, 14027, 14503,
16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665,
21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 19020,
21183, 21659, 23822, 22080, 24243, 24719, 26882, 12379, 14542, 15018, 17181,
15439, 17602, 18078, 20241, 15957, 18120, 18596, 20759, 19017, 21180, 21656,
23819, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698,
22174, 24337, 22595, 24758, 25234, 27397, 15957, 18120, 18596, 20759, 19017,
21180, 21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397,
19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 23113, 25276, 25752,
27915, 26173, 28336, 28812, 30975, 12379, 14542, 15018, 17181, 15439, 17602,
18078, 20241, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 15957,
18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337,
22595, 24758, 25234, 27397, 15957, 18120, 18596, 20759, 19017, 21180, 21656,
23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 19535, 21698,
22174, 24337, 22595, 24758, 25234, 27397, 23113, 25276, 25752, 27915, 26173,
28336, 28812, 30975, 16472, 18635, 19111, 21274, 19532, 21695, 22171, 24334,
20050, 22213, 22689, 24852, 23110, 25273, 25749, 27912, 20050, 22213, 22689,
24852, 23110, 25273, 25749, 27912, 23628, 25791, 26267, 28430, 26688, 28851,
29327, 31490, 20050, 22213, 22689, 24852, 23110, 25273, 25749, 27912, 23628,
25791, 26267, 28430, 26688, 28851, 29327, 31490, 23628, 25791, 26267, 28430,
26688, 28851, 29327, 31490, 27206, 29369, 29845, 32008, 30266, 32429, 32905,
35068
};
#endif
const vp9_extra_bit vp9_extra_bits[ENTROPY_TOKENS] = {
{0, 0, 0, zero_cost}, // ZERO_TOKEN
{0, 0, 1, sign_cost}, // ONE_TOKEN
{0, 0, 2, sign_cost}, // TWO_TOKEN
{0, 0, 3, sign_cost}, // THREE_TOKEN
{0, 0, 4, sign_cost}, // FOUR_TOKEN
{vp9_cat1_prob, 1, CAT1_MIN_VAL, cat1_cost}, // CATEGORY1_TOKEN
{vp9_cat2_prob, 2, CAT2_MIN_VAL, cat2_cost}, // CATEGORY2_TOKEN
{vp9_cat3_prob, 3, CAT3_MIN_VAL, cat3_cost}, // CATEGORY3_TOKEN
{vp9_cat4_prob, 4, CAT4_MIN_VAL, cat4_cost}, // CATEGORY4_TOKEN
{vp9_cat5_prob, 5, CAT5_MIN_VAL, cat5_cost}, // CATEGORY5_TOKEN
{vp9_cat6_prob, 14, CAT6_MIN_VAL, 0}, // CATEGORY6_TOKEN
{0, 0, 0, zero_cost} // EOB_TOKEN
{ 0, 0, 0, zero_cost }, // ZERO_TOKEN
{ 0, 0, 1, sign_cost }, // ONE_TOKEN
{ 0, 0, 2, sign_cost }, // TWO_TOKEN
{ 0, 0, 3, sign_cost }, // THREE_TOKEN
{ 0, 0, 4, sign_cost }, // FOUR_TOKEN
{ vp9_cat1_prob, 1, CAT1_MIN_VAL, cat1_cost }, // CATEGORY1_TOKEN
{ vp9_cat2_prob, 2, CAT2_MIN_VAL, cat2_cost }, // CATEGORY2_TOKEN
{ vp9_cat3_prob, 3, CAT3_MIN_VAL, cat3_cost }, // CATEGORY3_TOKEN
{ vp9_cat4_prob, 4, CAT4_MIN_VAL, cat4_cost }, // CATEGORY4_TOKEN
{ vp9_cat5_prob, 5, CAT5_MIN_VAL, cat5_cost }, // CATEGORY5_TOKEN
{ vp9_cat6_prob, 14, CAT6_MIN_VAL, 0 }, // CATEGORY6_TOKEN
{ 0, 0, 0, zero_cost } // EOB_TOKEN
};
#if CONFIG_VP9_HIGHBITDEPTH
const vp9_extra_bit vp9_extra_bits_high10[ENTROPY_TOKENS] = {
{0, 0, 0, zero_cost}, // ZERO
{0, 0, 1, sign_cost}, // ONE
{0, 0, 2, sign_cost}, // TWO
{0, 0, 3, sign_cost}, // THREE
{0, 0, 4, sign_cost}, // FOUR
{vp9_cat1_prob, 1, CAT1_MIN_VAL, cat1_cost}, // CAT1
{vp9_cat2_prob, 2, CAT2_MIN_VAL, cat2_cost}, // CAT2
{vp9_cat3_prob, 3, CAT3_MIN_VAL, cat3_cost}, // CAT3
{vp9_cat4_prob, 4, CAT4_MIN_VAL, cat4_cost}, // CAT4
{vp9_cat5_prob, 5, CAT5_MIN_VAL, cat5_cost}, // CAT5
{vp9_cat6_prob_high12 + 2, 16, CAT6_MIN_VAL, 0}, // CAT6
{0, 0, 0, zero_cost} // EOB
{ 0, 0, 0, zero_cost }, // ZERO
{ 0, 0, 1, sign_cost }, // ONE
{ 0, 0, 2, sign_cost }, // TWO
{ 0, 0, 3, sign_cost }, // THREE
{ 0, 0, 4, sign_cost }, // FOUR
{ vp9_cat1_prob, 1, CAT1_MIN_VAL, cat1_cost }, // CAT1
{ vp9_cat2_prob, 2, CAT2_MIN_VAL, cat2_cost }, // CAT2
{ vp9_cat3_prob, 3, CAT3_MIN_VAL, cat3_cost }, // CAT3
{ vp9_cat4_prob, 4, CAT4_MIN_VAL, cat4_cost }, // CAT4
{ vp9_cat5_prob, 5, CAT5_MIN_VAL, cat5_cost }, // CAT5
{ vp9_cat6_prob_high12 + 2, 16, CAT6_MIN_VAL, 0 }, // CAT6
{ 0, 0, 0, zero_cost } // EOB
};
const vp9_extra_bit vp9_extra_bits_high12[ENTROPY_TOKENS] = {
{0, 0, 0, zero_cost}, // ZERO
{0, 0, 1, sign_cost}, // ONE
{0, 0, 2, sign_cost}, // TWO
{0, 0, 3, sign_cost}, // THREE
{0, 0, 4, sign_cost}, // FOUR
{vp9_cat1_prob, 1, CAT1_MIN_VAL, cat1_cost}, // CAT1
{vp9_cat2_prob, 2, CAT2_MIN_VAL, cat2_cost}, // CAT2
{vp9_cat3_prob, 3, CAT3_MIN_VAL, cat3_cost}, // CAT3
{vp9_cat4_prob, 4, CAT4_MIN_VAL, cat4_cost}, // CAT4
{vp9_cat5_prob, 5, CAT5_MIN_VAL, cat5_cost}, // CAT5
{vp9_cat6_prob_high12, 18, CAT6_MIN_VAL, 0}, // CAT6
{0, 0, 0, zero_cost} // EOB
{ 0, 0, 0, zero_cost }, // ZERO
{ 0, 0, 1, sign_cost }, // ONE
{ 0, 0, 2, sign_cost }, // TWO
{ 0, 0, 3, sign_cost }, // THREE
{ 0, 0, 4, sign_cost }, // FOUR
{ vp9_cat1_prob, 1, CAT1_MIN_VAL, cat1_cost }, // CAT1
{ vp9_cat2_prob, 2, CAT2_MIN_VAL, cat2_cost }, // CAT2
{ vp9_cat3_prob, 3, CAT3_MIN_VAL, cat3_cost }, // CAT3
{ vp9_cat4_prob, 4, CAT4_MIN_VAL, cat4_cost }, // CAT4
{ vp9_cat5_prob, 5, CAT5_MIN_VAL, cat5_cost }, // CAT5
{ vp9_cat6_prob_high12, 18, CAT6_MIN_VAL, 0 }, // CAT6
{ 0, 0, 0, zero_cost } // EOB
};
#endif
const struct vp9_token vp9_coef_encodings[ENTROPY_TOKENS] = {
{2, 2}, {6, 3}, {28, 5}, {58, 6}, {59, 6}, {60, 6}, {61, 6}, {124, 7},
{125, 7}, {126, 7}, {127, 7}, {0, 1}
{ 2, 2 }, { 6, 3 }, { 28, 5 }, { 58, 6 }, { 59, 6 }, { 60, 6 },
{ 61, 6 }, { 124, 7 }, { 125, 7 }, { 126, 7 }, { 127, 7 }, { 0, 1 }
};
struct tokenize_b_args {
VP9_COMP *cpi;
ThreadData *td;
@@ -320,16 +315,15 @@ struct tokenize_b_args {
};
static void set_entropy_context_b(int plane, int block, int row, int col,
BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
struct tokenize_b_args* const args = arg;
BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
void *arg) {
struct tokenize_b_args *const args = arg;
ThreadData *const td = args->td;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblock_plane *p = &x->plane[plane];
struct macroblockd_plane *pd = &xd->plane[plane];
vp9_set_contexts(xd, pd, plane_bsize, tx_size, p->eobs[block] > 0,
col, row);
vp9_set_contexts(xd, pd, plane_bsize, tx_size, p->eobs[block] > 0, col, row);
}
static INLINE void add_token(TOKENEXTRA **t, const vpx_prob *context_tree,
@@ -344,8 +338,7 @@ static INLINE void add_token(TOKENEXTRA **t, const vpx_prob *context_tree,
static INLINE void add_token_no_extra(TOKENEXTRA **t,
const vpx_prob *context_tree,
int16_t token,
unsigned int *counts) {
int16_t token, unsigned int *counts) {
(*t)->context_tree = context_tree;
(*t)->token = token;
(*t)++;
@@ -353,9 +346,8 @@ static INLINE void add_token_no_extra(TOKENEXTRA **t,
}
static void tokenize_b(int plane, int block, int row, int col,
BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
struct tokenize_b_args* const args = arg;
BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
struct tokenize_b_args *const args = arg;
VP9_COMP *cpi = args->cpi;
ThreadData *const td = args->td;
MACROBLOCK *const x = &td->mb;
@@ -367,7 +359,7 @@ static void tokenize_b(int plane, int block, int row, int col,
MODE_INFO *mi = xd->mi[0];
int pt; /* near block/prev token context index */
int c;
TOKENEXTRA *t = *tp; /* store tokens starting here */
TOKENEXTRA *t = *tp; /* store tokens starting here */
int eob = p->eobs[block];
const PLANE_TYPE type = get_plane_type(plane);
const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
@@ -408,8 +400,7 @@ static void tokenize_b(int plane, int block, int row, int col,
vp9_get_token_extra(v, &token, &extra);
add_token(&t, coef_probs[band[c]][pt], token, extra,
counts[band[c]][pt]);
add_token(&t, coef_probs[band[c]][pt], token, extra, counts[band[c]][pt]);
token_cache[scan[c]] = vp9_pt_energy_class[token];
++c;
@@ -432,8 +423,7 @@ struct is_skippable_args {
};
static void is_skippable(int plane, int block, int row, int col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
void *argv) {
BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *argv) {
struct is_skippable_args *args = argv;
(void)plane;
(void)plane_bsize;
@@ -447,7 +437,7 @@ static void is_skippable(int plane, int block, int row, int col,
// vp9_foreach_transform_block() and simplify is_skippable().
int vp9_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
int result = 1;
struct is_skippable_args args = {x->plane[plane].eobs, &result};
struct is_skippable_args args = { x->plane[plane].eobs, &result };
vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, plane, is_skippable,
&args);
return result;
@@ -458,36 +448,35 @@ static void has_high_freq_coeff(int plane, int block, int row, int col,
void *argv) {
struct is_skippable_args *args = argv;
int eobs = (tx_size == TX_4X4) ? 3 : 10;
(void) plane;
(void) plane_bsize;
(void) row;
(void) col;
(void)plane;
(void)plane_bsize;
(void)row;
(void)col;
*(args->skippable) |= (args->eobs[block] > eobs);
}
int vp9_has_high_freq_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
int result = 0;
struct is_skippable_args args = {x->plane[plane].eobs, &result};
struct is_skippable_args args = { x->plane[plane].eobs, &result };
vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, plane,
has_high_freq_coeff, &args);
return result;
}
void vp9_tokenize_sb(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
int dry_run, int seg_skip, BLOCK_SIZE bsize) {
void vp9_tokenize_sb(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, int dry_run,
int seg_skip, BLOCK_SIZE bsize) {
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mi = xd->mi[0];
const int ctx = vp9_get_skip_context(xd);
struct tokenize_b_args arg = {cpi, td, t};
struct tokenize_b_args arg = { cpi, td, t };
if (seg_skip) {
assert(mi->skip);
}
if (mi->skip) {
if (!dry_run && !seg_skip)
++td->counts->skip[ctx][1];
if (!dry_run && !seg_skip) ++td->counts->skip[ctx][1];
reset_skip_context(xd, bsize);
return;
}

View File

@@ -20,15 +20,14 @@
extern "C" {
#endif
#define EOSB_TOKEN 127 // Not signalled, encoder only
#define EOSB_TOKEN 127 // Not signalled, encoder only
#if CONFIG_VP9_HIGHBITDEPTH
typedef int32_t EXTRABIT;
typedef int32_t EXTRABIT;
#else
typedef int16_t EXTRABIT;
typedef int16_t EXTRABIT;
#endif
typedef struct {
int16_t token;
EXTRABIT extra;
@@ -84,19 +83,19 @@ static INLINE int vp9_get_cost(int16_t token, EXTRABIT extrabits,
const int *cat6_high_table) {
if (token != CATEGORY6_TOKEN)
return vp9_extra_bits[token].cost[extrabits >> 1];
return vp9_cat6_low_cost[(extrabits >> 1) & 0xff]
+ cat6_high_table[extrabits >> 9];
return vp9_cat6_low_cost[(extrabits >> 1) & 0xff] +
cat6_high_table[extrabits >> 9];
}
#if CONFIG_VP9_HIGHBITDEPTH
static INLINE const int* vp9_get_high_cost_table(int bit_depth) {
static INLINE const int *vp9_get_high_cost_table(int bit_depth) {
return bit_depth == 8 ? vp9_cat6_high_cost
: (bit_depth == 10 ? vp9_cat6_high10_high_cost :
vp9_cat6_high12_high_cost);
: (bit_depth == 10 ? vp9_cat6_high10_high_cost
: vp9_cat6_high12_high_cost);
}
#else
static INLINE const int* vp9_get_high_cost_table(int bit_depth) {
(void) bit_depth;
static INLINE const int *vp9_get_high_cost_table(int bit_depth) {
(void)bit_depth;
return vp9_cat6_high_cost;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -114,8 +113,7 @@ static INLINE void vp9_get_token_extra(int v, int16_t *token, EXTRABIT *extra) {
*extra = vp9_dct_cat_lt_10_value_tokens[v].extra;
}
static INLINE int16_t vp9_get_token(int v) {
if (v >= CAT6_MIN_VAL || v <= -CAT6_MIN_VAL)
return 10;
if (v >= CAT6_MIN_VAL || v <= -CAT6_MIN_VAL) return 10;
return vp9_dct_cat_lt_10_value_tokens[v].token;
}

View File

@@ -18,15 +18,15 @@ extern "C" {
#endif
void vp9_tree_probs_from_distribution(vpx_tree tree,
unsigned int branch_ct[ /* n - 1 */ ][2],
const unsigned int num_events[ /* n */ ]);
unsigned int branch_ct[/* n - 1 */][2],
const unsigned int num_events[/* n */]);
struct vp9_token {
int value;
int len;
};
void vp9_tokens_from_tree(struct vp9_token*, const vpx_tree_index *);
void vp9_tokens_from_tree(struct vp9_token *, const vpx_tree_index *);
static INLINE void vp9_write_tree(vpx_writer *w, const vpx_tree_index *tree,
const vpx_prob *probs, int bits, int len,

View File

@@ -78,8 +78,8 @@ static void fdct4_sse2(__m128i *in) {
const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
__m128i u[4], v[4];
u[0]=_mm_unpacklo_epi16(in[0], in[1]);
u[1]=_mm_unpacklo_epi16(in[3], in[2]);
u[0] = _mm_unpacklo_epi16(in[0], in[1]);
u[1] = _mm_unpacklo_epi16(in[3], in[2]);
v[0] = _mm_add_epi16(u[0], u[1]);
v[1] = _mm_sub_epi16(u[0], u[1]);
@@ -151,14 +151,12 @@ static void fadst4_sse2(__m128i *in) {
transpose_4x4(in);
}
void vp9_fht4x4_sse2(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
void vp9_fht4x4_sse2(const int16_t *input, tran_low_t *output, int stride,
int tx_type) {
__m128i in[4];
switch (tx_type) {
case DCT_DCT:
vpx_fdct4x4_sse2(input, output, stride);
break;
case DCT_DCT: vpx_fdct4x4_sse2(input, output, stride); break;
case ADST_DCT:
load_buffer_4x4(input, in, stride);
fadst4_sse2(in);
@@ -177,21 +175,18 @@ void vp9_fht4x4_sse2(const int16_t *input, tran_low_t *output,
fadst4_sse2(in);
write_buffer_4x4(output, in);
break;
default:
assert(0);
break;
default: assert(0); break;
}
}
void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
int16_t* coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t* zbin_ptr,
const int16_t* round_ptr, const int16_t* quant_ptr,
const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr,
int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,
uint16_t* eob_ptr,
const int16_t* scan_ptr,
const int16_t* iscan_ptr) {
int16_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr,
int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan_ptr,
const int16_t *iscan_ptr) {
__m128i zero;
int pass;
// Constants
@@ -208,14 +203,14 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64);
const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
// Load input
__m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
__m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
__m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
__m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
__m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride));
__m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride));
__m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride));
__m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride));
__m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
__m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
__m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
__m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
__m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride));
__m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride));
__m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride));
__m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride));
__m128i *in[8];
int index = 0;
@@ -469,9 +464,9 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
// Setup global values
{
round = _mm_load_si128((const __m128i*)round_ptr);
quant = _mm_load_si128((const __m128i*)quant_ptr);
dequant = _mm_load_si128((const __m128i*)dequant_ptr);
round = _mm_load_si128((const __m128i *)round_ptr);
quant = _mm_load_si128((const __m128i *)quant_ptr);
dequant = _mm_load_si128((const __m128i *)dequant_ptr);
}
{
@@ -503,15 +498,15 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
dequant = _mm_unpackhi_epi64(dequant, dequant);
coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
}
{
@@ -524,8 +519,8 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@@ -568,14 +563,14 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
}
{
@@ -588,8 +583,8 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@@ -615,10 +610,10 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
}
} else {
do {
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
n_coeffs += 8 * 2;
} while (n_coeffs < 0);
*eob_ptr = 0;
@@ -628,14 +623,14 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
// load 8x8 array
static INLINE void load_buffer_8x8(const int16_t *input, __m128i *in,
int stride) {
in[0] = _mm_load_si128((const __m128i *)(input + 0 * stride));
in[1] = _mm_load_si128((const __m128i *)(input + 1 * stride));
in[2] = _mm_load_si128((const __m128i *)(input + 2 * stride));
in[3] = _mm_load_si128((const __m128i *)(input + 3 * stride));
in[4] = _mm_load_si128((const __m128i *)(input + 4 * stride));
in[5] = _mm_load_si128((const __m128i *)(input + 5 * stride));
in[6] = _mm_load_si128((const __m128i *)(input + 6 * stride));
in[7] = _mm_load_si128((const __m128i *)(input + 7 * stride));
in[0] = _mm_load_si128((const __m128i *)(input + 0 * stride));
in[1] = _mm_load_si128((const __m128i *)(input + 1 * stride));
in[2] = _mm_load_si128((const __m128i *)(input + 2 * stride));
in[3] = _mm_load_si128((const __m128i *)(input + 3 * stride));
in[4] = _mm_load_si128((const __m128i *)(input + 4 * stride));
in[5] = _mm_load_si128((const __m128i *)(input + 5 * stride));
in[6] = _mm_load_si128((const __m128i *)(input + 6 * stride));
in[7] = _mm_load_si128((const __m128i *)(input + 7 * stride));
in[0] = _mm_slli_epi16(in[0], 2);
in[1] = _mm_slli_epi16(in[1], 2);
@@ -930,14 +925,14 @@ static void fadst8_sse2(__m128i *in) {
__m128i in0, in1, in2, in3, in4, in5, in6, in7;
// properly aligned for butterfly input
in0 = in[7];
in1 = in[0];
in2 = in[5];
in3 = in[2];
in4 = in[3];
in5 = in[4];
in6 = in[1];
in7 = in[6];
in0 = in[7];
in1 = in[0];
in2 = in[5];
in3 = in[2];
in4 = in[3];
in5 = in[4];
in6 = in[1];
in7 = in[6];
// column transformation
// stage 1
@@ -1135,14 +1130,12 @@ static void fadst8_sse2(__m128i *in) {
array_transpose_8x8(in, in);
}
void vp9_fht8x8_sse2(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
void vp9_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride,
int tx_type) {
__m128i in[8];
switch (tx_type) {
case DCT_DCT:
vpx_fdct8x8_sse2(input, output, stride);
break;
case DCT_DCT: vpx_fdct8x8_sse2(input, output, stride); break;
case ADST_DCT:
load_buffer_8x8(input, in, stride);
fadst8_sse2(in);
@@ -1164,13 +1157,11 @@ void vp9_fht8x8_sse2(const int16_t *input, tran_low_t *output,
right_shift_8x8(in, 1);
write_buffer_8x8(output, in, 8);
break;
default:
assert(0);
break;
default: assert(0); break;
}
}
static INLINE void load_buffer_16x16(const int16_t* input, __m128i *in0,
static INLINE void load_buffer_16x16(const int16_t *input, __m128i *in0,
__m128i *in1, int stride) {
// load first 8 columns
load_buffer_8x8(input, in0, stride);
@@ -1530,13 +1521,13 @@ static void fdct16_8col(__m128i *in) {
v[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS);
v[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS);
in[1] = _mm_packs_epi32(v[0], v[1]);
in[9] = _mm_packs_epi32(v[2], v[3]);
in[5] = _mm_packs_epi32(v[4], v[5]);
in[1] = _mm_packs_epi32(v[0], v[1]);
in[9] = _mm_packs_epi32(v[2], v[3]);
in[5] = _mm_packs_epi32(v[4], v[5]);
in[13] = _mm_packs_epi32(v[6], v[7]);
in[3] = _mm_packs_epi32(v[8], v[9]);
in[3] = _mm_packs_epi32(v[8], v[9]);
in[11] = _mm_packs_epi32(v[10], v[11]);
in[7] = _mm_packs_epi32(v[12], v[13]);
in[7] = _mm_packs_epi32(v[12], v[13]);
in[15] = _mm_packs_epi32(v[14], v[15]);
}
@@ -2022,14 +2013,12 @@ static void fadst16_sse2(__m128i *in0, __m128i *in1) {
array_transpose_16x16(in0, in1);
}
void vp9_fht16x16_sse2(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
void vp9_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride,
int tx_type) {
__m128i in0[16], in1[16];
switch (tx_type) {
case DCT_DCT:
vpx_fdct16x16_sse2(input, output, stride);
break;
case DCT_DCT: vpx_fdct16x16_sse2(input, output, stride); break;
case ADST_DCT:
load_buffer_16x16(input, in0, in1, stride);
fadst16_sse2(in0, in1);
@@ -2051,8 +2040,6 @@ void vp9_fht16x16_sse2(const int16_t *input, tran_low_t *output,
fadst16_sse2(in0, in1);
write_buffer_16x16(output, in0, in1, 16);
break;
default:
assert(0);
break;
default: assert(0); break;
}
}

View File

@@ -15,16 +15,12 @@
#include "vpx_dsp/x86/inv_txfm_sse2.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"
void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
int16_t* coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t* zbin_ptr,
const int16_t* round_ptr, const int16_t* quant_ptr,
const int16_t* quant_shift_ptr,
int16_t* qcoeff_ptr,
int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,
uint16_t* eob_ptr,
const int16_t* scan_ptr,
const int16_t* iscan_ptr) {
void vp9_fdct8x8_quant_ssse3(
const int16_t *input, int stride, int16_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan_ptr, const int16_t *iscan_ptr) {
__m128i zero;
int pass;
// Constants
@@ -42,14 +38,14 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64);
const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
// Load input
__m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
__m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
__m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
__m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
__m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride));
__m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride));
__m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride));
__m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride));
__m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
__m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
__m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
__m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
__m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride));
__m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride));
__m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride));
__m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride));
__m128i *in[8];
int index = 0;
@@ -298,9 +294,9 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
// Setup global values
{
round = _mm_load_si128((const __m128i*)round_ptr);
quant = _mm_load_si128((const __m128i*)quant_ptr);
dequant = _mm_load_si128((const __m128i*)dequant_ptr);
round = _mm_load_si128((const __m128i *)round_ptr);
quant = _mm_load_si128((const __m128i *)quant_ptr);
dequant = _mm_load_si128((const __m128i *)dequant_ptr);
}
{
@@ -332,15 +328,15 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
dequant = _mm_unpackhi_epi64(dequant, dequant);
coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
}
{
@@ -353,8 +349,8 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@@ -388,7 +384,7 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
nzflag = _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff0, thr)) |
_mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff1, thr));
_mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff1, thr));
if (nzflag) {
qcoeff0 = _mm_adds_epi16(qcoeff0, round);
@@ -402,20 +398,20 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
} else {
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
}
}
@@ -429,8 +425,8 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@@ -456,10 +452,10 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
}
} else {
do {
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
n_coeffs += 8 * 2;
} while (n_coeffs < 0);
*eob_ptr = 0;

View File

@@ -37,17 +37,11 @@ static INLINE int sum_diff_16x1(__m128i acc_diff) {
}
// Denoise a 16x1 vector.
static INLINE __m128i vp9_denoiser_16x1_sse2(const uint8_t *sig,
const uint8_t *mc_running_avg_y,
uint8_t *running_avg_y,
const __m128i *k_0,
const __m128i *k_4,
const __m128i *k_8,
const __m128i *k_16,
const __m128i *l3,
const __m128i *l32,
const __m128i *l21,
__m128i acc_diff) {
static INLINE __m128i vp9_denoiser_16x1_sse2(
const uint8_t *sig, const uint8_t *mc_running_avg_y, uint8_t *running_avg_y,
const __m128i *k_0, const __m128i *k_4, const __m128i *k_8,
const __m128i *k_16, const __m128i *l3, const __m128i *l32,
const __m128i *l21, __m128i acc_diff) {
// Calculate differences
const __m128i v_sig = _mm_loadu_si128((const __m128i *)(&sig[0]));
const __m128i v_mc_running_avg_y =
@@ -69,7 +63,7 @@ static INLINE __m128i vp9_denoiser_16x1_sse2(const uint8_t *sig,
__m128i adj2 = _mm_and_si128(mask2, *l32);
const __m128i adj1 = _mm_and_si128(mask1, *l21);
const __m128i adj0 = _mm_and_si128(mask0, clamped_absdiff);
__m128i adj, padj, nadj;
__m128i adj, padj, nadj;
// Combine the adjustments and get absolute adjustments.
adj2 = _mm_add_epi8(adj2, adj1);
@@ -95,9 +89,8 @@ static INLINE __m128i vp9_denoiser_16x1_sse2(const uint8_t *sig,
// Denoise a 16x1 vector with a weaker filter.
static INLINE __m128i vp9_denoiser_adj_16x1_sse2(
const uint8_t *sig, const uint8_t *mc_running_avg_y,
uint8_t *running_avg_y, const __m128i k_0,
const __m128i k_delta, __m128i acc_diff) {
const uint8_t *sig, const uint8_t *mc_running_avg_y, uint8_t *running_avg_y,
const __m128i k_0, const __m128i k_delta, __m128i acc_diff) {
__m128i v_running_avg_y = _mm_loadu_si128((__m128i *)(&running_avg_y[0]));
// Calculate differences.
const __m128i v_sig = _mm_loadu_si128((const __m128i *)(&sig[0]));
@@ -108,8 +101,7 @@ static INLINE __m128i vp9_denoiser_adj_16x1_sse2(
// Obtain the sign. FF if diff is negative.
const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0);
// Clamp absolute difference to delta to get the adjustment.
const __m128i adj =
_mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_delta);
const __m128i adj = _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_delta);
// Restore the sign and get positive and negative adjustments.
__m128i padj, nadj;
padj = _mm_andnot_si128(diff_sign, adj);
@@ -126,14 +118,17 @@ static INLINE __m128i vp9_denoiser_adj_16x1_sse2(
}
// Denoise 8x8 and 8x16 blocks.
static int vp9_denoiser_NxM_sse2_small(
const uint8_t *sig, int sig_stride, const uint8_t *mc_running_avg_y,
int mc_avg_y_stride, uint8_t *running_avg_y, int avg_y_stride,
int increase_denoising, BLOCK_SIZE bs, int motion_magnitude, int width) {
static int vp9_denoiser_NxM_sse2_small(const uint8_t *sig, int sig_stride,
const uint8_t *mc_running_avg_y,
int mc_avg_y_stride,
uint8_t *running_avg_y, int avg_y_stride,
int increase_denoising, BLOCK_SIZE bs,
int motion_magnitude, int width) {
int sum_diff_thresh, r, sum_diff = 0;
const int shift_inc = (increase_denoising &&
motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ?
1 : 0;
const int shift_inc =
(increase_denoising && motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD)
? 1
: 0;
uint8_t sig_buffer[8][16], mc_running_buffer[8][16], running_buffer[8][16];
__m128i acc_diff = _mm_setzero_si128();
const __m128i k_0 = _mm_setzero_si128();
@@ -153,15 +148,13 @@ static int vp9_denoiser_NxM_sse2_small(
memcpy(sig_buffer[r], sig, width);
memcpy(sig_buffer[r] + width, sig + sig_stride, width);
memcpy(mc_running_buffer[r], mc_running_avg_y, width);
memcpy(mc_running_buffer[r] + width,
mc_running_avg_y + mc_avg_y_stride, width);
memcpy(mc_running_buffer[r] + width, mc_running_avg_y + mc_avg_y_stride,
width);
memcpy(running_buffer[r], running_avg_y, width);
memcpy(running_buffer[r] + width, running_avg_y + avg_y_stride, width);
acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r],
mc_running_buffer[r],
running_buffer[r],
&k_0, &k_4, &k_8, &k_16,
&l3, &l32, &l21, acc_diff);
acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r], mc_running_buffer[r],
running_buffer[r], &k_0, &k_4, &k_8,
&k_16, &l3, &l32, &l21, acc_diff);
memcpy(running_avg_y, running_buffer[r], width);
memcpy(running_avg_y + avg_y_stride, running_buffer[r] + width, width);
// Update pointers for next iteration.
@@ -184,19 +177,19 @@ static int vp9_denoiser_NxM_sse2_small(
// The delta is set by the excess of absolute pixel diff over the
// threshold.
const int delta = ((abs(sum_diff) - sum_diff_thresh) >>
num_pels_log2_lookup[bs]) + 1;
const int delta =
((abs(sum_diff) - sum_diff_thresh) >> num_pels_log2_lookup[bs]) + 1;
// Only apply the adjustment for max delta up to 3.
if (delta < 4) {
const __m128i k_delta = _mm_set1_epi8(delta);
running_avg_y -= avg_y_stride * (b_height << 1);
for (r = 0; r < b_height; ++r) {
acc_diff = vp9_denoiser_adj_16x1_sse2(
sig_buffer[r], mc_running_buffer[r], running_buffer[r],
k_0, k_delta, acc_diff);
sig_buffer[r], mc_running_buffer[r], running_buffer[r], k_0,
k_delta, acc_diff);
memcpy(running_avg_y, running_buffer[r], width);
memcpy(running_avg_y + avg_y_stride,
running_buffer[r] + width, width);
memcpy(running_avg_y + avg_y_stride, running_buffer[r] + width,
width);
// Update pointers for next iteration.
running_avg_y += (avg_y_stride << 1);
}
@@ -216,14 +209,14 @@ static int vp9_denoiser_NxM_sse2_small(
static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
const uint8_t *mc_running_avg_y,
int mc_avg_y_stride,
uint8_t *running_avg_y,
int avg_y_stride,
uint8_t *running_avg_y, int avg_y_stride,
int increase_denoising, BLOCK_SIZE bs,
int motion_magnitude) {
int sum_diff_thresh, r, c, sum_diff = 0;
const int shift_inc = (increase_denoising &&
motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ?
1 : 0;
const int shift_inc =
(increase_denoising && motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD)
? 1
: 0;
__m128i acc_diff[4][4];
const __m128i k_0 = _mm_setzero_si128();
const __m128i k_4 = _mm_set1_epi8(4 + shift_inc);
@@ -248,9 +241,9 @@ static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
for (r = 0; r < b_height; ++r) {
for (c = 0; c < b_width_shift4; ++c) {
acc_diff[c][r>>4] = vp9_denoiser_16x1_sse2(
sig, mc_running_avg_y, running_avg_y, &k_0, &k_4,
&k_8, &k_16, &l3, &l32, &l21, acc_diff[c][r>>4]);
acc_diff[c][r >> 4] = vp9_denoiser_16x1_sse2(
sig, mc_running_avg_y, running_avg_y, &k_0, &k_4, &k_8, &k_16, &l3,
&l32, &l21, acc_diff[c][r >> 4]);
// Update pointers for next iteration.
sig += 16;
mc_running_avg_y += 16;
@@ -259,7 +252,7 @@ static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
if ((r & 0xf) == 0xf || (bs == BLOCK_16X8 && r == 7)) {
for (c = 0; c < b_width_shift4; ++c) {
sum_diff += sum_diff_16x1(acc_diff[c][r>>4]);
sum_diff += sum_diff_16x1(acc_diff[c][r >> 4]);
}
}
@@ -272,8 +265,8 @@ static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
{
sum_diff_thresh = total_adj_strong_thresh(bs, increase_denoising);
if (abs(sum_diff) > sum_diff_thresh) {
const int delta = ((abs(sum_diff) - sum_diff_thresh) >>
num_pels_log2_lookup[bs]) + 1;
const int delta =
((abs(sum_diff) - sum_diff_thresh) >> num_pels_log2_lookup[bs]) + 1;
// Only apply the adjustment for max delta up to 3.
if (delta < 4) {
@@ -284,9 +277,9 @@ static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
sum_diff = 0;
for (r = 0; r < b_height; ++r) {
for (c = 0; c < b_width_shift4; ++c) {
acc_diff[c][r>>4] = vp9_denoiser_adj_16x1_sse2(
sig, mc_running_avg_y, running_avg_y, k_0,
k_delta, acc_diff[c][r>>4]);
acc_diff[c][r >> 4] =
vp9_denoiser_adj_16x1_sse2(sig, mc_running_avg_y, running_avg_y,
k_0, k_delta, acc_diff[c][r >> 4]);
// Update pointers for next iteration.
sig += 16;
mc_running_avg_y += 16;
@@ -295,7 +288,7 @@ static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
if ((r & 0xf) == 0xf || (bs == BLOCK_16X8 && r == 7)) {
for (c = 0; c < b_width_shift4; ++c) {
sum_diff += sum_diff_16x1(acc_diff[c][r>>4]);
sum_diff += sum_diff_16x1(acc_diff[c][r >> 4]);
}
}
sig = sig - b_width + sig_stride;
@@ -314,27 +307,21 @@ static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
}
int vp9_denoiser_filter_sse2(const uint8_t *sig, int sig_stride,
const uint8_t *mc_avg,
int mc_avg_stride,
const uint8_t *mc_avg, int mc_avg_stride,
uint8_t *avg, int avg_stride,
int increase_denoising,
BLOCK_SIZE bs,
int increase_denoising, BLOCK_SIZE bs,
int motion_magnitude) {
// Rank by frequency of the block type to have an early termination.
if (bs == BLOCK_16X16 || bs == BLOCK_32X32 || bs == BLOCK_64X64 ||
bs == BLOCK_16X32 || bs == BLOCK_16X8 || bs == BLOCK_32X16 ||
bs == BLOCK_32X64 || bs == BLOCK_64X32) {
return vp9_denoiser_NxM_sse2_big(sig, sig_stride,
mc_avg, mc_avg_stride,
avg, avg_stride,
increase_denoising,
bs, motion_magnitude);
return vp9_denoiser_NxM_sse2_big(sig, sig_stride, mc_avg, mc_avg_stride,
avg, avg_stride, increase_denoising, bs,
motion_magnitude);
} else if (bs == BLOCK_8X8 || bs == BLOCK_8X16) {
return vp9_denoiser_NxM_sse2_small(sig, sig_stride,
mc_avg, mc_avg_stride,
avg, avg_stride,
increase_denoising,
bs, motion_magnitude, 8);
return vp9_denoiser_NxM_sse2_small(sig, sig_stride, mc_avg, mc_avg_stride,
avg, avg_stride, increase_denoising, bs,
motion_magnitude, 8);
} else {
return COPY_BLOCK;
}

View File

@@ -9,7 +9,7 @@
*/
#if defined(_MSC_VER)
# include <intrin.h>
#include <intrin.h>
#endif
#include <emmintrin.h>
#include <smmintrin.h>
@@ -19,11 +19,11 @@
#include "vpx_ports/mem.h"
#ifdef __GNUC__
# define LIKELY(v) __builtin_expect(v, 1)
# define UNLIKELY(v) __builtin_expect(v, 0)
#define LIKELY(v) __builtin_expect(v, 1)
#define UNLIKELY(v) __builtin_expect(v, 0)
#else
# define LIKELY(v) (v)
# define UNLIKELY(v) (v)
#define LIKELY(v) (v)
#define UNLIKELY(v) (v)
#endif
static INLINE int_mv pack_int_mv(int16_t row, int16_t col) {
@@ -40,19 +40,19 @@ static INLINE MV_JOINT_TYPE get_mv_joint(const int_mv mv) {
return mv.as_int == 0 ? 0 : 1;
}
static INLINE int mv_cost(const int_mv mv,
const int *joint_cost, int *const comp_cost[2]) {
return joint_cost[get_mv_joint(mv)] +
comp_cost[0][mv.as_mv.row] + comp_cost[1][mv.as_mv.col];
static INLINE int mv_cost(const int_mv mv, const int *joint_cost,
int *const comp_cost[2]) {
return joint_cost[get_mv_joint(mv)] + comp_cost[0][mv.as_mv.row] +
comp_cost[1][mv.as_mv.col];
}
static int mvsad_err_cost(const MACROBLOCK *x, const int_mv mv, const MV *ref,
int sad_per_bit) {
const int_mv diff = pack_int_mv(mv.as_mv.row - ref->row,
mv.as_mv.col - ref->col);
return ROUND_POWER_OF_TWO((unsigned)mv_cost(diff, x->nmvjointsadcost,
x->nmvsadcost) *
sad_per_bit, VP9_PROB_COST_SHIFT);
const int_mv diff =
pack_int_mv(mv.as_mv.row - ref->row, mv.as_mv.col - ref->col);
return ROUND_POWER_OF_TWO(
(unsigned)mv_cost(diff, x->nmvjointsadcost, x->nmvsadcost) * sad_per_bit,
VP9_PROB_COST_SHIFT);
}
/*****************************************************************************
@@ -71,10 +71,9 @@ static int mvsad_err_cost(const MACROBLOCK *x, const int_mv mv, const MV *ref,
* which does not rely on these properties. *
*****************************************************************************/
int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
const search_site_config *cfg,
MV *ref_mv, MV *best_mv, int search_param,
int sad_per_bit, int *num00,
const vp9_variance_fn_ptr_t *fn_ptr,
const search_site_config *cfg, MV *ref_mv,
MV *best_mv, int search_param, int sad_per_bit,
int *num00, const vp9_variance_fn_ptr_t *fn_ptr,
const MV *center_mv) {
const int_mv maxmv = pack_int_mv(x->mv_row_max, x->mv_col_max);
const __m128i v_max_mv_w = _mm_set1_epi32(maxmv.as_int);
@@ -91,12 +90,12 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
// 0 = initial step (MAX_FIRST_STEP) pel
// 1 = (MAX_FIRST_STEP/2) pel,
// 2 = (MAX_FIRST_STEP/4) pel...
const MV *ss_mv = &cfg->ss_mv[cfg->searches_per_step * search_param];
const MV *ss_mv = &cfg->ss_mv[cfg->searches_per_step * search_param];
const intptr_t *ss_os = &cfg->ss_os[cfg->searches_per_step * search_param];
const int tot_steps = cfg->total_steps - search_param;
const int_mv fcenter_mv = pack_int_mv(center_mv->row >> 3,
center_mv->col >> 3);
const int_mv fcenter_mv =
pack_int_mv(center_mv->row >> 3, center_mv->col >> 3);
const __m128i vfcmv = _mm_set1_epi32(fcenter_mv.as_int);
const int ref_row = clamp(ref_mv->row, minmv.as_mv.row, maxmv.as_mv.row);
@@ -109,8 +108,8 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
const int what_stride = x->plane[0].src.stride;
const int in_what_stride = x->e_mbd.plane[0].pre[0].stride;
const uint8_t *const what = x->plane[0].src.buf;
const uint8_t *const in_what = x->e_mbd.plane[0].pre[0].buf +
ref_row * in_what_stride + ref_col;
const uint8_t *const in_what =
x->e_mbd.plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
// Work out the start point for the search
const uint8_t *best_address = in_what;
@@ -181,10 +180,9 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
__m128i v_bo10_q = _mm_loadu_si128((const __m128i *)&ss_os[i + 0]);
__m128i v_bo32_q = _mm_loadu_si128((const __m128i *)&ss_os[i + 2]);
// Set the ones falling outside to zero
v_bo10_q = _mm_and_si128(v_bo10_q,
_mm_cvtepi32_epi64(v_inside_d));
v_bo32_q = _mm_and_si128(v_bo32_q,
_mm_unpackhi_epi32(v_inside_d, v_inside_d));
v_bo10_q = _mm_and_si128(v_bo10_q, _mm_cvtepi32_epi64(v_inside_d));
v_bo32_q =
_mm_and_si128(v_bo32_q, _mm_unpackhi_epi32(v_inside_d, v_inside_d));
// Compute the candidate addresses
v_blocka[0] = _mm_add_epi64(v_ba_q, v_bo10_q);
v_blocka[1] = _mm_add_epi64(v_ba_q, v_bo32_q);
@@ -195,9 +193,8 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
#endif
}
fn_ptr->sdx4df(what, what_stride,
(const uint8_t **)&v_blocka[0], in_what_stride,
(uint32_t*)&v_sad_d);
fn_ptr->sdx4df(what, what_stride, (const uint8_t **)&v_blocka[0],
in_what_stride, (uint32_t *)&v_sad_d);
// Look up the component cost of the residual motion vector
{
@@ -226,11 +223,10 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
// Now add in the joint cost
{
const __m128i v_sel_d = _mm_cmpeq_epi32(v_diff_mv_w,
_mm_setzero_si128());
const __m128i v_joint_cost_d = _mm_blendv_epi8(v_joint_cost_1_d,
v_joint_cost_0_d,
v_sel_d);
const __m128i v_sel_d =
_mm_cmpeq_epi32(v_diff_mv_w, _mm_setzero_si128());
const __m128i v_joint_cost_d =
_mm_blendv_epi8(v_joint_cost_1_d, v_joint_cost_0_d, v_sel_d);
v_cost_d = _mm_add_epi32(v_cost_d, v_joint_cost_d);
}

View File

@@ -13,10 +13,8 @@
#include "./vp9_rtcd.h"
#include "vpx/vpx_integer.h"
int64_t vp9_block_error_avx2(const int16_t *coeff,
const int16_t *dqcoeff,
intptr_t block_size,
int64_t *ssz) {
int64_t vp9_block_error_avx2(const int16_t *coeff, const int16_t *dqcoeff,
intptr_t block_size, int64_t *ssz) {
__m256i sse_reg, ssz_reg, coeff_reg, dqcoeff_reg;
__m256i exp_dqcoeff_lo, exp_dqcoeff_hi, exp_coeff_lo, exp_coeff_hi;
__m256i sse_reg_64hi, ssz_reg_64hi;
@@ -29,7 +27,7 @@ int64_t vp9_block_error_avx2(const int16_t *coeff,
sse_reg = _mm256_set1_epi16(0);
ssz_reg = _mm256_set1_epi16(0);
for (i = 0 ; i < block_size ; i+= 16) {
for (i = 0; i < block_size; i += 16) {
// load 32 bytes from coeff and dqcoeff
coeff_reg = _mm256_loadu_si256((const __m256i *)(coeff + i));
dqcoeff_reg = _mm256_loadu_si256((const __m256i *)(dqcoeff + i));
@@ -66,8 +64,8 @@ int64_t vp9_block_error_avx2(const int16_t *coeff,
_mm256_extractf128_si256(ssz_reg, 1));
// store the results
_mm_storel_epi64((__m128i*)(&sse), sse_reg128);
_mm_storel_epi64((__m128i *)(&sse), sse_reg128);
_mm_storel_epi64((__m128i*)(ssz), ssz_reg128);
_mm_storel_epi64((__m128i *)(ssz), ssz_reg128);
return sse;
}

View File

@@ -19,23 +19,22 @@ extern void vp9_scale_and_extend_frame_c(const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst);
static void downsample_2_to_1_ssse3(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
int w, int h) {
uint8_t *dst, ptrdiff_t dst_stride, int w,
int h) {
const __m128i mask = _mm_set1_epi16(0x00FF);
const int max_width = w & ~15;
int y;
for (y = 0; y < h; ++y) {
int x;
for (x = 0; x < max_width; x += 16) {
const __m128i a = _mm_loadu_si128((const __m128i *)(src + x * 2 + 0));
const __m128i a = _mm_loadu_si128((const __m128i *)(src + x * 2 + 0));
const __m128i b = _mm_loadu_si128((const __m128i *)(src + x * 2 + 16));
const __m128i a_and = _mm_and_si128(a, mask);
const __m128i b_and = _mm_and_si128(b, mask);
const __m128i c = _mm_packus_epi16(a_and, b_and);
_mm_storeu_si128((__m128i *)(dst + x), c);
}
for (; x < w; ++x)
dst[x] = src[x * 2];
for (; x < w; ++x) dst[x] = src[x * 2];
src += src_stride * 2;
dst += dst_stride;
}
@@ -47,9 +46,8 @@ static INLINE __m128i filter(const __m128i *const a, const __m128i *const b,
const __m128i *const g, const __m128i *const h) {
const __m128i coeffs_ab =
_mm_set_epi8(6, -1, 6, -1, 6, -1, 6, -1, 6, -1, 6, -1, 6, -1, 6, -1);
const __m128i coeffs_cd =
_mm_set_epi8(78, -19, 78, -19, 78, -19, 78, -19, 78, -19, 78, -19,
78, -19, 78, -19);
const __m128i coeffs_cd = _mm_set_epi8(78, -19, 78, -19, 78, -19, 78, -19, 78,
-19, 78, -19, 78, -19, 78, -19);
const __m128i const64_x16 = _mm_set1_epi16(64);
const __m128i ab = _mm_unpacklo_epi8(*a, *b);
const __m128i cd = _mm_unpacklo_epi8(*c, *d);
@@ -88,8 +86,8 @@ static void eight_tap_row_ssse3(const uint8_t *src, uint8_t *dst, int w) {
}
static void upsample_1_to_2_ssse3(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
int dst_w, int dst_h) {
uint8_t *dst, ptrdiff_t dst_stride, int dst_w,
int dst_h) {
dst_w /= 2;
dst_h /= 2;
{
@@ -116,7 +114,7 @@ static void upsample_1_to_2_ssse3(const uint8_t *src, ptrdiff_t src_stride,
int x;
eight_tap_row_ssse3(src + src_stride * 4 - 3, tmp7, dst_w);
for (x = 0; x < max_width; x += 8) {
const __m128i A = _mm_loadl_epi64((const __m128i *)(src + x));
const __m128i A = _mm_loadl_epi64((const __m128i *)(src + x));
const __m128i B = _mm_loadl_epi64((const __m128i *)(tmp3 + x));
const __m128i AB = _mm_unpacklo_epi8(A, B);
__m128i C, D, CD;
@@ -179,23 +177,23 @@ void vp9_scale_and_extend_frame_ssse3(const YV12_BUFFER_CONFIG *src,
const int dst_uv_h = dst_h / 2;
if (dst_w * 2 == src_w && dst_h * 2 == src_h) {
downsample_2_to_1_ssse3(src->y_buffer, src->y_stride,
dst->y_buffer, dst->y_stride, dst_w, dst_h);
downsample_2_to_1_ssse3(src->u_buffer, src->uv_stride,
dst->u_buffer, dst->uv_stride, dst_uv_w, dst_uv_h);
downsample_2_to_1_ssse3(src->v_buffer, src->uv_stride,
dst->v_buffer, dst->uv_stride, dst_uv_w, dst_uv_h);
downsample_2_to_1_ssse3(src->y_buffer, src->y_stride, dst->y_buffer,
dst->y_stride, dst_w, dst_h);
downsample_2_to_1_ssse3(src->u_buffer, src->uv_stride, dst->u_buffer,
dst->uv_stride, dst_uv_w, dst_uv_h);
downsample_2_to_1_ssse3(src->v_buffer, src->uv_stride, dst->v_buffer,
dst->uv_stride, dst_uv_w, dst_uv_h);
vpx_extend_frame_borders(dst);
} else if (dst_w == src_w * 2 && dst_h == src_h * 2) {
// The upsample() supports widths up to 1920 * 2. If greater, fall back
// to vp9_scale_and_extend_frame_c().
if (dst_w/2 <= 1920) {
upsample_1_to_2_ssse3(src->y_buffer, src->y_stride,
dst->y_buffer, dst->y_stride, dst_w, dst_h);
upsample_1_to_2_ssse3(src->u_buffer, src->uv_stride,
dst->u_buffer, dst->uv_stride, dst_uv_w, dst_uv_h);
upsample_1_to_2_ssse3(src->v_buffer, src->uv_stride,
dst->v_buffer, dst->uv_stride, dst_uv_w, dst_uv_h);
if (dst_w / 2 <= 1920) {
upsample_1_to_2_ssse3(src->y_buffer, src->y_stride, dst->y_buffer,
dst->y_stride, dst_w, dst_h);
upsample_1_to_2_ssse3(src->u_buffer, src->uv_stride, dst->u_buffer,
dst->uv_stride, dst_uv_w, dst_uv_h);
upsample_1_to_2_ssse3(src->v_buffer, src->uv_stride, dst->v_buffer,
dst->uv_stride, dst_uv_w, dst_uv_h);
vpx_extend_frame_borders(dst);
} else {
vp9_scale_and_extend_frame_c(src, dst);

View File

@@ -23,41 +23,41 @@ int64_t vp9_highbd_block_error_sse2(tran_low_t *coeff, tran_low_t *dqcoeff,
const int shift = 2 * (bps - 8);
const int rounding = shift > 0 ? 1 << (shift - 1) : 0;
for (i = 0; i < block_size; i+=8) {
for (i = 0; i < block_size; i += 8) {
// Load the data into xmm registers
__m128i mm_coeff = _mm_load_si128((__m128i*) (coeff + i));
__m128i mm_coeff2 = _mm_load_si128((__m128i*) (coeff + i + 4));
__m128i mm_dqcoeff = _mm_load_si128((__m128i*) (dqcoeff + i));
__m128i mm_dqcoeff2 = _mm_load_si128((__m128i*) (dqcoeff + i + 4));
__m128i mm_coeff = _mm_load_si128((__m128i *)(coeff + i));
__m128i mm_coeff2 = _mm_load_si128((__m128i *)(coeff + i + 4));
__m128i mm_dqcoeff = _mm_load_si128((__m128i *)(dqcoeff + i));
__m128i mm_dqcoeff2 = _mm_load_si128((__m128i *)(dqcoeff + i + 4));
// Check if any values require more than 15 bit
max = _mm_set1_epi32(0x3fff);
min = _mm_set1_epi32(0xffffc000);
cmp0 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff, max),
_mm_cmplt_epi32(mm_coeff, min));
_mm_cmplt_epi32(mm_coeff, min));
cmp1 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff2, max),
_mm_cmplt_epi32(mm_coeff2, min));
_mm_cmplt_epi32(mm_coeff2, min));
cmp2 = _mm_xor_si128(_mm_cmpgt_epi32(mm_dqcoeff, max),
_mm_cmplt_epi32(mm_dqcoeff, min));
_mm_cmplt_epi32(mm_dqcoeff, min));
cmp3 = _mm_xor_si128(_mm_cmpgt_epi32(mm_dqcoeff2, max),
_mm_cmplt_epi32(mm_dqcoeff2, min));
test = _mm_movemask_epi8(_mm_or_si128(_mm_or_si128(cmp0, cmp1),
_mm_or_si128(cmp2, cmp3)));
_mm_cmplt_epi32(mm_dqcoeff2, min));
test = _mm_movemask_epi8(
_mm_or_si128(_mm_or_si128(cmp0, cmp1), _mm_or_si128(cmp2, cmp3)));
if (!test) {
__m128i mm_diff, error_sse2, sqcoeff_sse2;;
__m128i mm_diff, error_sse2, sqcoeff_sse2;
mm_coeff = _mm_packs_epi32(mm_coeff, mm_coeff2);
mm_dqcoeff = _mm_packs_epi32(mm_dqcoeff, mm_dqcoeff2);
mm_diff = _mm_sub_epi16(mm_coeff, mm_dqcoeff);
error_sse2 = _mm_madd_epi16(mm_diff, mm_diff);
sqcoeff_sse2 = _mm_madd_epi16(mm_coeff, mm_coeff);
_mm_storeu_si128((__m128i*)temp, error_sse2);
_mm_storeu_si128((__m128i *)temp, error_sse2);
error = error + temp[0] + temp[1] + temp[2] + temp[3];
_mm_storeu_si128((__m128i*)temp, sqcoeff_sse2);
_mm_storeu_si128((__m128i *)temp, sqcoeff_sse2);
sqcoeff += temp[0] + temp[1] + temp[2] + temp[3];
} else {
for (j = 0; j < 8; j++) {
const int64_t diff = coeff[i + j] - dqcoeff[i + j];
error += diff * diff;
error += diff * diff;
sqcoeff += (int64_t)coeff[i + j] * (int64_t)coeff[i + j];
}
}

View File

@@ -14,14 +14,13 @@
#include "./vp9_rtcd.h"
#include "vpx/vpx_integer.h"
void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t* zbin_ptr,
const int16_t* round_ptr, const int16_t* quant_ptr,
const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr,
int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,
uint16_t* eob_ptr,
const int16_t* scan_ptr,
const int16_t* iscan_ptr) {
void vp9_quantize_fp_sse2(const int16_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr,
int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan_ptr,
const int16_t *iscan_ptr) {
__m128i zero;
__m128i thr;
int16_t nzflag;
@@ -44,9 +43,9 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
// Setup global values
{
round = _mm_load_si128((const __m128i*)round_ptr);
quant = _mm_load_si128((const __m128i*)quant_ptr);
dequant = _mm_load_si128((const __m128i*)dequant_ptr);
round = _mm_load_si128((const __m128i *)round_ptr);
quant = _mm_load_si128((const __m128i *)quant_ptr);
dequant = _mm_load_si128((const __m128i *)dequant_ptr);
}
{
@@ -54,8 +53,8 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
__m128i qcoeff0, qcoeff1;
__m128i qtmp0, qtmp1;
// Do DC and first 15 AC
coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs));
coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1);
coeff0 = _mm_load_si128((const __m128i *)(coeff_ptr + n_coeffs));
coeff1 = _mm_load_si128((const __m128i *)(coeff_ptr + n_coeffs) + 1);
// Poor man's sign extract
coeff0_sign = _mm_srai_epi16(coeff0, 15);
@@ -78,15 +77,15 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
dequant = _mm_unpackhi_epi64(dequant, dequant);
coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
}
{
@@ -99,8 +98,8 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@@ -121,8 +120,8 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
__m128i qcoeff0, qcoeff1;
__m128i qtmp0, qtmp1;
coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs));
coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1);
coeff0 = _mm_load_si128((const __m128i *)(coeff_ptr + n_coeffs));
coeff1 = _mm_load_si128((const __m128i *)(coeff_ptr + n_coeffs) + 1);
// Poor man's sign extract
coeff0_sign = _mm_srai_epi16(coeff0, 15);
@@ -133,7 +132,7 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
nzflag = _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff0, thr)) |
_mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff1, thr));
_mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff1, thr));
if (nzflag) {
qcoeff0 = _mm_adds_epi16(qcoeff0, round);
@@ -147,20 +146,20 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
} else {
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
}
}
@@ -174,8 +173,8 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@@ -200,10 +199,10 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
}
} else {
do {
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
_mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
n_coeffs += 8 * 2;
} while (n_coeffs < 0);
*eob_ptr = 0;