Apply fast motion search to golden reference frame
This commit enables the rtc coding mode to run integral projection based motion search for golden reference frame. It improves the speed -6 compression performance by 1.1% on average, 3.46% for jimred_vga, 6.46% for tacomascmvvga, and 0.5% for vidyo clips. The speed -6 is about 6% slower. Change-Id: I0fe402ad2edf0149d0349ad304ab9b2abdf0c804
This commit is contained in:
@@ -784,15 +784,43 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
continue;
|
||||
|
||||
if (this_mode == NEWMV) {
|
||||
if (ref_frame > LAST_FRAME)
|
||||
continue;
|
||||
if (cpi->sf.partition_search_type != VAR_BASED_PARTITION &&
|
||||
best_rdc.rdcost < (int64_t)(1 << num_pels_log2_lookup[bsize]))
|
||||
continue;
|
||||
if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
|
||||
&frame_mv[NEWMV][ref_frame],
|
||||
&rate_mv, best_rdc.rdcost))
|
||||
|
||||
if (ref_frame > LAST_FRAME) {
|
||||
int tmp_sad;
|
||||
int dis, cost_list[5];
|
||||
|
||||
if (bsize < BLOCK_16X16)
|
||||
continue;
|
||||
|
||||
tmp_sad = vp9_int_pro_motion_estimation(cpi, x, bsize);
|
||||
if (tmp_sad > x->pred_mv_sad[LAST_FRAME])
|
||||
continue;
|
||||
|
||||
frame_mv[NEWMV][ref_frame].as_int = mbmi->mv[0].as_int;
|
||||
rate_mv = vp9_mv_bit_cost(&frame_mv[NEWMV][ref_frame].as_mv,
|
||||
&mbmi->ref_mvs[ref_frame][0].as_mv,
|
||||
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
|
||||
frame_mv[NEWMV][ref_frame].as_mv.row >>= 3;
|
||||
frame_mv[NEWMV][ref_frame].as_mv.col >>= 3;
|
||||
|
||||
cpi->find_fractional_mv_step(x, &frame_mv[NEWMV][ref_frame].as_mv,
|
||||
&mbmi->ref_mvs[ref_frame][0].as_mv,
|
||||
cpi->common.allow_high_precision_mv,
|
||||
x->errorperbit,
|
||||
&cpi->fn_ptr[bsize],
|
||||
cpi->sf.mv.subpel_force_stop,
|
||||
cpi->sf.mv.subpel_iters_per_step,
|
||||
cond_cost_list(cpi, cost_list),
|
||||
x->nmvjointcost, x->mvcost, &dis,
|
||||
&x->pred_sse[ref_frame], NULL, 0, 0);
|
||||
} else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
|
||||
&frame_mv[NEWMV][ref_frame],
|
||||
&rate_mv, best_rdc.rdcost)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (this_mode != NEARESTMV &&
|
||||
@@ -817,7 +845,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
}
|
||||
|
||||
if ((this_mode == NEWMV || filter_ref == SWITCHABLE) &&
|
||||
pred_filter_search &&
|
||||
pred_filter_search && (ref_frame == LAST_FRAME) &&
|
||||
((mbmi->mv[0].as_mv.row & 0x07) != 0 ||
|
||||
(mbmi->mv[0].as_mv.col & 0x07) != 0)) {
|
||||
int pf_rate[3];
|
||||
|
||||
@@ -61,7 +61,7 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
|
||||
const int ref_stride, const int height) {
|
||||
int idx;
|
||||
__m128i zero = _mm_setzero_si128();
|
||||
__m128i src_line = _mm_load_si128((const __m128i *)ref);
|
||||
__m128i src_line = _mm_loadu_si128((const __m128i *)ref);
|
||||
__m128i s0 = _mm_unpacklo_epi8(src_line, zero);
|
||||
__m128i s1 = _mm_unpackhi_epi8(src_line, zero);
|
||||
__m128i t0, t1;
|
||||
@@ -69,14 +69,14 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
|
||||
ref += ref_stride;
|
||||
|
||||
for (idx = 1; idx < height_1; idx += 2) {
|
||||
src_line = _mm_load_si128((const __m128i *)ref);
|
||||
src_line = _mm_loadu_si128((const __m128i *)ref);
|
||||
t0 = _mm_unpacklo_epi8(src_line, zero);
|
||||
t1 = _mm_unpackhi_epi8(src_line, zero);
|
||||
s0 = _mm_adds_epu16(s0, t0);
|
||||
s1 = _mm_adds_epu16(s1, t1);
|
||||
ref += ref_stride;
|
||||
|
||||
src_line = _mm_load_si128((const __m128i *)ref);
|
||||
src_line = _mm_loadu_si128((const __m128i *)ref);
|
||||
t0 = _mm_unpacklo_epi8(src_line, zero);
|
||||
t1 = _mm_unpackhi_epi8(src_line, zero);
|
||||
s0 = _mm_adds_epu16(s0, t0);
|
||||
@@ -84,7 +84,7 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
|
||||
ref += ref_stride;
|
||||
}
|
||||
|
||||
src_line = _mm_load_si128((const __m128i *)ref);
|
||||
src_line = _mm_loadu_si128((const __m128i *)ref);
|
||||
t0 = _mm_unpacklo_epi8(src_line, zero);
|
||||
t1 = _mm_unpackhi_epi8(src_line, zero);
|
||||
s0 = _mm_adds_epu16(s0, t0);
|
||||
@@ -101,9 +101,9 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
|
||||
s1 = _mm_srai_epi16(s1, 3);
|
||||
}
|
||||
|
||||
_mm_store_si128((__m128i *)hbuf, s0);
|
||||
_mm_storeu_si128((__m128i *)hbuf, s0);
|
||||
hbuf += 8;
|
||||
_mm_store_si128((__m128i *)hbuf, s1);
|
||||
_mm_storeu_si128((__m128i *)hbuf, s1);
|
||||
}
|
||||
|
||||
int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) {
|
||||
|
||||
Reference in New Issue
Block a user