Bench Class For More Robust Speed Tests
To make speed testing more robust, the AbstractBench runs the desired code multiple times and report the median run time with mean absolute deviation around the median. To use the AbstractBench, simply add it as a parent to your test class, and implement the run() method (with the code you want to benchmark). Sample output for VP9QuantizeTest [ BENCH ] Bypass calculations 4x4 165.8 ms ( ±1.0 ms ) [ BENCH ] Full calculations 4x4 165.8 ms ( ±0.9 ms ) [ BENCH ] Bypass calculations 8x8 129.7 ms ( ±0.9 ms ) [ BENCH ] Full calculations 8x8 130.3 ms ( ±1.4 ms ) [ BENCH ] Bypass calculations 16x16 110.3 ms ( ±1.4 ms ) [ BENCH ] Full calculations 16x16 110.1 ms ( ±0.9 ms ) Change-Id: I1dd649754cb8c4c621eee2728198ea6a555f38b3
This commit is contained in:
38
test/bench.cc
Normal file
38
test/bench.cc
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2018 The WebM project authors. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license
|
||||||
|
* that can be found in the LICENSE file in the root of the source
|
||||||
|
* tree. An additional intellectual property rights grant can be found
|
||||||
|
* in the file PATENTS. All contributing project authors may
|
||||||
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include "test/bench.h"
|
||||||
|
#include "vpx_ports/vpx_timer.h"
|
||||||
|
|
||||||
|
void AbstractBench::runNTimes(int n) {
|
||||||
|
for (int r = 0; r < VPX_BENCH_ROBUST_ITER; r++) {
|
||||||
|
vpx_usec_timer timer;
|
||||||
|
vpx_usec_timer_start(&timer);
|
||||||
|
for (int j = 0; j < n; ++j) {
|
||||||
|
run();
|
||||||
|
}
|
||||||
|
vpx_usec_timer_mark(&timer);
|
||||||
|
times[r] = static_cast<int>(vpx_usec_timer_elapsed(&timer));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void AbstractBench::printMedian(const char *title) {
|
||||||
|
std::sort(times, times + VPX_BENCH_ROBUST_ITER);
|
||||||
|
const int med = times[VPX_BENCH_ROBUST_ITER >> 1];
|
||||||
|
int sad = 0;
|
||||||
|
for (int t = 0; t < VPX_BENCH_ROBUST_ITER; t++) {
|
||||||
|
sad += abs(times[t] - med);
|
||||||
|
}
|
||||||
|
printf("[%10s] %s %.1f ms ( ±%.1f ms )\n", "BENCH ", title, med / 1000.0,
|
||||||
|
sad / (VPX_BENCH_ROBUST_ITER * 1000.0));
|
||||||
|
}
|
||||||
30
test/bench.h
Normal file
30
test/bench.h
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2018 The WebM project authors. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license
|
||||||
|
* that can be found in the LICENSE file in the root of the source
|
||||||
|
* tree. An additional intellectual property rights grant can be found
|
||||||
|
* in the file PATENTS. All contributing project authors may
|
||||||
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TEST_BENCH_H_
|
||||||
|
#define TEST_BENCH_H_
|
||||||
|
|
||||||
|
// Number of iterations used to compute median run time.
|
||||||
|
#define VPX_BENCH_ROBUST_ITER 15
|
||||||
|
|
||||||
|
class AbstractBench {
|
||||||
|
public:
|
||||||
|
void runNTimes(int n);
|
||||||
|
void printMedian(const char *title);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
// Implement this method and put the code to benchmark in it.
|
||||||
|
virtual void run() = 0;
|
||||||
|
|
||||||
|
private:
|
||||||
|
int times[VPX_BENCH_ROBUST_ITER];
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // TEST_BENCH_H_
|
||||||
@@ -1,4 +1,6 @@
|
|||||||
LIBVPX_TEST_SRCS-yes += acm_random.h
|
LIBVPX_TEST_SRCS-yes += acm_random.h
|
||||||
|
LIBVPX_TEST_SRCS-yes += bench.h
|
||||||
|
LIBVPX_TEST_SRCS-yes += bench.cc
|
||||||
LIBVPX_TEST_SRCS-yes += buffer.h
|
LIBVPX_TEST_SRCS-yes += buffer.h
|
||||||
LIBVPX_TEST_SRCS-yes += clear_system_state.h
|
LIBVPX_TEST_SRCS-yes += clear_system_state.h
|
||||||
LIBVPX_TEST_SRCS-yes += codec_factory.h
|
LIBVPX_TEST_SRCS-yes += codec_factory.h
|
||||||
|
|||||||
@@ -18,6 +18,7 @@
|
|||||||
#include "./vpx_config.h"
|
#include "./vpx_config.h"
|
||||||
#include "./vpx_dsp_rtcd.h"
|
#include "./vpx_dsp_rtcd.h"
|
||||||
#include "test/acm_random.h"
|
#include "test/acm_random.h"
|
||||||
|
#include "test/bench.h"
|
||||||
#include "test/buffer.h"
|
#include "test/buffer.h"
|
||||||
#include "test/clear_system_state.h"
|
#include "test/clear_system_state.h"
|
||||||
#include "test/register_state_check.h"
|
#include "test/register_state_check.h"
|
||||||
@@ -67,10 +68,13 @@ void QuantFPWrapper(const tran_low_t *coeff, intptr_t count, int skip_block,
|
|||||||
scan, iscan);
|
scan, iscan);
|
||||||
}
|
}
|
||||||
|
|
||||||
class VP9QuantizeBase {
|
class VP9QuantizeBase : public AbstractBench {
|
||||||
public:
|
public:
|
||||||
VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size, bool is_fp)
|
VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size, bool is_fp)
|
||||||
: bit_depth_(bit_depth), max_size_(max_size), is_fp_(is_fp) {
|
: bit_depth_(bit_depth), max_size_(max_size), is_fp_(is_fp),
|
||||||
|
coeff(Buffer<tran_low_t>(max_size_, max_size_, 0, 16)),
|
||||||
|
qcoeff(Buffer<tran_low_t>(max_size_, max_size_, 0, 32)),
|
||||||
|
dqcoeff(Buffer<tran_low_t>(max_size_, max_size_, 0, 32)) {
|
||||||
max_value_ = (1 << bit_depth_) - 1;
|
max_value_ = (1 << bit_depth_) - 1;
|
||||||
zbin_ptr_ =
|
zbin_ptr_ =
|
||||||
reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*zbin_ptr_)));
|
reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*zbin_ptr_)));
|
||||||
@@ -86,6 +90,9 @@ class VP9QuantizeBase {
|
|||||||
vpx_memalign(16, 8 * sizeof(*quant_shift_ptr_)));
|
vpx_memalign(16, 8 * sizeof(*quant_shift_ptr_)));
|
||||||
dequant_ptr_ = reinterpret_cast<int16_t *>(
|
dequant_ptr_ = reinterpret_cast<int16_t *>(
|
||||||
vpx_memalign(16, 8 * sizeof(*dequant_ptr_)));
|
vpx_memalign(16, 8 * sizeof(*dequant_ptr_)));
|
||||||
|
|
||||||
|
r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
|
||||||
|
q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
|
||||||
}
|
}
|
||||||
|
|
||||||
~VP9QuantizeBase() {
|
~VP9QuantizeBase() {
|
||||||
@@ -118,6 +125,15 @@ class VP9QuantizeBase {
|
|||||||
int max_value_;
|
int max_value_;
|
||||||
const int max_size_;
|
const int max_size_;
|
||||||
const bool is_fp_;
|
const bool is_fp_;
|
||||||
|
Buffer<tran_low_t> coeff;
|
||||||
|
Buffer<tran_low_t> qcoeff;
|
||||||
|
Buffer<tran_low_t> dqcoeff;
|
||||||
|
int16_t *r_ptr;
|
||||||
|
int16_t *q_ptr;
|
||||||
|
int count;
|
||||||
|
int skip_block;
|
||||||
|
const scan_order *scan;
|
||||||
|
uint16_t eob;
|
||||||
};
|
};
|
||||||
|
|
||||||
class VP9QuantizeTest : public VP9QuantizeBase,
|
class VP9QuantizeTest : public VP9QuantizeBase,
|
||||||
@@ -128,10 +144,17 @@ class VP9QuantizeTest : public VP9QuantizeBase,
|
|||||||
quantize_op_(GET_PARAM(0)), ref_quantize_op_(GET_PARAM(1)) {}
|
quantize_op_(GET_PARAM(0)), ref_quantize_op_(GET_PARAM(1)) {}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
void run();
|
||||||
const QuantizeFunc quantize_op_;
|
const QuantizeFunc quantize_op_;
|
||||||
const QuantizeFunc ref_quantize_op_;
|
const QuantizeFunc ref_quantize_op_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void VP9QuantizeTest::run() {
|
||||||
|
quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
|
||||||
|
quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
|
||||||
|
dequant_ptr_, &eob, scan->scan, scan->iscan);
|
||||||
|
}
|
||||||
|
|
||||||
// This quantizer compares the AC coefficients to the quantization step size to
|
// This quantizer compares the AC coefficients to the quantization step size to
|
||||||
// determine if further multiplication operations are needed.
|
// determine if further multiplication operations are needed.
|
||||||
// Based on vp9_quantize_fp_sse2().
|
// Based on vp9_quantize_fp_sse2().
|
||||||
@@ -269,11 +292,8 @@ void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round,
|
|||||||
|
|
||||||
TEST_P(VP9QuantizeTest, OperationCheck) {
|
TEST_P(VP9QuantizeTest, OperationCheck) {
|
||||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||||
Buffer<tran_low_t> coeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 16);
|
|
||||||
ASSERT_TRUE(coeff.Init());
|
ASSERT_TRUE(coeff.Init());
|
||||||
Buffer<tran_low_t> qcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
|
|
||||||
ASSERT_TRUE(qcoeff.Init());
|
ASSERT_TRUE(qcoeff.Init());
|
||||||
Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
|
|
||||||
ASSERT_TRUE(dqcoeff.Init());
|
ASSERT_TRUE(dqcoeff.Init());
|
||||||
Buffer<tran_low_t> ref_qcoeff =
|
Buffer<tran_low_t> ref_qcoeff =
|
||||||
Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
|
Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
|
||||||
@@ -281,7 +301,8 @@ TEST_P(VP9QuantizeTest, OperationCheck) {
|
|||||||
Buffer<tran_low_t> ref_dqcoeff =
|
Buffer<tran_low_t> ref_dqcoeff =
|
||||||
Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
|
Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
|
||||||
ASSERT_TRUE(ref_dqcoeff.Init());
|
ASSERT_TRUE(ref_dqcoeff.Init());
|
||||||
uint16_t eob, ref_eob;
|
uint16_t ref_eob = 0;
|
||||||
|
eob = 0;
|
||||||
|
|
||||||
for (int i = 0; i < number_of_iterations; ++i) {
|
for (int i = 0; i < number_of_iterations; ++i) {
|
||||||
// Test skip block for the first three iterations to catch all the different
|
// Test skip block for the first three iterations to catch all the different
|
||||||
@@ -294,23 +315,21 @@ TEST_P(VP9QuantizeTest, OperationCheck) {
|
|||||||
sz = TX_32X32;
|
sz = TX_32X32;
|
||||||
}
|
}
|
||||||
const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
|
const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
|
||||||
const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
|
scan = &vp9_scan_orders[sz][tx_type];
|
||||||
const int count = (4 << sz) * (4 << sz);
|
count = (4 << sz) * (4 << sz);
|
||||||
coeff.Set(&rnd, -max_value_, max_value_);
|
coeff.Set(&rnd, -max_value_, max_value_);
|
||||||
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
|
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
|
||||||
quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
|
quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
|
||||||
quant_fp_ptr_);
|
quant_fp_ptr_);
|
||||||
int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
|
|
||||||
int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
|
|
||||||
ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
|
ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
|
||||||
q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
|
q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
|
||||||
ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
|
ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
|
||||||
scan_order->scan, scan_order->iscan);
|
scan->scan, scan->iscan);
|
||||||
|
|
||||||
ASM_REGISTER_STATE_CHECK(quantize_op_(
|
ASM_REGISTER_STATE_CHECK(quantize_op_(
|
||||||
coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
|
coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
|
||||||
quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
|
quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
|
||||||
dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
|
dequant_ptr_, &eob, scan->scan, scan->iscan));
|
||||||
|
|
||||||
EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
|
EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
|
||||||
EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));
|
EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));
|
||||||
@@ -328,11 +347,8 @@ TEST_P(VP9QuantizeTest, OperationCheck) {
|
|||||||
|
|
||||||
TEST_P(VP9QuantizeTest, EOBCheck) {
|
TEST_P(VP9QuantizeTest, EOBCheck) {
|
||||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||||
Buffer<tran_low_t> coeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 16);
|
|
||||||
ASSERT_TRUE(coeff.Init());
|
ASSERT_TRUE(coeff.Init());
|
||||||
Buffer<tran_low_t> qcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
|
|
||||||
ASSERT_TRUE(qcoeff.Init());
|
ASSERT_TRUE(qcoeff.Init());
|
||||||
Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
|
|
||||||
ASSERT_TRUE(dqcoeff.Init());
|
ASSERT_TRUE(dqcoeff.Init());
|
||||||
Buffer<tran_low_t> ref_qcoeff =
|
Buffer<tran_low_t> ref_qcoeff =
|
||||||
Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
|
Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
|
||||||
@@ -340,10 +356,12 @@ TEST_P(VP9QuantizeTest, EOBCheck) {
|
|||||||
Buffer<tran_low_t> ref_dqcoeff =
|
Buffer<tran_low_t> ref_dqcoeff =
|
||||||
Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
|
Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
|
||||||
ASSERT_TRUE(ref_dqcoeff.Init());
|
ASSERT_TRUE(ref_dqcoeff.Init());
|
||||||
uint16_t eob, ref_eob;
|
uint16_t ref_eob = 0;
|
||||||
|
eob = 0;
|
||||||
|
const uint32_t max_index = max_size_ * max_size_ - 1;
|
||||||
|
|
||||||
for (int i = 0; i < number_of_iterations; ++i) {
|
for (int i = 0; i < number_of_iterations; ++i) {
|
||||||
const int skip_block = 0;
|
skip_block = 0;
|
||||||
TX_SIZE sz;
|
TX_SIZE sz;
|
||||||
if (max_size_ == 16) {
|
if (max_size_ == 16) {
|
||||||
sz = static_cast<TX_SIZE>(i % 3); // TX_4X4, TX_8X8 TX_16X16
|
sz = static_cast<TX_SIZE>(i % 3); // TX_4X4, TX_8X8 TX_16X16
|
||||||
@@ -351,28 +369,26 @@ TEST_P(VP9QuantizeTest, EOBCheck) {
|
|||||||
sz = TX_32X32;
|
sz = TX_32X32;
|
||||||
}
|
}
|
||||||
const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
|
const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
|
||||||
const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
|
scan = &vp9_scan_orders[sz][tx_type];
|
||||||
int count = (4 << sz) * (4 << sz);
|
count = (4 << sz) * (4 << sz);
|
||||||
// Two random entries
|
// Two random entries
|
||||||
coeff.Set(0);
|
coeff.Set(0);
|
||||||
coeff.TopLeftPixel()[rnd(count)] =
|
coeff.TopLeftPixel()[rnd.RandRange(count) & max_index] =
|
||||||
static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
|
static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
|
||||||
coeff.TopLeftPixel()[rnd(count)] =
|
coeff.TopLeftPixel()[rnd.RandRange(count) & max_index] =
|
||||||
static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
|
static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
|
||||||
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
|
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
|
||||||
quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
|
quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
|
||||||
quant_fp_ptr_);
|
quant_fp_ptr_);
|
||||||
int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
|
|
||||||
int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
|
|
||||||
ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
|
ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
|
||||||
q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
|
q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
|
||||||
ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
|
ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
|
||||||
scan_order->scan, scan_order->iscan);
|
scan->scan, scan->iscan);
|
||||||
|
|
||||||
ASM_REGISTER_STATE_CHECK(quantize_op_(
|
ASM_REGISTER_STATE_CHECK(quantize_op_(
|
||||||
coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
|
coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
|
||||||
quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
|
quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
|
||||||
dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
|
dequant_ptr_, &eob, scan->scan, scan->iscan));
|
||||||
|
|
||||||
EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
|
EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
|
||||||
EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));
|
EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));
|
||||||
@@ -390,13 +406,9 @@ TEST_P(VP9QuantizeTest, EOBCheck) {
|
|||||||
|
|
||||||
TEST_P(VP9QuantizeTest, DISABLED_Speed) {
|
TEST_P(VP9QuantizeTest, DISABLED_Speed) {
|
||||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||||
Buffer<tran_low_t> coeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 16);
|
|
||||||
ASSERT_TRUE(coeff.Init());
|
ASSERT_TRUE(coeff.Init());
|
||||||
Buffer<tran_low_t> qcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
|
|
||||||
ASSERT_TRUE(qcoeff.Init());
|
ASSERT_TRUE(qcoeff.Init());
|
||||||
Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
|
|
||||||
ASSERT_TRUE(dqcoeff.Init());
|
ASSERT_TRUE(dqcoeff.Init());
|
||||||
uint16_t eob;
|
|
||||||
TX_SIZE starting_sz, ending_sz;
|
TX_SIZE starting_sz, ending_sz;
|
||||||
|
|
||||||
if (max_size_ == 16) {
|
if (max_size_ == 16) {
|
||||||
@@ -410,18 +422,16 @@ TEST_P(VP9QuantizeTest, DISABLED_Speed) {
|
|||||||
for (TX_SIZE sz = starting_sz; sz <= ending_sz; ++sz) {
|
for (TX_SIZE sz = starting_sz; sz <= ending_sz; ++sz) {
|
||||||
// zbin > coeff, zbin < coeff.
|
// zbin > coeff, zbin < coeff.
|
||||||
for (int i = 0; i < 2; ++i) {
|
for (int i = 0; i < 2; ++i) {
|
||||||
const int skip_block = 0;
|
skip_block = 0;
|
||||||
// TX_TYPE defines the scan order. That is not relevant to the speed test.
|
// TX_TYPE defines the scan order. That is not relevant to the speed test.
|
||||||
// Pick the first one.
|
// Pick the first one.
|
||||||
const TX_TYPE tx_type = DCT_DCT;
|
const TX_TYPE tx_type = DCT_DCT;
|
||||||
const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
|
count = (4 << sz) * (4 << sz);
|
||||||
const int count = (4 << sz) * (4 << sz);
|
scan = &vp9_scan_orders[sz][tx_type];
|
||||||
|
|
||||||
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
|
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
|
||||||
quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
|
quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
|
||||||
quant_fp_ptr_);
|
quant_fp_ptr_);
|
||||||
int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
|
|
||||||
int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
|
|
||||||
|
|
||||||
if (i == 0) {
|
if (i == 0) {
|
||||||
// When |coeff values| are less than zbin the results are 0.
|
// When |coeff values| are less than zbin the results are 0.
|
||||||
@@ -438,22 +448,15 @@ TEST_P(VP9QuantizeTest, DISABLED_Speed) {
|
|||||||
coeff.Set(&rnd, -500, 500);
|
coeff.Set(&rnd, -500, 500);
|
||||||
}
|
}
|
||||||
|
|
||||||
vpx_usec_timer timer;
|
runNTimes(10000000 / count);
|
||||||
vpx_usec_timer_start(&timer);
|
const char *type =
|
||||||
for (int j = 0; j < 100000000 / count; ++j) {
|
(i == 0) ? "Bypass calculations " : "Full calculations ";
|
||||||
quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
|
char block_size[16];
|
||||||
q_ptr, quant_shift_ptr_, qcoeff.TopLeftPixel(),
|
snprintf(block_size, sizeof(block_size), "%dx%d", 4 << sz, 4 << sz);
|
||||||
dqcoeff.TopLeftPixel(), dequant_ptr_, &eob,
|
char title[100];
|
||||||
scan_order->scan, scan_order->iscan);
|
snprintf(title, sizeof(title), "%25s %8s ", type, block_size);
|
||||||
}
|
printMedian(title);
|
||||||
vpx_usec_timer_mark(&timer);
|
|
||||||
const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
|
|
||||||
if (i == 0) printf("Bypass calculations.\n");
|
|
||||||
if (i == 1) printf("Full calculations.\n");
|
|
||||||
printf("Quantize %dx%d time: %5d ms\n", 4 << sz, 4 << sz,
|
|
||||||
elapsed_time / 1000);
|
|
||||||
}
|
}
|
||||||
printf("\n");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user