The idea is to allow developing the NDK in the open, while having the platforms and samples under a private branch until the corresponding release are open-sourced. Change-Id: Iee995fb6c4d3ee1387dea7486e599e079c9e4c6d
164 lines
4.9 KiB
C
164 lines
4.9 KiB
C
/*
|
|
* Copyright (C) 2010 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*
|
|
*/
|
|
#include <jni.h>
|
|
#include <time.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <cpu-features.h>
|
|
#include "helloneon-intrinsics.h"
|
|
|
|
#define DEBUG 0
|
|
|
|
#if DEBUG
|
|
#include <android/log.h>
|
|
# define D(x...) __android_log_print(ANDROID_LOG_INFO,"helloneon",x)
|
|
#else
|
|
# define D(...) do {} while (0)
|
|
#endif
|
|
|
|
/* return current time in milliseconds */
|
|
static double
|
|
now_ms(void)
|
|
{
|
|
struct timespec res;
|
|
clock_gettime(CLOCK_REALTIME, &res);
|
|
return 1000.0*res.tv_sec + (double)res.tv_nsec/1e6;
|
|
}
|
|
|
|
|
|
/* this is a FIR filter implemented in C */
|
|
static void
|
|
fir_filter_c(short *output, const short* input, const short* kernel, int width, int kernelSize)
|
|
{
|
|
int offset = -kernelSize/2;
|
|
int nn;
|
|
for (nn = 0; nn < width; nn++) {
|
|
int sum = 0;
|
|
int mm;
|
|
for (mm = 0; mm < kernelSize; mm++) {
|
|
sum += kernel[mm]*input[nn+offset+mm];
|
|
}
|
|
output[nn] = (short)((sum + 0x8000) >> 16);
|
|
}
|
|
}
|
|
|
|
#define FIR_KERNEL_SIZE 32
|
|
#define FIR_OUTPUT_SIZE 2560
|
|
#define FIR_INPUT_SIZE (FIR_OUTPUT_SIZE + FIR_KERNEL_SIZE)
|
|
#define FIR_ITERATIONS 600
|
|
|
|
static const short fir_kernel[FIR_KERNEL_SIZE] = {
|
|
0x10, 0x20, 0x40, 0x70, 0x8c, 0xa2, 0xce, 0xf0, 0xe9, 0xce, 0xa2, 0x8c, 070, 0x40, 0x20, 0x10,
|
|
0x10, 0x20, 0x40, 0x70, 0x8c, 0xa2, 0xce, 0xf0, 0xe9, 0xce, 0xa2, 0x8c, 070, 0x40, 0x20, 0x10 };
|
|
|
|
static short fir_output[FIR_OUTPUT_SIZE];
|
|
static short fir_input_0[FIR_INPUT_SIZE];
|
|
static const short* fir_input = fir_input_0 + (FIR_KERNEL_SIZE/2);
|
|
static short fir_output_expected[FIR_OUTPUT_SIZE];
|
|
|
|
/* This is a trivial JNI example where we use a native method
|
|
* to return a new VM String. See the corresponding Java source
|
|
* file located at:
|
|
*
|
|
* apps/samples/hello-neon/project/src/com/example/neon/HelloNeon.java
|
|
*/
|
|
jstring
|
|
Java_com_example_neon_HelloNeon_stringFromJNI( JNIEnv* env,
|
|
jobject thiz )
|
|
{
|
|
char* str;
|
|
uint64_t features;
|
|
char buffer[512];
|
|
char tryNeon = 0;
|
|
double t0, t1, time_c, time_neon;
|
|
|
|
/* setup FIR input - whatever */
|
|
{
|
|
int nn;
|
|
for (nn = 0; nn < FIR_INPUT_SIZE; nn++) {
|
|
fir_input_0[nn] = (5*nn) & 255;
|
|
}
|
|
fir_filter_c(fir_output_expected, fir_input, fir_kernel, FIR_OUTPUT_SIZE, FIR_KERNEL_SIZE);
|
|
}
|
|
|
|
/* Benchmark small FIR filter loop - C version */
|
|
t0 = now_ms();
|
|
{
|
|
int count = FIR_ITERATIONS;
|
|
for (; count > 0; count--) {
|
|
fir_filter_c(fir_output, fir_input, fir_kernel, FIR_OUTPUT_SIZE, FIR_KERNEL_SIZE);
|
|
}
|
|
}
|
|
t1 = now_ms();
|
|
time_c = t1 - t0;
|
|
|
|
asprintf(&str, "FIR Filter benchmark:\nC version : %g ms\n", time_c);
|
|
strlcpy(buffer, str, sizeof buffer);
|
|
free(str);
|
|
|
|
strlcat(buffer, "Neon version : ", sizeof buffer);
|
|
|
|
if (android_getCpuFamily() != ANDROID_CPU_FAMILY_ARM) {
|
|
strlcat(buffer, "Not an ARM CPU !\n", sizeof buffer);
|
|
goto EXIT;
|
|
}
|
|
|
|
features = android_getCpuFeatures();
|
|
if ((features & ANDROID_CPU_ARM_FEATURE_ARMv7) == 0) {
|
|
strlcat(buffer, "Not an ARMv7 CPU !\n", sizeof buffer);
|
|
goto EXIT;
|
|
}
|
|
|
|
/* HAVE_NEON is defined in Android.mk ! */
|
|
#ifdef HAVE_NEON
|
|
if ((features & ANDROID_CPU_ARM_FEATURE_NEON) == 0) {
|
|
strlcat(buffer, "CPU doesn't support NEON !\n", sizeof buffer);
|
|
goto EXIT;
|
|
}
|
|
|
|
/* Benchmark small FIR filter loop - Neon version */
|
|
t0 = now_ms();
|
|
{
|
|
int count = FIR_ITERATIONS;
|
|
for (; count > 0; count--) {
|
|
fir_filter_neon_intrinsics(fir_output, fir_input, fir_kernel, FIR_OUTPUT_SIZE, FIR_KERNEL_SIZE);
|
|
}
|
|
}
|
|
t1 = now_ms();
|
|
time_neon = t1 - t0;
|
|
asprintf(&str, "%g ms (x%g faster)\n", time_neon, time_c / (time_neon < 1e-6 ? 1. : time_neon));
|
|
strlcat(buffer, str, sizeof buffer);
|
|
free(str);
|
|
|
|
/* check the result, just in case */
|
|
{
|
|
int nn, fails = 0;
|
|
for (nn = 0; nn < FIR_OUTPUT_SIZE; nn++) {
|
|
if (fir_output[nn] != fir_output_expected[nn]) {
|
|
if (++fails < 16)
|
|
D("neon[%d] = %d expected %d", nn, fir_output[nn], fir_output_expected[nn]);
|
|
}
|
|
}
|
|
D("%d fails\n", fails);
|
|
}
|
|
#else /* !HAVE_NEON */
|
|
strlcat(buffer, "Program not compiled with ARMv7 support !\n", sizeof buffer);
|
|
#endif /* !HAVE_NEON */
|
|
EXIT:
|
|
return (*env)->NewStringUTF(env, buffer);
|
|
}
|