Add NEON support to the NDK build system + docs

You can now define LOCAL_ARM_NEON to 'true' in your Android.mk
to indicate that a whole module must be compiled with NEON support.

Alternatively, use the .neon suffix when listing source files in
LOCAL_SRC_FILES to indicate that they should be built with NEON
support. E.g.:

  LOCAL_SRC_FILES := foo.c.neon bar.c zoo.c.arm.neon

Note that .arm.neon is supported, but .neon.arm is NOT.

Also added documentation in docs/CPU-ARM-NEON.TXT

Another patch will provide one or more sample applications
to demonstrate all of this.
This commit is contained in:
David 'Digit' Turner
2010-02-09 12:25:56 -08:00
parent 3b712fda34
commit da4b8312a1
9 changed files with 440 additions and 41 deletions

View File

@@ -59,10 +59,10 @@ else
endif
#
# If LOCAL_ALLOW_UNDEFINED_SYMBOLS, the linker will allow the generation
# If LOCAL_ALLOW_UNDEFINED_SYMBOLS is not true, the linker will allow the generation
# of a binary that uses undefined symbols.
#
ifeq ($(strip $(LOCAL_ALLOW_UNDEFINED_SYMBOLS)),)
ifneq ($(LOCAL_ALLOW_UNDEFINED_SYMBOLS),true)
LOCAL_LDFLAGS := $(LOCAL_LDFLAGS) $($(my)NO_UNDEFINED_LDFLAGS)
endif
@@ -75,9 +75,7 @@ endif
# We make the default 'thumb'
#
LOCAL_ARM_MODE := $(strip $(LOCAL_ARM_MODE))
ifeq ($(LOCAL_ARM_MODE),)
LOCAL_ARM_MODE := thumb
else
ifdef LOCAL_ARM_MODE
ifneq ($(words $(LOCAL_ARM_MODE)),1)
$(call __ndk_info, LOCAL_ARM_MODE in $(LOCAL_MAKEFILE) must be one word, not '$(LOCAL_ARM_MODE)')
$(call __ndk_error, Aborting)
@@ -89,12 +87,6 @@ else
)
endif
LOCAL_ARM_TEXT_arm = arm$(space)$(space)
LOCAL_ARM_TEXT_thumb = thumb
LOCAL_ARM_CFLAGS := $(TARGET_$(LOCAL_ARM_MODE)_$(LOCAL_BUILD_MODE)_CFLAGS)
LOCAL_ARM_TEXT := $(LOCAL_ARM_TEXT_$(LOCAL_ARM_MODE))
# As a special case, the original Android build system
# allows one to specify that certain source files can be
# forced to build in ARM mode by using a '.arm' suffix
@@ -105,36 +97,70 @@ LOCAL_ARM_TEXT := $(LOCAL_ARM_TEXT_$(LOCAL_ARM_MODE))
# to build source file $(LOCAL_PATH)/foo.c as ARM
#
# As a special extension, the NDK also supports the .neon extension suffix
# to indicate that a single file can be compiled with ARM NEON support
# We must support both foo.c.neon and foo.c.arm.neon here
#
# Build C source files into .o
# Also, if LOCAL_ARM_NEON is set to 'true', force Neon mode for all source
# files
#
ifeq ($(LOCAL_ARM_MODE),arm)
arm_sources := $(LOCAL_SRC_FILES)
else
arm_sources := $(filter %.arm,$(LOCAL_SRC_FILES))
thumb_sources := $(filter-out %.arm,$(LOCAL_SRC_FILES))
neon_sources := $(filter %.neon,$(LOCAL_SRC_FILES))
neon_sources := $(neon_sources:%.neon=%)
LOCAL_ARM_NEON := $(strip $(LOCAL_ARM_NEON))
ifdef LOCAL_ARM_NEON
$(if $(filter-out true false,$(LOCAL_ARM_NEON)),\
$(call __ndk_info,LOCAL_ARM_NEON must be defined either to 'true' or 'false' in $(LOCAL_MAKEFILE), not '$(LOCAL_ARM_NEON)')\
$(call __ndk_error,Aborting) \
)
endif
ifeq ($(LOCAL_ARM_NEON),true)
neon_sources += $(LOCAL_SRC_FILES:%.neon=%))
endif
# First, build the 'thumb' sources
neon_sources := $(strip $(neon_sources))
ifdef neon_sources
ifneq ($(TARGET_ARCH_ABI),armeabi-v7a)
$(call __ndk_info,NEON support is only possible for armeabi-v7a ABI)
$(call __ndk_info,Please add checks afainst TARGET_ARCH_ABI in $(LOCAL_MAKEFILE))
$(call __ndk_error,Aborting)
endif
$(call tag-src-files,$(neon_sources:%.arm=%),neon)
endif
LOCAL_SRC_FILES := $(LOCAL_SRC_FILES:%.neon=%)
# strip the .arm suffix from LOCAL_SRC_FILES
# and tag the relevant sources with the 'arm' tag
#
LOCAL_ARM_MODE := thumb
arm_sources := $(filter %.arm,$(LOCAL_SRC_FILES))
arm_sources := $(arm_sources:%.arm=%)
thumb_sources := $(filter-out %.arm,$(LOCAL_SRC_FILES))
LOCAL_SRC_FILES := $(arm_sources) $(thumb_sources)
$(foreach src,$(filter %.c,$(thumb_sources)), $(call compile-c-source,$(src)))
$(foreach src,$(filter %.S,$(thumb_sources)), $(call compile-s-source,$(src)))
ifeq ($(LOCAL_ARM_MODE),arm)
arm_sources := $(LOCAL_SRC_FILES)
endif
ifeq ($(LOCAL_ARM_MODE),thumb)
arm_sources := $(empty)
endif
$(call tag-src-files,$(arm_sources),arm)
$(foreach src,$(filter %$(LOCAL_CPP_EXTENSION),$(thumb_sources)),\
$(call compile-cpp-source,$(src)))
# Then, the 'ARM' ones
# Process all source file tags to determine toolchain-specific
# target compiler flags, and text.
#
LOCAL_ARM_MODE := arm
arm_sources := $(arm_sources:%.arm=%)
$(call TARGET-process-src-files-tags)
$(foreach src,$(filter %.c,$(arm_sources)), $(call compile-c-source,$(src)))
$(foreach src,$(filter %.S,$(arm_sources)), $(call compile-s-source,$(src)))
# only call dump-src-file-tags during debugging
#$(dump-src-file-tags)
$(foreach src,$(filter %$(LOCAL_CPP_EXTENSION),$(arm_sources)),\
# Build the sources to object files
#
$(foreach src,$(filter %.c,$(LOCAL_SRC_FILES)), $(call compile-c-source,$(src)))
$(foreach src,$(filter %.S,$(LOCAL_SRC_FILES)), $(call compile-s-source,$(src)))
$(foreach src,$(filter %$(LOCAL_CPP_EXTENSION),$(LOCAL_SRC_FILES)),\
$(call compile-cpp-source,$(src)))
#

View File

@@ -31,8 +31,10 @@ NDK_LOCAL_VARS := \
LOCAL_STATIC_WHOLE_LIBRARIES \
LOCAL_SHARED_LIBRARIES \
LOCAL_MAKEFILE \
LOCAL_NO_UNDEFINED_SYMBOLS \
LOCAL_ALLOW_UNDEFINED_SYMBOLS \
LOCAL_ARM_MODE \
LOCAL_ARM_NEON \
$(call clear-src-tags)
$(call clear-vars, $(NDK_LOCAL_VARS))

View File

@@ -250,6 +250,156 @@ all-makefiles-under = $(wildcard $1/*/Android.mk)
# -----------------------------------------------------------------------------
all-subdir-makefiles = $(call all-makefiles-under,$(call my-dir))
# =============================================================================
#
# Source file tagging support.
#
# Each source file listed in LOCAL_SRC_FILES can have any number of
# 'tags' associated to it. A tag name must not contain space, and its
# usage can vary.
#
# For example, the 'debug' tag is used to sources that must be built
# in debug mode, the 'arm' tag is used for sources that must be built
# using the 32-bit instruction set on ARM platforms, and 'neon' is used
# for sources that must be built with ARM Advanced SIMD (a.k.a. NEON)
# support.
#
# More tags might be introduced in the future.
#
# LOCAL_SRC_TAGS contains the list of all tags used (initially empty)
# LOCAL_SRC_FILES contains the list of all source files.
# LOCAL_SRC_TAG.<tagname> contains the set of source file names tagged
# with <tagname>
# LOCAL_SRC_FILES_TAGS.<filename> contains the set of tags for a given
# source file name
#
# Tags are processed by a toolchain-specific function (e.g. TARGET-compute-cflags)
# which will call various functions to compute source-file specific settings.
# These are currently stored as:
#
# LOCAL_SRC_FILES_TARGET_CFLAGS.<filename> contains the list of
# target-specific C compiler flags used to compile a given
# source file. This is set by the function TARGET-set-cflags
# defined in the toolchain's setup.mk script.
#
# LOCAL_SRC_FILES_TEXT.<filename> contains the 'text' that will be
# displayed along the label of the build output line. For example
# 'thumb' or 'arm ' with ARM-based toolchains.
#
# =============================================================================
# -----------------------------------------------------------------------------
# Macro : clear-all-src-tags
# Returns : remove all source file tags and associated data.
# Usage : $(clear-all-src-tags)
# -----------------------------------------------------------------------------
clear-all-src-tags = \
$(foreach __tag,$(LOCAL_SRC_TAGS), \
$(eval LOCAL_SRC_TAG.$(__tag) := $(empty)) \
) \
$(foreach __src,$(LOCAL_SRC_FILES), \
$(eval LOCAL_SRC_FILES_TAGS.$(__src) := $(empty)) \
$(eval LOCAL_SRC_FILES_TARGET_CFLAGS.$(__src) := $(empty)) \
$(eval LOCAL_SRC_FILES_TEXT.$(__src) := $(empty)) \
) \
$(eval LOCAL_SRC_TAGS := $(empty_set))
# -----------------------------------------------------------------------------
# Macro : tag-src-files
# Arguments: 1: list of source files to tag
# 2: tag name (must not contain space)
# Usage : $(call tag-src-files,<list-of-source-files>,<tagname>)
# Rationale: Add a tag to a list of source files
# -----------------------------------------------------------------------------
tag-src-files = \
$(eval LOCAL_SRC_TAGS := $(call set_insert,$2,$(LOCAL_SRC_TAGS))) \
$(eval LOCAL_SRC_TAG.$2 := $(call set_union,$1,$(LOCAL_SRC_TAG.$2))) \
$(foreach __src,$1, \
$(eval LOCAL_SRC_FILES_TAGS.$(__src) += $2) \
)
# -----------------------------------------------------------------------------
# Macro : get-src-files-with-tag
# Arguments: 1: tag name
# Usage : $(call get-src-files-with-tag,<tagname>)
# Return : The list of source file names that have been tagged with <tagname>
# -----------------------------------------------------------------------------
get-src-files-with-tag = $(LOCAL_SRC_TAG.$1)
# -----------------------------------------------------------------------------
# Macro : get-src-files-without-tag
# Arguments: 1: tag name
# Usage : $(call get-src-files-without-tag,<tagname>)
# Return : The list of source file names that have NOT been tagged with <tagname>
# -----------------------------------------------------------------------------
get-src-files-without-tag = $(filter-out $(LOCAL_SRC_TAG.$1),$(LOCAL_SRC_FILES))
# -----------------------------------------------------------------------------
# Macro : set-src-files-target-cflags
# Arguments: 1: list of source files
# 2: list of compiler flags
# Usage : $(call set-src-files-target-cflags,<sources>,<flags>)
# Rationale: Set or replace the set of compiler flags that will be applied
# when building a given set of source files. This function should
# normally be called from the toolchain-specific function that
# computes all compiler flags for all source files.
# -----------------------------------------------------------------------------
set-src-files-target-cflags = $(foreach __src,$1,$(eval LOCAL_SRC_FILES_TARGET_CFLAGS.$(__src) := $2))
# -----------------------------------------------------------------------------
# Macro : add-src-files-target-cflags
# Arguments: 1: list of source files
# 2: list of compiler flags
# Usage : $(call add-src-files-target-cflags,<sources>,<flags>)
# Rationale: A variant of set-src-files-target-cflags that can be used
# to append, instead of replace, compiler flags for specific
# source files.
# -----------------------------------------------------------------------------
add-src-files-target-cflags = $(foreach __src,$1,$(eval LOCAL_SRC_FILES_TARGET_CFLAGS.$(__src) += $2))
# -----------------------------------------------------------------------------
# Macro : get-src-file-target-cflags
# Arguments: 1: single source file name
# Usage : $(call get-src-file-target-cflags,<source>)
# Rationale: Return the set of target-specific compiler flags that must be
# applied to a given source file. These must be set prior to this
# call using set-src-files-target-cflags or add-src-files-target-cflags
# -----------------------------------------------------------------------------
get-src-file-target-cflags = $(LOCAL_SRC_FILES_TARGET_CFLAGS.$1)
# -----------------------------------------------------------------------------
# Macro : set-src-files-text
# Arguments: 1: list of source files
# 2: text
# Usage : $(call set-src-files-text,<sources>,<text>)
# Rationale: Set or replace the 'text' associated to a set of source files.
# The text is a very short string that complements the build
# label. For example, it will be either 'thumb' or 'arm ' for
# ARM-based toolchains. This function must be called by the
# toolchain-specific functions that processes all source files.
# -----------------------------------------------------------------------------
set-src-files-text = $(foreach __src,$1,$(eval LOCAL_SRC_FILES_TEXT.$(__src) := $2))
# -----------------------------------------------------------------------------
# Macro : get-src-file-text
# Arguments: 1: single source file
# Usage : $(call get-src-file-text,<source>)
# Rationale: Return the 'text' associated to a given source file when
# set-src-files-text was called.
# -----------------------------------------------------------------------------
get-src-file-text = $(LOCAL_SRC_FILES_TEXT.$1)
# This should only be called for debugging the source files tagging system
dump-src-file-tags = \
$(info LOCAL_SRC_TAGS := $(LOCAL_SRC_TAGS)) \
$(info LOCAL_SRC_FILES = $(LOCAL_SRC_FILES)) \
$(foreach __tag,$(LOCAL_SRC_TAGS),$(info LOCAL_SRC_TAG.$(__tag) = $(LOCAL_SRC_TAG.$(__tag)))) \
$(foreach __src,$(LOCAL_SRC_FILES),$(info LOCAL_SRC_FILES_TAGS.$(__src) = $(LOCAL_SRC_FILES_TAGS.$(__src)))) \
$(info WITH arm = $(call get-src-files-with-tag,arm)) \
$(info WITHOUT arm = $(call get-src-files-without-tag,arm)) \
$(foreach __src,$(LOCAL_SRC_FILES),$(info LOCAL_SRC_FILES_TARGET_CFLAGS.$(__src) = $(LOCAL_SRC_FILES_TARGET_CFLAGS.$(__src)))) \
$(foreach __src,$(LOCAL_SRC_FILES),$(info LOCAL_SRC_FILES_TEXT.$(__src) = $(LOCAL_SRC_FILES_TEXT.$(__src)))) \
# =============================================================================
#
@@ -366,10 +516,10 @@ $$(_OBJ): PRIVATE_SRC := $$(_SRC)
$$(_OBJ): PRIVATE_OBJ := $$(_OBJ)
$$(_OBJ): PRIVATE_MODULE := $$(LOCAL_MODULE)
$$(_OBJ): PRIVATE_ARM_MODE := $$(LOCAL_ARM_MODE)
$$(_OBJ): PRIVATE_ARM_TEXT := $$(LOCAL_ARM_TEXT)
$$(_OBJ): PRIVATE_ARM_TEXT := $$(call get-src-file-text,$1)
$$(_OBJ): PRIVATE_CC := $$($$(my)CC)
$$(_OBJ): PRIVATE_CFLAGS := $$($$(my)CFLAGS) \
$$($$(my)$(LOCAL_ARM_MODE)_$(LOCAL_BUILD_MODE)_CFLAGS) \
$$(call get-src-file-target-cflags,$(1)) \
$$(LOCAL_C_INCLUDES:%=-I%) \
-I$$(LOCAL_PATH) \
$$(LOCAL_CFLAGS) \
@@ -424,10 +574,10 @@ $$(_OBJ): PRIVATE_SRC := $$(_SRC)
$$(_OBJ): PRIVATE_OBJ := $$(_OBJ)
$$(_OBJ): PRIVATE_MODULE := $$(LOCAL_MODULE)
$$(_OBJ): PRIVATE_ARM_MODE := $$(LOCAL_ARM_MODE)
$$(_OBJ): PRIVATE_ARM_TEXT := $$(LOCAL_ARM_TEXT)
$$(_OBJ): PRIVATE_ARM_TEXT := $$(call get-src-file-text,$1)
$$(_OBJ): PRIVATE_CXX := $$($$(my)CXX)
$$(_OBJ): PRIVATE_CXXFLAGS := $$($$(my)CXXFLAGS) \
$$($$(my)$(LOCAL_ARM_MODE)_$(LOCAL_BUILD_MODE)_CFLAGS) \
$$(call get-src-file-target-cflags,$(1)) \
$$(LOCAL_C_INCLUDES:%=-I%) \
-I$$(LOCAL_PATH) \
$$(LOCAL_CFLAGS) \

View File

@@ -60,6 +60,34 @@ TARGET_thumb_debug_CFLAGS := $(TARGET_thumb_release_CFLAGS) \
-marm \
-fno-omit-frame-pointer
# This function will be called to determine the target CFLAGS used to build
# a C or Assembler source file, based on its tags.
#
# NOTE: ARM Advanced SIMD (a.k.a. NEON) is not supported with this toolchain.
#
TARGET-process-src-files-tags = \
$(eval __arm_sources := $(call get-src-files-with-tag,arm)) \
$(eval __thumb_sources := $(call get-src-files-without-tag,arm)) \
$(eval __debug_sources := $(call get-src-files-with-tag,debug)) \
$(eval __release_sources := $(call get-src-files-without-tag,debug)) \
$(call set-src-files-target-cflags, \
$(call set_intersection,$(__arm_sources),$(__debug_sources)), \
$(TARGET_arm_debug_CFLAGS)) \
$(call set-src-files-target-cflags,\
$(call set_intersection,$(__arm_sources),$(__release_sources)),\
$(TARGET_arm_release_CFLAGS)) \
$(call set-src-files-target-cflags,\
$(call set_intersection,$(__arm_sources),$(__debug_sources)),\
$(TARGET_arm_debug_CFLAGS)) \
$(call set-src-files-target-cflags,\
$(call set_intersection,$(__thumb_sources),$(__release_sources)),\
$(TARGET_thumb_release_CFLAGS)) \
$(call set-src-files-target-cflags,\
$(call set_intersection,$(__thumb_sources),$(__debug_sources)),\
$(TARGET_thumb_debug_CFLAGS)) \
$(call set-src-files-text,$(__arm_sources),arm$(space)$(space)) \
$(call set-src-files-text,$(__thumb_sources),thumb)
TARGET_CC := $(TOOLCHAIN_PREFIX)gcc
TARGET_CFLAGS := $(TARGET_CFLAGS.common)

View File

@@ -49,6 +49,9 @@ else
TARGET_ARCH_LDFLAGS :=
endif
TARGET_CFLAGS.neon := \
-mfpu=neon
TARGET_arm_release_CFLAGS := -O2 \
-fomit-frame-pointer \
-fstrict-aliasing \
@@ -70,6 +73,35 @@ TARGET_thumb_debug_CFLAGS := $(TARGET_thumb_release_CFLAGS) \
-marm \
-fno-omit-frame-pointer
# This function will be called to determine the target CFLAGS used to build
# a C or Assembler source file, based on its tags.
#
TARGET-process-src-files-tags = \
$(eval __arm_sources := $(call get-src-files-with-tag,arm)) \
$(eval __thumb_sources := $(call get-src-files-without-tag,arm)) \
$(eval __debug_sources := $(call get-src-files-with-tag,debug)) \
$(eval __release_sources := $(call get-src-files-without-tag,debug)) \
$(call set-src-files-target-cflags, \
$(call set_intersection,$(__arm_sources),$(__debug_sources)), \
$(TARGET_arm_debug_CFLAGS)) \
$(call set-src-files-target-cflags,\
$(call set_intersection,$(__arm_sources),$(__release_sources)),\
$(TARGET_arm_release_CFLAGS)) \
$(call set-src-files-target-cflags,\
$(call set_intersection,$(__arm_sources),$(__debug_sources)),\
$(TARGET_arm_debug_CFLAGS)) \
$(call set-src-files-target-cflags,\
$(call set_intersection,$(__thumb_sources),$(__release_sources)),\
$(TARGET_thumb_release_CFLAGS)) \
$(call set-src-files-target-cflags,\
$(call set_intersection,$(__thumb_sources),$(__debug_sources)),\
$(TARGET_thumb_debug_CFLAGS)) \
$(call add-src-files-target-cflags,\
$(call get-src-files-with-tag,neon),\
$(TARGET_CFLAGS.neon)) \
$(call set-src-files-text,$(__arm_sources),arm$(space)$(space)) \
$(call set-src-files-text,$(__thumb_sources),thumb)
TARGET_CC := $(TOOLCHAIN_PREFIX)gcc
TARGET_CFLAGS := $(TARGET_CFLAGS.common) $(TARGET_ARCH_CFLAGS)

View File

@@ -441,3 +441,28 @@ LOCAL_ARM_MODE
NOTE: Setting APP_OPTIM to 'debug' in your Application.mk will also force
the generation of ARM binaries as well. This is due to bugs in the
toolchain debugger that don't deal too well with thumb code.
LOCAL_ARM_NEON
Defining this variable to 'true' allows the use of ARM Advanced SIMD
(a.k.a. NEON) GCC intrinsics in your C and C++ sources, as well as
NEON instructions in Assembly files.
You should only define it when targetting the 'armeabi-v7a' ABI that
corresponds to the ARMv7 instruction set. Note that not all ARMv7
based CPUs support the NEON instruction set extensions and that you
should perform runtime detection to be able to use this code at runtime
safely. To lean more about this, please read the documentation at
docs/CPU-ARM-NEON.TXT and docs/CPU-FEATURES.TXT.
Alternatively, you can also specify that only specific source files
may be compiled with NEON support by using the '.neon' suffix, as
in:
LOCAL_SRC_FILES = foo.c.neon bar.c zoo.c.arm.neon
In this example, 'foo.c' will be compiled in thumb+neon mode,
'bar.c' will be compiled in 'thumb' mode, and 'zoo.c' will be
compiled in 'arm+neon' mode.
Note that the '.neon' suffix must appear after the '.arm' suffix
if you use both (i.e. foo.c.arm.neon works, but not foo.c.neon.arm !)

View File

@@ -28,13 +28,23 @@ IMPORTANT CHANGES:
More details about ABIs is now available in docs/CPU-ARCH-ABIS.TXT
- Added a new sample static library, named "cpufeatures" to detect
CPU Features at runtime. For now, this can be used to detect the
availability of ARM Advanced SIMD (a.k.a. NEON) instruction support
at runtime in order to provide optimized code paths for specific
operations.
- A small static library named 'cpufeatures' is provided with source code
and can be used at runtime to determine the CPU features supported by the
target device. It should run on all Android platforms, starting from 1.5.
See docs/CPU-FEATURES.TXT for details.
For more information, see docs/CPU-FEATURES.TXT
- Support for the optional ARM Advanced SIMD (a.k.a. NEON) instruction set
extension through the use the LOCAL_ARM_NEON variable in Android.mk, or
the '.neon' suffix when listing source files.
Neon is an *optional* instruction set extension, and not all Android ARMv7
devices will support it. You will need to use the 'cpufeatures' library to
determine if such code can be used at runtime, and provide alternate code
paths if this is not the case. This is similar to MMX/SSE/3DNow on x86
platforms.
For more information, see docs/CPU-ARM-NEON.TXT
- GCC 4.4.0 is now used by default by the NDK. It generates better code than
GCC 4.2.1, which was used in previous releases. However, the compiler's C++
@@ -87,6 +97,10 @@ OTHER FIXES & CHANGES:
- Actually use the awk version detected by host-setup.sh during the build.
- Only allow undefined symbols when LOCAL_ALLOW_UNDEFINED_SYMBOLS is set
to 'true', just like the documentation says it works. Also fix a typo
in CLEAR_VARS that prevented this variable from being cleared properly.
- Added --prebuilt-ndk=FILE option to build/tools/make-release.sh script to
package a new experimental NDK package archive from the current source tree
plus the toolchain binaries of an existing NDK release package. E.g.:

View File

@@ -104,6 +104,10 @@ Each supported ABI is identified by a unique name.
(Just like one typically does on x86 systems to check/use MMX/SSE2/etc...
specialized instructions).
You can check docs/CPU-FEATURES.TXT to see how to perform these runtime
checks, and docs/CPU-ARM-NEON.TXT to learn about the NDK's support for
building NEON-capable machine code too.
IMPORTANT NOTE: This ABI enforces that all double values are passed during
function calls in 'core' register pairs, instead of dedicated FP ones.
However, all internal computations can be performed with the FP registers

118
ndk/docs/CPU-ARM-NEON.TXT Normal file
View File

@@ -0,0 +1,118 @@
Android NDK & ARM NEON instruction set extension support
--------------------------------------------------------
Introduction:
-------------
Android NDK r3 added support for the new 'armeabi-v7a' ARM-based ABI
that allows native code to use two useful instruction set extenstions:
- Thumb-2, which provides performance comparable to 32-bit ARM
instructions with similar compactness to Thumb-1
- VFPv3, which provides hardware FPU registers and computations,
to boost floating point performance significantly.
More specifically, by default 'armeabi-v7a' only supports
VFPv3-D16 which only uses/requires 16 hardware FPU 64-bit registers.
More information about this can be read in docs/CPU-ARCH-ABIS.TXT
The ARMv7 Architecture Reference Manual also defines another optional
instruction set extension known as "ARM Advanced SIMD", nick-named
"NEON". It provides:
- A set of interesting scalar/vector instructions and registers
(the latter are mapped to the same chip area than the FPU ones),
comparable to MMX/SSE/3DNow! in the x86 world.
- VFPv3-D32 as a requirement (i.e. 32 hardware FPU 64-bit registers,
instead of the minimum of 16).
Not all ARMv7-based Android devices will support NEON, but those that
do may benefit in significant ways from the scalar/vector instructions.
The NDK supports the compilation of modules or even specific source
files with support for NEON. What this means is that a specific compiler
flag will be used to enable the use of GCC ARM Neon intrinsics and
VFPv3-D32 at the same time. The intrinsics are described here:
http://gcc.gnu.org/onlinedocs/gcc/ARM-NEON-Intrinsics.html
LOCAL_ARM_NEON:
---------------
Define LOCAL_ARM_NEON to 'true' in your module definition, and the NDK
will build all its source files with NEON support. This can be useful if
you want to build a static or shared library that specifically contains
NEON code paths.
Using the .neon suffix:
-----------------------
When listing sources files in your LOCAL_SRC_FILES variable, you now have
the option of using the .neon suffix to indicate that you want to
corresponding source(s) to be built with Neon support. For example:
LOCAL_SRC_FILES := foo.c.neon bar.c
Will only build 'foo.c' with NEON support.
Note that the .neon suffix can be used with the .arm suffix too (used to
specify the 32-bit ARM instruction set for non-NEON instructions), but must
appear after it.
In other words, 'foo.c.arm.neon' works, but 'foo.c.neon.arm' does NOT.
Build Requirements:
------------------
Neon support only works when targetting the 'armeabi-v7a' ABI, otherwise the
NDK build scripts will complain and abort. It is important to use checks like
the following in your Android.mk:
# define a static library containing our NEON code
ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
include $(CLEAR_VARS)
LOCAL_MODULE := mylib-neon
LOCAL_SRC_FILES := mylib-neon.c
LOCAL_ARM_NEON := true
include $(BUILD_STATIC_LIBRARY)
endif # TARGET_ARCH_ABI == armeabi-v7a
Runtime Detection:
------------------
As said previously, NOT ALL ARMv7-BASED ANDROID DEVICES WILL SUPPORT NEON !
It is thus crucial to perform runtime detection to know if the NEON-capable
machine code can be run on the target device.
To do that, use the 'cpufeatures' library that comes with this NDK. To lean
more about it, see docs/CPU-FEATURES.TXT.
You should explicitely check that android_getCpuFamily() returns
ANDROID_CPU_FAMILY_ARM, and that android_getCpuFeatures() returns a value
that has the ANDROID_CPU_ARM_FEATURE_NEON flag set, as in:
#include <cpu-features.h>
...
...
if (android_getCpuFamily() == ANDROID_CPU_FAMILY_ARM &&
(android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) != 0)
{
// use NEON-optimized routines
...
}
else
{
// use non-NEON fallback routines instead
...
}
...