Add NEON support to the NDK build system + docs
You can now define LOCAL_ARM_NEON to 'true' in your Android.mk to indicate that a whole module must be compiled with NEON support. Alternatively, use the .neon suffix when listing source files in LOCAL_SRC_FILES to indicate that they should be built with NEON support. E.g.: LOCAL_SRC_FILES := foo.c.neon bar.c zoo.c.arm.neon Note that .arm.neon is supported, but .neon.arm is NOT. Also added documentation in docs/CPU-ARM-NEON.TXT Another patch will provide one or more sample applications to demonstrate all of this.
This commit is contained in:
@@ -59,10 +59,10 @@ else
|
||||
endif
|
||||
|
||||
#
|
||||
# If LOCAL_ALLOW_UNDEFINED_SYMBOLS, the linker will allow the generation
|
||||
# If LOCAL_ALLOW_UNDEFINED_SYMBOLS is not true, the linker will allow the generation
|
||||
# of a binary that uses undefined symbols.
|
||||
#
|
||||
ifeq ($(strip $(LOCAL_ALLOW_UNDEFINED_SYMBOLS)),)
|
||||
ifneq ($(LOCAL_ALLOW_UNDEFINED_SYMBOLS),true)
|
||||
LOCAL_LDFLAGS := $(LOCAL_LDFLAGS) $($(my)NO_UNDEFINED_LDFLAGS)
|
||||
endif
|
||||
|
||||
@@ -75,9 +75,7 @@ endif
|
||||
# We make the default 'thumb'
|
||||
#
|
||||
LOCAL_ARM_MODE := $(strip $(LOCAL_ARM_MODE))
|
||||
ifeq ($(LOCAL_ARM_MODE),)
|
||||
LOCAL_ARM_MODE := thumb
|
||||
else
|
||||
ifdef LOCAL_ARM_MODE
|
||||
ifneq ($(words $(LOCAL_ARM_MODE)),1)
|
||||
$(call __ndk_info, LOCAL_ARM_MODE in $(LOCAL_MAKEFILE) must be one word, not '$(LOCAL_ARM_MODE)')
|
||||
$(call __ndk_error, Aborting)
|
||||
@@ -89,12 +87,6 @@ else
|
||||
)
|
||||
endif
|
||||
|
||||
LOCAL_ARM_TEXT_arm = arm$(space)$(space)
|
||||
LOCAL_ARM_TEXT_thumb = thumb
|
||||
|
||||
LOCAL_ARM_CFLAGS := $(TARGET_$(LOCAL_ARM_MODE)_$(LOCAL_BUILD_MODE)_CFLAGS)
|
||||
LOCAL_ARM_TEXT := $(LOCAL_ARM_TEXT_$(LOCAL_ARM_MODE))
|
||||
|
||||
# As a special case, the original Android build system
|
||||
# allows one to specify that certain source files can be
|
||||
# forced to build in ARM mode by using a '.arm' suffix
|
||||
@@ -105,36 +97,70 @@ LOCAL_ARM_TEXT := $(LOCAL_ARM_TEXT_$(LOCAL_ARM_MODE))
|
||||
# to build source file $(LOCAL_PATH)/foo.c as ARM
|
||||
#
|
||||
|
||||
# As a special extension, the NDK also supports the .neon extension suffix
|
||||
# to indicate that a single file can be compiled with ARM NEON support
|
||||
# We must support both foo.c.neon and foo.c.arm.neon here
|
||||
#
|
||||
# Build C source files into .o
|
||||
# Also, if LOCAL_ARM_NEON is set to 'true', force Neon mode for all source
|
||||
# files
|
||||
#
|
||||
|
||||
ifeq ($(LOCAL_ARM_MODE),arm)
|
||||
arm_sources := $(LOCAL_SRC_FILES)
|
||||
else
|
||||
arm_sources := $(filter %.arm,$(LOCAL_SRC_FILES))
|
||||
thumb_sources := $(filter-out %.arm,$(LOCAL_SRC_FILES))
|
||||
neon_sources := $(filter %.neon,$(LOCAL_SRC_FILES))
|
||||
neon_sources := $(neon_sources:%.neon=%)
|
||||
|
||||
LOCAL_ARM_NEON := $(strip $(LOCAL_ARM_NEON))
|
||||
ifdef LOCAL_ARM_NEON
|
||||
$(if $(filter-out true false,$(LOCAL_ARM_NEON)),\
|
||||
$(call __ndk_info,LOCAL_ARM_NEON must be defined either to 'true' or 'false' in $(LOCAL_MAKEFILE), not '$(LOCAL_ARM_NEON)')\
|
||||
$(call __ndk_error,Aborting) \
|
||||
)
|
||||
endif
|
||||
ifeq ($(LOCAL_ARM_NEON),true)
|
||||
neon_sources += $(LOCAL_SRC_FILES:%.neon=%))
|
||||
endif
|
||||
|
||||
# First, build the 'thumb' sources
|
||||
neon_sources := $(strip $(neon_sources))
|
||||
ifdef neon_sources
|
||||
ifneq ($(TARGET_ARCH_ABI),armeabi-v7a)
|
||||
$(call __ndk_info,NEON support is only possible for armeabi-v7a ABI)
|
||||
$(call __ndk_info,Please add checks afainst TARGET_ARCH_ABI in $(LOCAL_MAKEFILE))
|
||||
$(call __ndk_error,Aborting)
|
||||
endif
|
||||
$(call tag-src-files,$(neon_sources:%.arm=%),neon)
|
||||
endif
|
||||
|
||||
LOCAL_SRC_FILES := $(LOCAL_SRC_FILES:%.neon=%)
|
||||
|
||||
# strip the .arm suffix from LOCAL_SRC_FILES
|
||||
# and tag the relevant sources with the 'arm' tag
|
||||
#
|
||||
LOCAL_ARM_MODE := thumb
|
||||
arm_sources := $(filter %.arm,$(LOCAL_SRC_FILES))
|
||||
arm_sources := $(arm_sources:%.arm=%)
|
||||
thumb_sources := $(filter-out %.arm,$(LOCAL_SRC_FILES))
|
||||
LOCAL_SRC_FILES := $(arm_sources) $(thumb_sources)
|
||||
|
||||
$(foreach src,$(filter %.c,$(thumb_sources)), $(call compile-c-source,$(src)))
|
||||
$(foreach src,$(filter %.S,$(thumb_sources)), $(call compile-s-source,$(src)))
|
||||
ifeq ($(LOCAL_ARM_MODE),arm)
|
||||
arm_sources := $(LOCAL_SRC_FILES)
|
||||
endif
|
||||
ifeq ($(LOCAL_ARM_MODE),thumb)
|
||||
arm_sources := $(empty)
|
||||
endif
|
||||
$(call tag-src-files,$(arm_sources),arm)
|
||||
|
||||
$(foreach src,$(filter %$(LOCAL_CPP_EXTENSION),$(thumb_sources)),\
|
||||
$(call compile-cpp-source,$(src)))
|
||||
|
||||
# Then, the 'ARM' ones
|
||||
# Process all source file tags to determine toolchain-specific
|
||||
# target compiler flags, and text.
|
||||
#
|
||||
LOCAL_ARM_MODE := arm
|
||||
arm_sources := $(arm_sources:%.arm=%)
|
||||
$(call TARGET-process-src-files-tags)
|
||||
|
||||
$(foreach src,$(filter %.c,$(arm_sources)), $(call compile-c-source,$(src)))
|
||||
$(foreach src,$(filter %.S,$(arm_sources)), $(call compile-s-source,$(src)))
|
||||
# only call dump-src-file-tags during debugging
|
||||
#$(dump-src-file-tags)
|
||||
|
||||
$(foreach src,$(filter %$(LOCAL_CPP_EXTENSION),$(arm_sources)),\
|
||||
# Build the sources to object files
|
||||
#
|
||||
$(foreach src,$(filter %.c,$(LOCAL_SRC_FILES)), $(call compile-c-source,$(src)))
|
||||
$(foreach src,$(filter %.S,$(LOCAL_SRC_FILES)), $(call compile-s-source,$(src)))
|
||||
|
||||
$(foreach src,$(filter %$(LOCAL_CPP_EXTENSION),$(LOCAL_SRC_FILES)),\
|
||||
$(call compile-cpp-source,$(src)))
|
||||
|
||||
#
|
||||
|
||||
@@ -31,8 +31,10 @@ NDK_LOCAL_VARS := \
|
||||
LOCAL_STATIC_WHOLE_LIBRARIES \
|
||||
LOCAL_SHARED_LIBRARIES \
|
||||
LOCAL_MAKEFILE \
|
||||
LOCAL_NO_UNDEFINED_SYMBOLS \
|
||||
LOCAL_ALLOW_UNDEFINED_SYMBOLS \
|
||||
LOCAL_ARM_MODE \
|
||||
LOCAL_ARM_NEON \
|
||||
|
||||
$(call clear-src-tags)
|
||||
$(call clear-vars, $(NDK_LOCAL_VARS))
|
||||
|
||||
|
||||
@@ -250,6 +250,156 @@ all-makefiles-under = $(wildcard $1/*/Android.mk)
|
||||
# -----------------------------------------------------------------------------
|
||||
all-subdir-makefiles = $(call all-makefiles-under,$(call my-dir))
|
||||
|
||||
# =============================================================================
|
||||
#
|
||||
# Source file tagging support.
|
||||
#
|
||||
# Each source file listed in LOCAL_SRC_FILES can have any number of
|
||||
# 'tags' associated to it. A tag name must not contain space, and its
|
||||
# usage can vary.
|
||||
#
|
||||
# For example, the 'debug' tag is used to sources that must be built
|
||||
# in debug mode, the 'arm' tag is used for sources that must be built
|
||||
# using the 32-bit instruction set on ARM platforms, and 'neon' is used
|
||||
# for sources that must be built with ARM Advanced SIMD (a.k.a. NEON)
|
||||
# support.
|
||||
#
|
||||
# More tags might be introduced in the future.
|
||||
#
|
||||
# LOCAL_SRC_TAGS contains the list of all tags used (initially empty)
|
||||
# LOCAL_SRC_FILES contains the list of all source files.
|
||||
# LOCAL_SRC_TAG.<tagname> contains the set of source file names tagged
|
||||
# with <tagname>
|
||||
# LOCAL_SRC_FILES_TAGS.<filename> contains the set of tags for a given
|
||||
# source file name
|
||||
#
|
||||
# Tags are processed by a toolchain-specific function (e.g. TARGET-compute-cflags)
|
||||
# which will call various functions to compute source-file specific settings.
|
||||
# These are currently stored as:
|
||||
#
|
||||
# LOCAL_SRC_FILES_TARGET_CFLAGS.<filename> contains the list of
|
||||
# target-specific C compiler flags used to compile a given
|
||||
# source file. This is set by the function TARGET-set-cflags
|
||||
# defined in the toolchain's setup.mk script.
|
||||
#
|
||||
# LOCAL_SRC_FILES_TEXT.<filename> contains the 'text' that will be
|
||||
# displayed along the label of the build output line. For example
|
||||
# 'thumb' or 'arm ' with ARM-based toolchains.
|
||||
#
|
||||
# =============================================================================
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Macro : clear-all-src-tags
|
||||
# Returns : remove all source file tags and associated data.
|
||||
# Usage : $(clear-all-src-tags)
|
||||
# -----------------------------------------------------------------------------
|
||||
clear-all-src-tags = \
|
||||
$(foreach __tag,$(LOCAL_SRC_TAGS), \
|
||||
$(eval LOCAL_SRC_TAG.$(__tag) := $(empty)) \
|
||||
) \
|
||||
$(foreach __src,$(LOCAL_SRC_FILES), \
|
||||
$(eval LOCAL_SRC_FILES_TAGS.$(__src) := $(empty)) \
|
||||
$(eval LOCAL_SRC_FILES_TARGET_CFLAGS.$(__src) := $(empty)) \
|
||||
$(eval LOCAL_SRC_FILES_TEXT.$(__src) := $(empty)) \
|
||||
) \
|
||||
$(eval LOCAL_SRC_TAGS := $(empty_set))
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Macro : tag-src-files
|
||||
# Arguments: 1: list of source files to tag
|
||||
# 2: tag name (must not contain space)
|
||||
# Usage : $(call tag-src-files,<list-of-source-files>,<tagname>)
|
||||
# Rationale: Add a tag to a list of source files
|
||||
# -----------------------------------------------------------------------------
|
||||
tag-src-files = \
|
||||
$(eval LOCAL_SRC_TAGS := $(call set_insert,$2,$(LOCAL_SRC_TAGS))) \
|
||||
$(eval LOCAL_SRC_TAG.$2 := $(call set_union,$1,$(LOCAL_SRC_TAG.$2))) \
|
||||
$(foreach __src,$1, \
|
||||
$(eval LOCAL_SRC_FILES_TAGS.$(__src) += $2) \
|
||||
)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Macro : get-src-files-with-tag
|
||||
# Arguments: 1: tag name
|
||||
# Usage : $(call get-src-files-with-tag,<tagname>)
|
||||
# Return : The list of source file names that have been tagged with <tagname>
|
||||
# -----------------------------------------------------------------------------
|
||||
get-src-files-with-tag = $(LOCAL_SRC_TAG.$1)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Macro : get-src-files-without-tag
|
||||
# Arguments: 1: tag name
|
||||
# Usage : $(call get-src-files-without-tag,<tagname>)
|
||||
# Return : The list of source file names that have NOT been tagged with <tagname>
|
||||
# -----------------------------------------------------------------------------
|
||||
get-src-files-without-tag = $(filter-out $(LOCAL_SRC_TAG.$1),$(LOCAL_SRC_FILES))
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Macro : set-src-files-target-cflags
|
||||
# Arguments: 1: list of source files
|
||||
# 2: list of compiler flags
|
||||
# Usage : $(call set-src-files-target-cflags,<sources>,<flags>)
|
||||
# Rationale: Set or replace the set of compiler flags that will be applied
|
||||
# when building a given set of source files. This function should
|
||||
# normally be called from the toolchain-specific function that
|
||||
# computes all compiler flags for all source files.
|
||||
# -----------------------------------------------------------------------------
|
||||
set-src-files-target-cflags = $(foreach __src,$1,$(eval LOCAL_SRC_FILES_TARGET_CFLAGS.$(__src) := $2))
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Macro : add-src-files-target-cflags
|
||||
# Arguments: 1: list of source files
|
||||
# 2: list of compiler flags
|
||||
# Usage : $(call add-src-files-target-cflags,<sources>,<flags>)
|
||||
# Rationale: A variant of set-src-files-target-cflags that can be used
|
||||
# to append, instead of replace, compiler flags for specific
|
||||
# source files.
|
||||
# -----------------------------------------------------------------------------
|
||||
add-src-files-target-cflags = $(foreach __src,$1,$(eval LOCAL_SRC_FILES_TARGET_CFLAGS.$(__src) += $2))
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Macro : get-src-file-target-cflags
|
||||
# Arguments: 1: single source file name
|
||||
# Usage : $(call get-src-file-target-cflags,<source>)
|
||||
# Rationale: Return the set of target-specific compiler flags that must be
|
||||
# applied to a given source file. These must be set prior to this
|
||||
# call using set-src-files-target-cflags or add-src-files-target-cflags
|
||||
# -----------------------------------------------------------------------------
|
||||
get-src-file-target-cflags = $(LOCAL_SRC_FILES_TARGET_CFLAGS.$1)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Macro : set-src-files-text
|
||||
# Arguments: 1: list of source files
|
||||
# 2: text
|
||||
# Usage : $(call set-src-files-text,<sources>,<text>)
|
||||
# Rationale: Set or replace the 'text' associated to a set of source files.
|
||||
# The text is a very short string that complements the build
|
||||
# label. For example, it will be either 'thumb' or 'arm ' for
|
||||
# ARM-based toolchains. This function must be called by the
|
||||
# toolchain-specific functions that processes all source files.
|
||||
# -----------------------------------------------------------------------------
|
||||
set-src-files-text = $(foreach __src,$1,$(eval LOCAL_SRC_FILES_TEXT.$(__src) := $2))
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Macro : get-src-file-text
|
||||
# Arguments: 1: single source file
|
||||
# Usage : $(call get-src-file-text,<source>)
|
||||
# Rationale: Return the 'text' associated to a given source file when
|
||||
# set-src-files-text was called.
|
||||
# -----------------------------------------------------------------------------
|
||||
get-src-file-text = $(LOCAL_SRC_FILES_TEXT.$1)
|
||||
|
||||
# This should only be called for debugging the source files tagging system
|
||||
dump-src-file-tags = \
|
||||
$(info LOCAL_SRC_TAGS := $(LOCAL_SRC_TAGS)) \
|
||||
$(info LOCAL_SRC_FILES = $(LOCAL_SRC_FILES)) \
|
||||
$(foreach __tag,$(LOCAL_SRC_TAGS),$(info LOCAL_SRC_TAG.$(__tag) = $(LOCAL_SRC_TAG.$(__tag)))) \
|
||||
$(foreach __src,$(LOCAL_SRC_FILES),$(info LOCAL_SRC_FILES_TAGS.$(__src) = $(LOCAL_SRC_FILES_TAGS.$(__src)))) \
|
||||
$(info WITH arm = $(call get-src-files-with-tag,arm)) \
|
||||
$(info WITHOUT arm = $(call get-src-files-without-tag,arm)) \
|
||||
$(foreach __src,$(LOCAL_SRC_FILES),$(info LOCAL_SRC_FILES_TARGET_CFLAGS.$(__src) = $(LOCAL_SRC_FILES_TARGET_CFLAGS.$(__src)))) \
|
||||
$(foreach __src,$(LOCAL_SRC_FILES),$(info LOCAL_SRC_FILES_TEXT.$(__src) = $(LOCAL_SRC_FILES_TEXT.$(__src)))) \
|
||||
|
||||
|
||||
# =============================================================================
|
||||
#
|
||||
@@ -366,10 +516,10 @@ $$(_OBJ): PRIVATE_SRC := $$(_SRC)
|
||||
$$(_OBJ): PRIVATE_OBJ := $$(_OBJ)
|
||||
$$(_OBJ): PRIVATE_MODULE := $$(LOCAL_MODULE)
|
||||
$$(_OBJ): PRIVATE_ARM_MODE := $$(LOCAL_ARM_MODE)
|
||||
$$(_OBJ): PRIVATE_ARM_TEXT := $$(LOCAL_ARM_TEXT)
|
||||
$$(_OBJ): PRIVATE_ARM_TEXT := $$(call get-src-file-text,$1)
|
||||
$$(_OBJ): PRIVATE_CC := $$($$(my)CC)
|
||||
$$(_OBJ): PRIVATE_CFLAGS := $$($$(my)CFLAGS) \
|
||||
$$($$(my)$(LOCAL_ARM_MODE)_$(LOCAL_BUILD_MODE)_CFLAGS) \
|
||||
$$(call get-src-file-target-cflags,$(1)) \
|
||||
$$(LOCAL_C_INCLUDES:%=-I%) \
|
||||
-I$$(LOCAL_PATH) \
|
||||
$$(LOCAL_CFLAGS) \
|
||||
@@ -424,10 +574,10 @@ $$(_OBJ): PRIVATE_SRC := $$(_SRC)
|
||||
$$(_OBJ): PRIVATE_OBJ := $$(_OBJ)
|
||||
$$(_OBJ): PRIVATE_MODULE := $$(LOCAL_MODULE)
|
||||
$$(_OBJ): PRIVATE_ARM_MODE := $$(LOCAL_ARM_MODE)
|
||||
$$(_OBJ): PRIVATE_ARM_TEXT := $$(LOCAL_ARM_TEXT)
|
||||
$$(_OBJ): PRIVATE_ARM_TEXT := $$(call get-src-file-text,$1)
|
||||
$$(_OBJ): PRIVATE_CXX := $$($$(my)CXX)
|
||||
$$(_OBJ): PRIVATE_CXXFLAGS := $$($$(my)CXXFLAGS) \
|
||||
$$($$(my)$(LOCAL_ARM_MODE)_$(LOCAL_BUILD_MODE)_CFLAGS) \
|
||||
$$(call get-src-file-target-cflags,$(1)) \
|
||||
$$(LOCAL_C_INCLUDES:%=-I%) \
|
||||
-I$$(LOCAL_PATH) \
|
||||
$$(LOCAL_CFLAGS) \
|
||||
|
||||
@@ -60,6 +60,34 @@ TARGET_thumb_debug_CFLAGS := $(TARGET_thumb_release_CFLAGS) \
|
||||
-marm \
|
||||
-fno-omit-frame-pointer
|
||||
|
||||
# This function will be called to determine the target CFLAGS used to build
|
||||
# a C or Assembler source file, based on its tags.
|
||||
#
|
||||
# NOTE: ARM Advanced SIMD (a.k.a. NEON) is not supported with this toolchain.
|
||||
#
|
||||
TARGET-process-src-files-tags = \
|
||||
$(eval __arm_sources := $(call get-src-files-with-tag,arm)) \
|
||||
$(eval __thumb_sources := $(call get-src-files-without-tag,arm)) \
|
||||
$(eval __debug_sources := $(call get-src-files-with-tag,debug)) \
|
||||
$(eval __release_sources := $(call get-src-files-without-tag,debug)) \
|
||||
$(call set-src-files-target-cflags, \
|
||||
$(call set_intersection,$(__arm_sources),$(__debug_sources)), \
|
||||
$(TARGET_arm_debug_CFLAGS)) \
|
||||
$(call set-src-files-target-cflags,\
|
||||
$(call set_intersection,$(__arm_sources),$(__release_sources)),\
|
||||
$(TARGET_arm_release_CFLAGS)) \
|
||||
$(call set-src-files-target-cflags,\
|
||||
$(call set_intersection,$(__arm_sources),$(__debug_sources)),\
|
||||
$(TARGET_arm_debug_CFLAGS)) \
|
||||
$(call set-src-files-target-cflags,\
|
||||
$(call set_intersection,$(__thumb_sources),$(__release_sources)),\
|
||||
$(TARGET_thumb_release_CFLAGS)) \
|
||||
$(call set-src-files-target-cflags,\
|
||||
$(call set_intersection,$(__thumb_sources),$(__debug_sources)),\
|
||||
$(TARGET_thumb_debug_CFLAGS)) \
|
||||
$(call set-src-files-text,$(__arm_sources),arm$(space)$(space)) \
|
||||
$(call set-src-files-text,$(__thumb_sources),thumb)
|
||||
|
||||
TARGET_CC := $(TOOLCHAIN_PREFIX)gcc
|
||||
TARGET_CFLAGS := $(TARGET_CFLAGS.common)
|
||||
|
||||
|
||||
@@ -49,6 +49,9 @@ else
|
||||
TARGET_ARCH_LDFLAGS :=
|
||||
endif
|
||||
|
||||
TARGET_CFLAGS.neon := \
|
||||
-mfpu=neon
|
||||
|
||||
TARGET_arm_release_CFLAGS := -O2 \
|
||||
-fomit-frame-pointer \
|
||||
-fstrict-aliasing \
|
||||
@@ -70,6 +73,35 @@ TARGET_thumb_debug_CFLAGS := $(TARGET_thumb_release_CFLAGS) \
|
||||
-marm \
|
||||
-fno-omit-frame-pointer
|
||||
|
||||
# This function will be called to determine the target CFLAGS used to build
|
||||
# a C or Assembler source file, based on its tags.
|
||||
#
|
||||
TARGET-process-src-files-tags = \
|
||||
$(eval __arm_sources := $(call get-src-files-with-tag,arm)) \
|
||||
$(eval __thumb_sources := $(call get-src-files-without-tag,arm)) \
|
||||
$(eval __debug_sources := $(call get-src-files-with-tag,debug)) \
|
||||
$(eval __release_sources := $(call get-src-files-without-tag,debug)) \
|
||||
$(call set-src-files-target-cflags, \
|
||||
$(call set_intersection,$(__arm_sources),$(__debug_sources)), \
|
||||
$(TARGET_arm_debug_CFLAGS)) \
|
||||
$(call set-src-files-target-cflags,\
|
||||
$(call set_intersection,$(__arm_sources),$(__release_sources)),\
|
||||
$(TARGET_arm_release_CFLAGS)) \
|
||||
$(call set-src-files-target-cflags,\
|
||||
$(call set_intersection,$(__arm_sources),$(__debug_sources)),\
|
||||
$(TARGET_arm_debug_CFLAGS)) \
|
||||
$(call set-src-files-target-cflags,\
|
||||
$(call set_intersection,$(__thumb_sources),$(__release_sources)),\
|
||||
$(TARGET_thumb_release_CFLAGS)) \
|
||||
$(call set-src-files-target-cflags,\
|
||||
$(call set_intersection,$(__thumb_sources),$(__debug_sources)),\
|
||||
$(TARGET_thumb_debug_CFLAGS)) \
|
||||
$(call add-src-files-target-cflags,\
|
||||
$(call get-src-files-with-tag,neon),\
|
||||
$(TARGET_CFLAGS.neon)) \
|
||||
$(call set-src-files-text,$(__arm_sources),arm$(space)$(space)) \
|
||||
$(call set-src-files-text,$(__thumb_sources),thumb)
|
||||
|
||||
TARGET_CC := $(TOOLCHAIN_PREFIX)gcc
|
||||
TARGET_CFLAGS := $(TARGET_CFLAGS.common) $(TARGET_ARCH_CFLAGS)
|
||||
|
||||
|
||||
@@ -441,3 +441,28 @@ LOCAL_ARM_MODE
|
||||
NOTE: Setting APP_OPTIM to 'debug' in your Application.mk will also force
|
||||
the generation of ARM binaries as well. This is due to bugs in the
|
||||
toolchain debugger that don't deal too well with thumb code.
|
||||
|
||||
LOCAL_ARM_NEON
|
||||
Defining this variable to 'true' allows the use of ARM Advanced SIMD
|
||||
(a.k.a. NEON) GCC intrinsics in your C and C++ sources, as well as
|
||||
NEON instructions in Assembly files.
|
||||
|
||||
You should only define it when targetting the 'armeabi-v7a' ABI that
|
||||
corresponds to the ARMv7 instruction set. Note that not all ARMv7
|
||||
based CPUs support the NEON instruction set extensions and that you
|
||||
should perform runtime detection to be able to use this code at runtime
|
||||
safely. To lean more about this, please read the documentation at
|
||||
docs/CPU-ARM-NEON.TXT and docs/CPU-FEATURES.TXT.
|
||||
|
||||
Alternatively, you can also specify that only specific source files
|
||||
may be compiled with NEON support by using the '.neon' suffix, as
|
||||
in:
|
||||
|
||||
LOCAL_SRC_FILES = foo.c.neon bar.c zoo.c.arm.neon
|
||||
|
||||
In this example, 'foo.c' will be compiled in thumb+neon mode,
|
||||
'bar.c' will be compiled in 'thumb' mode, and 'zoo.c' will be
|
||||
compiled in 'arm+neon' mode.
|
||||
|
||||
Note that the '.neon' suffix must appear after the '.arm' suffix
|
||||
if you use both (i.e. foo.c.arm.neon works, but not foo.c.neon.arm !)
|
||||
|
||||
@@ -28,13 +28,23 @@ IMPORTANT CHANGES:
|
||||
|
||||
More details about ABIs is now available in docs/CPU-ARCH-ABIS.TXT
|
||||
|
||||
- Added a new sample static library, named "cpufeatures" to detect
|
||||
CPU Features at runtime. For now, this can be used to detect the
|
||||
availability of ARM Advanced SIMD (a.k.a. NEON) instruction support
|
||||
at runtime in order to provide optimized code paths for specific
|
||||
operations.
|
||||
- A small static library named 'cpufeatures' is provided with source code
|
||||
and can be used at runtime to determine the CPU features supported by the
|
||||
target device. It should run on all Android platforms, starting from 1.5.
|
||||
|
||||
See docs/CPU-FEATURES.TXT for details.
|
||||
For more information, see docs/CPU-FEATURES.TXT
|
||||
|
||||
- Support for the optional ARM Advanced SIMD (a.k.a. NEON) instruction set
|
||||
extension through the use the LOCAL_ARM_NEON variable in Android.mk, or
|
||||
the '.neon' suffix when listing source files.
|
||||
|
||||
Neon is an *optional* instruction set extension, and not all Android ARMv7
|
||||
devices will support it. You will need to use the 'cpufeatures' library to
|
||||
determine if such code can be used at runtime, and provide alternate code
|
||||
paths if this is not the case. This is similar to MMX/SSE/3DNow on x86
|
||||
platforms.
|
||||
|
||||
For more information, see docs/CPU-ARM-NEON.TXT
|
||||
|
||||
- GCC 4.4.0 is now used by default by the NDK. It generates better code than
|
||||
GCC 4.2.1, which was used in previous releases. However, the compiler's C++
|
||||
@@ -87,6 +97,10 @@ OTHER FIXES & CHANGES:
|
||||
|
||||
- Actually use the awk version detected by host-setup.sh during the build.
|
||||
|
||||
- Only allow undefined symbols when LOCAL_ALLOW_UNDEFINED_SYMBOLS is set
|
||||
to 'true', just like the documentation says it works. Also fix a typo
|
||||
in CLEAR_VARS that prevented this variable from being cleared properly.
|
||||
|
||||
- Added --prebuilt-ndk=FILE option to build/tools/make-release.sh script to
|
||||
package a new experimental NDK package archive from the current source tree
|
||||
plus the toolchain binaries of an existing NDK release package. E.g.:
|
||||
|
||||
@@ -104,6 +104,10 @@ Each supported ABI is identified by a unique name.
|
||||
(Just like one typically does on x86 systems to check/use MMX/SSE2/etc...
|
||||
specialized instructions).
|
||||
|
||||
You can check docs/CPU-FEATURES.TXT to see how to perform these runtime
|
||||
checks, and docs/CPU-ARM-NEON.TXT to learn about the NDK's support for
|
||||
building NEON-capable machine code too.
|
||||
|
||||
IMPORTANT NOTE: This ABI enforces that all double values are passed during
|
||||
function calls in 'core' register pairs, instead of dedicated FP ones.
|
||||
However, all internal computations can be performed with the FP registers
|
||||
|
||||
118
ndk/docs/CPU-ARM-NEON.TXT
Normal file
118
ndk/docs/CPU-ARM-NEON.TXT
Normal file
@@ -0,0 +1,118 @@
|
||||
Android NDK & ARM NEON instruction set extension support
|
||||
--------------------------------------------------------
|
||||
|
||||
Introduction:
|
||||
-------------
|
||||
|
||||
Android NDK r3 added support for the new 'armeabi-v7a' ARM-based ABI
|
||||
that allows native code to use two useful instruction set extenstions:
|
||||
|
||||
- Thumb-2, which provides performance comparable to 32-bit ARM
|
||||
instructions with similar compactness to Thumb-1
|
||||
|
||||
- VFPv3, which provides hardware FPU registers and computations,
|
||||
to boost floating point performance significantly.
|
||||
|
||||
More specifically, by default 'armeabi-v7a' only supports
|
||||
VFPv3-D16 which only uses/requires 16 hardware FPU 64-bit registers.
|
||||
|
||||
More information about this can be read in docs/CPU-ARCH-ABIS.TXT
|
||||
|
||||
The ARMv7 Architecture Reference Manual also defines another optional
|
||||
instruction set extension known as "ARM Advanced SIMD", nick-named
|
||||
"NEON". It provides:
|
||||
|
||||
- A set of interesting scalar/vector instructions and registers
|
||||
(the latter are mapped to the same chip area than the FPU ones),
|
||||
comparable to MMX/SSE/3DNow! in the x86 world.
|
||||
|
||||
- VFPv3-D32 as a requirement (i.e. 32 hardware FPU 64-bit registers,
|
||||
instead of the minimum of 16).
|
||||
|
||||
Not all ARMv7-based Android devices will support NEON, but those that
|
||||
do may benefit in significant ways from the scalar/vector instructions.
|
||||
|
||||
The NDK supports the compilation of modules or even specific source
|
||||
files with support for NEON. What this means is that a specific compiler
|
||||
flag will be used to enable the use of GCC ARM Neon intrinsics and
|
||||
VFPv3-D32 at the same time. The intrinsics are described here:
|
||||
|
||||
http://gcc.gnu.org/onlinedocs/gcc/ARM-NEON-Intrinsics.html
|
||||
|
||||
|
||||
LOCAL_ARM_NEON:
|
||||
---------------
|
||||
|
||||
Define LOCAL_ARM_NEON to 'true' in your module definition, and the NDK
|
||||
will build all its source files with NEON support. This can be useful if
|
||||
you want to build a static or shared library that specifically contains
|
||||
NEON code paths.
|
||||
|
||||
|
||||
Using the .neon suffix:
|
||||
-----------------------
|
||||
|
||||
When listing sources files in your LOCAL_SRC_FILES variable, you now have
|
||||
the option of using the .neon suffix to indicate that you want to
|
||||
corresponding source(s) to be built with Neon support. For example:
|
||||
|
||||
LOCAL_SRC_FILES := foo.c.neon bar.c
|
||||
|
||||
Will only build 'foo.c' with NEON support.
|
||||
|
||||
Note that the .neon suffix can be used with the .arm suffix too (used to
|
||||
specify the 32-bit ARM instruction set for non-NEON instructions), but must
|
||||
appear after it.
|
||||
|
||||
In other words, 'foo.c.arm.neon' works, but 'foo.c.neon.arm' does NOT.
|
||||
|
||||
|
||||
Build Requirements:
|
||||
------------------
|
||||
|
||||
Neon support only works when targetting the 'armeabi-v7a' ABI, otherwise the
|
||||
NDK build scripts will complain and abort. It is important to use checks like
|
||||
the following in your Android.mk:
|
||||
|
||||
# define a static library containing our NEON code
|
||||
ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
|
||||
include $(CLEAR_VARS)
|
||||
LOCAL_MODULE := mylib-neon
|
||||
LOCAL_SRC_FILES := mylib-neon.c
|
||||
LOCAL_ARM_NEON := true
|
||||
include $(BUILD_STATIC_LIBRARY)
|
||||
endif # TARGET_ARCH_ABI == armeabi-v7a
|
||||
|
||||
|
||||
Runtime Detection:
|
||||
------------------
|
||||
|
||||
As said previously, NOT ALL ARMv7-BASED ANDROID DEVICES WILL SUPPORT NEON !
|
||||
It is thus crucial to perform runtime detection to know if the NEON-capable
|
||||
machine code can be run on the target device.
|
||||
|
||||
To do that, use the 'cpufeatures' library that comes with this NDK. To lean
|
||||
more about it, see docs/CPU-FEATURES.TXT.
|
||||
|
||||
You should explicitely check that android_getCpuFamily() returns
|
||||
ANDROID_CPU_FAMILY_ARM, and that android_getCpuFeatures() returns a value
|
||||
that has the ANDROID_CPU_ARM_FEATURE_NEON flag set, as in:
|
||||
|
||||
#include <cpu-features.h>
|
||||
|
||||
...
|
||||
...
|
||||
|
||||
if (android_getCpuFamily() == ANDROID_CPU_FAMILY_ARM &&
|
||||
(android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) != 0)
|
||||
{
|
||||
// use NEON-optimized routines
|
||||
...
|
||||
}
|
||||
else
|
||||
{
|
||||
// use non-NEON fallback routines instead
|
||||
...
|
||||
}
|
||||
|
||||
...
|
||||
Reference in New Issue
Block a user