Add NEON support to the NDK build system + docs
You can now define LOCAL_ARM_NEON to 'true' in your Android.mk to indicate that a whole module must be compiled with NEON support. Alternatively, use the .neon suffix when listing source files in LOCAL_SRC_FILES to indicate that they should be built with NEON support. E.g.: LOCAL_SRC_FILES := foo.c.neon bar.c zoo.c.arm.neon Note that .arm.neon is supported, but .neon.arm is NOT. Also added documentation in docs/CPU-ARM-NEON.TXT Another patch will provide one or more sample applications to demonstrate all of this.
This commit is contained in:
@@ -59,10 +59,10 @@ else
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
#
|
#
|
||||||
# If LOCAL_ALLOW_UNDEFINED_SYMBOLS, the linker will allow the generation
|
# If LOCAL_ALLOW_UNDEFINED_SYMBOLS is not true, the linker will allow the generation
|
||||||
# of a binary that uses undefined symbols.
|
# of a binary that uses undefined symbols.
|
||||||
#
|
#
|
||||||
ifeq ($(strip $(LOCAL_ALLOW_UNDEFINED_SYMBOLS)),)
|
ifneq ($(LOCAL_ALLOW_UNDEFINED_SYMBOLS),true)
|
||||||
LOCAL_LDFLAGS := $(LOCAL_LDFLAGS) $($(my)NO_UNDEFINED_LDFLAGS)
|
LOCAL_LDFLAGS := $(LOCAL_LDFLAGS) $($(my)NO_UNDEFINED_LDFLAGS)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
@@ -75,9 +75,7 @@ endif
|
|||||||
# We make the default 'thumb'
|
# We make the default 'thumb'
|
||||||
#
|
#
|
||||||
LOCAL_ARM_MODE := $(strip $(LOCAL_ARM_MODE))
|
LOCAL_ARM_MODE := $(strip $(LOCAL_ARM_MODE))
|
||||||
ifeq ($(LOCAL_ARM_MODE),)
|
ifdef LOCAL_ARM_MODE
|
||||||
LOCAL_ARM_MODE := thumb
|
|
||||||
else
|
|
||||||
ifneq ($(words $(LOCAL_ARM_MODE)),1)
|
ifneq ($(words $(LOCAL_ARM_MODE)),1)
|
||||||
$(call __ndk_info, LOCAL_ARM_MODE in $(LOCAL_MAKEFILE) must be one word, not '$(LOCAL_ARM_MODE)')
|
$(call __ndk_info, LOCAL_ARM_MODE in $(LOCAL_MAKEFILE) must be one word, not '$(LOCAL_ARM_MODE)')
|
||||||
$(call __ndk_error, Aborting)
|
$(call __ndk_error, Aborting)
|
||||||
@@ -89,12 +87,6 @@ else
|
|||||||
)
|
)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
LOCAL_ARM_TEXT_arm = arm$(space)$(space)
|
|
||||||
LOCAL_ARM_TEXT_thumb = thumb
|
|
||||||
|
|
||||||
LOCAL_ARM_CFLAGS := $(TARGET_$(LOCAL_ARM_MODE)_$(LOCAL_BUILD_MODE)_CFLAGS)
|
|
||||||
LOCAL_ARM_TEXT := $(LOCAL_ARM_TEXT_$(LOCAL_ARM_MODE))
|
|
||||||
|
|
||||||
# As a special case, the original Android build system
|
# As a special case, the original Android build system
|
||||||
# allows one to specify that certain source files can be
|
# allows one to specify that certain source files can be
|
||||||
# forced to build in ARM mode by using a '.arm' suffix
|
# forced to build in ARM mode by using a '.arm' suffix
|
||||||
@@ -105,36 +97,70 @@ LOCAL_ARM_TEXT := $(LOCAL_ARM_TEXT_$(LOCAL_ARM_MODE))
|
|||||||
# to build source file $(LOCAL_PATH)/foo.c as ARM
|
# to build source file $(LOCAL_PATH)/foo.c as ARM
|
||||||
#
|
#
|
||||||
|
|
||||||
|
# As a special extension, the NDK also supports the .neon extension suffix
|
||||||
|
# to indicate that a single file can be compiled with ARM NEON support
|
||||||
|
# We must support both foo.c.neon and foo.c.arm.neon here
|
||||||
#
|
#
|
||||||
# Build C source files into .o
|
# Also, if LOCAL_ARM_NEON is set to 'true', force Neon mode for all source
|
||||||
|
# files
|
||||||
#
|
#
|
||||||
|
|
||||||
ifeq ($(LOCAL_ARM_MODE),arm)
|
neon_sources := $(filter %.neon,$(LOCAL_SRC_FILES))
|
||||||
arm_sources := $(LOCAL_SRC_FILES)
|
neon_sources := $(neon_sources:%.neon=%)
|
||||||
else
|
|
||||||
arm_sources := $(filter %.arm,$(LOCAL_SRC_FILES))
|
LOCAL_ARM_NEON := $(strip $(LOCAL_ARM_NEON))
|
||||||
thumb_sources := $(filter-out %.arm,$(LOCAL_SRC_FILES))
|
ifdef LOCAL_ARM_NEON
|
||||||
|
$(if $(filter-out true false,$(LOCAL_ARM_NEON)),\
|
||||||
|
$(call __ndk_info,LOCAL_ARM_NEON must be defined either to 'true' or 'false' in $(LOCAL_MAKEFILE), not '$(LOCAL_ARM_NEON)')\
|
||||||
|
$(call __ndk_error,Aborting) \
|
||||||
|
)
|
||||||
|
endif
|
||||||
|
ifeq ($(LOCAL_ARM_NEON),true)
|
||||||
|
neon_sources += $(LOCAL_SRC_FILES:%.neon=%))
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# First, build the 'thumb' sources
|
neon_sources := $(strip $(neon_sources))
|
||||||
|
ifdef neon_sources
|
||||||
|
ifneq ($(TARGET_ARCH_ABI),armeabi-v7a)
|
||||||
|
$(call __ndk_info,NEON support is only possible for armeabi-v7a ABI)
|
||||||
|
$(call __ndk_info,Please add checks afainst TARGET_ARCH_ABI in $(LOCAL_MAKEFILE))
|
||||||
|
$(call __ndk_error,Aborting)
|
||||||
|
endif
|
||||||
|
$(call tag-src-files,$(neon_sources:%.arm=%),neon)
|
||||||
|
endif
|
||||||
|
|
||||||
|
LOCAL_SRC_FILES := $(LOCAL_SRC_FILES:%.neon=%)
|
||||||
|
|
||||||
|
# strip the .arm suffix from LOCAL_SRC_FILES
|
||||||
|
# and tag the relevant sources with the 'arm' tag
|
||||||
#
|
#
|
||||||
LOCAL_ARM_MODE := thumb
|
arm_sources := $(filter %.arm,$(LOCAL_SRC_FILES))
|
||||||
|
arm_sources := $(arm_sources:%.arm=%)
|
||||||
|
thumb_sources := $(filter-out %.arm,$(LOCAL_SRC_FILES))
|
||||||
|
LOCAL_SRC_FILES := $(arm_sources) $(thumb_sources)
|
||||||
|
|
||||||
$(foreach src,$(filter %.c,$(thumb_sources)), $(call compile-c-source,$(src)))
|
ifeq ($(LOCAL_ARM_MODE),arm)
|
||||||
$(foreach src,$(filter %.S,$(thumb_sources)), $(call compile-s-source,$(src)))
|
arm_sources := $(LOCAL_SRC_FILES)
|
||||||
|
endif
|
||||||
|
ifeq ($(LOCAL_ARM_MODE),thumb)
|
||||||
|
arm_sources := $(empty)
|
||||||
|
endif
|
||||||
|
$(call tag-src-files,$(arm_sources),arm)
|
||||||
|
|
||||||
$(foreach src,$(filter %$(LOCAL_CPP_EXTENSION),$(thumb_sources)),\
|
# Process all source file tags to determine toolchain-specific
|
||||||
$(call compile-cpp-source,$(src)))
|
# target compiler flags, and text.
|
||||||
|
|
||||||
# Then, the 'ARM' ones
|
|
||||||
#
|
#
|
||||||
LOCAL_ARM_MODE := arm
|
$(call TARGET-process-src-files-tags)
|
||||||
arm_sources := $(arm_sources:%.arm=%)
|
|
||||||
|
|
||||||
$(foreach src,$(filter %.c,$(arm_sources)), $(call compile-c-source,$(src)))
|
# only call dump-src-file-tags during debugging
|
||||||
$(foreach src,$(filter %.S,$(arm_sources)), $(call compile-s-source,$(src)))
|
#$(dump-src-file-tags)
|
||||||
|
|
||||||
$(foreach src,$(filter %$(LOCAL_CPP_EXTENSION),$(arm_sources)),\
|
# Build the sources to object files
|
||||||
|
#
|
||||||
|
$(foreach src,$(filter %.c,$(LOCAL_SRC_FILES)), $(call compile-c-source,$(src)))
|
||||||
|
$(foreach src,$(filter %.S,$(LOCAL_SRC_FILES)), $(call compile-s-source,$(src)))
|
||||||
|
|
||||||
|
$(foreach src,$(filter %$(LOCAL_CPP_EXTENSION),$(LOCAL_SRC_FILES)),\
|
||||||
$(call compile-cpp-source,$(src)))
|
$(call compile-cpp-source,$(src)))
|
||||||
|
|
||||||
#
|
#
|
||||||
|
|||||||
@@ -31,8 +31,10 @@ NDK_LOCAL_VARS := \
|
|||||||
LOCAL_STATIC_WHOLE_LIBRARIES \
|
LOCAL_STATIC_WHOLE_LIBRARIES \
|
||||||
LOCAL_SHARED_LIBRARIES \
|
LOCAL_SHARED_LIBRARIES \
|
||||||
LOCAL_MAKEFILE \
|
LOCAL_MAKEFILE \
|
||||||
LOCAL_NO_UNDEFINED_SYMBOLS \
|
LOCAL_ALLOW_UNDEFINED_SYMBOLS \
|
||||||
LOCAL_ARM_MODE \
|
LOCAL_ARM_MODE \
|
||||||
|
LOCAL_ARM_NEON \
|
||||||
|
|
||||||
|
$(call clear-src-tags)
|
||||||
$(call clear-vars, $(NDK_LOCAL_VARS))
|
$(call clear-vars, $(NDK_LOCAL_VARS))
|
||||||
|
|
||||||
|
|||||||
@@ -250,6 +250,156 @@ all-makefiles-under = $(wildcard $1/*/Android.mk)
|
|||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
all-subdir-makefiles = $(call all-makefiles-under,$(call my-dir))
|
all-subdir-makefiles = $(call all-makefiles-under,$(call my-dir))
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
#
|
||||||
|
# Source file tagging support.
|
||||||
|
#
|
||||||
|
# Each source file listed in LOCAL_SRC_FILES can have any number of
|
||||||
|
# 'tags' associated to it. A tag name must not contain space, and its
|
||||||
|
# usage can vary.
|
||||||
|
#
|
||||||
|
# For example, the 'debug' tag is used to sources that must be built
|
||||||
|
# in debug mode, the 'arm' tag is used for sources that must be built
|
||||||
|
# using the 32-bit instruction set on ARM platforms, and 'neon' is used
|
||||||
|
# for sources that must be built with ARM Advanced SIMD (a.k.a. NEON)
|
||||||
|
# support.
|
||||||
|
#
|
||||||
|
# More tags might be introduced in the future.
|
||||||
|
#
|
||||||
|
# LOCAL_SRC_TAGS contains the list of all tags used (initially empty)
|
||||||
|
# LOCAL_SRC_FILES contains the list of all source files.
|
||||||
|
# LOCAL_SRC_TAG.<tagname> contains the set of source file names tagged
|
||||||
|
# with <tagname>
|
||||||
|
# LOCAL_SRC_FILES_TAGS.<filename> contains the set of tags for a given
|
||||||
|
# source file name
|
||||||
|
#
|
||||||
|
# Tags are processed by a toolchain-specific function (e.g. TARGET-compute-cflags)
|
||||||
|
# which will call various functions to compute source-file specific settings.
|
||||||
|
# These are currently stored as:
|
||||||
|
#
|
||||||
|
# LOCAL_SRC_FILES_TARGET_CFLAGS.<filename> contains the list of
|
||||||
|
# target-specific C compiler flags used to compile a given
|
||||||
|
# source file. This is set by the function TARGET-set-cflags
|
||||||
|
# defined in the toolchain's setup.mk script.
|
||||||
|
#
|
||||||
|
# LOCAL_SRC_FILES_TEXT.<filename> contains the 'text' that will be
|
||||||
|
# displayed along the label of the build output line. For example
|
||||||
|
# 'thumb' or 'arm ' with ARM-based toolchains.
|
||||||
|
#
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# Macro : clear-all-src-tags
|
||||||
|
# Returns : remove all source file tags and associated data.
|
||||||
|
# Usage : $(clear-all-src-tags)
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
clear-all-src-tags = \
|
||||||
|
$(foreach __tag,$(LOCAL_SRC_TAGS), \
|
||||||
|
$(eval LOCAL_SRC_TAG.$(__tag) := $(empty)) \
|
||||||
|
) \
|
||||||
|
$(foreach __src,$(LOCAL_SRC_FILES), \
|
||||||
|
$(eval LOCAL_SRC_FILES_TAGS.$(__src) := $(empty)) \
|
||||||
|
$(eval LOCAL_SRC_FILES_TARGET_CFLAGS.$(__src) := $(empty)) \
|
||||||
|
$(eval LOCAL_SRC_FILES_TEXT.$(__src) := $(empty)) \
|
||||||
|
) \
|
||||||
|
$(eval LOCAL_SRC_TAGS := $(empty_set))
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# Macro : tag-src-files
|
||||||
|
# Arguments: 1: list of source files to tag
|
||||||
|
# 2: tag name (must not contain space)
|
||||||
|
# Usage : $(call tag-src-files,<list-of-source-files>,<tagname>)
|
||||||
|
# Rationale: Add a tag to a list of source files
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
tag-src-files = \
|
||||||
|
$(eval LOCAL_SRC_TAGS := $(call set_insert,$2,$(LOCAL_SRC_TAGS))) \
|
||||||
|
$(eval LOCAL_SRC_TAG.$2 := $(call set_union,$1,$(LOCAL_SRC_TAG.$2))) \
|
||||||
|
$(foreach __src,$1, \
|
||||||
|
$(eval LOCAL_SRC_FILES_TAGS.$(__src) += $2) \
|
||||||
|
)
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# Macro : get-src-files-with-tag
|
||||||
|
# Arguments: 1: tag name
|
||||||
|
# Usage : $(call get-src-files-with-tag,<tagname>)
|
||||||
|
# Return : The list of source file names that have been tagged with <tagname>
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
get-src-files-with-tag = $(LOCAL_SRC_TAG.$1)
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# Macro : get-src-files-without-tag
|
||||||
|
# Arguments: 1: tag name
|
||||||
|
# Usage : $(call get-src-files-without-tag,<tagname>)
|
||||||
|
# Return : The list of source file names that have NOT been tagged with <tagname>
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
get-src-files-without-tag = $(filter-out $(LOCAL_SRC_TAG.$1),$(LOCAL_SRC_FILES))
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# Macro : set-src-files-target-cflags
|
||||||
|
# Arguments: 1: list of source files
|
||||||
|
# 2: list of compiler flags
|
||||||
|
# Usage : $(call set-src-files-target-cflags,<sources>,<flags>)
|
||||||
|
# Rationale: Set or replace the set of compiler flags that will be applied
|
||||||
|
# when building a given set of source files. This function should
|
||||||
|
# normally be called from the toolchain-specific function that
|
||||||
|
# computes all compiler flags for all source files.
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
set-src-files-target-cflags = $(foreach __src,$1,$(eval LOCAL_SRC_FILES_TARGET_CFLAGS.$(__src) := $2))
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# Macro : add-src-files-target-cflags
|
||||||
|
# Arguments: 1: list of source files
|
||||||
|
# 2: list of compiler flags
|
||||||
|
# Usage : $(call add-src-files-target-cflags,<sources>,<flags>)
|
||||||
|
# Rationale: A variant of set-src-files-target-cflags that can be used
|
||||||
|
# to append, instead of replace, compiler flags for specific
|
||||||
|
# source files.
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
add-src-files-target-cflags = $(foreach __src,$1,$(eval LOCAL_SRC_FILES_TARGET_CFLAGS.$(__src) += $2))
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# Macro : get-src-file-target-cflags
|
||||||
|
# Arguments: 1: single source file name
|
||||||
|
# Usage : $(call get-src-file-target-cflags,<source>)
|
||||||
|
# Rationale: Return the set of target-specific compiler flags that must be
|
||||||
|
# applied to a given source file. These must be set prior to this
|
||||||
|
# call using set-src-files-target-cflags or add-src-files-target-cflags
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
get-src-file-target-cflags = $(LOCAL_SRC_FILES_TARGET_CFLAGS.$1)
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# Macro : set-src-files-text
|
||||||
|
# Arguments: 1: list of source files
|
||||||
|
# 2: text
|
||||||
|
# Usage : $(call set-src-files-text,<sources>,<text>)
|
||||||
|
# Rationale: Set or replace the 'text' associated to a set of source files.
|
||||||
|
# The text is a very short string that complements the build
|
||||||
|
# label. For example, it will be either 'thumb' or 'arm ' for
|
||||||
|
# ARM-based toolchains. This function must be called by the
|
||||||
|
# toolchain-specific functions that processes all source files.
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
set-src-files-text = $(foreach __src,$1,$(eval LOCAL_SRC_FILES_TEXT.$(__src) := $2))
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# Macro : get-src-file-text
|
||||||
|
# Arguments: 1: single source file
|
||||||
|
# Usage : $(call get-src-file-text,<source>)
|
||||||
|
# Rationale: Return the 'text' associated to a given source file when
|
||||||
|
# set-src-files-text was called.
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
get-src-file-text = $(LOCAL_SRC_FILES_TEXT.$1)
|
||||||
|
|
||||||
|
# This should only be called for debugging the source files tagging system
|
||||||
|
dump-src-file-tags = \
|
||||||
|
$(info LOCAL_SRC_TAGS := $(LOCAL_SRC_TAGS)) \
|
||||||
|
$(info LOCAL_SRC_FILES = $(LOCAL_SRC_FILES)) \
|
||||||
|
$(foreach __tag,$(LOCAL_SRC_TAGS),$(info LOCAL_SRC_TAG.$(__tag) = $(LOCAL_SRC_TAG.$(__tag)))) \
|
||||||
|
$(foreach __src,$(LOCAL_SRC_FILES),$(info LOCAL_SRC_FILES_TAGS.$(__src) = $(LOCAL_SRC_FILES_TAGS.$(__src)))) \
|
||||||
|
$(info WITH arm = $(call get-src-files-with-tag,arm)) \
|
||||||
|
$(info WITHOUT arm = $(call get-src-files-without-tag,arm)) \
|
||||||
|
$(foreach __src,$(LOCAL_SRC_FILES),$(info LOCAL_SRC_FILES_TARGET_CFLAGS.$(__src) = $(LOCAL_SRC_FILES_TARGET_CFLAGS.$(__src)))) \
|
||||||
|
$(foreach __src,$(LOCAL_SRC_FILES),$(info LOCAL_SRC_FILES_TEXT.$(__src) = $(LOCAL_SRC_FILES_TEXT.$(__src)))) \
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
#
|
#
|
||||||
@@ -366,10 +516,10 @@ $$(_OBJ): PRIVATE_SRC := $$(_SRC)
|
|||||||
$$(_OBJ): PRIVATE_OBJ := $$(_OBJ)
|
$$(_OBJ): PRIVATE_OBJ := $$(_OBJ)
|
||||||
$$(_OBJ): PRIVATE_MODULE := $$(LOCAL_MODULE)
|
$$(_OBJ): PRIVATE_MODULE := $$(LOCAL_MODULE)
|
||||||
$$(_OBJ): PRIVATE_ARM_MODE := $$(LOCAL_ARM_MODE)
|
$$(_OBJ): PRIVATE_ARM_MODE := $$(LOCAL_ARM_MODE)
|
||||||
$$(_OBJ): PRIVATE_ARM_TEXT := $$(LOCAL_ARM_TEXT)
|
$$(_OBJ): PRIVATE_ARM_TEXT := $$(call get-src-file-text,$1)
|
||||||
$$(_OBJ): PRIVATE_CC := $$($$(my)CC)
|
$$(_OBJ): PRIVATE_CC := $$($$(my)CC)
|
||||||
$$(_OBJ): PRIVATE_CFLAGS := $$($$(my)CFLAGS) \
|
$$(_OBJ): PRIVATE_CFLAGS := $$($$(my)CFLAGS) \
|
||||||
$$($$(my)$(LOCAL_ARM_MODE)_$(LOCAL_BUILD_MODE)_CFLAGS) \
|
$$(call get-src-file-target-cflags,$(1)) \
|
||||||
$$(LOCAL_C_INCLUDES:%=-I%) \
|
$$(LOCAL_C_INCLUDES:%=-I%) \
|
||||||
-I$$(LOCAL_PATH) \
|
-I$$(LOCAL_PATH) \
|
||||||
$$(LOCAL_CFLAGS) \
|
$$(LOCAL_CFLAGS) \
|
||||||
@@ -424,10 +574,10 @@ $$(_OBJ): PRIVATE_SRC := $$(_SRC)
|
|||||||
$$(_OBJ): PRIVATE_OBJ := $$(_OBJ)
|
$$(_OBJ): PRIVATE_OBJ := $$(_OBJ)
|
||||||
$$(_OBJ): PRIVATE_MODULE := $$(LOCAL_MODULE)
|
$$(_OBJ): PRIVATE_MODULE := $$(LOCAL_MODULE)
|
||||||
$$(_OBJ): PRIVATE_ARM_MODE := $$(LOCAL_ARM_MODE)
|
$$(_OBJ): PRIVATE_ARM_MODE := $$(LOCAL_ARM_MODE)
|
||||||
$$(_OBJ): PRIVATE_ARM_TEXT := $$(LOCAL_ARM_TEXT)
|
$$(_OBJ): PRIVATE_ARM_TEXT := $$(call get-src-file-text,$1)
|
||||||
$$(_OBJ): PRIVATE_CXX := $$($$(my)CXX)
|
$$(_OBJ): PRIVATE_CXX := $$($$(my)CXX)
|
||||||
$$(_OBJ): PRIVATE_CXXFLAGS := $$($$(my)CXXFLAGS) \
|
$$(_OBJ): PRIVATE_CXXFLAGS := $$($$(my)CXXFLAGS) \
|
||||||
$$($$(my)$(LOCAL_ARM_MODE)_$(LOCAL_BUILD_MODE)_CFLAGS) \
|
$$(call get-src-file-target-cflags,$(1)) \
|
||||||
$$(LOCAL_C_INCLUDES:%=-I%) \
|
$$(LOCAL_C_INCLUDES:%=-I%) \
|
||||||
-I$$(LOCAL_PATH) \
|
-I$$(LOCAL_PATH) \
|
||||||
$$(LOCAL_CFLAGS) \
|
$$(LOCAL_CFLAGS) \
|
||||||
|
|||||||
@@ -60,6 +60,34 @@ TARGET_thumb_debug_CFLAGS := $(TARGET_thumb_release_CFLAGS) \
|
|||||||
-marm \
|
-marm \
|
||||||
-fno-omit-frame-pointer
|
-fno-omit-frame-pointer
|
||||||
|
|
||||||
|
# This function will be called to determine the target CFLAGS used to build
|
||||||
|
# a C or Assembler source file, based on its tags.
|
||||||
|
#
|
||||||
|
# NOTE: ARM Advanced SIMD (a.k.a. NEON) is not supported with this toolchain.
|
||||||
|
#
|
||||||
|
TARGET-process-src-files-tags = \
|
||||||
|
$(eval __arm_sources := $(call get-src-files-with-tag,arm)) \
|
||||||
|
$(eval __thumb_sources := $(call get-src-files-without-tag,arm)) \
|
||||||
|
$(eval __debug_sources := $(call get-src-files-with-tag,debug)) \
|
||||||
|
$(eval __release_sources := $(call get-src-files-without-tag,debug)) \
|
||||||
|
$(call set-src-files-target-cflags, \
|
||||||
|
$(call set_intersection,$(__arm_sources),$(__debug_sources)), \
|
||||||
|
$(TARGET_arm_debug_CFLAGS)) \
|
||||||
|
$(call set-src-files-target-cflags,\
|
||||||
|
$(call set_intersection,$(__arm_sources),$(__release_sources)),\
|
||||||
|
$(TARGET_arm_release_CFLAGS)) \
|
||||||
|
$(call set-src-files-target-cflags,\
|
||||||
|
$(call set_intersection,$(__arm_sources),$(__debug_sources)),\
|
||||||
|
$(TARGET_arm_debug_CFLAGS)) \
|
||||||
|
$(call set-src-files-target-cflags,\
|
||||||
|
$(call set_intersection,$(__thumb_sources),$(__release_sources)),\
|
||||||
|
$(TARGET_thumb_release_CFLAGS)) \
|
||||||
|
$(call set-src-files-target-cflags,\
|
||||||
|
$(call set_intersection,$(__thumb_sources),$(__debug_sources)),\
|
||||||
|
$(TARGET_thumb_debug_CFLAGS)) \
|
||||||
|
$(call set-src-files-text,$(__arm_sources),arm$(space)$(space)) \
|
||||||
|
$(call set-src-files-text,$(__thumb_sources),thumb)
|
||||||
|
|
||||||
TARGET_CC := $(TOOLCHAIN_PREFIX)gcc
|
TARGET_CC := $(TOOLCHAIN_PREFIX)gcc
|
||||||
TARGET_CFLAGS := $(TARGET_CFLAGS.common)
|
TARGET_CFLAGS := $(TARGET_CFLAGS.common)
|
||||||
|
|
||||||
|
|||||||
@@ -49,6 +49,9 @@ else
|
|||||||
TARGET_ARCH_LDFLAGS :=
|
TARGET_ARCH_LDFLAGS :=
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
TARGET_CFLAGS.neon := \
|
||||||
|
-mfpu=neon
|
||||||
|
|
||||||
TARGET_arm_release_CFLAGS := -O2 \
|
TARGET_arm_release_CFLAGS := -O2 \
|
||||||
-fomit-frame-pointer \
|
-fomit-frame-pointer \
|
||||||
-fstrict-aliasing \
|
-fstrict-aliasing \
|
||||||
@@ -70,6 +73,35 @@ TARGET_thumb_debug_CFLAGS := $(TARGET_thumb_release_CFLAGS) \
|
|||||||
-marm \
|
-marm \
|
||||||
-fno-omit-frame-pointer
|
-fno-omit-frame-pointer
|
||||||
|
|
||||||
|
# This function will be called to determine the target CFLAGS used to build
|
||||||
|
# a C or Assembler source file, based on its tags.
|
||||||
|
#
|
||||||
|
TARGET-process-src-files-tags = \
|
||||||
|
$(eval __arm_sources := $(call get-src-files-with-tag,arm)) \
|
||||||
|
$(eval __thumb_sources := $(call get-src-files-without-tag,arm)) \
|
||||||
|
$(eval __debug_sources := $(call get-src-files-with-tag,debug)) \
|
||||||
|
$(eval __release_sources := $(call get-src-files-without-tag,debug)) \
|
||||||
|
$(call set-src-files-target-cflags, \
|
||||||
|
$(call set_intersection,$(__arm_sources),$(__debug_sources)), \
|
||||||
|
$(TARGET_arm_debug_CFLAGS)) \
|
||||||
|
$(call set-src-files-target-cflags,\
|
||||||
|
$(call set_intersection,$(__arm_sources),$(__release_sources)),\
|
||||||
|
$(TARGET_arm_release_CFLAGS)) \
|
||||||
|
$(call set-src-files-target-cflags,\
|
||||||
|
$(call set_intersection,$(__arm_sources),$(__debug_sources)),\
|
||||||
|
$(TARGET_arm_debug_CFLAGS)) \
|
||||||
|
$(call set-src-files-target-cflags,\
|
||||||
|
$(call set_intersection,$(__thumb_sources),$(__release_sources)),\
|
||||||
|
$(TARGET_thumb_release_CFLAGS)) \
|
||||||
|
$(call set-src-files-target-cflags,\
|
||||||
|
$(call set_intersection,$(__thumb_sources),$(__debug_sources)),\
|
||||||
|
$(TARGET_thumb_debug_CFLAGS)) \
|
||||||
|
$(call add-src-files-target-cflags,\
|
||||||
|
$(call get-src-files-with-tag,neon),\
|
||||||
|
$(TARGET_CFLAGS.neon)) \
|
||||||
|
$(call set-src-files-text,$(__arm_sources),arm$(space)$(space)) \
|
||||||
|
$(call set-src-files-text,$(__thumb_sources),thumb)
|
||||||
|
|
||||||
TARGET_CC := $(TOOLCHAIN_PREFIX)gcc
|
TARGET_CC := $(TOOLCHAIN_PREFIX)gcc
|
||||||
TARGET_CFLAGS := $(TARGET_CFLAGS.common) $(TARGET_ARCH_CFLAGS)
|
TARGET_CFLAGS := $(TARGET_CFLAGS.common) $(TARGET_ARCH_CFLAGS)
|
||||||
|
|
||||||
|
|||||||
@@ -441,3 +441,28 @@ LOCAL_ARM_MODE
|
|||||||
NOTE: Setting APP_OPTIM to 'debug' in your Application.mk will also force
|
NOTE: Setting APP_OPTIM to 'debug' in your Application.mk will also force
|
||||||
the generation of ARM binaries as well. This is due to bugs in the
|
the generation of ARM binaries as well. This is due to bugs in the
|
||||||
toolchain debugger that don't deal too well with thumb code.
|
toolchain debugger that don't deal too well with thumb code.
|
||||||
|
|
||||||
|
LOCAL_ARM_NEON
|
||||||
|
Defining this variable to 'true' allows the use of ARM Advanced SIMD
|
||||||
|
(a.k.a. NEON) GCC intrinsics in your C and C++ sources, as well as
|
||||||
|
NEON instructions in Assembly files.
|
||||||
|
|
||||||
|
You should only define it when targetting the 'armeabi-v7a' ABI that
|
||||||
|
corresponds to the ARMv7 instruction set. Note that not all ARMv7
|
||||||
|
based CPUs support the NEON instruction set extensions and that you
|
||||||
|
should perform runtime detection to be able to use this code at runtime
|
||||||
|
safely. To lean more about this, please read the documentation at
|
||||||
|
docs/CPU-ARM-NEON.TXT and docs/CPU-FEATURES.TXT.
|
||||||
|
|
||||||
|
Alternatively, you can also specify that only specific source files
|
||||||
|
may be compiled with NEON support by using the '.neon' suffix, as
|
||||||
|
in:
|
||||||
|
|
||||||
|
LOCAL_SRC_FILES = foo.c.neon bar.c zoo.c.arm.neon
|
||||||
|
|
||||||
|
In this example, 'foo.c' will be compiled in thumb+neon mode,
|
||||||
|
'bar.c' will be compiled in 'thumb' mode, and 'zoo.c' will be
|
||||||
|
compiled in 'arm+neon' mode.
|
||||||
|
|
||||||
|
Note that the '.neon' suffix must appear after the '.arm' suffix
|
||||||
|
if you use both (i.e. foo.c.arm.neon works, but not foo.c.neon.arm !)
|
||||||
|
|||||||
@@ -28,13 +28,23 @@ IMPORTANT CHANGES:
|
|||||||
|
|
||||||
More details about ABIs is now available in docs/CPU-ARCH-ABIS.TXT
|
More details about ABIs is now available in docs/CPU-ARCH-ABIS.TXT
|
||||||
|
|
||||||
- Added a new sample static library, named "cpufeatures" to detect
|
- A small static library named 'cpufeatures' is provided with source code
|
||||||
CPU Features at runtime. For now, this can be used to detect the
|
and can be used at runtime to determine the CPU features supported by the
|
||||||
availability of ARM Advanced SIMD (a.k.a. NEON) instruction support
|
target device. It should run on all Android platforms, starting from 1.5.
|
||||||
at runtime in order to provide optimized code paths for specific
|
|
||||||
operations.
|
|
||||||
|
|
||||||
See docs/CPU-FEATURES.TXT for details.
|
For more information, see docs/CPU-FEATURES.TXT
|
||||||
|
|
||||||
|
- Support for the optional ARM Advanced SIMD (a.k.a. NEON) instruction set
|
||||||
|
extension through the use the LOCAL_ARM_NEON variable in Android.mk, or
|
||||||
|
the '.neon' suffix when listing source files.
|
||||||
|
|
||||||
|
Neon is an *optional* instruction set extension, and not all Android ARMv7
|
||||||
|
devices will support it. You will need to use the 'cpufeatures' library to
|
||||||
|
determine if such code can be used at runtime, and provide alternate code
|
||||||
|
paths if this is not the case. This is similar to MMX/SSE/3DNow on x86
|
||||||
|
platforms.
|
||||||
|
|
||||||
|
For more information, see docs/CPU-ARM-NEON.TXT
|
||||||
|
|
||||||
- GCC 4.4.0 is now used by default by the NDK. It generates better code than
|
- GCC 4.4.0 is now used by default by the NDK. It generates better code than
|
||||||
GCC 4.2.1, which was used in previous releases. However, the compiler's C++
|
GCC 4.2.1, which was used in previous releases. However, the compiler's C++
|
||||||
@@ -87,6 +97,10 @@ OTHER FIXES & CHANGES:
|
|||||||
|
|
||||||
- Actually use the awk version detected by host-setup.sh during the build.
|
- Actually use the awk version detected by host-setup.sh during the build.
|
||||||
|
|
||||||
|
- Only allow undefined symbols when LOCAL_ALLOW_UNDEFINED_SYMBOLS is set
|
||||||
|
to 'true', just like the documentation says it works. Also fix a typo
|
||||||
|
in CLEAR_VARS that prevented this variable from being cleared properly.
|
||||||
|
|
||||||
- Added --prebuilt-ndk=FILE option to build/tools/make-release.sh script to
|
- Added --prebuilt-ndk=FILE option to build/tools/make-release.sh script to
|
||||||
package a new experimental NDK package archive from the current source tree
|
package a new experimental NDK package archive from the current source tree
|
||||||
plus the toolchain binaries of an existing NDK release package. E.g.:
|
plus the toolchain binaries of an existing NDK release package. E.g.:
|
||||||
|
|||||||
@@ -104,6 +104,10 @@ Each supported ABI is identified by a unique name.
|
|||||||
(Just like one typically does on x86 systems to check/use MMX/SSE2/etc...
|
(Just like one typically does on x86 systems to check/use MMX/SSE2/etc...
|
||||||
specialized instructions).
|
specialized instructions).
|
||||||
|
|
||||||
|
You can check docs/CPU-FEATURES.TXT to see how to perform these runtime
|
||||||
|
checks, and docs/CPU-ARM-NEON.TXT to learn about the NDK's support for
|
||||||
|
building NEON-capable machine code too.
|
||||||
|
|
||||||
IMPORTANT NOTE: This ABI enforces that all double values are passed during
|
IMPORTANT NOTE: This ABI enforces that all double values are passed during
|
||||||
function calls in 'core' register pairs, instead of dedicated FP ones.
|
function calls in 'core' register pairs, instead of dedicated FP ones.
|
||||||
However, all internal computations can be performed with the FP registers
|
However, all internal computations can be performed with the FP registers
|
||||||
|
|||||||
118
ndk/docs/CPU-ARM-NEON.TXT
Normal file
118
ndk/docs/CPU-ARM-NEON.TXT
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
Android NDK & ARM NEON instruction set extension support
|
||||||
|
--------------------------------------------------------
|
||||||
|
|
||||||
|
Introduction:
|
||||||
|
-------------
|
||||||
|
|
||||||
|
Android NDK r3 added support for the new 'armeabi-v7a' ARM-based ABI
|
||||||
|
that allows native code to use two useful instruction set extenstions:
|
||||||
|
|
||||||
|
- Thumb-2, which provides performance comparable to 32-bit ARM
|
||||||
|
instructions with similar compactness to Thumb-1
|
||||||
|
|
||||||
|
- VFPv3, which provides hardware FPU registers and computations,
|
||||||
|
to boost floating point performance significantly.
|
||||||
|
|
||||||
|
More specifically, by default 'armeabi-v7a' only supports
|
||||||
|
VFPv3-D16 which only uses/requires 16 hardware FPU 64-bit registers.
|
||||||
|
|
||||||
|
More information about this can be read in docs/CPU-ARCH-ABIS.TXT
|
||||||
|
|
||||||
|
The ARMv7 Architecture Reference Manual also defines another optional
|
||||||
|
instruction set extension known as "ARM Advanced SIMD", nick-named
|
||||||
|
"NEON". It provides:
|
||||||
|
|
||||||
|
- A set of interesting scalar/vector instructions and registers
|
||||||
|
(the latter are mapped to the same chip area than the FPU ones),
|
||||||
|
comparable to MMX/SSE/3DNow! in the x86 world.
|
||||||
|
|
||||||
|
- VFPv3-D32 as a requirement (i.e. 32 hardware FPU 64-bit registers,
|
||||||
|
instead of the minimum of 16).
|
||||||
|
|
||||||
|
Not all ARMv7-based Android devices will support NEON, but those that
|
||||||
|
do may benefit in significant ways from the scalar/vector instructions.
|
||||||
|
|
||||||
|
The NDK supports the compilation of modules or even specific source
|
||||||
|
files with support for NEON. What this means is that a specific compiler
|
||||||
|
flag will be used to enable the use of GCC ARM Neon intrinsics and
|
||||||
|
VFPv3-D32 at the same time. The intrinsics are described here:
|
||||||
|
|
||||||
|
http://gcc.gnu.org/onlinedocs/gcc/ARM-NEON-Intrinsics.html
|
||||||
|
|
||||||
|
|
||||||
|
LOCAL_ARM_NEON:
|
||||||
|
---------------
|
||||||
|
|
||||||
|
Define LOCAL_ARM_NEON to 'true' in your module definition, and the NDK
|
||||||
|
will build all its source files with NEON support. This can be useful if
|
||||||
|
you want to build a static or shared library that specifically contains
|
||||||
|
NEON code paths.
|
||||||
|
|
||||||
|
|
||||||
|
Using the .neon suffix:
|
||||||
|
-----------------------
|
||||||
|
|
||||||
|
When listing sources files in your LOCAL_SRC_FILES variable, you now have
|
||||||
|
the option of using the .neon suffix to indicate that you want to
|
||||||
|
corresponding source(s) to be built with Neon support. For example:
|
||||||
|
|
||||||
|
LOCAL_SRC_FILES := foo.c.neon bar.c
|
||||||
|
|
||||||
|
Will only build 'foo.c' with NEON support.
|
||||||
|
|
||||||
|
Note that the .neon suffix can be used with the .arm suffix too (used to
|
||||||
|
specify the 32-bit ARM instruction set for non-NEON instructions), but must
|
||||||
|
appear after it.
|
||||||
|
|
||||||
|
In other words, 'foo.c.arm.neon' works, but 'foo.c.neon.arm' does NOT.
|
||||||
|
|
||||||
|
|
||||||
|
Build Requirements:
|
||||||
|
------------------
|
||||||
|
|
||||||
|
Neon support only works when targetting the 'armeabi-v7a' ABI, otherwise the
|
||||||
|
NDK build scripts will complain and abort. It is important to use checks like
|
||||||
|
the following in your Android.mk:
|
||||||
|
|
||||||
|
# define a static library containing our NEON code
|
||||||
|
ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
|
||||||
|
include $(CLEAR_VARS)
|
||||||
|
LOCAL_MODULE := mylib-neon
|
||||||
|
LOCAL_SRC_FILES := mylib-neon.c
|
||||||
|
LOCAL_ARM_NEON := true
|
||||||
|
include $(BUILD_STATIC_LIBRARY)
|
||||||
|
endif # TARGET_ARCH_ABI == armeabi-v7a
|
||||||
|
|
||||||
|
|
||||||
|
Runtime Detection:
|
||||||
|
------------------
|
||||||
|
|
||||||
|
As said previously, NOT ALL ARMv7-BASED ANDROID DEVICES WILL SUPPORT NEON !
|
||||||
|
It is thus crucial to perform runtime detection to know if the NEON-capable
|
||||||
|
machine code can be run on the target device.
|
||||||
|
|
||||||
|
To do that, use the 'cpufeatures' library that comes with this NDK. To lean
|
||||||
|
more about it, see docs/CPU-FEATURES.TXT.
|
||||||
|
|
||||||
|
You should explicitely check that android_getCpuFamily() returns
|
||||||
|
ANDROID_CPU_FAMILY_ARM, and that android_getCpuFeatures() returns a value
|
||||||
|
that has the ANDROID_CPU_ARM_FEATURE_NEON flag set, as in:
|
||||||
|
|
||||||
|
#include <cpu-features.h>
|
||||||
|
|
||||||
|
...
|
||||||
|
...
|
||||||
|
|
||||||
|
if (android_getCpuFamily() == ANDROID_CPU_FAMILY_ARM &&
|
||||||
|
(android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) != 0)
|
||||||
|
{
|
||||||
|
// use NEON-optimized routines
|
||||||
|
...
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// use non-NEON fallback routines instead
|
||||||
|
...
|
||||||
|
}
|
||||||
|
|
||||||
|
...
|
||||||
Reference in New Issue
Block a user