ndk-samples/nn-samples/basic/src/main/cpp/simple_model.cpp

/**
 * Copyright 2017 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#include "simple_model.h"

#include <android/asset_manager_jni.h>
#include <android/log.h>
#include <android/sharedmem.h>
#include <sys/mman.h>
#include <string>
#include <unistd.h>

namespace {

// Create ANeuralNetworksMemory from an asset file.
//
// Note that, at API level 30 or earlier, the NNAPI drivers may not have the permission to
// access the asset file. To work around this issue, here we will:
// 1. Allocate a large-enough shared memory to hold the model data;
// 2. Copy the asset file to the shared memory;
// 3. Create the NNAPI memory with the file descriptor of the shared memory.
ANeuralNetworksMemory *createMemoryFromAsset(AAsset *asset) {
    // Allocate a large-enough shared memory to hold the model data.
    off_t length = AAsset_getLength(asset);
    int fd = ASharedMemory_create("model_data", length);
    if (fd < 0) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ASharedMemory_create failed with size %d", length);
        return nullptr;
    }

    // Copy the asset file to the shared memory.
    void *data = mmap(nullptr, length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
    if (data == nullptr) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to map a shared memory");
        close(fd);
        return nullptr;
    }
    AAsset_read(asset, data, length);
    munmap(data, length);

    // Create the NNAPI memory with the file descriptor of the shared memory.
    ANeuralNetworksMemory *memory;
    int status = ANeuralNetworksMemory_createFromFd(length, PROT_READ | PROT_WRITE, fd, 0,
                                                    &memory);

    // It is safe to close the file descriptor here because ANeuralNetworksMemory_createFromFd
    // will create a dup.
    close(fd);
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksMemory_createFromFd failed for trained weights");
        return nullptr;
    }
    return memory;
}

}  // namespace

/**
 * SimpleModel Constructor.
 *
 * Initialize the member variables, including the shared memory objects.
 */
SimpleModel::SimpleModel(AAsset *asset) :
        model_(nullptr),
        compilation_(nullptr),
        dimLength_(TENSOR_SIZE) {
    tensorSize_ = dimLength_;
    inputTensor1_.resize(tensorSize_);

    // Create ANeuralNetworksMemory from a file containing the trained data.
    memoryModel_ = createMemoryFromAsset(asset);

    // Create ASharedMemory to hold the data for the second input tensor and output output tensor.
    inputTensor2Fd_ = ASharedMemory_create("input2", tensorSize_ * sizeof(float));
    outputTensorFd_ = ASharedMemory_create("output", tensorSize_ * sizeof(float));

    // Create ANeuralNetworksMemory objects from the corresponding ASharedMemory objects.
    int status = ANeuralNetworksMemory_createFromFd(tensorSize_ * sizeof(float),
                                                    PROT_READ,
                                                    inputTensor2Fd_, 0,
                                                    &memoryInput2_);
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksMemory_createFromFd failed for Input2");
        return;
    }
    status = ANeuralNetworksMemory_createFromFd(tensorSize_ * sizeof(float),
                                                PROT_READ | PROT_WRITE,
                                                outputTensorFd_, 0,
                                                &memoryOutput_);
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksMemory_createFromFd failed for Output");
        return;
    }
}

/**
 * Create a graph that consists of three operations: two additions and a
 * multiplication.
 * The sums created by the additions are the inputs to the multiplication. In
 * essence, we are creating a graph that computes:
 *        (tensor0 + tensor1) * (tensor2 + tensor3).
 *
 * tensor0 ---+
 *            +--- ADD ---> intermediateOutput0 ---+
 * tensor1 ---+                                    |
 *                                                 +--- MUL---> output
 * tensor2 ---+                                    |
 *            +--- ADD ---> intermediateOutput1 ---+
 * tensor3 ---+
 *
 * Two of the four tensors, tensor0 and tensor2 being added are constants, defined in the
 * model. They represent the weights that would have been learned during a training process.
 *
 * The other two tensors, tensor1 and tensor3 will be inputs to the model. Their values will be
 * provided when we execute the model. These values can change from execution to execution.
 *
 * Besides the two input tensors, an optional fused activation function can
 * also be defined for ADD and MUL. In this example, we'll simply set it to NONE.
 *
 * The graph then has 10 operands:
 *  - 2 tensors that are inputs to the model. These are fed to the two
 *      ADD operations.
 *  - 2 constant tensors that are the other two inputs to the ADD operations.
 *  - 1 fuse activation operand reused for the ADD operations and the MUL operation.
 *  - 2 intermediate tensors, representing outputs of the ADD operations and inputs to the
 *      MUL operation.
 *  - 1 model output.
 *
 * @return true for success, false otherwise
 */
bool SimpleModel::CreateCompiledModel() {
    int32_t status;

    // Create the ANeuralNetworksModel handle.
    status = ANeuralNetworksModel_create(&model_);
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksModel_create failed");
        return false;
    }

    uint32_t dimensions[] = {dimLength_};
    ANeuralNetworksOperandType float32TensorType{
            .type = ANEURALNETWORKS_TENSOR_FLOAT32,
            .dimensionCount = sizeof(dimensions) / sizeof(dimensions[0]),
            .dimensions = dimensions,
            .scale = 0.0f,
            .zeroPoint = 0,
    };
    ANeuralNetworksOperandType scalarInt32Type{
            .type = ANEURALNETWORKS_INT32,
            .dimensionCount = 0,
            .dimensions = nullptr,
            .scale = 0.0f,
            .zeroPoint = 0,
    };

    /**
     * Add operands and operations to construct the model.
     *
     * Operands are implicitly identified by the order in which they are added to the model,
     * starting from 0.
     *
     * These indexes are not returned by the model_addOperand call. The application must
     * manage these values. Here, we use opIdx to do the bookkeeping.
     */
    uint32_t opIdx = 0;

    // We first add the operand for the NONE activation function, and set its
    // value to ANEURALNETWORKS_FUSED_NONE.
    // This constant scalar operand will be used for all 3 operations.
    status = ANeuralNetworksModel_addOperand(model_, &scalarInt32Type);
    uint32_t fusedActivationFuncNone = opIdx++;
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksModel_addOperand failed for operand (%d)",
                            fusedActivationFuncNone);
        return false;
    }

    FuseCode fusedActivationCodeValue = ANEURALNETWORKS_FUSED_NONE;
    status = ANeuralNetworksModel_setOperandValue(
            model_, fusedActivationFuncNone, &fusedActivationCodeValue,
            sizeof(fusedActivationCodeValue));
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksModel_setOperandValue failed for operand (%d)",
                            fusedActivationFuncNone);
        return false;
    }

    // Add operands for the tensors.
    status = ANeuralNetworksModel_addOperand(model_, &float32TensorType);
    uint32_t tensor0 = opIdx++;
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksModel_addOperand failed for operand (%d)",
                            tensor0);
        return false;
    }
    // tensor0 is a constant tensor that was established during training.
    // We read these values from the corresponding ANeuralNetworksMemory object.
    status = ANeuralNetworksModel_setOperandValueFromMemory(model_,
                                                            tensor0,
                                                            memoryModel_,
                                                            0,
                                                            tensorSize_ * sizeof(float));
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksModel_setOperandValueFromMemory failed for operand (%d)",
                            tensor0);
        return false;
    }

    // tensor1 is one of the user provided input tensors to the trained model.
    // Its value is determined pre-execution.
    status = ANeuralNetworksModel_addOperand(model_, &float32TensorType);
    uint32_t tensor1 = opIdx++;
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksModel_addOperand failed for operand (%d)",
                            tensor1);
        return false;
    }

    // tensor2 is a constant tensor that was established during training.
    // We read these values from the corresponding ANeuralNetworksMemory object.
    status = ANeuralNetworksModel_addOperand(model_, &float32TensorType);
    uint32_t tensor2 = opIdx++;
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksModel_addOperand failed for operand (%d)",
                            tensor2);
        return false;
    }
    status = ANeuralNetworksModel_setOperandValueFromMemory(
            model_, tensor2, memoryModel_, tensorSize_ * sizeof(float),
            tensorSize_ * sizeof(float));
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksModel_setOperandValueFromMemory failed for operand (%d)",
                            tensor2);
        return false;
    }

    // tensor3 is one of the user provided input tensors to the trained model.
    // Its value is determined pre-execution.
    status = ANeuralNetworksModel_addOperand(model_, &float32TensorType);
    uint32_t tensor3 = opIdx++;
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksModel_addOperand failed for operand (%d)",
                            tensor3);
        return false;
    }

    // intermediateOutput0 is the output of the first ADD operation.
    // Its value is computed during execution.
    status = ANeuralNetworksModel_addOperand(model_, &float32TensorType);
    uint32_t intermediateOutput0 = opIdx++;
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksModel_addOperand failed for operand (%d)",
                            intermediateOutput0);
        return false;
    }

    // intermediateOutput1 is the output of the second ADD operation.
    // Its value is computed during execution.
    status = ANeuralNetworksModel_addOperand(model_, &float32TensorType);
    uint32_t intermediateOutput1 = opIdx++;
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksModel_addOperand failed for operand (%d)",
                            intermediateOutput1);
        return false;
    }

    // multiplierOutput is the output of the MUL operation.
    // Its value will be computed during execution.
    status = ANeuralNetworksModel_addOperand(model_, &float32TensorType);
    uint32_t multiplierOutput = opIdx++;
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksModel_addOperand failed for operand (%d)",
                            multiplierOutput);
        return false;
    }

    // Add the first ADD operation.
    std::vector<uint32_t> add1InputOperands = {
            tensor0,
            tensor1,
            fusedActivationFuncNone,
    };
    status = ANeuralNetworksModel_addOperation(model_, ANEURALNETWORKS_ADD,
                                               add1InputOperands.size(), add1InputOperands.data(),
                                               1, &intermediateOutput0);
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksModel_addOperation failed for ADD_1");
        return false;
    }

    // Add the second ADD operation.
    // Note the fusedActivationFuncNone is used again.
    std::vector<uint32_t> add2InputOperands = {
            tensor2,
            tensor3,
            fusedActivationFuncNone,
    };
    status = ANeuralNetworksModel_addOperation(model_, ANEURALNETWORKS_ADD,
                                               add2InputOperands.size(),add2InputOperands.data(),
                                               1, &intermediateOutput1);
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksModel_addOperation failed for ADD_2");
        return false;
    }

    // Add the MUL operation.
    // Note that intermediateOutput0 and intermediateOutput1 are specified
    // as inputs to the operation.
    std::vector<uint32_t> mulInputOperands = {
            intermediateOutput0,
            intermediateOutput1,
            fusedActivationFuncNone};
    status = ANeuralNetworksModel_addOperation(model_, ANEURALNETWORKS_MUL,
                                               mulInputOperands.size(),mulInputOperands.data(),
                                               1, &multiplierOutput);
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksModel_addOperation failed for MUL");
        return false;
    }

    // Identify the input and output tensors to the model.
    // Inputs: {tensor1, tensor3}
    // Outputs: {multiplierOutput}
    std::vector<uint32_t> modelInputOperands = {
            tensor1, tensor3,
    };
    status = ANeuralNetworksModel_identifyInputsAndOutputs(model_,
                                                           modelInputOperands.size(),
                                                           modelInputOperands.data(),
                                                           1,
                                                           &multiplierOutput);
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksModel_identifyInputsAndOutputs failed");
        return false;
    }

    // Finish constructing the model.
    // The values of constant and intermediate operands cannot be altered after
    // the finish function is called.
    status = ANeuralNetworksModel_finish(model_);
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksModel_finish failed");
        return false;
    }

    // Create the ANeuralNetworksCompilation object for the constructed model.
    status = ANeuralNetworksCompilation_create(model_, &compilation_);
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksCompilation_create failed");
        return false;
    }

    // Set the preference for the compilation, so that the runtime and drivers
    // can make better decisions.
    // Here we prefer to get the answer quickly, so we choose
    // ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER.
    status = ANeuralNetworksCompilation_setPreference(compilation_,
                                                      ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER);
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksCompilation_setPreference failed");
        return false;
    }

    // Finish the compilation.
    status = ANeuralNetworksCompilation_finish(compilation_);
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksCompilation_finish failed");
        return false;
    }

    return true;
}

/**
 * Compute with the given input data.
 * @param modelInputs:
 *    inputValue1:   The values to fill tensor1
 *    inputValue2:   The values to fill tensor3
 * @return  computed result, or 0.0f if there is error.
 */
bool SimpleModel::Compute(float inputValue1, float inputValue2,
                          float *result) {
    if (!result) {
        return false;
    }

    // Create an ANeuralNetworksExecution object from the compiled model.
    // Note:
    //   1. All the input and output data are tied to the ANeuralNetworksExecution object.
    //   2. Multiple concurrent execution instances could be created from the same compiled model.
    // This sample only uses one execution of the compiled model.
    ANeuralNetworksExecution *execution;
    int32_t status = ANeuralNetworksExecution_create(compilation_, &execution);
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksExecution_create failed");
        return false;
    }

    // Set all the elements of the first input tensor (tensor1) to the same value as inputValue1.
    // It's not a realistic example but it shows how to pass a small tensor
    // to an execution.
    std::fill(inputTensor1_.data(), inputTensor1_.data() + tensorSize_,
              inputValue1);

    // Tell the execution to associate inputTensor1 to the first of the two model inputs.
    // Note that the index "0" here means the first operand of the modelInput list
    // {tensor1, tensor3}, which means tensor1.
    status = ANeuralNetworksExecution_setInput(execution, 0, nullptr,
                                               inputTensor1_.data(),
                                               tensorSize_ * sizeof(float));
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksExecution_setInput failed for input1");
        return false;
    }

    // Set the values of the second input operand (tensor3) to be inputValue2.
    // In reality, the values in the shared memory region will be manipulated by
    // other modules or processes.
    float *inputTensor2Ptr = reinterpret_cast<float *>(mmap(nullptr, tensorSize_ * sizeof(float),
                                                            PROT_READ | PROT_WRITE, MAP_SHARED,
                                                            inputTensor2Fd_, 0));
    for (int i = 0; i < tensorSize_; i++) {
        *inputTensor2Ptr = inputValue2;
        inputTensor2Ptr++;
    }
    munmap(inputTensor2Ptr, tensorSize_ * sizeof(float));

    // ANeuralNetworksExecution_setInputFromMemory associates the operand with a shared memory
    // region to minimize the number of copies of raw data.
    // Note that the index "1" here means the second operand of the modelInput list
    // {tensor1, tensor3}, which means tensor3.
    status = ANeuralNetworksExecution_setInputFromMemory(execution, 1, nullptr,
                                                         memoryInput2_, 0,
                                                         tensorSize_ * sizeof(float));
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksExecution_setInputFromMemory failed for input2");
        return false;
    }

    // Set the output tensor that will be filled by executing the model.
    // We use shared memory here to minimize the copies needed for getting the output data.
    status = ANeuralNetworksExecution_setOutputFromMemory(execution, 0, nullptr,
                                                          memoryOutput_, 0,
                                                          tensorSize_ * sizeof(float));
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksExecution_setOutputFromMemory failed for output");
        return false;
    }

    // Start the execution of the model.
    // Note that the execution here is asynchronous, and an ANeuralNetworksEvent object will be
    // created to monitor the status of the execution.
    ANeuralNetworksEvent *event = nullptr;
    status = ANeuralNetworksExecution_startCompute(execution, &event);
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksExecution_startCompute failed");
        return false;
    }

    // Wait until the completion of the execution. This could be done on a different
    // thread. By waiting immediately, we effectively make this a synchronous call.
    status = ANeuralNetworksEvent_wait(event);
    if (status != ANEURALNETWORKS_NO_ERROR) {
        __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                            "ANeuralNetworksEvent_wait failed");
        return false;
    }

    ANeuralNetworksEvent_free(event);
    ANeuralNetworksExecution_free(execution);

    // Validate the results.
    const float goldenRef = (inputValue1 + 0.5f) * (inputValue2 + 0.5f);
    float *outputTensorPtr = reinterpret_cast<float *>(mmap(nullptr,
                                                            tensorSize_ * sizeof(float),
                                                            PROT_READ, MAP_SHARED,
                                                            outputTensorFd_, 0));
    for (int32_t idx = 0; idx < tensorSize_; idx++) {
        float delta = outputTensorPtr[idx] - goldenRef;
        delta = (delta < 0.0f) ? (-delta) : delta;
        if (delta > FLOAT_EPISILON) {
            __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
                                "Output computation Error: output0(%f), delta(%f) @ idx(%d)",
                                outputTensorPtr[0], delta, idx);
        }
    }
    *result = outputTensorPtr[0];
    munmap(outputTensorPtr, tensorSize_ * sizeof(float));
    return result;
}

/**
 * SimpleModel Destructor.
 *
 * Release NN API objects and close the file descriptors.
 */
SimpleModel::~SimpleModel() {
    ANeuralNetworksCompilation_free(compilation_);
    ANeuralNetworksModel_free(model_);
    ANeuralNetworksMemory_free(memoryModel_);
    ANeuralNetworksMemory_free(memoryInput2_);
    ANeuralNetworksMemory_free(memoryOutput_);
    close(inputTensor2Fd_);
    close(outputTensorFd_);
}