blob: c88163227adcc08390c073f6f27c6b71bf966baf [file] [log] [blame]
//
// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include <Graph.hpp>
#include <Network.hpp>
#include <neon/NeonTensorHandle.hpp>
#include <neon/NeonTensorHandleFactory.hpp>
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <test/GraphUtils.hpp>
#include <arm_compute/runtime/Allocator.h>
#include <backendsCommon/test/CommonTestUtils.hpp>
#include <boost/test/unit_test.hpp>
#include <armnn/utility/Assert.hpp>
BOOST_AUTO_TEST_SUITE(NeonTensorHandleTests)
using namespace armnn;
BOOST_AUTO_TEST_CASE(NeonTensorHandleGetCapabilitiesNoPadding)
{
std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>();
NeonTensorHandleFactory handleFactory(memoryManager);
INetworkPtr network(INetwork::Create());
// Add the layers
IConnectableLayer* input = network->AddInputLayer(0);
SoftmaxDescriptor descriptor;
descriptor.m_Beta = 1.0f;
IConnectableLayer* softmax = network->AddSoftmaxLayer(descriptor);
IConnectableLayer* output = network->AddOutputLayer(2);
// Establish connections
input->GetOutputSlot(0).Connect(softmax->GetInputSlot(0));
softmax->GetOutputSlot(0).Connect(output->GetInputSlot(0));
// No padding required for input
std::vector<Capability> capabilities = handleFactory.GetCapabilities(input,
softmax,
CapabilityClass::PaddingRequired);
BOOST_TEST(capabilities.empty());
// No padding required for Softmax
capabilities = handleFactory.GetCapabilities(softmax, output, CapabilityClass::PaddingRequired);
BOOST_TEST(capabilities.empty());
// No padding required for output
capabilities = handleFactory.GetCapabilities(output, nullptr, CapabilityClass::PaddingRequired);
BOOST_TEST(capabilities.empty());
}
BOOST_AUTO_TEST_CASE(NeonTensorHandleGetCapabilitiesPadding)
{
std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>();
NeonTensorHandleFactory handleFactory(memoryManager);
INetworkPtr network(INetwork::Create());
// Add the layers
IConnectableLayer* input = network->AddInputLayer(0);
Pooling2dDescriptor descriptor;
IConnectableLayer* pooling = network->AddPooling2dLayer(descriptor);
IConnectableLayer* output = network->AddOutputLayer(2);
// Establish connections
input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
// No padding required for input
std::vector<Capability> capabilities = handleFactory.GetCapabilities(input,
pooling,
CapabilityClass::PaddingRequired);
BOOST_TEST(capabilities.empty());
// No padding required for output
capabilities = handleFactory.GetCapabilities(output, nullptr, CapabilityClass::PaddingRequired);
BOOST_TEST(capabilities.empty());
// Padding required for Pooling2d
capabilities = handleFactory.GetCapabilities(pooling, output, CapabilityClass::PaddingRequired);
BOOST_TEST(capabilities.size() == 1);
BOOST_TEST((capabilities[0].m_CapabilityClass == CapabilityClass::PaddingRequired));
BOOST_TEST(capabilities[0].m_Value);
}
BOOST_AUTO_TEST_CASE(ConcatOnXorYSubTensorsNoPaddingRequiredTest)
{
armnn::INetworkPtr net(armnn::INetwork::Create());
// Set up tensor infos
const armnn::TensorInfo inputInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32);
const armnn::TensorInfo intermediateInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32);
const armnn::TensorInfo outputInfo = armnn::TensorInfo({2, 3, 4, 2}, armnn::DataType::Float32);
armnn::ElementwiseUnaryDescriptor descriptor(armnn::UnaryOperation::Abs);
// Create the network
armnn::IConnectableLayer* const input0Layer = net->AddInputLayer(0, "input_0");
input0Layer->GetOutputSlot(0).SetTensorInfo(inputInfo);
armnn::IConnectableLayer* elementwiseUnaryLayer0 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseUnary_0");
elementwiseUnaryLayer0->GetOutputSlot(0).SetTensorInfo(intermediateInfo);
input0Layer->GetOutputSlot(0).Connect(elementwiseUnaryLayer0->GetInputSlot(0));
armnn::IConnectableLayer* const input1Layer = net->AddInputLayer(1, "input_1");
input1Layer->GetOutputSlot(0).SetTensorInfo(inputInfo);
armnn::IConnectableLayer* elementwiseUnaryLayer1 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseUnary_1");
elementwiseUnaryLayer1->GetOutputSlot(0).SetTensorInfo(intermediateInfo);
input1Layer->GetOutputSlot(0).Connect(elementwiseUnaryLayer1->GetInputSlot(0));
std::array<armnn::TensorShape, 2> concatInputShapes = { intermediateInfo.GetShape(), intermediateInfo.GetShape() };
armnn::IConnectableLayer* const concatLayer = net->AddConcatLayer(armnn::CreateDescriptorForConcatenation(
concatInputShapes.begin(), concatInputShapes.end(), 2), "concatenation");
concatLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
elementwiseUnaryLayer0->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(0));
elementwiseUnaryLayer1->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(1));
armnn::IConnectableLayer* const outputLayer = net->AddOutputLayer(0, "output");
concatLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
armnn::IRuntime::CreationOptions options;
armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
const armnn::Graph& theGraph = static_cast<armnn::OptimizedNetwork*>(optimizedNet.get())->GetGraph();
// Load graph into runtime
armnn::NetworkId networkIdentifier;
runtime->LoadNetwork(networkIdentifier, std::move(optimizedNet));
// now check the concat how many sub-tensors it is using..
auto TraceSubTensorHandleAncestry = [](armnn::ITensorHandle* const subTensorHandle)
{
if (subTensorHandle && subTensorHandle->GetParent())
{
return true;
}
return false;
};
for (auto&& layer : theGraph)
{
if(layer->GetType() == armnn::LayerType::Concat)
{
unsigned int numberOfSubTensors = 0;
for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
{
const armnn::OutputSlot* slot = layer->GetInputSlot(i).GetConnectedOutputSlot();
if (TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData()))
{
++numberOfSubTensors;
}
}
// sub-tensors should be supported in this configuration
ARMNN_ASSERT(numberOfSubTensors > 0);
}
}
}
BOOST_AUTO_TEST_CASE(ConcatonXorYPaddingRequiredTest)
{
armnn::INetworkPtr net(armnn::INetwork::Create());
// Set up tensor infos
const armnn::TensorInfo inputInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32);
const armnn::TensorInfo intermediateInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32);
const armnn::TensorInfo outputInfo = armnn::TensorInfo({2, 3, 4, 2}, armnn::DataType::Float32);
armnn::Pooling2dDescriptor descriptor;
descriptor.m_PoolType = armnn::PoolingAlgorithm::Average;
descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3;
descriptor.m_StrideX = descriptor.m_StrideY = 1;
descriptor.m_PadLeft = 1;
descriptor.m_PadRight = 1;
descriptor.m_PadTop = 1;
descriptor.m_PadBottom = 1;
descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue;
// Create the network
armnn::IConnectableLayer* const input0Layer = net->AddInputLayer(0, "input_0");
input0Layer->GetOutputSlot(0).SetTensorInfo(inputInfo);
armnn::IConnectableLayer* pooling2dLayer0 = net->AddPooling2dLayer(descriptor, "pooling2d_0");
pooling2dLayer0->GetOutputSlot(0).SetTensorInfo(intermediateInfo);
input0Layer->GetOutputSlot(0).Connect(pooling2dLayer0->GetInputSlot(0));
armnn::IConnectableLayer* const input1Layer = net->AddInputLayer(1, "input_1");
input1Layer->GetOutputSlot(0).SetTensorInfo(inputInfo);
armnn::IConnectableLayer* pooling2dLayer1 = net->AddPooling2dLayer(descriptor, "pooling2d_1");
pooling2dLayer1->GetOutputSlot(0).SetTensorInfo(intermediateInfo);
input1Layer->GetOutputSlot(0).Connect(pooling2dLayer1->GetInputSlot(0));
std::array<armnn::TensorShape, 2> concatInputShapes = { intermediateInfo.GetShape(), intermediateInfo.GetShape() };
armnn::IConnectableLayer* const concatLayer = net->AddConcatLayer(armnn::CreateDescriptorForConcatenation(
concatInputShapes.begin(), concatInputShapes.end(), 2), "concatenation");
concatLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
pooling2dLayer0->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(0));
pooling2dLayer1->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(1));
armnn::IConnectableLayer* const outputLayer = net->AddOutputLayer(0, "output");
concatLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
armnn::IRuntime::CreationOptions options;
armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
const armnn::Graph& theGraph = static_cast<armnn::OptimizedNetwork*>(optimizedNet.get())->GetGraph();
// Load graph into runtime
armnn::NetworkId networkIdentifier;
runtime->LoadNetwork(networkIdentifier, std::move(optimizedNet));
// now check the concat how many sub-tensors it is using..
auto TraceSubTensorHandleAncestry = [](armnn::ITensorHandle* const subTensorHandle)
{
if (subTensorHandle && subTensorHandle->GetParent())
{
return true;
}
return false;
};
unsigned int numberOfSubTensors = 0;
for (auto&& layer : theGraph)
{
if(layer->GetType() == armnn::LayerType::Concat)
{
for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
{
const armnn::OutputSlot* slot = layer->GetInputSlot(i).GetConnectedOutputSlot();
if (TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData()))
{
++numberOfSubTensors;
}
}
}
}
// sub-tensors should not be supported in this configuration
ARMNN_ASSERT(numberOfSubTensors == 0);
}
BOOST_AUTO_TEST_CASE(SplitteronXorYNoPaddingRequiredTest)
{
using namespace armnn;
unsigned int splitAxis = 2;
unsigned int numSplit = 2;
const TensorShape& inputShape = { 2, 3, 4, 2 };
const armnn::TensorInfo intermediateInfo = armnn::TensorInfo({ 2, 3, 2, 2 }, armnn::DataType::Float32);
const std::vector<TensorShape> outputShapes{{ 2, 3, 2, 2 },
{ 2, 3, 2, 2 }};
const float qScale = 1.0f;
const int32_t qOffset = 0;
// Creates structures for input & output.
std::vector<float> inputData{
1, 2,
3, 4,
5, 6,
7, 8,
9, 10,
11, 12,
13, 14,
15, 16,
17, 18,
19, 20,
21, 22,
23, 24,
25, 26,
27, 28,
29, 30,
31, 32,
33, 34,
35, 36,
37, 38,
39, 40,
41, 42,
43, 44,
45, 46,
47, 48
};
std::vector<float> expectedOutput0{
1, 2,
3, 4,
9, 10,
11, 12,
17, 18,
19, 20,
25, 26,
27, 28,
33, 34,
35, 36,
41, 42,
43, 44
};
std::vector<float> expectedOutput1{
5, 6,
7, 8,
13, 14,
15, 16,
21, 22,
23, 24,
29, 30,
31, 32,
37, 38,
39, 40,
45, 46,
47, 48
};
// Builds up the structure of the network.
INetworkPtr net(INetwork::Create());
TensorInfo inputTensorInfo(inputShape, armnn::DataType::Float32, qScale, qOffset);
armnn::ElementwiseUnaryDescriptor descriptor(armnn::UnaryOperation::Abs);
// Splitter
std::vector<unsigned int> splitterDimSizes(inputShape.GetNumDimensions());
// Add current input shape to splitterDimSizes
for (unsigned int i = 0; i < inputShape.GetNumDimensions(); ++i)
{
splitterDimSizes[i] = inputTensorInfo.GetShape()[i];
}
if (splitterDimSizes[splitAxis] % numSplit != 0)
{
throw ParseException("Number of splits must evenly divide the dimension");
}
splitterDimSizes[splitAxis] /= numSplit;
SplitterDescriptor splitDesc(numSplit, inputShape.GetNumDimensions());
for (unsigned int g = 0; g < numSplit; ++g)
{
// Set the size of the views.
for (unsigned int dimIdx = 0; dimIdx < splitterDimSizes.size(); ++dimIdx)
{
splitDesc.SetViewSize(g, dimIdx, splitterDimSizes[dimIdx]);
}
splitDesc.SetViewOriginCoord(g, splitAxis, splitterDimSizes[splitAxis] * g);
}
IConnectableLayer* input = net->AddInputLayer(0, "input");
IConnectableLayer* elementWiseUnary0 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseunary_0");
IConnectableLayer* elementWiseUnary1 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseunary_0");
IConnectableLayer* splitter = net->AddSplitterLayer(splitDesc, "splitter");
// Connections
Connect(input, splitter, inputTensorInfo, 0, 0);
Connect(splitter, elementWiseUnary0, intermediateInfo, 0, 0);
Connect(splitter, elementWiseUnary1, intermediateInfo, 1, 0);
std::vector<IConnectableLayer*> pooling2dLayers{elementWiseUnary0, elementWiseUnary1};
for (unsigned int i = 0; i < outputShapes.size(); ++i)
{
TensorInfo outputTensorInfo(outputShapes[i], armnn::DataType::Float32, qScale, qOffset);
IConnectableLayer* output = net->AddOutputLayer(boost::numeric_cast<LayerBindingId>(i));
Connect(pooling2dLayers[i], output, outputTensorInfo, 0, 0);
}
std::map<int, std::vector<float>> inputTensorData = {{ 0,inputData }};
std::map<int, std::vector<float>> expectedOutputData = {{ 0, expectedOutput0 }, { 1, expectedOutput1 }};
armnn::IRuntime::CreationOptions options;
armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
const armnn::Graph& theGraph = static_cast<armnn::OptimizedNetwork*>(optimizedNet.get())->GetGraph();
// Load graph into runtime
armnn::NetworkId networkIdentifier;
runtime->LoadNetwork(networkIdentifier, std::move(optimizedNet));
// now check the concat how many sub-tensors it is using..
auto TraceSubTensorHandleAncestry = [](armnn::ITensorHandle* const subTensorHandle)
{
if (subTensorHandle && subTensorHandle->GetParent())
{
return true;
}
return false;
};
for (auto&& layer : theGraph)
{
if(layer->GetType() == armnn::LayerType::ElementwiseUnary)
{
unsigned int numberOfSubTensors = 0;
for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
{
const armnn::OutputSlot* slot = layer->GetInputSlot(i).GetConnectedOutputSlot();
if (TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData()))
{
++numberOfSubTensors;
}
}
// sub-tensors should be supported in this configuration
ARMNN_ASSERT(numberOfSubTensors > 0);
}
}
InputTensors inputTensors;
inputTensors.reserve(inputTensorData.size());
for (auto&& it : inputTensorData)
{
inputTensors.push_back({it.first,
ConstTensor(runtime->GetInputTensorInfo(networkIdentifier, it.first), it.second.data())});
}
OutputTensors outputTensors;
outputTensors.reserve(expectedOutputData.size());
std::map<int, std::vector<float>> outputStorage;
for (auto&& it : expectedOutputData)
{
std::vector<float> out(it.second.size());
outputStorage.emplace(it.first, out);
outputTensors.push_back({it.first,
Tensor(runtime->GetOutputTensorInfo(networkIdentifier, it.first),
outputStorage.at(it.first).data())});
}
// Does the inference.
runtime->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
// Checks the results.
float tolerance = 0.000001f;
for (auto&& it : expectedOutputData)
{
std::vector<float> out = outputStorage.at(it.first);
for (unsigned int i = 0; i < out.size(); ++i)
{
BOOST_CHECK_MESSAGE(Compare<armnn::DataType::Float32>(it.second[i], out[i], tolerance) == true,
"Actual output: " << out[i] << ". Expected output:" << it.second[i]);
}
}
}
BOOST_AUTO_TEST_CASE(SplitteronXorYPaddingRequiredTest)
{
using namespace armnn;
unsigned int splitAxis = 2;
unsigned int numSplit = 2;
const TensorShape& inputShape = { 1, 1, 4, 4 };
const armnn::TensorInfo intermediateInfo = armnn::TensorInfo({ 1, 1, 2, 4 }, armnn::DataType::Float32);
const std::vector<TensorShape> outputShapes{{ 1, 1, 2, 4 },
{ 1, 1, 2, 4 }};
const float qScale = 1.0f;
const int32_t qOffset = 0;
// Creates structures for input & output.
std::vector<float> inputData{
9.0f, 27.0f, 18.0f, 36.0f,
18.0f, 9.0f, 18.0f, 9.0f,
27.0f, 18.0f, 9.0f, 27.0f,
9.0f, 27.0f, 9.0f, 18.0f,
};
std::vector<float> expectedOutput0{
7.0f, 11.0f, 13.0f, 9.0f,
7.0f, 11.0f, 13.0f, 9.0f
};
std::vector<float> expectedOutput1{
9.0f, 11.0f, 12.0f, 7.0f,
9.0f, 11.0f, 12.0f, 7.0f
};
// Builds up the structure of the network.
INetworkPtr net(INetwork::Create());
TensorInfo inputTensorInfo(inputShape, armnn::DataType::Float32, qScale, qOffset);
// Pooling
armnn::Pooling2dDescriptor descriptor;
descriptor.m_PoolType = armnn::PoolingAlgorithm::Average;
descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3;
descriptor.m_StrideX = descriptor.m_StrideY = 1;
descriptor.m_PadLeft = 1;
descriptor.m_PadRight = 1;
descriptor.m_PadTop = 1;
descriptor.m_PadBottom = 1;
descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue;
// Splitter
std::vector<unsigned int> splitterDimSizes(inputShape.GetNumDimensions());
// Add current input shape to splitterDimSizes
for (unsigned int i = 0; i < inputShape.GetNumDimensions(); ++i)
{
splitterDimSizes[i] = inputTensorInfo.GetShape()[i];
}
if (splitterDimSizes[splitAxis] % numSplit != 0)
{
throw ParseException("Number of splits must evenly divide the dimension");
}
splitterDimSizes[splitAxis] /= numSplit;
SplitterDescriptor splitDesc(numSplit, inputShape.GetNumDimensions());
for (unsigned int g = 0; g < numSplit; ++g)
{
// Set the size of the views.
for (unsigned int dimIdx = 0; dimIdx < splitterDimSizes.size(); ++dimIdx)
{
splitDesc.SetViewSize(g, dimIdx, splitterDimSizes[dimIdx]);
}
splitDesc.SetViewOriginCoord(g, splitAxis, splitterDimSizes[splitAxis] * g);
}
IConnectableLayer* input = net->AddInputLayer(0, "input");
IConnectableLayer* pooling2d0 = net->AddPooling2dLayer(descriptor, "pooling2d_0");
IConnectableLayer* pooling2d1 = net->AddPooling2dLayer(descriptor, "pooling2d_1");
IConnectableLayer* splitter = net->AddSplitterLayer(splitDesc, "splitter");
// Connections
Connect(input, splitter, inputTensorInfo, 0, 0);
Connect(splitter, pooling2d0, intermediateInfo, 0, 0);
Connect(splitter, pooling2d1, intermediateInfo, 1, 0);
std::vector<IConnectableLayer*> pooling2dLayers{pooling2d0, pooling2d1};
for (unsigned int i = 0; i < outputShapes.size(); ++i)
{
TensorInfo outputTensorInfo(outputShapes[i], armnn::DataType::Float32, qScale, qOffset);
IConnectableLayer* output = net->AddOutputLayer(boost::numeric_cast<LayerBindingId>(i));
Connect(pooling2dLayers[i], output, outputTensorInfo, 0, 0);
}
std::map<int, std::vector<float>> inputTensorData = {{ 0,inputData }};
std::map<int, std::vector<float>> expectedOutputData = {{ 0, expectedOutput0 }, { 1, expectedOutput1 }};
armnn::IRuntime::CreationOptions options;
armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
const armnn::Graph& theGraph = static_cast<armnn::OptimizedNetwork*>(optimizedNet.get())->GetGraph();
// Load graph into runtime
armnn::NetworkId networkIdentifier;
runtime->LoadNetwork(networkIdentifier, std::move(optimizedNet));
// now check the concat how many sub-tensors it is using..
auto TraceSubTensorHandleAncestry = [](armnn::ITensorHandle* const subTensorHandle)
{
if (subTensorHandle && subTensorHandle->GetParent())
{
return true;
}
return false;
};
for (auto&& layer : theGraph)
{
if(layer->GetType() == armnn::LayerType::Pooling2d)
{
unsigned int numberOfSubTensors = 0;
for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
{
const armnn::OutputSlot* slot = layer->GetInputSlot(i).GetConnectedOutputSlot();
if (TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData()))
{
++numberOfSubTensors;
}
}
// sub-tensors should be supported in this configuration
ARMNN_ASSERT(numberOfSubTensors == 0);
}
}
InputTensors inputTensors;
inputTensors.reserve(inputTensorData.size());
for (auto&& it : inputTensorData)
{
inputTensors.push_back({it.first,
ConstTensor(runtime->GetInputTensorInfo(networkIdentifier, it.first), it.second.data())});
}
OutputTensors outputTensors;
outputTensors.reserve(expectedOutputData.size());
std::map<int, std::vector<float>> outputStorage;
for (auto&& it : expectedOutputData)
{
std::vector<float> out(it.second.size());
outputStorage.emplace(it.first, out);
outputTensors.push_back({it.first,
Tensor(runtime->GetOutputTensorInfo(networkIdentifier, it.first),
outputStorage.at(it.first).data())});
}
// Does the inference.
runtime->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
// Checks the results.
float tolerance = 0.000001f;
for (auto&& it : expectedOutputData)
{
std::vector<float> out = outputStorage.at(it.first);
for (unsigned int i = 0; i < out.size(); ++i)
{
BOOST_CHECK_MESSAGE(Compare<armnn::DataType::Float32>(it.second[i], out[i], tolerance) == true,
"Actual output: " << out[i] << ". Expected output:" << it.second[i]);
}
}
}
BOOST_AUTO_TEST_CASE(NeonTensorHandleFactoryMemoryManaged)
{
std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>(
std::make_unique<arm_compute::Allocator>(),
BaseMemoryManager::MemoryAffinity::Offset);
NeonTensorHandleFactory handleFactory(memoryManager);
TensorInfo info({ 1, 1, 2, 1 }, DataType::Float32);
// create TensorHandle with memory managed
auto handle = handleFactory.CreateTensorHandle(info, true);
handle->Manage();
handle->Allocate();
memoryManager->Acquire();
{
float* buffer = reinterpret_cast<float*>(handle->Map());
BOOST_CHECK(buffer != nullptr); // Yields a valid pointer
buffer[0] = 1.5f;
buffer[1] = 2.5f;
BOOST_CHECK(buffer[0] == 1.5f); // Memory is writable and readable
BOOST_CHECK(buffer[1] == 2.5f); // Memory is writable and readable
}
memoryManager->Release();
memoryManager->Acquire();
{
float* buffer = reinterpret_cast<float*>(handle->Map());
BOOST_CHECK(buffer != nullptr); // Yields a valid pointer
buffer[0] = 3.5f;
buffer[1] = 4.5f;
BOOST_CHECK(buffer[0] == 3.5f); // Memory is writable and readable
BOOST_CHECK(buffer[1] == 4.5f); // Memory is writable and readable
}
memoryManager->Release();
float testPtr[2] = { 2.5f, 5.5f };
// Cannot import as import is disabled
BOOST_CHECK(!handle->Import(static_cast<void*>(testPtr), MemorySource::Malloc));
}
BOOST_AUTO_TEST_CASE(NeonTensorHandleFactoryImport)
{
std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>(
std::make_unique<arm_compute::Allocator>(),
BaseMemoryManager::MemoryAffinity::Offset);
NeonTensorHandleFactory handleFactory(memoryManager);
TensorInfo info({ 1, 1, 2, 1 }, DataType::Float32);
// create TensorHandle without memory managed
auto handle = handleFactory.CreateTensorHandle(info, false);
handle->Manage();
handle->Allocate();
memoryManager->Acquire();
// No buffer allocated when import is enabled
BOOST_CHECK((PolymorphicDowncast<NeonTensorHandle*>(handle.get()))->GetTensor().buffer() == nullptr);
float testPtr[2] = { 2.5f, 5.5f };
// Correctly import
BOOST_CHECK(handle->Import(static_cast<void*>(testPtr), MemorySource::Malloc));
float* buffer = reinterpret_cast<float*>(handle->Map());
BOOST_CHECK(buffer != nullptr); // Yields a valid pointer after import
BOOST_CHECK(buffer == testPtr); // buffer is pointing to testPtr
// Memory is writable and readable with correct value
BOOST_CHECK(buffer[0] == 2.5f);
BOOST_CHECK(buffer[1] == 5.5f);
buffer[0] = 3.5f;
buffer[1] = 10.0f;
BOOST_CHECK(buffer[0] == 3.5f);
BOOST_CHECK(buffer[1] == 10.0f);
memoryManager->Release();
}
BOOST_AUTO_TEST_SUITE_END()