blob: 27acf78d3ea24402569aa2c76bc4e2a5bd74740e [file] [log] [blame]
//
// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once
#include "Optimization.hpp"
#include "NetworkUtils.hpp"
#include <armnn/Logging.hpp>
#include <armnnUtils/Permute.hpp>
namespace armnn
{
namespace optimizations
{
class ConvertConstDequantisationLayersToConstLayersImpl
{
public:
void Run(Graph& graph, InputSlot& connection) const
{
Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
Layer& child = connection.GetOwningLayer();
ARMNN_ASSERT(base.GetType() == LayerType::Constant);
ARMNN_ASSERT(child.GetType() == LayerType::Dequantize);
ReplaceConstDequantisationLayer(graph,
PolymorphicDowncast<ConstantLayer*>(&base),
PolymorphicDowncast<DequantizeLayer*>(&child));
}
protected:
ConvertConstDequantisationLayersToConstLayersImpl() = default;
~ConvertConstDequantisationLayersToConstLayersImpl() = default;
private:
static void ReplaceConstDequantisationLayer(Graph&,
ConstantLayer* constantLayer,
DequantizeLayer* dequantizeLayer)
{
ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl::ReplaceConstDequantisationLayer()";
/**
* This optimisation is to find situations where a constant set of inputs is being provided to a Dequantization
* layer. In this case we don't want the overhead of Dequantizing the values on every inference, instead we
* want to Dequantize them once and store them in a Const layer to be used everytime as they will not change.
*/
TensorInfo constantInfo = constantLayer->GetOutputSlot(0).GetTensorInfo();
TensorInfo inputDequantizeInfo = dequantizeLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
TensorInfo outputDequantizeInfo = dequantizeLayer->GetOutputSlot(0).GetTensorInfo();
bool requiresPermute = false;
auto connection = dequantizeLayer->GetOutputSlot(0).GetConnection(0);
if (connection)
{
if (connection->GetOwningLayer().GetType() == LayerType::Convolution2d)
{
/**
* ArmNN does not currently support non-fixed weights or bias
* The NNAPI filter is always OHWI [depth_out, filter_height, filter_width, depth_in]
* but ArmNN expects the filter's height and width indices to match the input's height
* and width indices so we permute it to OIHW if the DataLayout is NCHW
*/
ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "
"Convolution layer.";
auto conv2dLayer = PolymorphicDowncast<Convolution2dLayer*>(&connection->GetOwningLayer());
if (conv2dLayer->GetParameters().m_DataLayout == DataLayout::NCHW)
{
ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "
"Convolution layer and requires permute on weights. ";
requiresPermute = true;
}
}
}
ARMNN_ASSERT(constantLayer->GetNumOutputSlots() == 1);
auto numConnections = constantLayer->GetOutputSlot(0).GetNumConnections();
ARMNN_LOG(info) << "constantInfo datatype:" << armnn::GetDataTypeName(constantInfo.GetDataType())
<< "inputDequantizeInfo datatype:" << armnn::GetDataTypeName(inputDequantizeInfo.GetDataType())
<< "outputDequantizeInfo datatype:" << armnn::GetDataTypeName(outputDequantizeInfo.GetDataType());
std::vector<float> newValues(outputDequantizeInfo.GetNumElements());
if (constantInfo.GetDataType() == DataType::Float16 &&
inputDequantizeInfo.GetDataType() == DataType::Float16 &&
outputDequantizeInfo.GetDataType() == DataType::Float32)
{
ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting FP16 -> FP32";
armnnUtils::FloatingPointConverter::ConvertFloat16To32(constantLayer->m_LayerOutput->Map(true),
outputDequantizeInfo.GetNumElements(),
newValues.data());
}
else if (((constantInfo.GetDataType() == DataType::QAsymmS8
&& inputDequantizeInfo.GetDataType() == DataType::QAsymmS8)
|| (constantInfo.GetDataType() == DataType::QSymmS8
&& inputDequantizeInfo.GetDataType() == DataType::QSymmS8)) &&
outputDequantizeInfo.GetDataType() == DataType::Float32)
{
ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting INT8 -> FP32";
ConvertInt8To32(constantLayer->m_LayerOutput->Map(true),
outputDequantizeInfo.GetNumElements(),
inputDequantizeInfo.GetQuantizationScale(),
inputDequantizeInfo.GetQuantizationOffset(),
newValues.data());
}
TensorInfo newInfo = outputDequantizeInfo;
newInfo.SetConstant(true);
if (requiresPermute)
{
ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Permuting the constant data.";
const PermutationVector OHWIToOIHW = {0, 2, 3, 1};
std::vector<float> permutedValues(outputDequantizeInfo.GetNumElements());
armnnUtils::Permute(outputDequantizeInfo.GetShape(), OHWIToOIHW,
newValues.data(), permutedValues.data(),
GetDataTypeSize(outputDequantizeInfo.GetDataType()));
ConstTensor newInput(newInfo, permutedValues);
constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
}
else
{
ConstTensor newInput(newInfo, newValues);
constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
}
// Moves connections in dequantize output to the constant layer.
// Dequantize layer will be removed if left unconnected.
dequantizeLayer->GetOutputSlot().MoveAllConnections(constantLayer->GetOutputSlot());
// Updating the output tensor
constantLayer->GetOutputSlot(0).SetTensorInfo(newInfo);
ARMNN_ASSERT(constantLayer->GetOutputSlot(0).GetTensorInfo().IsConstant() == true);
// Set isConstant to true in all input tensor infos where constantLayer is now connected to
for (unsigned int i = numConnections; i < constantLayer->GetOutputSlot(0).GetNumConnections(); ++i)
{
auto info = constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
.GetConnectedOutputSlot()->GetTensorInfo();
info.SetConstant();
constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
.GetConnectedOutputSlot()->SetTensorInfo(info);
}
}
static void ConvertInt8To32(const void* srcInt8Buffer,
size_t numElements,
const float scale,
const int32_t offset,
float* dstFloat32Buffer)
{
ARMNN_ASSERT(srcInt8Buffer != nullptr);
ARMNN_ASSERT(dstFloat32Buffer != nullptr);
ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: scale: " << scale;
ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: offset: " << offset;
const auto* pInt8 = static_cast<const int8_t*>(srcInt8Buffer);
for (size_t i = 0; i < numElements; ++i)
{
dstFloat32Buffer[i] = static_cast<float>(pInt8[i] - offset) * scale;
}
}
};
using ConvertConstDequantisationLayersToConstLayers
= OptimizeForConnection<ConstantLayer,
DequantizeLayer,
ConvertConstDequantisationLayersToConstLayersImpl>;
} // namespace optimizations
} // namespace armnn