src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp - ml/armnn - Gitiles

 //
 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 #pragma once

 #include "Optimization.hpp"
 #include "NetworkUtils.hpp"

 #include <armnn/Logging.hpp>
 #include <armnnUtils/Permute.hpp>

 namespace armnn
 {
 namespace optimizations
 {

 class ConvertConstDequantisationLayersToConstLayersImpl
 {
 public:
     void Run(Graph& graph, InputSlot& connection) const
     {
         Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
         Layer& child = connection.GetOwningLayer();

         ARMNN_ASSERT(base.GetType() == LayerType::Constant);
         ARMNN_ASSERT(child.GetType() == LayerType::Dequantize);

         ReplaceConstDequantisationLayer(graph,
                                         PolymorphicDowncast<ConstantLayer*>(&base),
                                         PolymorphicDowncast<DequantizeLayer*>(&child));

     }
 protected:
     ConvertConstDequantisationLayersToConstLayersImpl() = default;
     ~ConvertConstDequantisationLayersToConstLayersImpl() = default;
 private:

     static void ReplaceConstDequantisationLayer(Graph&,
                                                 ConstantLayer* constantLayer,
                                                 DequantizeLayer* dequantizeLayer)
     {
         ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl::ReplaceConstDequantisationLayer()";
         /**
          * This optimisation is to find situations where a constant set of inputs is being provided to a Dequantization
          * layer. In this case we don't want the overhead of Dequantizing the values on every inference, instead we
          * want to Dequantize them once and store them in a Const layer to be used everytime as they will not change.
          */
         TensorInfo constantInfo = constantLayer->GetOutputSlot(0).GetTensorInfo();
         TensorInfo inputDequantizeInfo = dequantizeLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
         TensorInfo outputDequantizeInfo = dequantizeLayer->GetOutputSlot(0).GetTensorInfo();

         bool requiresPermute = false;

         auto connection = dequantizeLayer->GetOutputSlot(0).GetConnection(0);
         if (connection)
         {
             if (connection->GetOwningLayer().GetType() == LayerType::Convolution2d)
             {
                 /**
                  * ArmNN does not currently support non-fixed weights or bias
                  * The NNAPI filter is always OHWI [depth_out, filter_height, filter_width, depth_in]
                  * but ArmNN expects the filter's height and width indices to match the input's height
                  * and width indices so we permute it to OIHW if the DataLayout is NCHW
                  */
                 ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "
                                    "Convolution layer.";
                 auto conv2dLayer = PolymorphicDowncast<Convolution2dLayer*>(&connection->GetOwningLayer());
                 if (conv2dLayer->GetParameters().m_DataLayout == DataLayout::NCHW)
                 {
                     ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "
                                         "Convolution layer and requires permute on weights. ";
                     requiresPermute = true;
                 }
             }
         }

         ARMNN_ASSERT(constantLayer->GetNumOutputSlots() == 1);
         auto numConnections = constantLayer->GetOutputSlot(0).GetNumConnections();

         ARMNN_LOG(info) << "constantInfo datatype:" << armnn::GetDataTypeName(constantInfo.GetDataType())
            << "inputDequantizeInfo datatype:" << armnn::GetDataTypeName(inputDequantizeInfo.GetDataType())
            << "outputDequantizeInfo datatype:" << armnn::GetDataTypeName(outputDequantizeInfo.GetDataType());

         std::vector<float> newValues(outputDequantizeInfo.GetNumElements());
         if (constantInfo.GetDataType() == DataType::Float16 &&
             inputDequantizeInfo.GetDataType() == DataType::Float16 &&
             outputDequantizeInfo.GetDataType() == DataType::Float32)
         {
             ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting FP16 -> FP32";
             armnnUtils::FloatingPointConverter::ConvertFloat16To32(constantLayer->m_LayerOutput->Map(true),
                                                                    outputDequantizeInfo.GetNumElements(),
                                                                    newValues.data());
         }
         else if (((constantInfo.GetDataType() == DataType::QAsymmS8
                   && inputDequantizeInfo.GetDataType() == DataType::QAsymmS8)
                   || (constantInfo.GetDataType() == DataType::QSymmS8
                       && inputDequantizeInfo.GetDataType() == DataType::QSymmS8)) &&
                 outputDequantizeInfo.GetDataType() == DataType::Float32)
         {
             ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting INT8 -> FP32";
             ConvertInt8To32(constantLayer->m_LayerOutput->Map(true),
                             outputDequantizeInfo.GetNumElements(),
                             inputDequantizeInfo.GetQuantizationScale(),
                             inputDequantizeInfo.GetQuantizationOffset(),
                             newValues.data());
         }

         TensorInfo newInfo = outputDequantizeInfo;
         newInfo.SetConstant(true);
         if (requiresPermute)
         {
             ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Permuting the constant data.";
             const PermutationVector OHWIToOIHW = {0, 2, 3, 1};
             std::vector<float> permutedValues(outputDequantizeInfo.GetNumElements());
             armnnUtils::Permute(outputDequantizeInfo.GetShape(), OHWIToOIHW,
                                 newValues.data(), permutedValues.data(),
                                 GetDataTypeSize(outputDequantizeInfo.GetDataType()));
             ConstTensor newInput(newInfo, permutedValues);
             constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
         }
         else
         {
             ConstTensor newInput(newInfo, newValues);
             constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
         }

         // Moves connections in dequantize output to the constant layer.
         // Dequantize layer will be removed if left unconnected.
         dequantizeLayer->GetOutputSlot().MoveAllConnections(constantLayer->GetOutputSlot());

         // Updating the output tensor
         constantLayer->GetOutputSlot(0).SetTensorInfo(newInfo);
         ARMNN_ASSERT(constantLayer->GetOutputSlot(0).GetTensorInfo().IsConstant() == true);

         // Set isConstant to true in all input tensor infos where constantLayer is now connected to
         for (unsigned int i = numConnections; i < constantLayer->GetOutputSlot(0).GetNumConnections(); ++i)
         {
             auto info = constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
                     .GetConnectedOutputSlot()->GetTensorInfo();
             info.SetConstant();
             constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
                     .GetConnectedOutputSlot()->SetTensorInfo(info);
         }
     }


 static void ConvertInt8To32(const void* srcInt8Buffer,
                             size_t numElements,
                             const float scale,
                             const int32_t offset,
                             float* dstFloat32Buffer)
 {
     ARMNN_ASSERT(srcInt8Buffer != nullptr);
     ARMNN_ASSERT(dstFloat32Buffer != nullptr);

     ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: scale: " << scale;
     ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: offset: " << offset;

     const auto* pInt8 = static_cast<const int8_t*>(srcInt8Buffer);

     for (size_t i = 0; i < numElements; ++i)
     {
         dstFloat32Buffer[i] = static_cast<float>(pInt8[i] - offset) * scale;
     }
 }

 };

 using ConvertConstDequantisationLayersToConstLayers
     = OptimizeForConnection<ConstantLayer,
                             DequantizeLayer,
                             ConvertConstDequantisationLayersToConstLayersImpl>;

 } // namespace optimizations
 } // namespace armnn
	//
	// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
	// SPDX-License-Identifier: MIT
	//
	#pragma once

	#include "Optimization.hpp"
	#include "NetworkUtils.hpp"

	#include <armnn/Logging.hpp>
	#include <armnnUtils/Permute.hpp>

	namespace armnn
	{
	namespace optimizations
	{

	class ConvertConstDequantisationLayersToConstLayersImpl
	{
	public:
	void Run(Graph& graph, InputSlot& connection) const
	{
	Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
	Layer& child = connection.GetOwningLayer();

	ARMNN_ASSERT(base.GetType() == LayerType::Constant);
	ARMNN_ASSERT(child.GetType() == LayerType::Dequantize);

	ReplaceConstDequantisationLayer(graph,
	PolymorphicDowncast<ConstantLayer*>(&base),
	PolymorphicDowncast<DequantizeLayer*>(&child));

	}
	protected:
	ConvertConstDequantisationLayersToConstLayersImpl() = default;
	~ConvertConstDequantisationLayersToConstLayersImpl() = default;
	private:

	static void ReplaceConstDequantisationLayer(Graph&,
	ConstantLayer* constantLayer,
	DequantizeLayer* dequantizeLayer)
	{
	ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl::ReplaceConstDequantisationLayer()";
	/**
	* This optimisation is to find situations where a constant set of inputs is being provided to a Dequantization
	* layer. In this case we don't want the overhead of Dequantizing the values on every inference, instead we
	* want to Dequantize them once and store them in a Const layer to be used everytime as they will not change.
	*/
	TensorInfo constantInfo = constantLayer->GetOutputSlot(0).GetTensorInfo();
	TensorInfo inputDequantizeInfo = dequantizeLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
	TensorInfo outputDequantizeInfo = dequantizeLayer->GetOutputSlot(0).GetTensorInfo();

	bool requiresPermute = false;

	auto connection = dequantizeLayer->GetOutputSlot(0).GetConnection(0);
	if (connection)
	{
	if (connection->GetOwningLayer().GetType() == LayerType::Convolution2d)
	{
	/**
	* ArmNN does not currently support non-fixed weights or bias
	* The NNAPI filter is always OHWI [depth_out, filter_height, filter_width, depth_in]
	* but ArmNN expects the filter's height and width indices to match the input's height
	* and width indices so we permute it to OIHW if the DataLayout is NCHW
	*/
	ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "
	"Convolution layer.";
	auto conv2dLayer = PolymorphicDowncast<Convolution2dLayer*>(&connection->GetOwningLayer());
	if (conv2dLayer->GetParameters().m_DataLayout == DataLayout::NCHW)
	{
	ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "
	"Convolution layer and requires permute on weights. ";
	requiresPermute = true;
	}
	}
	}

	ARMNN_ASSERT(constantLayer->GetNumOutputSlots() == 1);
	auto numConnections = constantLayer->GetOutputSlot(0).GetNumConnections();

	ARMNN_LOG(info) << "constantInfo datatype:" << armnn::GetDataTypeName(constantInfo.GetDataType())
	<< "inputDequantizeInfo datatype:" << armnn::GetDataTypeName(inputDequantizeInfo.GetDataType())
	<< "outputDequantizeInfo datatype:" << armnn::GetDataTypeName(outputDequantizeInfo.GetDataType());

	std::vector<float> newValues(outputDequantizeInfo.GetNumElements());
	if (constantInfo.GetDataType() == DataType::Float16 &&
	inputDequantizeInfo.GetDataType() == DataType::Float16 &&
	outputDequantizeInfo.GetDataType() == DataType::Float32)
	{
	ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting FP16 -> FP32";
	armnnUtils::FloatingPointConverter::ConvertFloat16To32(constantLayer->m_LayerOutput->Map(true),
	outputDequantizeInfo.GetNumElements(),
	newValues.data());
	}
	else if (((constantInfo.GetDataType() == DataType::QAsymmS8
	&& inputDequantizeInfo.GetDataType() == DataType::QAsymmS8)
	\|\| (constantInfo.GetDataType() == DataType::QSymmS8
	&& inputDequantizeInfo.GetDataType() == DataType::QSymmS8)) &&
	outputDequantizeInfo.GetDataType() == DataType::Float32)
	{
	ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting INT8 -> FP32";
	ConvertInt8To32(constantLayer->m_LayerOutput->Map(true),
	outputDequantizeInfo.GetNumElements(),
	inputDequantizeInfo.GetQuantizationScale(),
	inputDequantizeInfo.GetQuantizationOffset(),
	newValues.data());
	}

	TensorInfo newInfo = outputDequantizeInfo;
	newInfo.SetConstant(true);
	if (requiresPermute)
	{
	ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Permuting the constant data.";
	const PermutationVector OHWIToOIHW = {0, 2, 3, 1};
	std::vector<float> permutedValues(outputDequantizeInfo.GetNumElements());
	armnnUtils::Permute(outputDequantizeInfo.GetShape(), OHWIToOIHW,
	newValues.data(), permutedValues.data(),
	GetDataTypeSize(outputDequantizeInfo.GetDataType()));
	ConstTensor newInput(newInfo, permutedValues);
	constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
	}
	else
	{
	ConstTensor newInput(newInfo, newValues);
	constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
	}

	// Moves connections in dequantize output to the constant layer.
	// Dequantize layer will be removed if left unconnected.
	dequantizeLayer->GetOutputSlot().MoveAllConnections(constantLayer->GetOutputSlot());

	// Updating the output tensor
	constantLayer->GetOutputSlot(0).SetTensorInfo(newInfo);
	ARMNN_ASSERT(constantLayer->GetOutputSlot(0).GetTensorInfo().IsConstant() == true);

	// Set isConstant to true in all input tensor infos where constantLayer is now connected to
	for (unsigned int i = numConnections; i < constantLayer->GetOutputSlot(0).GetNumConnections(); ++i)
	{
	auto info = constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
	.GetConnectedOutputSlot()->GetTensorInfo();
	info.SetConstant();
	constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
	.GetConnectedOutputSlot()->SetTensorInfo(info);
	}
	}


	static void ConvertInt8To32(const void* srcInt8Buffer,
	size_t numElements,
	const float scale,
	const int32_t offset,
	float* dstFloat32Buffer)
	{
	ARMNN_ASSERT(srcInt8Buffer != nullptr);
	ARMNN_ASSERT(dstFloat32Buffer != nullptr);

	ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: scale: " << scale;
	ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: offset: " << offset;

	const auto* pInt8 = static_cast<const int8_t*>(srcInt8Buffer);

	for (size_t i = 0; i < numElements; ++i)
	{
	dstFloat32Buffer[i] = static_cast<float>(pInt8[i] - offset) * scale;
	}
	}

	};

	using ConvertConstDequantisationLayersToConstLayers
	= OptimizeForConnection<ConstantLayer,
	DequantizeLayer,
	ConvertConstDequantisationLayersToConstLayersImpl>;

	} // namespace optimizations
	} // namespace armnn