IVGCVSW-4483 Remove boost::polymorphic_downcast

 * exchange boost::polymorphic_downcast with armnn::PolymorphicDowncast
 * remove unnecessary includes of boost::polymorphic_downcast

Signed-off-by: Jan Eilers <jan.eilers@arm.com>
Change-Id: Ie603fb82860fe05fee547dc78073230cc62b2e1f
diff --git a/src/backends/aclCommon/BaseMemoryManager.cpp b/src/backends/aclCommon/BaseMemoryManager.cpp
index b43eaf8..aaadc94 100644
--- a/src/backends/aclCommon/BaseMemoryManager.cpp
+++ b/src/backends/aclCommon/BaseMemoryManager.cpp
@@ -10,7 +10,6 @@
 #include "arm_compute/runtime/OffsetLifetimeManager.h"
 #endif
 
-#include <boost/polymorphic_cast.hpp>
 
 namespace armnn
 {
diff --git a/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp b/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp
index 83cec2a..b14e148 100644
--- a/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp
+++ b/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp
@@ -6,6 +6,7 @@
 
 #include <test/CreateWorkload.hpp>
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/MemCopyWorkload.hpp>
 #include <reference/RefWorkloadFactory.hpp>
 #include <reference/RefTensorHandle.hpp>
@@ -93,8 +94,8 @@
     MemCopyQueueDescriptor queueDescriptor1 = workload1->GetData();
     BOOST_TEST(queueDescriptor1.m_Inputs.size() == 1);
     BOOST_TEST(queueDescriptor1.m_Outputs.size() == 1);
-    auto inputHandle1  = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor1.m_Inputs[0]);
-    auto outputHandle1 = boost::polymorphic_downcast<IComputeTensorHandle*>(queueDescriptor1.m_Outputs[0]);
+    auto inputHandle1  = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor1.m_Inputs[0]);
+    auto outputHandle1 = PolymorphicDowncast<IComputeTensorHandle*>(queueDescriptor1.m_Outputs[0]);
     BOOST_TEST((inputHandle1->GetTensorInfo() == TensorInfo({2, 3}, DataType::Float32)));
     BOOST_TEST(CompareTensorHandleShape<IComputeTensorHandle>(outputHandle1, {2, 3}));
 
@@ -102,8 +103,8 @@
     MemCopyQueueDescriptor queueDescriptor2 = workload2->GetData();
     BOOST_TEST(queueDescriptor2.m_Inputs.size() == 1);
     BOOST_TEST(queueDescriptor2.m_Outputs.size() == 1);
-    auto inputHandle2  = boost::polymorphic_downcast<IComputeTensorHandle*>(queueDescriptor2.m_Inputs[0]);
-    auto outputHandle2 = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor2.m_Outputs[0]);
+    auto inputHandle2  = PolymorphicDowncast<IComputeTensorHandle*>(queueDescriptor2.m_Inputs[0]);
+    auto outputHandle2 = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor2.m_Outputs[0]);
     BOOST_TEST(CompareTensorHandleShape<IComputeTensorHandle>(inputHandle2, {2, 3}));
     BOOST_TEST((outputHandle2->GetTensorInfo() == TensorInfo({2, 3}, DataType::Float32)));
 }
diff --git a/src/backends/backendsCommon/MemCopyWorkload.cpp b/src/backends/backendsCommon/MemCopyWorkload.cpp
index 572c0fc..c1aa79c 100644
--- a/src/backends/backendsCommon/MemCopyWorkload.cpp
+++ b/src/backends/backendsCommon/MemCopyWorkload.cpp
@@ -8,7 +8,7 @@
 #include <backendsCommon/MemCopyWorkload.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 
-#include <boost/cast.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <cstring>
 
@@ -27,9 +27,9 @@
 
     for (unsigned int i = 0; i < numInputs; ++i)
     {
-        SrcTensorHandleType* const srcTensorHandle = boost::polymorphic_downcast<SrcTensorHandleType*>(
+        SrcTensorHandleType* const srcTensorHandle = PolymorphicDowncast<SrcTensorHandleType*>(
             descriptor.m_Inputs[i]);
-        DstTensorHandleType* const dstTensorHandle = boost::polymorphic_downcast<DstTensorHandleType*>(
+        DstTensorHandleType* const dstTensorHandle = PolymorphicDowncast<DstTensorHandleType*>(
             descriptor.m_Outputs[i]);
 
         tensorHandlePairs.emplace_back(srcTensorHandle, dstTensorHandle);
diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp
index a7e8576..c55c70a 100644
--- a/src/backends/backendsCommon/WorkloadFactory.cpp
+++ b/src/backends/backendsCommon/WorkloadFactory.cpp
@@ -10,6 +10,7 @@
 #include <armnn/LayerSupport.hpp>
 #include <armnn/ILayerSupport.hpp>
 #include <armnn/BackendRegistry.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <backendsCommon/WorkloadFactory.hpp>
 #include <armnn/backends/IBackendInternal.hpp>
@@ -18,7 +19,6 @@
 
 #include <backendsCommon/test/WorkloadTestUtils.hpp>
 
-#include <boost/cast.hpp>
 #include <boost/iterator/transform_iterator.hpp>
 
 #include <cstring>
@@ -49,7 +49,7 @@
 {
     Optional<std::string&> reason = outReasonIfUnsupported;
     bool result;
-    const Layer& layer = *(boost::polymorphic_downcast<const Layer*>(&connectableLayer));
+    const Layer& layer = *(PolymorphicDowncast<const Layer*>(&connectableLayer));
 
     auto const& backendRegistry = BackendRegistryInstance();
     if (!backendRegistry.IsBackendRegistered(backendId))
@@ -70,7 +70,7 @@
     {
         case LayerType::Activation:
         {
-            auto cLayer = boost::polymorphic_downcast<const ActivationLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const ActivationLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             result = layerSupportObject->IsActivationSupported(
@@ -94,7 +94,7 @@
         }
         case LayerType::ArgMinMax:
         {
-            auto cLayer = boost::polymorphic_downcast<const ArgMinMaxLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const ArgMinMaxLayer*>(&layer);
             const ArgMinMaxDescriptor& descriptor = cLayer->GetParameters();
 
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
@@ -108,7 +108,7 @@
         }
         case LayerType::BatchNormalization:
         {
-            auto cLayer = boost::polymorphic_downcast<const BatchNormalizationLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const BatchNormalizationLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             const TensorInfo& mean = cLayer->m_Mean->GetTensorInfo();
@@ -130,7 +130,7 @@
         {
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
-            auto cLayer = boost::polymorphic_downcast<const BatchToSpaceNdLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const BatchToSpaceNdLayer*>(&layer);
 
             result = layerSupportObject->IsBatchToSpaceNdSupported(OverrideDataType(input, dataType),
                                                                    OverrideDataType(output, dataType),
@@ -140,7 +140,7 @@
         }
         case LayerType::Comparison:
         {
-            auto cLayer = boost::polymorphic_downcast<const ComparisonLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const ComparisonLayer*>(&layer);
 
             const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo();
@@ -189,7 +189,7 @@
         }
         case LayerType::Convolution2d:
         {
-            auto cLayer = boost::polymorphic_downcast<const Convolution2dLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const Convolution2dLayer*>(&layer);
 
             const TensorInfo input  = OverrideDataType(layer.GetInputSlot(0).GetConnection()->GetTensorInfo(),
                                                        dataType);
@@ -227,7 +227,7 @@
         }
         case LayerType::DepthToSpace:
         {
-            auto cLayer = boost::polymorphic_downcast<const DepthToSpaceLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const DepthToSpaceLayer*>(&layer);
 
             const TensorInfo& input  = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
@@ -240,7 +240,7 @@
         }
         case LayerType::DepthwiseConvolution2d:
         {
-            auto cLayer = boost::polymorphic_downcast<const DepthwiseConvolution2dLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const DepthwiseConvolution2dLayer*>(&layer);
             const TensorInfo& input = OverrideDataType(layer.GetInputSlot(0).GetConnection()->GetTensorInfo(),
                                                        dataType);
             const TensorInfo& output = OverrideDataType(layer.GetOutputSlot(0).GetTensorInfo(), dataType);
@@ -277,7 +277,7 @@
         }
         case LayerType::DetectionPostProcess:
         {
-            auto cLayer = boost::polymorphic_downcast<const DetectionPostProcessLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const DetectionPostProcessLayer*>(&layer);
             const TensorInfo& boxEncodings = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& scores = layer.GetInputSlot(1).GetConnection()->GetTensorInfo();
             const TensorInfo& anchors = cLayer->m_Anchors->GetTensorInfo();
@@ -301,7 +301,7 @@
         }
         case LayerType::ElementwiseUnary:
         {
-            auto cLayer = boost::polymorphic_downcast<const ElementwiseUnaryLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const ElementwiseUnaryLayer*>(&layer);
 
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
@@ -314,7 +314,7 @@
         }
         case LayerType::FakeQuantization:
         {
-            auto cLayer = boost::polymorphic_downcast<const FakeQuantizationLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const FakeQuantizationLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             result = layerSupportObject->IsFakeQuantizationSupported(OverrideDataType(input, dataType),
                                                                      cLayer->GetParameters(),
@@ -332,7 +332,7 @@
         }
         case LayerType::FullyConnected:
         {
-            auto cLayer = boost::polymorphic_downcast<const FullyConnectedLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const FullyConnectedLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             ARMNN_ASSERT(cLayer->m_Weight.get() != nullptr);
@@ -414,7 +414,7 @@
         }
         case LayerType::InstanceNormalization:
         {
-            auto cLayer = boost::polymorphic_downcast<const InstanceNormalizationLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const InstanceNormalizationLayer*>(&layer);
             const InstanceNormalizationDescriptor& descriptor = cLayer->GetParameters();
 
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
@@ -429,7 +429,7 @@
         }
         case LayerType::L2Normalization:
         {
-            auto cLayer = boost::polymorphic_downcast<const L2NormalizationLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const L2NormalizationLayer*>(&layer);
             const L2NormalizationDescriptor& descriptor = cLayer->GetParameters();
 
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
@@ -444,7 +444,7 @@
         }
         case LayerType::LogSoftmax:
         {
-            auto cLayer = boost::polymorphic_downcast<const LogSoftmaxLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const LogSoftmaxLayer*>(&layer);
 
             const TensorInfo& input  = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
@@ -457,7 +457,7 @@
         }
         case LayerType::Lstm:
         {
-            auto cLayer = boost::polymorphic_downcast<const LstmLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const LstmLayer*>(&layer);
             const LstmDescriptor& descriptor = cLayer->GetParameters();
 
             // All inputs.
@@ -645,7 +645,7 @@
         }
         case LayerType::Concat:
         {
-            auto cLayer = boost::polymorphic_downcast<const ConcatLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const ConcatLayer*>(&layer);
 
             // Get vector of all inputs.
             auto getTensorInfo = [&dataType](const InputSlot& slot)
@@ -685,7 +685,7 @@
         }
         case LayerType::Normalization:
         {
-            auto cLayer = boost::polymorphic_downcast<const NormalizationLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const NormalizationLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             result = layerSupportObject->IsNormalizationSupported(OverrideDataType(input, dataType),
@@ -702,7 +702,7 @@
         }
         case LayerType::Permute:
         {
-            auto cLayer = boost::polymorphic_downcast<const PermuteLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const PermuteLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             result = layerSupportObject->IsPermuteSupported(OverrideDataType(input, dataType),
@@ -713,7 +713,7 @@
         }
         case LayerType::Pad:
         {
-            auto cLayer = boost::polymorphic_downcast<const PadLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const PadLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             result = layerSupportObject->IsPadSupported(
@@ -725,7 +725,7 @@
         }
         case LayerType::Pooling2d:
         {
-            auto cLayer = boost::polymorphic_downcast<const Pooling2dLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const Pooling2dLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             result = layerSupportObject->IsPooling2dSupported(OverrideDataType(input, dataType),
@@ -736,7 +736,7 @@
         }
         case LayerType::PreCompiled:
         {
-            auto cLayer = boost::polymorphic_downcast<const PreCompiledLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const PreCompiledLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             result = layerSupportObject->IsPreCompiledSupported(OverrideDataType(input, dataType),
                                                                 cLayer->GetParameters(),
@@ -752,7 +752,7 @@
         }
         case LayerType::QLstm:
         {
-            auto cLayer = boost::polymorphic_downcast<const QLstmLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const QLstmLayer*>(&layer);
             const QLstmDescriptor& descriptor = cLayer->GetParameters();
 
             // Inputs
@@ -840,7 +840,7 @@
         }
         case LayerType::QuantizedLstm:
         {
-            auto cLayer = boost::polymorphic_downcast<const QuantizedLstmLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const QuantizedLstmLayer*>(&layer);
 
             // Inputs
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
@@ -904,7 +904,7 @@
         }
         case LayerType::Reshape:
         {
-            auto cLayer = boost::polymorphic_downcast<const ReshapeLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const ReshapeLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             result = layerSupportObject->IsReshapeSupported(OverrideDataType(input, dataType),
@@ -915,7 +915,7 @@
         }
         case LayerType::Resize:
         {
-            auto cLayer = boost::polymorphic_downcast<const ResizeLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const ResizeLayer*>(&layer);
             const TensorInfo& input  = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             result = layerSupportObject->IsResizeSupported(OverrideDataType(input, dataType),
@@ -926,7 +926,7 @@
         }
         case LayerType::Slice:
         {
-            auto cLayer = boost::polymorphic_downcast<const SliceLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const SliceLayer*>(&layer);
 
             const TensorInfo& input  = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
@@ -939,7 +939,7 @@
         }
         case LayerType::Softmax:
         {
-            auto cLayer = boost::polymorphic_downcast<const SoftmaxLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const SoftmaxLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             result = layerSupportObject->IsSoftmaxSupported(OverrideDataType(input, dataType),
@@ -950,7 +950,7 @@
         }
         case LayerType::SpaceToBatchNd:
         {
-            auto cLayer = boost::polymorphic_downcast<const SpaceToBatchNdLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const SpaceToBatchNdLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             result = layerSupportObject->IsSpaceToBatchNdSupported(OverrideDataType(input, dataType),
@@ -961,7 +961,7 @@
         }
         case LayerType::SpaceToDepth:
         {
-            auto cLayer = boost::polymorphic_downcast<const SpaceToDepthLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const SpaceToDepthLayer*>(&layer);
 
             const TensorInfo& input  = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
@@ -974,7 +974,7 @@
         }
         case LayerType::Splitter:
         {
-            auto cLayer = boost::polymorphic_downcast<const SplitterLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const SplitterLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
 
             // Get vector of all outputs.
@@ -996,7 +996,7 @@
         }
         case LayerType::Stack:
         {
-            auto cLayer = boost::polymorphic_downcast<const StackLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const StackLayer*>(&layer);
 
             // Get vector of all inputs.
             auto getTensorInfo = [&dataType](const InputSlot& slot)
@@ -1023,7 +1023,7 @@
         }
         case LayerType::StandIn:
         {
-            auto cLayer = boost::polymorphic_downcast<const StandInLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const StandInLayer*>(&layer);
 
             // Get vector of all inputs.
             auto getTensorInfoIn = [&dataType](const InputSlot& slot)
@@ -1064,7 +1064,7 @@
         }
         case LayerType::StridedSlice:
         {
-            auto cLayer = boost::polymorphic_downcast<const StridedSliceLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const StridedSliceLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             result = layerSupportObject->IsStridedSliceSupported(OverrideDataType(input, dataType),
@@ -1100,7 +1100,7 @@
         }
         case LayerType::Mean:
         {
-            auto cLayer = boost::polymorphic_downcast<const MeanLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const MeanLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             result = layerSupportObject->IsMeanSupported(
@@ -1134,7 +1134,7 @@
         }
         case LayerType::Transpose:
         {
-            auto cLayer = boost::polymorphic_downcast<const TransposeLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const TransposeLayer*>(&layer);
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
             result = layerSupportObject->IsTransposeSupported(OverrideDataType(input, dataType),
@@ -1145,7 +1145,7 @@
         }
         case LayerType::TransposeConvolution2d:
         {
-            auto cLayer = boost::polymorphic_downcast<const TransposeConvolution2dLayer*>(&layer);
+            auto cLayer = PolymorphicDowncast<const TransposeConvolution2dLayer*>(&layer);
 
             const TensorInfo input  = OverrideDataType(layer.GetInputSlot(0).GetConnection()->GetTensorInfo(),
                                                        dataType);
@@ -1188,7 +1188,7 @@
                                         Optional<DataType> dataType,
                                         std::string& outReasonIfUnsupported)
 {
-    auto layer = boost::polymorphic_downcast<const Layer*>(&connectableLayer);
+    auto layer = PolymorphicDowncast<const Layer*>(&connectableLayer);
     return IsLayerSupported(layer->GetBackendId(), connectableLayer, dataType, outReasonIfUnsupported);
 }
 
diff --git a/src/backends/backendsCommon/WorkloadUtils.cpp b/src/backends/backendsCommon/WorkloadUtils.cpp
index bd5e81e..37915cf 100644
--- a/src/backends/backendsCommon/WorkloadUtils.cpp
+++ b/src/backends/backendsCommon/WorkloadUtils.cpp
@@ -7,6 +7,8 @@
 
 #include <armnn/Utils.hpp>
 
+#include <boost/numeric/conversion/cast.hpp>
+
 namespace armnn
 {
 
diff --git a/src/backends/backendsCommon/WorkloadUtils.hpp b/src/backends/backendsCommon/WorkloadUtils.hpp
index a4da924..354362e 100644
--- a/src/backends/backendsCommon/WorkloadUtils.hpp
+++ b/src/backends/backendsCommon/WorkloadUtils.hpp
@@ -8,15 +8,13 @@
 #include "CpuTensorHandle.hpp"
 
 #include <armnn/backends/ITensorHandle.hpp>
-
 #include <armnn/Tensor.hpp>
-
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <armnnUtils/Permute.hpp>
 
 #include <Half.hpp>
 #include <Profiling.hpp>
 
-#include <boost/cast.hpp>
 
 namespace armnn
 {
@@ -198,9 +196,9 @@
     for (unsigned int i = 0; i < numInputs; ++i)
     {
         SrcTensorHandleType* const srcTensorHandle =
-            boost::polymorphic_downcast<SrcTensorHandleType*>(descriptor.m_Inputs[i]);
+            PolymorphicDowncast<SrcTensorHandleType*>(descriptor.m_Inputs[i]);
         DstTensorHandleType* const dstTensorHandle =
-            boost::polymorphic_downcast<DstTensorHandleType*>(descriptor.m_Outputs[i]);
+            PolymorphicDowncast<DstTensorHandleType*>(descriptor.m_Outputs[i]);
 
         tensorHandlePairs.emplace_back(srcTensorHandle, dstTensorHandle);
     }
diff --git a/src/backends/backendsCommon/test/DynamicBackendTests.hpp b/src/backends/backendsCommon/test/DynamicBackendTests.hpp
index 1276776..6371e53 100644
--- a/src/backends/backendsCommon/test/DynamicBackendTests.hpp
+++ b/src/backends/backendsCommon/test/DynamicBackendTests.hpp
@@ -6,15 +6,12 @@
 #pragma once
 
 #include <armnn/BackendRegistry.hpp>
-#include <armnn/ILayerSupport.hpp>
-
 #include <armnn/backends/DynamicBackend.hpp>
-
-#include <backendsCommon/DynamicBackendUtils.hpp>
+#include <armnn/ILayerSupport.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
-
+#include <backendsCommon/DynamicBackendUtils.hpp>
 #include <reference/workloads/RefConvolution2dWorkload.hpp>
-
 #include <Runtime.hpp>
 
 #include <string>
@@ -1212,7 +1209,7 @@
     IRuntime::CreationOptions creationOptions;
     IRuntimePtr runtime = IRuntime::Create(creationOptions);
 
-    const DeviceSpec& deviceSpec = *boost::polymorphic_downcast<const DeviceSpec*>(&runtime->GetDeviceSpec());
+    const DeviceSpec& deviceSpec = *PolymorphicDowncast<const DeviceSpec*>(&runtime->GetDeviceSpec());
     BackendIdSet supportedBackendIds = deviceSpec.GetSupportedBackends();
     BOOST_TEST(supportedBackendIds.empty());
 
@@ -1253,7 +1250,7 @@
         BOOST_TEST((backendIds.find(expectedRegisteredbackendId) != backendIds.end()));
     }
 
-    const DeviceSpec& deviceSpec = *boost::polymorphic_downcast<const DeviceSpec*>(&runtime->GetDeviceSpec());
+    const DeviceSpec& deviceSpec = *PolymorphicDowncast<const DeviceSpec*>(&runtime->GetDeviceSpec());
     BackendIdSet supportedBackendIds = deviceSpec.GetSupportedBackends();
     BOOST_TEST(supportedBackendIds.size() == expectedRegisteredbackendIds.size());
     for (const BackendId& expectedRegisteredbackendId : expectedRegisteredbackendIds)
@@ -1294,7 +1291,7 @@
         BOOST_TEST((backendIds.find(expectedRegisteredbackendId) != backendIds.end()));
     }
 
-    const DeviceSpec& deviceSpec = *boost::polymorphic_downcast<const DeviceSpec*>(&runtime->GetDeviceSpec());
+    const DeviceSpec& deviceSpec = *PolymorphicDowncast<const DeviceSpec*>(&runtime->GetDeviceSpec());
     BackendIdSet supportedBackendIds = deviceSpec.GetSupportedBackends();
     BOOST_TEST(supportedBackendIds.size() == expectedRegisteredbackendIds.size());
     for (const BackendId& expectedRegisteredbackendId : expectedRegisteredbackendIds)
@@ -1323,7 +1320,7 @@
     const BackendRegistry& backendRegistry = BackendRegistryInstance();
     BOOST_TEST(backendRegistry.Size() == 0);
 
-    const DeviceSpec& deviceSpec = *boost::polymorphic_downcast<const DeviceSpec*>(&runtime->GetDeviceSpec());
+    const DeviceSpec& deviceSpec = *PolymorphicDowncast<const DeviceSpec*>(&runtime->GetDeviceSpec());
     BackendIdSet supportedBackendIds = deviceSpec.GetSupportedBackends();
     BOOST_TEST(supportedBackendIds.empty());
 }
@@ -1343,7 +1340,7 @@
     const BackendRegistry& backendRegistry = BackendRegistryInstance();
     BOOST_TEST(backendRegistry.Size() == 0);
 
-    const DeviceSpec& deviceSpec = *boost::polymorphic_downcast<const DeviceSpec*>(&runtime->GetDeviceSpec());
+    const DeviceSpec& deviceSpec = *PolymorphicDowncast<const DeviceSpec*>(&runtime->GetDeviceSpec());
     BackendIdSet supportedBackendIds = deviceSpec.GetSupportedBackends();
     BOOST_TEST(supportedBackendIds.empty());
 }
@@ -1382,7 +1379,7 @@
     BackendIdSet backendIds = backendRegistry.GetBackendIds();
     BOOST_TEST((backendIds.find("CpuRef") != backendIds.end()));
 
-    const DeviceSpec& deviceSpec = *boost::polymorphic_downcast<const DeviceSpec*>(&runtime->GetDeviceSpec());
+    const DeviceSpec& deviceSpec = *PolymorphicDowncast<const DeviceSpec*>(&runtime->GetDeviceSpec());
     BackendIdSet supportedBackendIds = deviceSpec.GetSupportedBackends();
     BOOST_TEST(supportedBackendIds.size() == 1);
     BOOST_TEST((supportedBackendIds.find("CpuRef") != supportedBackendIds.end()));
@@ -1433,7 +1430,7 @@
     // Create a convolution workload with the dummy settings
     auto workload = referenceWorkloadFactory->CreateConvolution2d(convolution2dQueueDescriptor, workloadInfo);
     BOOST_TEST((workload != nullptr));
-    BOOST_TEST(workload.get() == boost::polymorphic_downcast<RefConvolution2dWorkload*>(workload.get()));
+    BOOST_TEST(workload.get() == PolymorphicDowncast<RefConvolution2dWorkload*>(workload.get()));
 }
 
 #endif
@@ -1453,7 +1450,7 @@
     BackendIdSet backendIds = backendRegistry.GetBackendIds();
     BOOST_TEST((backendIds.find("SampleDynamic") != backendIds.end()));
 
-    const DeviceSpec& deviceSpec = *boost::polymorphic_downcast<const DeviceSpec*>(&runtime->GetDeviceSpec());
+    const DeviceSpec& deviceSpec = *PolymorphicDowncast<const DeviceSpec*>(&runtime->GetDeviceSpec());
     BackendIdSet supportedBackendIds = deviceSpec.GetSupportedBackends();
     BOOST_TEST(supportedBackendIds.size()>= 1);
     BOOST_TEST((supportedBackendIds.find("SampleDynamic") != supportedBackendIds.end()));
diff --git a/src/backends/backendsCommon/test/OptimizationViewsTests.cpp b/src/backends/backendsCommon/test/OptimizationViewsTests.cpp
index 3aebe3e..c972b4b 100644
--- a/src/backends/backendsCommon/test/OptimizationViewsTests.cpp
+++ b/src/backends/backendsCommon/test/OptimizationViewsTests.cpp
@@ -3,16 +3,20 @@
 // SPDX-License-Identifier: MIT
 //
 
-#include <boost/test/unit_test.hpp>
-#include <Graph.hpp>
-#include <SubgraphView.hpp>
-#include <SubgraphViewSelector.hpp>
-#include <armnn/backends/OptimizationViews.hpp>
-#include <Network.hpp>
 
 #include "CommonTestUtils.hpp"
 #include "MockBackend.hpp"
 
+#include <armnn/backends/OptimizationViews.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
+#include <Graph.hpp>
+#include <Network.hpp>
+#include <SubgraphView.hpp>
+#include <SubgraphViewSelector.hpp>
+
+#include <boost/test/unit_test.hpp>
+
+
 using namespace armnn;
 
 void CheckLayers(Graph& graph)
@@ -208,7 +212,7 @@
     BOOST_CHECK(optNet);
 
     // Check the optimised graph
-    OptimizedNetwork* optNetObjPtr = boost::polymorphic_downcast<OptimizedNetwork*>(optNet.get());
+    OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
     CheckLayers(optNetObjPtr->GetGraph());
 }
 
diff --git a/src/backends/cl/ClBackendContext.cpp b/src/backends/cl/ClBackendContext.cpp
index f612c37..bfe93bd 100644
--- a/src/backends/cl/ClBackendContext.cpp
+++ b/src/backends/cl/ClBackendContext.cpp
@@ -8,14 +8,13 @@
 
 #include <armnn/Logging.hpp>
 #include <armnn/utility/Assert.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <arm_compute/core/CL/OpenCL.h>
 #include <arm_compute/core/CL/CLKernelLibrary.h>
 #include <arm_compute/runtime/CL/CLScheduler.h>
 #include <arm_compute/runtime/CL/CLTunerTypes.h>
 
-#include <boost/polymorphic_cast.hpp>
-
 namespace armnn
 {
 
@@ -161,7 +160,7 @@
     bool useLegacyTunerAPI = options.m_GpuAccTunedParameters.get() != nullptr;
     if (useLegacyTunerAPI)
     {
-        auto clTunerParams = boost::polymorphic_downcast<ClTunedParameters*>(
+        auto clTunerParams = PolymorphicDowncast<ClTunedParameters*>(
                                 options.m_GpuAccTunedParameters.get());
         tuner = &clTunerParams->m_Tuner;
 
diff --git a/src/backends/cl/ClTensorHandleFactory.cpp b/src/backends/cl/ClTensorHandleFactory.cpp
index 9df3f1a..8af97f4 100644
--- a/src/backends/cl/ClTensorHandleFactory.cpp
+++ b/src/backends/cl/ClTensorHandleFactory.cpp
@@ -7,12 +7,12 @@
 #include "ClTensorHandleFactory.hpp"
 #include "ClTensorHandle.hpp"
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
 #include <arm_compute/runtime/CL/CLTensor.h>
 #include <arm_compute/core/Coordinates.h>
 #include <arm_compute/runtime/CL/CLSubTensor.h>
 
-#include <boost/polymorphic_cast.hpp>
-
 
 namespace armnn
 {
@@ -42,7 +42,7 @@
     }
 
     return std::make_unique<ClSubTensorHandle>(
-            boost::polymorphic_downcast<IClTensorHandle *>(&parent), shape, coords);
+            PolymorphicDowncast<IClTensorHandle *>(&parent), shape, coords);
 }
 
 std::unique_ptr<ITensorHandle> ClTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp
index b1bd46c..b0d2fdf 100644
--- a/src/backends/cl/ClWorkloadFactory.cpp
+++ b/src/backends/cl/ClWorkloadFactory.cpp
@@ -10,6 +10,7 @@
 #include <armnn/Exceptions.hpp>
 #include <armnn/Utils.hpp>
 #include <armnn/utility/IgnoreUnused.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <backendsCommon/CpuTensorHandle.hpp>
 #include <backendsCommon/MakeWorkloadHelper.hpp>
@@ -24,7 +25,6 @@
 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
 #include <arm_compute/runtime/CL/CLScheduler.h>
 
-#include <boost/polymorphic_cast.hpp>
 #include <boost/format.hpp>
 
 namespace armnn
@@ -125,7 +125,7 @@
     }
 
     return std::make_unique<ClSubTensorHandle>(
-        boost::polymorphic_downcast<IClTensorHandle*>(&parent), shape, coords);
+        PolymorphicDowncast<IClTensorHandle*>(&parent), shape, coords);
 }
 
 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateAbs(const AbsQueueDescriptor& descriptor,
diff --git a/src/backends/cl/test/ClCreateWorkloadTests.cpp b/src/backends/cl/test/ClCreateWorkloadTests.cpp
index 92e7717..b09b26f 100644
--- a/src/backends/cl/test/ClCreateWorkloadTests.cpp
+++ b/src/backends/cl/test/ClCreateWorkloadTests.cpp
@@ -6,6 +6,7 @@
 #include "ClContextControlFixture.hpp"
 #include "ClWorkloadFactoryHelper.hpp"
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/MemCopyWorkload.hpp>
 
 #include <aclCommon/test/CreateWorkloadClNeon.hpp>
@@ -35,8 +36,8 @@
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
     ActivationQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1, 1}));
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 1}));
@@ -66,9 +67,9 @@
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateElementwiseWorkloadTest).
     DescriptorType queueDescriptor = workload->GetData();
-    auto inputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto inputHandle2 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto inputHandle2 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle1, {2, 3}));
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle2, {2, 3}));
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3}));
@@ -159,8 +160,8 @@
 
     DescriptorType queueDescriptor = workload->GetData();
 
-    auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3}));
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3}));
@@ -184,8 +185,8 @@
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest).
     BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
      switch (dataLayout)
     {
@@ -232,8 +233,8 @@
     auto workload = CreateConvertFp16ToFp32WorkloadTest<ClConvertFp16ToFp32Workload>(factory, graph);
 
     ConvertFp16ToFp32QueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3}));
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3}));
@@ -250,8 +251,8 @@
     auto workload = CreateConvertFp32ToFp16WorkloadTest<ClConvertFp32ToFp16Workload>(factory, graph);
 
     ConvertFp32ToFp16QueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3}));
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3}));
@@ -277,8 +278,8 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
     Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST((inputHandle->GetShape() == inputShape));
     BOOST_TEST((outputHandle->GetShape() == outputShape));
 }
@@ -315,8 +316,8 @@
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateDepthwiseConvolution2dWorkloadTest).
     DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
                                                                : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
@@ -343,8 +344,8 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateDirectConvolution2dWorkloadTest).
     Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3, 6, 6}));
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 2, 6, 6}));
 }
@@ -376,8 +377,8 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
     FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 1, 4, 5}));
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 7}));
 }
@@ -404,8 +405,8 @@
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
     NormalizationQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 5, 5, 1})
                                                                : std::initializer_list<unsigned int>({3, 1, 5, 5});
@@ -452,8 +453,8 @@
 
     // Check that inputs/outputs are as we expect them (see definition of CreatePooling2dWorkloadTest).
     Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     BOOST_TEST((inputHandle->GetShape() == inputShape));
     BOOST_TEST((outputHandle->GetShape() == outputShape));
@@ -497,9 +498,9 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreatePreluWorkloadTest).
     PreluQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto alphaHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto alphaHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     BOOST_TEST((inputHandle->GetShape() == inputShape));
     BOOST_TEST((alphaHandle->GetShape() == alphaShape));
@@ -532,8 +533,8 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
     ReshapeQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1}));
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 4}));
@@ -565,8 +566,8 @@
 
     // Checks that inputs/outputs are as we expect them (see definition of ClSoftmaxFloatWorkload).
     SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1}));
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {4, 1}));
@@ -594,16 +595,16 @@
 
     // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
     SplitterQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {5, 7, 7}));
 
-    auto outputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
+    auto outputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle1, {2, 7, 7}));
 
-    auto outputHandle2 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[2]);
+    auto outputHandle2 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[2]);
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle2, {2, 7, 7}));
 
-    auto outputHandle0 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto outputHandle0 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle0, {1, 7, 7}));
 }
 
@@ -738,8 +739,8 @@
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
     L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 })
                                                                : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
@@ -780,8 +781,8 @@
     auto workload = CreateLstmWorkloadTest<LstmWorkloadType>(factory, graph);
 
     LstmQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
+    auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 2 }));
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 4 }));
 }
@@ -802,8 +803,8 @@
 
     auto queueDescriptor = workload->GetData();
 
-    auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     switch (dataLayout)
     {
@@ -859,8 +860,8 @@
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateMeanWorkloadTest).
     MeanQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     // The first dimension (batch size) in both input and output is singular thus it has been reduced by ACL.
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {  1, 3, 7, 4 }));
@@ -893,9 +894,9 @@
     auto workload = CreateConcatWorkloadTest<ConcatWorkloadType, DataType>(factory, graph, outputShape, concatAxis);
 
     ConcatQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle0  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto inputHandle1  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle0  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto inputHandle1  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle0, { 2, 3, 2, 5 }));
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle1, { 2, 3, 2, 5 }));
@@ -942,8 +943,8 @@
     auto workload = CreateSpaceToDepthWorkloadTest<SpaceToDepthWorkloadType, DataType>(factory, graph);
 
     SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 1, 2, 2, 1 }));
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 1, 1, 1, 4 }));
@@ -990,10 +991,10 @@
     StackQueueDescriptor queueDescriptor = workload->GetData();
     for (unsigned int i = 0; i < numInputs; ++i)
     {
-        auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[i]);
+        auto inputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[i]);
         BOOST_TEST(CompareIClTensorHandleShape(inputHandle, inputShape));
     }
-    auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, outputShape));
 }
 
@@ -1016,7 +1017,6 @@
 static void ClCreateQuantizedLstmWorkloadTest()
 {
     using namespace armnn::armcomputetensorutils;
-    using boost::polymorphic_downcast;
 
     Graph graph;
     ClWorkloadFactory factory =
@@ -1026,23 +1026,23 @@
 
     QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData();
 
-    IAclTensorHandle* inputHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
     BOOST_TEST((inputHandle->GetShape() == TensorShape({2, 2})));
     BOOST_TEST((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8));
 
-    IAclTensorHandle* cellStateInHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
+    IAclTensorHandle* cellStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
     BOOST_TEST((cellStateInHandle->GetShape() == TensorShape({2, 4})));
     BOOST_TEST((cellStateInHandle->GetDataType() == arm_compute::DataType::QSYMM16));
 
-    IAclTensorHandle* outputStateInHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]);
+    IAclTensorHandle* outputStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]);
     BOOST_TEST((outputStateInHandle->GetShape() == TensorShape({2, 4})));
     BOOST_TEST((outputStateInHandle->GetDataType() == arm_compute::DataType::QASYMM8));
 
-    IAclTensorHandle* cellStateOutHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
     BOOST_TEST((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
 
-    IAclTensorHandle* outputStateOutHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
+    IAclTensorHandle* outputStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
     BOOST_TEST((outputStateOutHandle->GetShape() == TensorShape({2, 4})));
     BOOST_TEST((outputStateOutHandle->GetDataType() == arm_compute::DataType::QASYMM8));
 }
diff --git a/src/backends/cl/workloads/ClAbsWorkload.cpp b/src/backends/cl/workloads/ClAbsWorkload.cpp
index 058c453..d020eeb 100644
--- a/src/backends/cl/workloads/ClAbsWorkload.cpp
+++ b/src/backends/cl/workloads/ClAbsWorkload.cpp
@@ -7,6 +7,8 @@
 
 #include "ClWorkloadUtils.hpp"
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
 #include <aclCommon/ArmComputeTensorUtils.hpp>
 
 #include <cl/ClTensorHandle.hpp>
@@ -29,8 +31,8 @@
 {
     m_Data.ValidateInputsOutputs("ClAbsWorkload", 1, 1);
 
-    arm_compute::ICLTensor& input  = boost::polymorphic_downcast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ICLTensor& output = boost::polymorphic_downcast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ICLTensor& input  = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     m_AbsLayer.configure(&input, &output);
 }
diff --git a/src/backends/cl/workloads/ClNegWorkload.cpp b/src/backends/cl/workloads/ClNegWorkload.cpp
index cc6333f..9f83cd3 100644
--- a/src/backends/cl/workloads/ClNegWorkload.cpp
+++ b/src/backends/cl/workloads/ClNegWorkload.cpp
@@ -8,6 +8,7 @@
 #include "ClWorkloadUtils.hpp"
 
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <cl/ClTensorHandle.hpp>
 
@@ -29,8 +30,8 @@
 {
     m_Data.ValidateInputsOutputs("ClNegWorkload", 1, 1);
 
-    arm_compute::ICLTensor& input  = boost::polymorphic_downcast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ICLTensor& output = boost::polymorphic_downcast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ICLTensor& input  = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     m_NegLayer.configure(&input, &output);
 }
diff --git a/src/backends/cl/workloads/ClRsqrtWorkload.cpp b/src/backends/cl/workloads/ClRsqrtWorkload.cpp
index be68759..a305a4a 100644
--- a/src/backends/cl/workloads/ClRsqrtWorkload.cpp
+++ b/src/backends/cl/workloads/ClRsqrtWorkload.cpp
@@ -8,6 +8,7 @@
 #include "ClWorkloadUtils.hpp"
 
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <cl/ClTensorHandle.hpp>
 
@@ -29,8 +30,8 @@
 {
     m_Data.ValidateInputsOutputs("ClRsqrtWorkload", 1, 1);
 
-    arm_compute::ICLTensor& input  = boost::polymorphic_downcast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ICLTensor& output = boost::polymorphic_downcast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ICLTensor& input  = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     m_RsqrtLayer.configure(&input, &output);
 }
diff --git a/src/backends/cl/workloads/ClSliceWorkload.cpp b/src/backends/cl/workloads/ClSliceWorkload.cpp
index fa99e7f..5ea4c4c 100644
--- a/src/backends/cl/workloads/ClSliceWorkload.cpp
+++ b/src/backends/cl/workloads/ClSliceWorkload.cpp
@@ -8,6 +8,7 @@
 #include "ClWorkloadUtils.hpp"
 
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <cl/ClTensorHandle.hpp>
 
@@ -36,8 +37,8 @@
 {
     m_Data.ValidateInputsOutputs("ClSliceWorkload", 1, 1);
 
-    arm_compute::ICLTensor& input  = boost::polymorphic_downcast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ICLTensor& output = boost::polymorphic_downcast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ICLTensor& input  = PolymorphicDowncast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = PolymorphicDowncast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     arm_compute::Coordinates starts;
     arm_compute::Coordinates ends;
diff --git a/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp b/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp
index d541e4e..1acb5c6 100644
--- a/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp
+++ b/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp
@@ -10,7 +10,6 @@
 #include <aclCommon/ArmComputeTensorUtils.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 #include <cl/ClTensorHandle.hpp>
-#include <boost/polymorphic_pointer_cast.hpp>
 
 namespace armnn
 {
diff --git a/src/backends/neon/NeonTensorHandleFactory.cpp b/src/backends/neon/NeonTensorHandleFactory.cpp
index 26b14af..a8b5b81 100644
--- a/src/backends/neon/NeonTensorHandleFactory.cpp
+++ b/src/backends/neon/NeonTensorHandleFactory.cpp
@@ -7,6 +7,7 @@
 #include "NeonTensorHandle.hpp"
 
 #include <armnn/utility/IgnoreUnused.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 namespace armnn
 {
@@ -36,7 +37,7 @@
     }
 
     return std::make_unique<NeonSubTensorHandle>(
-            boost::polymorphic_downcast<IAclTensorHandle*>(&parent), shape, coords);
+            PolymorphicDowncast<IAclTensorHandle*>(&parent), shape, coords);
 }
 
 std::unique_ptr<ITensorHandle> NeonTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp
index 47f7205..b3104b9 100644
--- a/src/backends/neon/NeonWorkloadFactory.cpp
+++ b/src/backends/neon/NeonWorkloadFactory.cpp
@@ -11,6 +11,7 @@
 
 #include <armnn/Utils.hpp>
 #include <armnn/utility/IgnoreUnused.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <backendsCommon/CpuTensorHandle.hpp>
 #include <backendsCommon/MakeWorkloadHelper.hpp>
@@ -69,7 +70,7 @@
     }
 
     return std::make_unique<NeonSubTensorHandle>(
-        boost::polymorphic_downcast<IAclTensorHandle*>(&parent), shape, coords);
+        PolymorphicDowncast<IAclTensorHandle*>(&parent), shape, coords);
 }
 
 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
diff --git a/src/backends/neon/test/NeonCreateWorkloadTests.cpp b/src/backends/neon/test/NeonCreateWorkloadTests.cpp
index 3e1888c..447bad1 100644
--- a/src/backends/neon/test/NeonCreateWorkloadTests.cpp
+++ b/src/backends/neon/test/NeonCreateWorkloadTests.cpp
@@ -6,6 +6,7 @@
 #include "NeonWorkloadFactoryHelper.hpp"
 
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/MemCopyWorkload.hpp>
 
 #include <aclCommon/test/CreateWorkloadClNeon.hpp>
@@ -72,8 +73,8 @@
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
     ActivationQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({1, 1}, DataType)));
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 1}, DataType)));
 }
@@ -103,9 +104,9 @@
     auto workload = CreateElementwiseWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
 
     DescriptorType queueDescriptor = workload->GetData();
-    auto inputHandle1 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto inputHandle2 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle1 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto inputHandle2 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({2, 3}, DataType)));
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle2, TensorInfo({2, 3}, DataType)));
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3}, DataType)));
@@ -201,8 +202,8 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest).
     BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 4, 4} : TensorShape{2, 4, 4, 3};
     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 4, 4} : TensorShape{2, 4, 4, 3};
@@ -247,8 +248,8 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
     Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle,  TensorInfo(outputShape, DataType)));
 }
@@ -287,8 +288,8 @@
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
     DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
                                                                : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
@@ -322,8 +323,8 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
     FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 1, 4, 5}, DataType)));
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 7}, DataType)));
 }
@@ -351,8 +352,8 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
     NormalizationQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 5, 5, 1} : TensorShape{3, 1, 5, 5};
     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 5, 5, 1} : TensorShape{3, 1, 5, 5};
@@ -398,8 +399,8 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreatePooling2dWorkloadTest).
     Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
 }
@@ -449,9 +450,9 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
     PreluQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto alphaHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto alphaHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, dataType)));
     BOOST_TEST(TestNeonTensorHandleInfo(alphaHandle, TensorInfo(alphaShape, dataType)));
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, dataType)));
@@ -485,8 +486,8 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
     ReshapeQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType)));
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 4}, DataType)));
 }
@@ -518,8 +519,8 @@
 
     auto queueDescriptor = workload->GetData();
 
-    auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     switch (dataLayout)
     {
@@ -565,8 +566,8 @@
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest).
     SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType)));
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({4, 1}, DataType)));
 }
@@ -593,8 +594,8 @@
     auto workload = CreateSpaceToDepthWorkloadTest<SpaceToDepthWorkloadType, DataType>(factory, graph);
 
     SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({ 1, 2, 2, 1 }, DataType)));
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({ 1, 1, 1, 4 }, DataType)));
@@ -630,16 +631,16 @@
 
     // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
     SplitterQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({5, 7, 7}, DataType::Float32)));
 
-    auto outputHandle0 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto outputHandle0 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle0, TensorInfo({1, 7, 7}, DataType::Float32)));
 
-    auto outputHandle1 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
+    auto outputHandle1 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle1, TensorInfo({2, 7, 7}, DataType::Float32)));
 
-    auto outputHandle2 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[2]);
+    auto outputHandle2 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[2]);
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle2, TensorInfo({2, 7, 7}, DataType::Float32)));
 }
 
@@ -743,8 +744,8 @@
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
     L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ?
                 TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
@@ -788,8 +789,8 @@
 
     LstmQueueDescriptor queueDescriptor = workload->GetData();
 
-    auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
+    auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
 
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({ 2, 2 }, DataType::Float32)));
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({ 2, 4 }, DataType::Float32)));
@@ -811,9 +812,9 @@
     auto workload = CreateConcatWorkloadTest<ConcatWorkloadType, DataType>(factory, graph, outputShape, concatAxis);
 
     ConcatQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle0 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto inputHandle1 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle0 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto inputHandle1 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
 
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle0, TensorInfo({ 2, 3, 2, 5 }, DataType)));
     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({ 2, 3, 2, 5 }, DataType)));
@@ -871,10 +872,10 @@
     StackQueueDescriptor queueDescriptor = workload->GetData();
     for (unsigned int i = 0; i < numInputs; ++i)
     {
-        auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[i]);
+        auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[i]);
         BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
     }
-    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
 }
 
@@ -898,8 +899,6 @@
 template <typename QuantizedLstmWorkloadType>
 static void NeonCreateQuantizedLstmWorkloadTest()
 {
-    using boost::polymorphic_downcast;
-
     Graph graph;
     NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
 
@@ -907,23 +906,23 @@
 
     QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData();
 
-    IAclTensorHandle* inputHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
     BOOST_TEST((inputHandle->GetShape() == TensorShape({2, 2})));
     BOOST_TEST((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8));
 
-    IAclTensorHandle* cellStateInHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
+    IAclTensorHandle* cellStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
     BOOST_TEST((cellStateInHandle->GetShape() == TensorShape({2, 4})));
     BOOST_TEST((cellStateInHandle->GetDataType() == arm_compute::DataType::QSYMM16));
 
-    IAclTensorHandle* outputStateInHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]);
+    IAclTensorHandle* outputStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]);
     BOOST_TEST((outputStateInHandle->GetShape() == TensorShape({2, 4})));
     BOOST_TEST((outputStateInHandle->GetDataType() == arm_compute::DataType::QASYMM8));
 
-    IAclTensorHandle* cellStateOutHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
     BOOST_TEST((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
 
-    IAclTensorHandle* outputStateOutHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
+    IAclTensorHandle* outputStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
     BOOST_TEST((outputStateOutHandle->GetShape() == TensorShape({2, 4})));
     BOOST_TEST((outputStateOutHandle->GetDataType() == arm_compute::DataType::QASYMM8));
 }
diff --git a/src/backends/neon/workloads/NeonAbsWorkload.cpp b/src/backends/neon/workloads/NeonAbsWorkload.cpp
index 7f8ed5a..ea14ac3 100644
--- a/src/backends/neon/workloads/NeonAbsWorkload.cpp
+++ b/src/backends/neon/workloads/NeonAbsWorkload.cpp
@@ -9,8 +9,7 @@
 
 #include <aclCommon/ArmComputeTensorHandle.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
-
-#include <boost/cast.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 namespace armnn
 {
@@ -28,8 +27,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonAbsWorkload", 1, 1);
 
-    arm_compute::ITensor& input  = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input  = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     m_AbsLayer.configure(&input, &output);
 }
diff --git a/src/backends/neon/workloads/NeonActivationWorkload.cpp b/src/backends/neon/workloads/NeonActivationWorkload.cpp
index 916d674..4b2169a 100644
--- a/src/backends/neon/workloads/NeonActivationWorkload.cpp
+++ b/src/backends/neon/workloads/NeonActivationWorkload.cpp
@@ -5,7 +5,9 @@
 
 #include "NeonActivationWorkload.hpp"
 #include "NeonWorkloadUtils.hpp"
+
 #include <aclCommon/ArmComputeUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <arm_compute/runtime/NEON/functions/NEActivationLayer.h>
 
@@ -36,8 +38,8 @@
     const arm_compute::ActivationLayerInfo activationLayerInfo =
         ConvertActivationDescriptorToAclActivationLayerInfo(m_Data.m_Parameters);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     auto layer = std::make_unique<arm_compute::NEActivationLayer>();
     layer->configure(&input, &output, activationLayerInfo);
diff --git a/src/backends/neon/workloads/NeonAdditionWorkload.cpp b/src/backends/neon/workloads/NeonAdditionWorkload.cpp
index a025c0b..cb0c8a4 100644
--- a/src/backends/neon/workloads/NeonAdditionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonAdditionWorkload.cpp
@@ -7,6 +7,7 @@
 #include "NeonWorkloadUtils.hpp"
 
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 
 #include <arm_compute/runtime/NEON/functions/NEArithmeticAddition.h>
@@ -35,9 +36,9 @@
 {
     m_Data.ValidateInputsOutputs("NeonAdditionWorkload", 2, 1);
 
-    arm_compute::ITensor& input1 = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& input2 = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input1 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& input2 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     auto layer = std::make_unique<arm_compute::NEArithmeticAddition>();
     layer->configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE);
diff --git a/src/backends/neon/workloads/NeonArgMinMaxWorkload.cpp b/src/backends/neon/workloads/NeonArgMinMaxWorkload.cpp
index 0fa9d43..0fb819d 100644
--- a/src/backends/neon/workloads/NeonArgMinMaxWorkload.cpp
+++ b/src/backends/neon/workloads/NeonArgMinMaxWorkload.cpp
@@ -10,6 +10,7 @@
 
 #include <backendsCommon/CpuTensorHandle.hpp>
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <armnnUtils/TensorUtils.hpp>
 
 #include <arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h>
@@ -54,8 +55,8 @@
                                              const WorkloadInfo& info)
         : BaseWorkload<ArgMinMaxQueueDescriptor>(descriptor, info)
 {
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     auto numDims = info.m_InputTensorInfos[0].GetNumDimensions();
     auto unsignedAxis = armnnUtils::GetUnsignedAxis(numDims, m_Data.m_Parameters.m_Axis);
diff --git a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp
index cd931e3..ff777db 100644
--- a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp
+++ b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp
@@ -7,8 +7,9 @@
 
 #include "NeonWorkloadUtils.hpp"
 
-#include <backendsCommon/CpuTensorHandle.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
+#include <backendsCommon/CpuTensorHandle.hpp>
 
 #include <arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h>
 
@@ -53,8 +54,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonBatchNormalizationWorkload", 1, 1);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
     input.info()->set_data_layout(aclDataLayout);
diff --git a/src/backends/neon/workloads/NeonConstantWorkload.cpp b/src/backends/neon/workloads/NeonConstantWorkload.cpp
index b9cb807..1cffbe1 100644
--- a/src/backends/neon/workloads/NeonConstantWorkload.cpp
+++ b/src/backends/neon/workloads/NeonConstantWorkload.cpp
@@ -9,6 +9,7 @@
 #include <BFloat16.hpp>
 #include <Half.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <neon/NeonTensorHandle.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 #include <backendsCommon/Workload.hpp>
@@ -41,9 +42,9 @@
 
         ARMNN_ASSERT(data.m_LayerOutput != nullptr);
         arm_compute::ITensor& output =
-            boost::polymorphic_downcast<NeonTensorHandle*>(data.m_Outputs[0])->GetTensor();
+            PolymorphicDowncast<NeonTensorHandle*>(data.m_Outputs[0])->GetTensor();
         arm_compute::DataType computeDataType =
-            boost::polymorphic_downcast<NeonTensorHandle*>(data.m_Outputs[0])->GetDataType();
+            PolymorphicDowncast<NeonTensorHandle*>(data.m_Outputs[0])->GetDataType();
 
         switch (computeDataType)
         {
diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
index 5d45642..144baec 100644
--- a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
+++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
@@ -5,8 +5,9 @@
 
 #include "NeonConvolution2dWorkload.hpp"
 
-#include <backendsCommon/CpuTensorHandle.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
+#include <backendsCommon/CpuTensorHandle.hpp>
 #include <neon/workloads/NeonWorkloadUtils.hpp>
 
 #include <arm_compute/runtime/NEON/functions/NEConvolutionLayer.h>
@@ -65,8 +66,8 @@
 
     // todo: check tensor shapes match.
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
     input.info()->set_data_layout(aclDataLayout);
diff --git a/src/backends/neon/workloads/NeonDequantizeWorkload.cpp b/src/backends/neon/workloads/NeonDequantizeWorkload.cpp
index 8b229a1..9ae82ff 100644
--- a/src/backends/neon/workloads/NeonDequantizeWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDequantizeWorkload.cpp
@@ -10,6 +10,7 @@
 #include <arm_compute/runtime/NEON/functions/NEDequantizationLayer.h>
 
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 #include <neon/NeonTensorHandle.hpp>
 
@@ -32,8 +33,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonDequantizeWorkload", 1, 1);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     std::unique_ptr<arm_compute::NEDequantizationLayer> layer(new arm_compute::NEDequantizationLayer());
     layer->configure(&input, &output);
diff --git a/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp b/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp
index 2ed47e4..36f1cd9 100644
--- a/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp
@@ -9,8 +9,7 @@
 
 #include <aclCommon/ArmComputeTensorHandle.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
-
-#include <boost/cast.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 namespace armnn
 {
@@ -85,7 +84,7 @@
 
     auto AclTensorRef = [](ITensorHandle* tensor) -> arm_compute::ITensor&
         {
-            return boost::polymorphic_downcast<IAclTensorHandle*>(tensor)->GetTensor();
+            return PolymorphicDowncast<IAclTensorHandle*>(tensor)->GetTensor();
         };
 
     arm_compute::ITensor& boxEncodings  = AclTensorRef(m_Data.m_Inputs[0]);
diff --git a/src/backends/neon/workloads/NeonDivisionWorkload.cpp b/src/backends/neon/workloads/NeonDivisionWorkload.cpp
index 6fdb455..fc353f1 100644
--- a/src/backends/neon/workloads/NeonDivisionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDivisionWorkload.cpp
@@ -4,7 +4,9 @@
 //
 
 #include "NeonDivisionWorkload.hpp"
+
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 
 namespace armnn
@@ -29,9 +31,9 @@
 {
     m_Data.ValidateInputsOutputs("NeonDivisionWorkload", 2, 1);
 
-    arm_compute::ITensor& input0 = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& input1 = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input0 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& input1 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     m_DivLayer.configure(&input0, &input1, &output);
 }
diff --git a/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp b/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp
index 5b4e909..c49df33 100644
--- a/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp
+++ b/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp
@@ -7,9 +7,9 @@
 
 #include "NeonWorkloadUtils.hpp"
 
-#include <arm_compute/runtime/NEON/functions/NEFloor.h>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
-#include <boost/polymorphic_cast.hpp>
+#include <arm_compute/runtime/NEON/functions/NEFloor.h>
 
 namespace armnn
 {
@@ -19,8 +19,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonFloorFloatWorkload", 1, 1);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     auto layer = std::make_unique<arm_compute::NEFloor>();
     layer->configure(&input, &output);
diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
index 338c7eb..e808c60 100644
--- a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
+++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
@@ -8,6 +8,7 @@
 #include "NeonWorkloadUtils.hpp"
 #include <aclCommon/ArmComputeTensorUtils.hpp>
 #include <aclCommon/ArmComputeUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 
 #include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h>
@@ -51,8 +52,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonFullyConnectedWorkload", 1, 1);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     m_WeightsTensor = std::make_unique<arm_compute::Tensor>();
     BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo());
diff --git a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp
index 9de6c82..d54607d 100644
--- a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp
+++ b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp
@@ -8,6 +8,7 @@
 #include "NeonWorkloadUtils.hpp"
 
 #include <aclCommon/ArmComputeUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h>
 
@@ -33,8 +34,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonL2NormalizationFloatWorkload", 1, 1);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
     input.info()->set_data_layout(aclDataLayout);
diff --git a/src/backends/neon/workloads/NeonMaximumWorkload.cpp b/src/backends/neon/workloads/NeonMaximumWorkload.cpp
index c433d81..46d500b 100644
--- a/src/backends/neon/workloads/NeonMaximumWorkload.cpp
+++ b/src/backends/neon/workloads/NeonMaximumWorkload.cpp
@@ -5,6 +5,7 @@
 
 #include "NeonMaximumWorkload.hpp"
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 
 namespace armnn
@@ -29,9 +30,9 @@
 {
     m_Data.ValidateInputsOutputs("NeonMaximumWorkload", 2, 1);
 
-    arm_compute::ITensor& input0 = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& input1 = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input0 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& input1 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     m_MaxLayer.configure(&input0, &input1, &output);
 }
diff --git a/src/backends/neon/workloads/NeonMinimumWorkload.cpp b/src/backends/neon/workloads/NeonMinimumWorkload.cpp
index 2867a80..53e483a 100644
--- a/src/backends/neon/workloads/NeonMinimumWorkload.cpp
+++ b/src/backends/neon/workloads/NeonMinimumWorkload.cpp
@@ -4,7 +4,9 @@
 //
 
 #include "NeonMinimumWorkload.hpp"
+
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 
 namespace armnn
@@ -29,9 +31,9 @@
 {
     m_Data.ValidateInputsOutputs("NeonMinimumWorkload", 2, 1);
 
-    arm_compute::ITensor& input0 = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& input1 = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input0 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& input1 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     m_MinLayer.configure(&input0, &input1, &output);
 }
diff --git a/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp b/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp
index 66fbedf..d813970 100644
--- a/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp
+++ b/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp
@@ -7,6 +7,8 @@
 
 #include "NeonWorkloadUtils.hpp"
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
 #include <arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h>
 
 namespace armnn
@@ -37,9 +39,9 @@
 {
     m_Data.ValidateInputsOutputs("NeonMultiplicationWorkload", 2, 1);
 
-    arm_compute::ITensor& input1 = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& input2 = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input1 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& input2 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
     // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
diff --git a/src/backends/neon/workloads/NeonNegWorkload.cpp b/src/backends/neon/workloads/NeonNegWorkload.cpp
index afe0558..06c1467 100644
--- a/src/backends/neon/workloads/NeonNegWorkload.cpp
+++ b/src/backends/neon/workloads/NeonNegWorkload.cpp
@@ -9,8 +9,7 @@
 
 #include <aclCommon/ArmComputeTensorHandle.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
-
-#include <boost/cast.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 namespace armnn
 {
@@ -28,8 +27,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonNegWorkload", 1, 1);
 
-    arm_compute::ITensor& input  = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input  = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     m_NegLayer.configure(&input, &output);
 }
diff --git a/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp
index 8cb4ec9..77fc429 100644
--- a/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp
+++ b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp
@@ -8,6 +8,7 @@
 #include "NeonWorkloadUtils.hpp"
 #include <aclCommon/ArmComputeUtils.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <arm_compute/runtime/NEON/functions/NENormalizationLayer.h>
 
@@ -77,8 +78,8 @@
         throw InvalidArgumentException("Normalization requires input and output tensors to have equal dimensionality.");
     }
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
     input.info()->set_data_layout(aclDataLayout);
     output.info()->set_data_layout(aclDataLayout);
diff --git a/src/backends/neon/workloads/NeonPooling2dWorkload.cpp b/src/backends/neon/workloads/NeonPooling2dWorkload.cpp
index 9934c29..968d5ce 100644
--- a/src/backends/neon/workloads/NeonPooling2dWorkload.cpp
+++ b/src/backends/neon/workloads/NeonPooling2dWorkload.cpp
@@ -7,6 +7,8 @@
 
 #include "NeonWorkloadUtils.hpp"
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
 #include <neon/NeonTensorHandle.hpp>
 #include <aclCommon/ArmComputeUtils.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
@@ -37,8 +39,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonPooling2dWorkload", 1, 1);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
     input.info()->set_data_layout(aclDataLayout);
diff --git a/src/backends/neon/workloads/NeonPreluWorkload.cpp b/src/backends/neon/workloads/NeonPreluWorkload.cpp
index 107090e..8e6ea30 100644
--- a/src/backends/neon/workloads/NeonPreluWorkload.cpp
+++ b/src/backends/neon/workloads/NeonPreluWorkload.cpp
@@ -5,7 +5,9 @@
 
 #include "NeonPreluWorkload.hpp"
 #include "NeonWorkloadUtils.hpp"
+
 #include <aclCommon/ArmComputeUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <arm_compute/runtime/NEON/functions/NEPReluLayer.h>
 
@@ -31,9 +33,9 @@
 {
     m_Data.ValidateInputsOutputs("NeonPreluWorkload", 1, 1);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& alpha = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& alpha = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     auto layer = std::make_unique<arm_compute::NEPReluLayer>();
     layer->configure(&input, &alpha, &output);
diff --git a/src/backends/neon/workloads/NeonReshapeWorkload.cpp b/src/backends/neon/workloads/NeonReshapeWorkload.cpp
index 659bb94..8b11da7 100644
--- a/src/backends/neon/workloads/NeonReshapeWorkload.cpp
+++ b/src/backends/neon/workloads/NeonReshapeWorkload.cpp
@@ -7,9 +7,9 @@
 
 #include "NeonWorkloadUtils.hpp"
 
-#include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
-#include <boost/polymorphic_cast.hpp>
+#include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h>
 
 namespace armnn
 {
@@ -29,8 +29,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonReshapeWorkload", 1, 1);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     auto layer = std::make_unique<arm_compute::NEReshapeLayer>();
     layer->configure(&input, &output);
diff --git a/src/backends/neon/workloads/NeonResizeWorkload.cpp b/src/backends/neon/workloads/NeonResizeWorkload.cpp
index e936ab7..9e3be26 100644
--- a/src/backends/neon/workloads/NeonResizeWorkload.cpp
+++ b/src/backends/neon/workloads/NeonResizeWorkload.cpp
@@ -9,7 +9,9 @@
 
 #include <aclCommon/ArmComputeUtils.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
+
 #include <neon/NeonTensorHandle.hpp>
 
 using namespace armnn::armcomputetensorutils;
@@ -45,8 +47,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonResizeWorkload", 1, 1);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
     input.info()->set_data_layout(aclDataLayout);
diff --git a/src/backends/neon/workloads/NeonRsqrtWorkload.cpp b/src/backends/neon/workloads/NeonRsqrtWorkload.cpp
index b629283..44980df 100644
--- a/src/backends/neon/workloads/NeonRsqrtWorkload.cpp
+++ b/src/backends/neon/workloads/NeonRsqrtWorkload.cpp
@@ -9,8 +9,8 @@
 
 #include <aclCommon/ArmComputeTensorHandle.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
-#include <boost/cast.hpp>
 
 namespace armnn
 {
@@ -28,8 +28,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonRsqrtWorkload", 1, 1);
 
-    arm_compute::ITensor& input  = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input  = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     m_RsqrtLayer.configure(&input, &output);
 }
diff --git a/src/backends/neon/workloads/NeonSliceWorkload.cpp b/src/backends/neon/workloads/NeonSliceWorkload.cpp
index 171edc6..32cc042 100644
--- a/src/backends/neon/workloads/NeonSliceWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSliceWorkload.cpp
@@ -7,6 +7,8 @@
 
 #include "NeonWorkloadUtils.hpp"
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
 #include <aclCommon/ArmComputeTensorUtils.hpp>
 
 #include <neon/NeonTensorHandle.hpp>
@@ -37,8 +39,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonSliceWorkload", 1, 1);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     arm_compute::Coordinates starts;
     arm_compute::Coordinates ends;
diff --git a/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.cpp b/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.cpp
index 152d19c..a4690a7 100644
--- a/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.cpp
@@ -8,6 +8,8 @@
 #include "NeonWorkloadUtils.hpp"
 
 #include <aclCommon/ArmComputeUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
 #include <arm_compute/runtime/NEON/functions/NESoftmaxLayer.h>
 
 namespace armnn
@@ -20,8 +22,8 @@
     m_Data.ValidateInputsOutputs("NeonSoftmaxFloatWorkload", 1, 1);
 
     // The ArmCompute softmax layer uses 2D input/output tensors, so flatten the first three dimensions.
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     auto layer = std::make_unique<arm_compute::NESoftmaxLayer>(memoryManager);
     unsigned int aclAxis = ComputeSoftmaxAclAxis(m_Data.m_Parameters, info.m_InputTensorInfos[0]);
diff --git a/src/backends/neon/workloads/NeonSoftmaxUint8Workload.cpp b/src/backends/neon/workloads/NeonSoftmaxUint8Workload.cpp
index 15a7066..05d93b9 100644
--- a/src/backends/neon/workloads/NeonSoftmaxUint8Workload.cpp
+++ b/src/backends/neon/workloads/NeonSoftmaxUint8Workload.cpp
@@ -7,6 +7,7 @@
 #include "NeonWorkloadUtils.hpp"
 
 #include <aclCommon/ArmComputeUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <arm_compute/runtime/NEON/functions/NESoftmaxLayer.h>
 
@@ -20,8 +21,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonSoftmaxUint8Workload", 1, 1);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     const auto outputQuantization = output.info()->quantization_info();
 
diff --git a/src/backends/neon/workloads/NeonSpaceToDepthWorkload.cpp b/src/backends/neon/workloads/NeonSpaceToDepthWorkload.cpp
index a4204b2..2982cd1 100644
--- a/src/backends/neon/workloads/NeonSpaceToDepthWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSpaceToDepthWorkload.cpp
@@ -5,6 +5,8 @@
 
 #include "NeonSpaceToDepthWorkload.hpp"
 #include "NeonWorkloadUtils.hpp"
+
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <ResolveType.hpp>
 
 namespace armnn
@@ -33,12 +35,12 @@
 
     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
     input.info()->set_data_layout(aclDataLayout);
 
     int32_t blockSize = boost::numeric_cast<int32_t>(desc.m_Parameters.m_BlockSize);
 
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
     output.info()->set_data_layout(aclDataLayout);
 
     m_Layer.reset(new arm_compute::NESpaceToDepthLayer());
diff --git a/src/backends/neon/workloads/NeonSplitterWorkload.cpp b/src/backends/neon/workloads/NeonSplitterWorkload.cpp
index 224e97a..19fa7c6 100644
--- a/src/backends/neon/workloads/NeonSplitterWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSplitterWorkload.cpp
@@ -9,6 +9,7 @@
 
 #include <aclCommon/ArmComputeTensorUtils.hpp>
 #include <aclCommon/ArmComputeUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 #include <neon/NeonTensorHandle.hpp>
 
@@ -74,7 +75,7 @@
         return;
     }
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
 
     std::vector<arm_compute::ITensor *> aclOutputs;
     for (auto output : m_Data.m_Outputs)
diff --git a/src/backends/neon/workloads/NeonStridedSliceWorkload.cpp b/src/backends/neon/workloads/NeonStridedSliceWorkload.cpp
index 356c0ae..282005c 100644
--- a/src/backends/neon/workloads/NeonStridedSliceWorkload.cpp
+++ b/src/backends/neon/workloads/NeonStridedSliceWorkload.cpp
@@ -9,6 +9,7 @@
 #include <neon/NeonTensorHandle.hpp>
 #include <aclCommon/ArmComputeUtils.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/WorkloadUtils.hpp>
 
 namespace armnn
@@ -50,8 +51,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonStridedSliceWorkload", 1, 1);
 
-    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     arm_compute::Coordinates starts;
     arm_compute::Coordinates ends;
diff --git a/src/backends/neon/workloads/NeonSubtractionWorkload.cpp b/src/backends/neon/workloads/NeonSubtractionWorkload.cpp
index f4b4707..ccc2bfe 100644
--- a/src/backends/neon/workloads/NeonSubtractionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSubtractionWorkload.cpp
@@ -7,6 +7,7 @@
 
 #include "NeonWorkloadUtils.hpp"
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 
 #include <arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h>
@@ -34,9 +35,9 @@
 {
     m_Data.ValidateInputsOutputs("NeonSubtractionWorkload", 2, 1);
 
-    arm_compute::ITensor& input1 = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& input2 = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input1 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& input2 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     auto layer = std::make_unique<arm_compute::NEArithmeticSubtraction>();
     layer->configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE);
diff --git a/src/backends/neon/workloads/NeonTransposeConvolution2dWorkload.cpp b/src/backends/neon/workloads/NeonTransposeConvolution2dWorkload.cpp
index ffca207..985f540 100644
--- a/src/backends/neon/workloads/NeonTransposeConvolution2dWorkload.cpp
+++ b/src/backends/neon/workloads/NeonTransposeConvolution2dWorkload.cpp
@@ -9,6 +9,7 @@
 #include <Profiling.hpp>
 
 #include <armnn/Types.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <aclCommon/ArmComputeTensorUtils.hpp>
 
@@ -60,8 +61,8 @@
 {
     m_Data.ValidateInputsOutputs("NeonTransposeConvolution2dWorkload", 1, 1);
 
-    arm_compute::ITensor& input  = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& input  = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
     input.info()->set_data_layout(aclDataLayout);
diff --git a/src/backends/reference/test/RefCreateWorkloadTests.cpp b/src/backends/reference/test/RefCreateWorkloadTests.cpp
index b83d205..29bfbc0 100644
--- a/src/backends/reference/test/RefCreateWorkloadTests.cpp
+++ b/src/backends/reference/test/RefCreateWorkloadTests.cpp
@@ -5,6 +5,7 @@
 
 #include <test/CreateWorkload.hpp>
 
+#include <armnn/utility/PolymorphicDowncast.hpp>
 #include <reference/RefTensorHandle.hpp>
 #include <reference/RefWorkloadFactory.hpp>
 #include <reference/workloads/RefWorkloads.hpp>
@@ -16,8 +17,8 @@
 void CheckInputOutput(std::unique_ptr<Workload> workload, const TensorInfo& inputInfo, const TensorInfo& outputInfo)
 {
     auto queueDescriptor = workload->GetData();
-    auto inputHandle  = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto outputHandle = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle  = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST((inputHandle->GetTensorInfo() == inputInfo));
     BOOST_TEST((outputHandle->GetTensorInfo() == outputInfo));
 }
@@ -29,9 +30,9 @@
                        const TensorInfo&         outputInfo)
 {
     auto queueDescriptor = workload->GetData();
-    auto inputHandle0     = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Inputs[0]);
-    auto inputHandle1     = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Inputs[1]);
-    auto outputHandle    = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto inputHandle0     = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto inputHandle1     = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Inputs[1]);
+    auto outputHandle    = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST((inputHandle0->GetTensorInfo() == inputInfo0));
     BOOST_TEST((inputHandle1->GetTensorInfo() == inputInfo1));
     BOOST_TEST((outputHandle->GetTensorInfo() == outputInfo));
@@ -538,16 +539,16 @@
 
     // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
     SplitterQueueDescriptor queueDescriptor = workload->GetData();
-    auto inputHandle = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto inputHandle = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Inputs[0]);
     BOOST_TEST((inputHandle->GetTensorInfo() == TensorInfo({ 5, 7, 7 }, DataType)));
 
-    auto outputHandle0 = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto outputHandle0 = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST((outputHandle0->GetTensorInfo() == TensorInfo({ 1, 7, 7 }, DataType)));
 
-    auto outputHandle1 = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Outputs[1]);
+    auto outputHandle1 = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Outputs[1]);
     BOOST_TEST((outputHandle1->GetTensorInfo() == TensorInfo({ 2, 7, 7 }, DataType)));
 
-    auto outputHandle2 = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Outputs[2]);
+    auto outputHandle2 = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Outputs[2]);
     BOOST_TEST((outputHandle2->GetTensorInfo() == TensorInfo({ 2, 7, 7 }, DataType)));
 }
 
@@ -910,7 +911,7 @@
 
     // Check output is as expected
     auto queueDescriptor = workload->GetData();
-    auto outputHandle = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto outputHandle = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST((outputHandle->GetTensorInfo() == TensorInfo(outputShape, DataType)));
 }
 
@@ -950,7 +951,7 @@
 
     // Check output is as expected
     auto queueDescriptor = workload->GetData();
-    auto outputHandle = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto outputHandle = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST((outputHandle->GetTensorInfo() == TensorInfo(outputShape, dataType)));
 }
 
@@ -1054,10 +1055,10 @@
     StackQueueDescriptor queueDescriptor = workload->GetData();
     for (unsigned int i = 0; i < numInputs; ++i)
     {
-        auto inputHandle = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Inputs[i]);
+        auto inputHandle = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Inputs[i]);
         BOOST_TEST((inputHandle->GetTensorInfo() == TensorInfo(inputShape, DataType)));
     }
-    auto outputHandle = boost::polymorphic_downcast<RefTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    auto outputHandle = PolymorphicDowncast<RefTensorHandle*>(queueDescriptor.m_Outputs[0]);
     BOOST_TEST((outputHandle->GetTensorInfo() == TensorInfo(outputShape, DataType)));
 }
 
diff --git a/src/backends/reference/workloads/RefWorkloadUtils.hpp b/src/backends/reference/workloads/RefWorkloadUtils.hpp
index f1b3157..a36ed45 100644
--- a/src/backends/reference/workloads/RefWorkloadUtils.hpp
+++ b/src/backends/reference/workloads/RefWorkloadUtils.hpp
@@ -9,12 +9,12 @@
 
 #include <armnn/Tensor.hpp>
 #include <armnn/Types.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
 
 #include <reference/RefTensorHandle.hpp>
 
 #include <BFloat16.hpp>
 #include <Half.hpp>
-#include <boost/polymorphic_cast.hpp>
 
 namespace armnn
 {
@@ -27,7 +27,7 @@
 {
     // We know that reference workloads use RefTensorHandles for inputs and outputs
     const RefTensorHandle* refTensorHandle =
-        boost::polymorphic_downcast<const RefTensorHandle*>(tensorHandle);
+        PolymorphicDowncast<const RefTensorHandle*>(tensorHandle);
     return refTensorHandle->GetTensorInfo();
 }