IVGCVSW-2354 Caffe SqueezeNet through armnn Cl and Neon

 * Compute Softmax Acl axis for Cl and Neon
 * Add unittests for Softmax in 3D and 4D
 * Correct input and output layer names in CaffeSqueezeNet inference test

Change-Id: I2d369d9a2db19c40f2af3341039dd33f0c5637b1
Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com>
diff --git a/src/backends/neon/test/NeonLayerTests.cpp b/src/backends/neon/test/NeonLayerTests.cpp
index 01773db..b34e2dd 100644
--- a/src/backends/neon/test/NeonLayerTests.cpp
+++ b/src/backends/neon/test/NeonLayerTests.cpp
@@ -275,14 +275,21 @@
 // Activation
 ARMNN_AUTO_TEST_CASE(ConstantLinearActivation, ConstantLinearActivationTest)
 
+// ReLu
+ARMNN_AUTO_TEST_CASE(ReLu1Uint8, BoundedReLuUint8UpperAndLowerBoundTest)
+ARMNN_AUTO_TEST_CASE(ReLu6Uint8, BoundedReLuUint8UpperBoundOnlyTest)
+
+// Softmax
 ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1, SimpleSoftmaxTest, 1.0f)
 ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2, SimpleSoftmaxTest, 2.0f)
-
 ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1Uint8, SimpleSoftmaxUint8Test, 1.0f)
 ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2Uint8, SimpleSoftmaxUint8Test, 2.0f)
 
-ARMNN_AUTO_TEST_CASE(ReLu1Uint8, BoundedReLuUint8UpperAndLowerBoundTest)
-ARMNN_AUTO_TEST_CASE(ReLu6Uint8, BoundedReLuUint8UpperBoundOnlyTest)
+ARMNN_AUTO_TEST_CASE(Simple3dSoftmaxBeta1, Simple3dSoftmaxTest, 1.0f)
+ARMNN_AUTO_TEST_CASE(Simple3dSoftmaxBeta1Uint8, Simple3dSoftmaxUint8Test, 1.0f)
+
+ARMNN_AUTO_TEST_CASE(Simple4dSoftmaxBeta1, Simple4dSoftmaxTest, 1.0f)
+ARMNN_AUTO_TEST_CASE(Simple4dSoftmaxBeta1Uint8, Simple4dSoftmaxUint8Test, 1.0f)
 
 // Splitter
 ARMNN_AUTO_TEST_CASE(SimpleSplitter, SplitterTest)
diff --git a/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.cpp b/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.cpp
index d9c78bb..afc6135 100644
--- a/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.cpp
@@ -7,6 +7,7 @@
 
 #include "NeonWorkloadUtils.hpp"
 
+#include <aclCommon/ArmComputeUtils.hpp>
 #include <arm_compute/runtime/NEON/functions/NESoftmaxLayer.h>
 
 namespace armnn
@@ -22,8 +23,9 @@
     arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
     arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
+    unsigned int aclAxis = ComputeSoftmaxAclAxis(info.m_InputTensorInfos[0]);
     auto layer = std::make_unique<arm_compute::NESoftmaxLayer>(memoryManager);
-    layer->configure(&input, &output, m_Data.m_Parameters.m_Beta);
+    layer->configure(&input, &output, m_Data.m_Parameters.m_Beta, aclAxis);
     m_SoftmaxLayer.reset(layer.release());
 }
 
diff --git a/src/backends/neon/workloads/NeonSoftmaxUint8Workload.cpp b/src/backends/neon/workloads/NeonSoftmaxUint8Workload.cpp
index f780589..7f295d6 100644
--- a/src/backends/neon/workloads/NeonSoftmaxUint8Workload.cpp
+++ b/src/backends/neon/workloads/NeonSoftmaxUint8Workload.cpp
@@ -4,9 +4,10 @@
 //
 
 #include "NeonSoftmaxUint8Workload.hpp"
-
 #include "NeonWorkloadUtils.hpp"
 
+#include <aclCommon/ArmComputeUtils.hpp>
+
 #include <arm_compute/runtime/NEON/functions/NESoftmaxLayer.h>
 
 namespace armnn
@@ -29,9 +30,10 @@
         throw InvalidArgumentException(
             "Invalid quantization for output. Only scale = 1.0f / 256.0f and offset = 0 supported");
     }
+    unsigned int aclAxis = ComputeSoftmaxAclAxis(info.m_InputTensorInfos[0]);
 
     auto layer = std::make_unique<arm_compute::NESoftmaxLayer>(memoryManager);
-    layer->configure(&input, &output, descriptor.m_Parameters.m_Beta);
+    layer->configure(&input, &output, descriptor.m_Parameters.m_Beta, aclAxis);
     m_SoftmaxLayer.reset(layer.release());
 }