COMPMID-458 add direct convolution to benchmark

Change-Id: I475cb1e3c69831aa5574305e9188251846cee6ba
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/82008
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/tests/networks_new/AlexNetNetwork.h b/tests/networks_new/AlexNetNetwork.h
index 7e1a855..8c801f7 100644
--- a/tests/networks_new/AlexNetNetwork.h
+++ b/tests/networks_new/AlexNetNetwork.h
@@ -43,6 +43,7 @@
           typename Accessor,
           typename ActivationLayerFunction,
           typename ConvolutionLayerFunction,
+          typename DirectConvolutionLayerFunction,
           typename FullyConnectedLayerFunction,
           typename NormalizationLayerFunction,
           typename PoolingLayerFunction,
@@ -60,11 +61,104 @@
         // Initialize weights and biases
         if(!_reshaped_weights)
         {
-            init_weights();
+            w[0].allocator()->init(TensorInfo(TensorShape(11U, 11U, 3U, 96U), 1, _data_type, _fixed_point_position));
+            b[0].allocator()->init(TensorInfo(TensorShape(96U), 1, _data_type, _fixed_point_position));
+            w[1].allocator()->init(TensorInfo(TensorShape(5U, 5U, 48U, 256U), 1, _data_type, _fixed_point_position));
+            b[1].allocator()->init(TensorInfo(TensorShape(256U), 1, _data_type, _fixed_point_position));
+            w[2].allocator()->init(TensorInfo(TensorShape(3U, 3U, 256U, 384U), 1, _data_type, _fixed_point_position));
+            b[2].allocator()->init(TensorInfo(TensorShape(384U), 1, _data_type, _fixed_point_position));
+            w[3].allocator()->init(TensorInfo(TensorShape(3U, 3U, 192U, 384U), 1, _data_type, _fixed_point_position));
+            b[3].allocator()->init(TensorInfo(TensorShape(384U), 1, _data_type, _fixed_point_position));
+            w[4].allocator()->init(TensorInfo(TensorShape(3U, 3U, 192U, 256U), 1, _data_type, _fixed_point_position));
+            b[4].allocator()->init(TensorInfo(TensorShape(256U), 1, _data_type, _fixed_point_position));
+            w[5].allocator()->init(TensorInfo(TensorShape(9216U, 4096U), 1, _data_type, _fixed_point_position));
+            b[5].allocator()->init(TensorInfo(TensorShape(4096U), 1, _data_type, _fixed_point_position));
+            w[6].allocator()->init(TensorInfo(TensorShape(4096U, 4096U), 1, _data_type, _fixed_point_position));
+            b[6].allocator()->init(TensorInfo(TensorShape(4096U), 1, _data_type, _fixed_point_position));
+            w[7].allocator()->init(TensorInfo(TensorShape(4096U, 1000U), 1, _data_type, _fixed_point_position));
+            b[7].allocator()->init(TensorInfo(TensorShape(1000U), 1, _data_type, _fixed_point_position));
+
+            w21 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[1], TensorShape(5U, 5U, 48U, 128U), Coordinates()));
+            w22 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[1], TensorShape(5U, 5U, 48U, 128U), Coordinates(0, 0, 0, 128)));
+            b21 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[1], TensorShape(128U), Coordinates()));
+            b22 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[1], TensorShape(128U), Coordinates(128)));
+
+            w41 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[3], TensorShape(3U, 3U, 192U, 192U), Coordinates()));
+            w42 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[3], TensorShape(3U, 3U, 192U, 192U), Coordinates(0, 0, 0, 192)));
+            b41 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[3], TensorShape(192U), Coordinates()));
+            b42 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[3], TensorShape(192U), Coordinates(192)));
+
+            w51 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[4], TensorShape(3U, 3U, 192U, 128U), Coordinates()));
+            w52 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[4], TensorShape(3U, 3U, 192U, 128U), Coordinates(0, 0, 0, 128)));
+            b51 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[4], TensorShape(128U), Coordinates()));
+            b52 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[4], TensorShape(128U), Coordinates(128)));
         }
         else
         {
-            init_reshaped_weights();
+            const unsigned int data_type_size = 16 / arm_compute::data_size_from_type(_data_type);
+
+            // Create tensor for the reshaped weights
+            auto w21_tensor = std::unique_ptr<TensorType>(new TensorType());
+            auto w22_tensor = std::unique_ptr<TensorType>(new TensorType());
+
+            w[0].allocator()->init(TensorInfo(TensorShape(366U * data_type_size, 96U / data_type_size), 1, _data_type, _fixed_point_position));
+            w21_tensor->allocator()->init(TensorInfo(TensorShape(1248U * data_type_size, 128U / data_type_size), 1, _data_type, _fixed_point_position));
+            w22_tensor->allocator()->init(TensorInfo(TensorShape(1248U * data_type_size, 128U / data_type_size), 1, _data_type, _fixed_point_position));
+            w21 = std::move(w21_tensor);
+            w22 = std::move(w22_tensor);
+
+            // Configure the direct convolution's weights. Direct convolution doesn't need reshape weights
+            if(!_is_direct_conv)
+            {
+                auto w41_tensor = std::unique_ptr<TensorType>(new TensorType());
+                auto w42_tensor = std::unique_ptr<TensorType>(new TensorType());
+                auto w51_tensor = std::unique_ptr<TensorType>(new TensorType());
+                auto w52_tensor = std::unique_ptr<TensorType>(new TensorType());
+                w41_tensor->allocator()->init(TensorInfo(TensorShape(1920U * data_type_size, 192U / data_type_size), 1, _data_type, _fixed_point_position));
+                w42_tensor->allocator()->init(TensorInfo(TensorShape(1920U * data_type_size, 192U / data_type_size), 1, _data_type, _fixed_point_position));
+                w51_tensor->allocator()->init(TensorInfo(TensorShape(1920U * data_type_size, 128U / data_type_size), 1, _data_type, _fixed_point_position));
+                w52_tensor->allocator()->init(TensorInfo(TensorShape(1920U * data_type_size, 128U / data_type_size), 1, _data_type, _fixed_point_position));
+                w[2].allocator()->init(TensorInfo(TensorShape(2560U * data_type_size, 384U / data_type_size), 1, _data_type, _fixed_point_position));
+                w41 = std::move(w41_tensor);
+                w42 = std::move(w42_tensor);
+                w51 = std::move(w51_tensor);
+                w52 = std::move(w52_tensor);
+            }
+            else
+            {
+                w[2].allocator()->init(TensorInfo(TensorShape(3U, 3U, 256U, 384U), 1, _data_type, _fixed_point_position));
+                b[2].allocator()->init(TensorInfo(TensorShape(384U), 1, _data_type, _fixed_point_position));
+                w[3].allocator()->init(TensorInfo(TensorShape(3U, 3U, 192U, 384U), 1, _data_type, _fixed_point_position));
+                b[3].allocator()->init(TensorInfo(TensorShape(384U), 1, _data_type, _fixed_point_position));
+                w[4].allocator()->init(TensorInfo(TensorShape(3U, 3U, 192U, 256U), 1, _data_type, _fixed_point_position));
+                b[4].allocator()->init(TensorInfo(TensorShape(256U), 1, _data_type, _fixed_point_position));
+                w41 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[3], TensorShape(3U, 3U, 192U, 192U), Coordinates()));
+                w42 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[3], TensorShape(3U, 3U, 192U, 192U), Coordinates(0, 0, 0, 192)));
+                b41 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[3], TensorShape(192U), Coordinates()));
+                b42 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[3], TensorShape(192U), Coordinates(192)));
+
+                w51 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[4], TensorShape(3U, 3U, 192U, 128U), Coordinates()));
+                w52 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[4], TensorShape(3U, 3U, 192U, 128U), Coordinates(0, 0, 0, 128)));
+                b51 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[4], TensorShape(128U), Coordinates()));
+                b52 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[4], TensorShape(128U), Coordinates(128)));
+            }
+
+            b[5].allocator()->init(TensorInfo(TensorShape(4096U), 1, _data_type, _fixed_point_position));
+            b[6].allocator()->init(TensorInfo(TensorShape(4096U), 1, _data_type, _fixed_point_position));
+            b[7].allocator()->init(TensorInfo(TensorShape(1000U), 1, _data_type, _fixed_point_position));
+
+            if(_batches > 1)
+            {
+                w[5].allocator()->init(TensorInfo(TensorShape(9216U * data_type_size, 4096U / data_type_size), 1, _data_type, _fixed_point_position));
+                w[6].allocator()->init(TensorInfo(TensorShape(4096U * data_type_size, 4096U / data_type_size), 1, _data_type, _fixed_point_position));
+                w[7].allocator()->init(TensorInfo(TensorShape(4096U * data_type_size, 1000U / data_type_size), 1, _data_type, _fixed_point_position));
+            }
+            else
+            {
+                w[5].allocator()->init(TensorInfo(TensorShape(4096U, 9216U), 1, _data_type, _fixed_point_position));
+                w[6].allocator()->init(TensorInfo(TensorShape(4096U, 4096U), 1, _data_type, _fixed_point_position));
+                w[7].allocator()->init(TensorInfo(TensorShape(1000U, 4096U), 1, _data_type, _fixed_point_position));
+            }
         }
     }
 
@@ -129,7 +223,7 @@
         norm2.configure(&act2_out, &norm2_out, NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f));
         pool2.configure(&norm2_out, &pool2_out, PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0)));
         // Layer 3
-        TensorType *b2 = _reshaped_weights ? nullptr : &b[2];
+        TensorType *b2 = (_reshaped_weights && !_is_direct_conv) ? nullptr : &b[2];
         conv3.configure(&pool2_out, &w[2], b2, &conv3_out, PadStrideInfo(1, 1, 1, 1), WeightsInfo(_reshaped_weights, 3U, 3U));
         act3.configure(&conv3_out, &act3_out, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
         // Layer 4
@@ -184,10 +278,21 @@
 
             dynamic_cast<TensorType *>(w21.get())->allocator()->allocate();
             dynamic_cast<TensorType *>(w22.get())->allocator()->allocate();
-            dynamic_cast<TensorType *>(w41.get())->allocator()->allocate();
-            dynamic_cast<TensorType *>(w42.get())->allocator()->allocate();
-            dynamic_cast<TensorType *>(w51.get())->allocator()->allocate();
-            dynamic_cast<TensorType *>(w52.get())->allocator()->allocate();
+            if(!_is_direct_conv)
+            {
+                dynamic_cast<TensorType *>(w41.get())->allocator()->allocate();
+                dynamic_cast<TensorType *>(w42.get())->allocator()->allocate();
+                dynamic_cast<TensorType *>(w51.get())->allocator()->allocate();
+                dynamic_cast<TensorType *>(w52.get())->allocator()->allocate();
+            }
+            else
+            {
+                b[2].allocator()->allocate();
+                b[3].allocator()->allocate();
+                b[4].allocator()->allocate();
+                w[3].allocator()->allocate();
+                w[4].allocator()->allocate();
+            }
         }
 
         conv1_out.allocator()->allocate();
@@ -239,10 +344,21 @@
 
             library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w21.get())), 9);
             library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w22.get())), 10);
-            library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w41.get())), 11);
-            library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w42.get())), 12);
-            library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w51.get())), 13);
-            library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w52.get())), 14);
+
+            if(!_is_direct_conv)
+            {
+                library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w41.get())), 11);
+                library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w42.get())), 12);
+                library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w51.get())), 13);
+                library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w52.get())), 14);
+            }
+            else
+            {
+                library->fill_tensor_uniform(Accessor(w[3]), 11);
+                library->fill_tensor_uniform(Accessor(b[3]), 12);
+                library->fill_tensor_uniform(Accessor(w[4]), 13);
+                library->fill_tensor_uniform(Accessor(b[4]), 14);
+            }
         }
     }
 
@@ -340,6 +456,15 @@
             b[5].allocator()->free();
             b[6].allocator()->free();
             b[7].allocator()->free();
+
+            if(_is_direct_conv)
+            {
+                w[3].allocator()->free();
+                w[4].allocator()->free();
+                b[2].allocator()->free();
+                b[3].allocator()->free();
+                b[4].allocator()->free();
+            }
         }
 
         w21.reset();
@@ -416,94 +541,39 @@
     }
 
 private:
-    void init_weights()
+    struct DirectConv
     {
-        w[0].allocator()->init(TensorInfo(TensorShape(11U, 11U, 3U, 96U), 1, _data_type, _fixed_point_position));
-        b[0].allocator()->init(TensorInfo(TensorShape(96U), 1, _data_type, _fixed_point_position));
-        w[1].allocator()->init(TensorInfo(TensorShape(5U, 5U, 48U, 256U), 1, _data_type, _fixed_point_position));
-        b[1].allocator()->init(TensorInfo(TensorShape(256U), 1, _data_type, _fixed_point_position));
-        w[2].allocator()->init(TensorInfo(TensorShape(3U, 3U, 256U, 384U), 1, _data_type, _fixed_point_position));
-        b[2].allocator()->init(TensorInfo(TensorShape(384U), 1, _data_type, _fixed_point_position));
-        w[3].allocator()->init(TensorInfo(TensorShape(3U, 3U, 192U, 384U), 1, _data_type, _fixed_point_position));
-        b[3].allocator()->init(TensorInfo(TensorShape(384U), 1, _data_type, _fixed_point_position));
-        w[4].allocator()->init(TensorInfo(TensorShape(3U, 3U, 192U, 256U), 1, _data_type, _fixed_point_position));
-        b[4].allocator()->init(TensorInfo(TensorShape(256U), 1, _data_type, _fixed_point_position));
-        w[5].allocator()->init(TensorInfo(TensorShape(9216U, 4096U), 1, _data_type, _fixed_point_position));
-        b[5].allocator()->init(TensorInfo(TensorShape(4096U), 1, _data_type, _fixed_point_position));
-        w[6].allocator()->init(TensorInfo(TensorShape(4096U, 4096U), 1, _data_type, _fixed_point_position));
-        b[6].allocator()->init(TensorInfo(TensorShape(4096U), 1, _data_type, _fixed_point_position));
-        w[7].allocator()->init(TensorInfo(TensorShape(4096U, 1000U), 1, _data_type, _fixed_point_position));
-        b[7].allocator()->init(TensorInfo(TensorShape(1000U), 1, _data_type, _fixed_point_position));
-
-        w21 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[1], TensorShape(5U, 5U, 48U, 128U), Coordinates()));
-        w22 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[1], TensorShape(5U, 5U, 48U, 128U), Coordinates(0, 0, 0, 128)));
-        b21 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[1], TensorShape(128U), Coordinates()));
-        b22 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[1], TensorShape(128U), Coordinates(128)));
-
-        w41 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[3], TensorShape(3U, 3U, 192U, 192U), Coordinates()));
-        w42 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[3], TensorShape(3U, 3U, 192U, 192U), Coordinates(0, 0, 0, 192)));
-        b41 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[3], TensorShape(192U), Coordinates()));
-        b42 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[3], TensorShape(192U), Coordinates(192)));
-
-        w51 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[4], TensorShape(3U, 3U, 192U, 128U), Coordinates()));
-        w52 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[4], TensorShape(3U, 3U, 192U, 128U), Coordinates(0, 0, 0, 128)));
-        b51 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[4], TensorShape(128U), Coordinates()));
-        b52 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[4], TensorShape(128U), Coordinates(128)));
-    }
-
-    void init_reshaped_weights()
-    {
-        const unsigned int data_type_size = 16 / arm_compute::data_size_from_type(_data_type);
-
-        // Create tensor for the reshaped weights
-        auto w21_tensor = std::unique_ptr<TensorType>(new TensorType());
-        auto w22_tensor = std::unique_ptr<TensorType>(new TensorType());
-        auto w41_tensor = std::unique_ptr<TensorType>(new TensorType());
-        auto w42_tensor = std::unique_ptr<TensorType>(new TensorType());
-        auto w51_tensor = std::unique_ptr<TensorType>(new TensorType());
-        auto w52_tensor = std::unique_ptr<TensorType>(new TensorType());
-
-        w[0].allocator()->init(TensorInfo(TensorShape(366U * data_type_size, 96U / data_type_size), 1, _data_type, _fixed_point_position));
-        w21_tensor->allocator()->init(TensorInfo(TensorShape(1248U * data_type_size, 128U / data_type_size), 1, _data_type, _fixed_point_position));
-        w22_tensor->allocator()->init(TensorInfo(TensorShape(1248U * data_type_size, 128U / data_type_size), 1, _data_type, _fixed_point_position));
-        w[2].allocator()->init(TensorInfo(TensorShape(2560U * data_type_size, 384U / data_type_size), 1, _data_type, _fixed_point_position));
-        w41_tensor->allocator()->init(TensorInfo(TensorShape(1920U * data_type_size, 192U / data_type_size), 1, _data_type, _fixed_point_position));
-        w42_tensor->allocator()->init(TensorInfo(TensorShape(1920U * data_type_size, 192U / data_type_size), 1, _data_type, _fixed_point_position));
-        w51_tensor->allocator()->init(TensorInfo(TensorShape(1920U * data_type_size, 128U / data_type_size), 1, _data_type, _fixed_point_position));
-        w52_tensor->allocator()->init(TensorInfo(TensorShape(1920U * data_type_size, 128U / data_type_size), 1, _data_type, _fixed_point_position));
-
-        w21 = std::move(w21_tensor);
-        w22 = std::move(w22_tensor);
-        w41 = std::move(w41_tensor);
-        w42 = std::move(w42_tensor);
-        w51 = std::move(w51_tensor);
-        w52 = std::move(w52_tensor);
-
-        b[5].allocator()->init(TensorInfo(TensorShape(4096U), 1, _data_type, _fixed_point_position));
-        b[6].allocator()->init(TensorInfo(TensorShape(4096U), 1, _data_type, _fixed_point_position));
-        b[7].allocator()->init(TensorInfo(TensorShape(1000U), 1, _data_type, _fixed_point_position));
-
-        if(_batches > 1)
+        template <typename ConvolutionLayerFunction1 = ConvolutionLayerFunction, typename DirectConvolutionLayerFunction1 = DirectConvolutionLayerFunction>
+        typename std::enable_if < !std::is_same<ConvolutionLayerFunction1, DirectConvolutionLayerFunction1>::value, void >::type
+        configure(ITensorType *input, const ITensorType *weights, const ITensorType *biases, ITensorType *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo())
         {
-            w[5].allocator()->init(TensorInfo(TensorShape(9216U * data_type_size, 4096U / data_type_size), 1, _data_type, _fixed_point_position));
-            w[6].allocator()->init(TensorInfo(TensorShape(4096U * data_type_size, 4096U / data_type_size), 1, _data_type, _fixed_point_position));
-            w[7].allocator()->init(TensorInfo(TensorShape(4096U * data_type_size, 1000U / data_type_size), 1, _data_type, _fixed_point_position));
+            _func.configure(input, weights, biases, output, conv_info);
         }
-        else
+
+        template <typename ConvolutionLayerFunction1 = ConvolutionLayerFunction, typename DirectConvolutionLayerFunction1 = DirectConvolutionLayerFunction>
+        typename std::enable_if<std::is_same<ConvolutionLayerFunction1, DirectConvolutionLayerFunction1>::value, void>::type
+        configure(ITensorType *input, const ITensorType *weights, const ITensorType *biases, ITensorType *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo())
         {
-            w[5].allocator()->init(TensorInfo(TensorShape(4096U, 9216U), 1, _data_type, _fixed_point_position));
-            w[6].allocator()->init(TensorInfo(TensorShape(4096U, 4096U), 1, _data_type, _fixed_point_position));
-            w[7].allocator()->init(TensorInfo(TensorShape(1000U, 4096U), 1, _data_type, _fixed_point_position));
+            _func.configure(input, weights, biases, output, conv_info, weights_info);
         }
-    }
+
+        void run()
+        {
+            _func.run();
+        }
+
+        DirectConvolutionLayerFunction _func{};
+    };
 
     DataType     _data_type{ DataType::UNKNOWN };
     int          _fixed_point_position{ 0 };
     unsigned int _batches{ 0 };
     bool         _reshaped_weights{ false };
+    bool         _is_direct_conv{ !std::is_same<ConvolutionLayerFunction, DirectConvolutionLayerFunction>::value };
 
     ActivationLayerFunction     act1{}, act2{}, act3{}, act4{}, act5{}, act6{}, act7{};
-    ConvolutionLayerFunction    conv1{}, conv21{}, conv22{}, conv3{}, conv41{}, conv42{}, conv51{}, conv52{};
+    ConvolutionLayerFunction    conv1{}, conv21{}, conv22{};
+    DirectConv                  conv3{}, conv41{}, conv42{}, conv51{}, conv52{};
     FullyConnectedLayerFunction fc6{}, fc7{}, fc8{};
     NormalizationLayerFunction  norm1{}, norm2{};
     PoolingLayerFunction        pool1{}, pool2{}, pool5{};