COMPMID-1367: Enable NHWC in graph examples

Change-Id: Iabc54a3a1bdcd46a9a921cda39c7c85fef672b72
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/141449
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
diff --git a/examples/graph_inception_v3.cpp b/examples/graph_inception_v3.cpp
index 67f4e3c..c41b0a8 100644
--- a/examples/graph_inception_v3.cpp
+++ b/examples/graph_inception_v3.cpp
@@ -60,7 +60,6 @@
 
         // Checks
         ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "Unsupported data type!");
-        ARM_COMPUTE_EXIT_ON_MSG(common_params.data_layout == DataLayout::NHWC, "Unsupported data layout!");
 
         // Print parameter values
         std::cout << common_params << std::endl;
@@ -71,12 +70,18 @@
         // Create a preprocessor object
         std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>();
 
+        // Create input descriptor
+        const TensorShape tensor_shape     = permute_shape(TensorShape(299U, 299U, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
+        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
+
+        // Set weights trained layout
+        const DataLayout weights_layout = DataLayout::NCHW;
+
         graph << common_params.target
               << common_params.fast_math_hint
-              << InputLayer(TensorDescriptor(TensorShape(299U, 299U, 3U, 1U), common_params.data_type),
-                            get_input_accessor(common_params, std::move(preprocessor), false))
+              << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false))
               << ConvolutionLayer(3U, 3U, 32U,
-                                  get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_weights.npy"),
+                                  get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_weights.npy", weights_layout),
                                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
               .set_name("Conv2d_1a_3x3/convolution")
               << BatchNormalizationLayer(get_weights_accessor(data_path,
@@ -89,7 +94,7 @@
               .set_name("Conv2d_1a_3x3/BatchNorm/batchnorm")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_1a_3x3/Relu")
               << ConvolutionLayer(3U, 3U, 32U,
-                                  get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_weights.npy"),
+                                  get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_weights.npy", weights_layout),
                                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
               .set_name("Conv2d_2a_3x3/convolution")
               << BatchNormalizationLayer(get_weights_accessor(data_path,
@@ -103,7 +108,7 @@
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_2a_3x3/Relu")
 
               << ConvolutionLayer(3U, 3U, 64U,
-                                  get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_weights.npy"),
+                                  get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_weights.npy", weights_layout),
                                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
               .set_name("Conv2d_2b_3x3/convolution")
               << BatchNormalizationLayer(get_weights_accessor(data_path,
@@ -119,7 +124,7 @@
               << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("MaxPool_3a_3x3/MaxPool")
 
               << ConvolutionLayer(1U, 1U, 80U,
-                                  get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_weights.npy"),
+                                  get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_weights.npy", weights_layout),
                                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
               .set_name("Conv2d_3b_1x1/convolution")
               << BatchNormalizationLayer(get_weights_accessor(data_path,
@@ -133,7 +138,7 @@
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_3b_1x1/Relu")
 
               << ConvolutionLayer(3U, 3U, 192U,
-                                  get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_weights.npy"),
+                                  get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_weights.npy", weights_layout),
                                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
               .set_name("Conv2d_4a_3x3/convolution")
               << BatchNormalizationLayer(get_weights_accessor(data_path,
@@ -148,45 +153,45 @@
 
               << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("MaxPool_5a_3x3/MaxPool");
 
-        graph << get_inception_node_A(data_path, "Mixed_5b", 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U),
+        graph << get_inception_node_A(data_path, "Mixed_5b", weights_layout, 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U),
                                       32U)
               .set_name("Mixed_5b/concat");
-        graph << get_inception_node_A(data_path, "Mixed_5c", 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U),
+        graph << get_inception_node_A(data_path, "Mixed_5c", weights_layout, 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U),
                                       64U, true)
               .set_name("Mixed_5c/concat");
-        graph << get_inception_node_A(data_path, "Mixed_5d", 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U),
+        graph << get_inception_node_A(data_path, "Mixed_5d", weights_layout, 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U),
                                       64U)
               .set_name("Mixed_5d/concat");
 
-        graph << get_inception_node_B(data_path, "Mixed_6a", 384U, std::make_tuple(64U, 96U, 96U)).set_name("Mixed_6a/concat");
+        graph << get_inception_node_B(data_path, "Mixed_6a", weights_layout, 384U, std::make_tuple(64U, 96U, 96U)).set_name("Mixed_6a/concat");
 
-        graph << get_inception_node_C(data_path, "Mixed_6b", 192U, std::make_tuple(128U, 128U, 192U),
+        graph << get_inception_node_C(data_path, "Mixed_6b", weights_layout, 192U, std::make_tuple(128U, 128U, 192U),
                                       std::make_tuple(128U, 128U, 128U, 128U, 192U), 192U)
               .set_name("Mixed_6b/concat");
-        graph << get_inception_node_C(data_path, "Mixed_6c", 192U, std::make_tuple(160U, 160U, 192U),
+        graph << get_inception_node_C(data_path, "Mixed_6c", weights_layout, 192U, std::make_tuple(160U, 160U, 192U),
                                       std::make_tuple(160U, 160U, 160U, 160U, 192U), 192U)
               .set_name("Mixed_6c/concat");
-        graph << get_inception_node_C(data_path, "Mixed_6d", 192U, std::make_tuple(160U, 160U, 192U),
+        graph << get_inception_node_C(data_path, "Mixed_6d", weights_layout, 192U, std::make_tuple(160U, 160U, 192U),
                                       std::make_tuple(160U, 160U, 160U, 160U, 192U), 192U)
               .set_name("Mixed_6d/concat");
-        graph << get_inception_node_C(data_path, "Mixed_6e", 192U, std::make_tuple(192U, 192U, 192U),
+        graph << get_inception_node_C(data_path, "Mixed_6e", weights_layout, 192U, std::make_tuple(192U, 192U, 192U),
                                       std::make_tuple(192U, 192U, 192U, 192U, 192U), 192U)
               .set_name("Mixed_6e/concat");
 
-        graph << get_inception_node_D(data_path, "Mixed_7a", std::make_tuple(192U, 320U),
+        graph << get_inception_node_D(data_path, "Mixed_7a", weights_layout, std::make_tuple(192U, 320U),
                                       std::make_tuple(192U, 192U, 192U, 192U))
               .set_name("Mixed_7a/concat");
 
-        graph << get_inception_node_E(data_path, "Mixed_7b", 320U, std::make_tuple(384U, 384U, 384U),
+        graph << get_inception_node_E(data_path, "Mixed_7b", weights_layout, 320U, std::make_tuple(384U, 384U, 384U),
                                       std::make_tuple(448U, 384U, 384U, 384U), 192U)
               .set_name("Mixed_7b/concat");
-        graph << get_inception_node_E(data_path, "Mixed_7c", 320U, std::make_tuple(384U, 384U, 384U),
+        graph << get_inception_node_E(data_path, "Mixed_7c", weights_layout, 320U, std::make_tuple(384U, 384U, 384U),
                                       std::make_tuple(448U, 384U, 384U, 384U), 192U, true)
               .set_name("Mixed_7c/concat");
 
         graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 8, PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL))).set_name("Logits/AvgPool_1a_8x8/AvgPool")
               << ConvolutionLayer(1U, 1U, 1001U, get_weights_accessor(data_path,
-                                                                      "/cnn_data/inceptionv3_model/Logits_Conv2d_1c_1x1_weights.npy"),
+                                                                      "/cnn_data/inceptionv3_model/Logits_Conv2d_1c_1x1_weights.npy", weights_layout),
                                   get_weights_accessor(data_path,
                                                        "/cnn_data/inceptionv3_model/Logits_Conv2d_1c_1x1_biases.npy"),
                                   PadStrideInfo(1, 1, 0, 0))
@@ -218,7 +223,7 @@
     Stream             graph;
 
 private:
-    BranchLayer get_inception_node_A(const std::string &data_path, std::string &&param_path,
+    BranchLayer get_inception_node_A(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
                                      unsigned int a_filt,
                                      std::tuple<unsigned int, unsigned int> b_filters,
                                      std::tuple<unsigned int, unsigned int, unsigned int> c_filters,
@@ -239,7 +244,7 @@
         SubStream i_a(graph);
         i_a << ConvolutionLayer(
                 1U, 1U, a_filt,
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution")
@@ -255,7 +260,7 @@
         SubStream i_b(graph);
         i_b << ConvolutionLayer(
                 1U, 1U, std::get<0>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/convolution")
@@ -269,7 +274,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/Relu")
             << ConvolutionLayer(
                 5U, 5U, std::get<1>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 2, 2))
             .set_name(param_path + "/Branch_1/Conv2d" + conv_id1 + "5x5/convolution")
@@ -285,7 +290,7 @@
         SubStream i_c(graph);
         i_c << ConvolutionLayer(
                 1U, 1U, std::get<0>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution")
@@ -299,7 +304,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu")
             << ConvolutionLayer(
                 3U, 3U, std::get<1>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 1, 1))
             .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/convolution")
@@ -313,7 +318,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_3x3/Relu")
             << ConvolutionLayer(
                 3U, 3U, std::get<2>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 1, 1))
             .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/convolution")
@@ -330,7 +335,7 @@
         i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)).set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool")
             << ConvolutionLayer(
                 1U, 1U, d_filt,
-                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution")
@@ -346,7 +351,7 @@
         return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d));
     }
 
-    BranchLayer get_inception_node_B(const std::string &data_path, std::string &&param_path,
+    BranchLayer get_inception_node_B(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
                                      unsigned int a_filt,
                                      std::tuple<unsigned int, unsigned int, unsigned int> b_filters)
     {
@@ -354,7 +359,7 @@
         SubStream   i_a(graph);
         i_a << ConvolutionLayer(
                 3U, 3U, a_filt,
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(2, 2, 0, 0))
             .set_name(param_path + "/Branch_0/Conv2d_1a_1x1/convolution")
@@ -370,7 +375,7 @@
         SubStream i_b(graph);
         i_b << ConvolutionLayer(
                 1U, 1U, std::get<0>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution")
@@ -384,7 +389,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu")
             << ConvolutionLayer(
                 3U, 3U, std::get<1>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 1, 1))
             .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/convolution")
@@ -398,7 +403,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_3x3/Relu")
             << ConvolutionLayer(
                 3U, 3U, std::get<2>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(2, 2, 0, 0))
             .set_name(param_path + "/Branch_1/Conv2d_1a_1x1/convolution")
@@ -417,7 +422,7 @@
         return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c));
     }
 
-    BranchLayer get_inception_node_C(const std::string &data_path, std::string &&param_path,
+    BranchLayer get_inception_node_C(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
                                      unsigned int a_filt,
                                      std::tuple<unsigned int, unsigned int, unsigned int> b_filters,
                                      std::tuple<unsigned int, unsigned int, unsigned int, unsigned int, unsigned int> c_filters,
@@ -427,7 +432,7 @@
         SubStream   i_a(graph);
         i_a << ConvolutionLayer(
                 1U, 1U, a_filt,
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution")
@@ -443,7 +448,7 @@
         SubStream i_b(graph);
         i_b << ConvolutionLayer(
                 1U, 1U, std::get<0>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution")
@@ -457,7 +462,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu")
             << ConvolutionLayer(
                 7U, 1U, std::get<1>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 3, 0))
             .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/convolution")
@@ -471,7 +476,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Relu")
             << ConvolutionLayer(
                 1U, 7U, std::get<2>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 3))
             .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/convolution")
@@ -487,7 +492,7 @@
         SubStream i_c(graph);
         i_c << ConvolutionLayer(
                 1U, 1U, std::get<0>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution")
@@ -501,7 +506,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu")
             << ConvolutionLayer(
                 1U, 7U, std::get<1>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 3))
             .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/convolution")
@@ -515,7 +520,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_7x1/Relu")
             << ConvolutionLayer(
                 7U, 1U, std::get<2>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 3, 0))
             .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/convolution")
@@ -529,7 +534,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0c_1x7/Relu")
             << ConvolutionLayer(
                 1U, 7U, std::get<3>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 3))
             .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/convolution")
@@ -543,7 +548,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0d_7x1/Relu")
             << ConvolutionLayer(
                 7U, 1U, std::get<4>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 3, 0))
             .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/convolution")
@@ -560,7 +565,7 @@
         i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)).set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool")
             << ConvolutionLayer(
                 1U, 1U, d_filt,
-                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution")
@@ -576,15 +581,15 @@
         return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d));
     }
 
-    BranchLayer get_inception_node_D(const std::string &data_path, std::string &&param_path,
-                                     std::tuple<unsigned int, unsigned int>      a_filters,
+    BranchLayer get_inception_node_D(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
+                                     std::tuple<unsigned int, unsigned int> a_filters,
                                      std::tuple<unsigned int, unsigned int, unsigned int, unsigned int> b_filters)
     {
         std::string total_path = "/cnn_data/inceptionv3_model/" + param_path + "_";
         SubStream   i_a(graph);
         i_a << ConvolutionLayer(
                 1U, 1U, std::get<0>(a_filters),
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution")
@@ -598,7 +603,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu")
             << ConvolutionLayer(
                 3U, 3U, std::get<1>(a_filters),
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(2, 2, 0, 0))
             .set_name(param_path + "/Branch_0/Conv2d_1a_3x3/convolution")
@@ -614,7 +619,7 @@
         SubStream i_b(graph);
         i_b << ConvolutionLayer(
                 1U, 1U, std::get<0>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution")
@@ -628,7 +633,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu")
             << ConvolutionLayer(
                 7U, 1U, std::get<1>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 3, 0))
             .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/convolution")
@@ -642,7 +647,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Relu")
             << ConvolutionLayer(
                 1U, 7U, std::get<2>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 3))
             .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/convolution")
@@ -656,7 +661,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0c_7x1/Relu")
             << ConvolutionLayer(
                 3U, 3U, std::get<3>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(2, 2, 0, 0))
             .set_name(param_path + "/Branch_1/Conv2d_1a_3x3/convolution")
@@ -675,7 +680,7 @@
         return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c));
     }
 
-    BranchLayer get_inception_node_E(const std::string &data_path, std::string &&param_path,
+    BranchLayer get_inception_node_E(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
                                      unsigned int a_filt,
                                      std::tuple<unsigned int, unsigned int, unsigned int> b_filters,
                                      std::tuple<unsigned int, unsigned int, unsigned int, unsigned int> c_filters,
@@ -693,7 +698,7 @@
         SubStream   i_a(graph);
         i_a << ConvolutionLayer(
                 1U, 1U, a_filt,
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution")
@@ -709,7 +714,7 @@
         SubStream i_b(graph);
         i_b << ConvolutionLayer(
                 1U, 1U, std::get<0>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution")
@@ -725,7 +730,7 @@
         SubStream i_b1(i_b);
         i_b1 << ConvolutionLayer(
                  3U, 1U, std::get<1>(b_filters),
-                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy"),
+                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy", weights_layout),
                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                  PadStrideInfo(1, 1, 1, 0))
              .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/convolution")
@@ -741,7 +746,7 @@
         SubStream i_b2(i_b);
         i_b2 << ConvolutionLayer(
                  1U, 3U, std::get<2>(b_filters),
-                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_weights.npy"),
+                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_weights.npy", weights_layout),
                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                  PadStrideInfo(1, 1, 0, 1))
              .set_name(param_path + "/Branch_1/Conv2d" + conv_id + "3x1/convolution")
@@ -760,7 +765,7 @@
         SubStream i_c(graph);
         i_c << ConvolutionLayer(
                 1U, 1U, std::get<0>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution")
@@ -774,7 +779,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu")
             << ConvolutionLayer(
                 3U, 3U, std::get<1>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 1, 1))
             .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/convolution")
@@ -790,7 +795,7 @@
         SubStream i_c1(i_c);
         i_c1 << ConvolutionLayer(
                  3U, 1U, std::get<2>(c_filters),
-                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_weights.npy"),
+                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_weights.npy", weights_layout),
                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                  PadStrideInfo(1, 1, 1, 0))
              .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/convolution")
@@ -806,7 +811,7 @@
         SubStream i_c2(i_c);
         i_c2 << ConvolutionLayer(
                  1U, 3U, std::get<3>(c_filters),
-                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_weights.npy"),
+                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_weights.npy", weights_layout),
                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                  PadStrideInfo(1, 1, 0, 1))
              .set_name(param_path + "/Branch_2/Conv2d_0d_3x1/convolution")
@@ -826,7 +831,7 @@
         i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)).set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool")
             << ConvolutionLayer(
                 1U, 1U, d_filt,
-                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution")