COMPMID-1160 Turn Graph hints into heuristics

Change-Id: Id24c2f07c59d863f8e1af6a1afbf6a542b2b9954
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/135142
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Tello <pablo.tello@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/examples/graph_alexnet.cpp b/examples/graph_alexnet.cpp
index 9e6d919..5328662 100644
--- a/examples/graph_alexnet.cpp
+++ b/examples/graph_alexnet.cpp
@@ -53,13 +53,9 @@
         std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb);
 
         // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON
-        const int target      = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
-        Target    target_hint = set_target_hint(target);
-
-        const bool        is_neon              = (target_hint == Target::NEON);
-        ConvolutionMethod convolution_5x5_hint = is_neon ? ConvolutionMethod::GEMM : ConvolutionMethod::DIRECT;
-        ConvolutionMethod convolution_3x3_hint = ConvolutionMethod::DEFAULT;
-        FastMathHint      fast_math_hint       = FastMathHint::DISABLED;
+        const int    target         = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
+        Target       target_hint    = set_target_hint(target);
+        FastMathHint fast_math_hint = FastMathHint::DISABLED;
 
         // Parse arguments
         if(argc < 2)
@@ -117,7 +113,6 @@
               << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)).set_name("norm1")
               << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0))).set_name("pool1")
               // Layer 2
-              << convolution_5x5_hint
               << ConvolutionLayer(
                   5U, 5U, 256U,
                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv2_w.npy"),
@@ -127,7 +122,6 @@
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu2")
               << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)).set_name("norm2")
               << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0))).set_name("pool2")
-              << convolution_3x3_hint
               // Layer 3
               << ConvolutionLayer(
                   3U, 3U, 384U,
diff --git a/examples/graph_mobilenet.cpp b/examples/graph_mobilenet.cpp
index 50dc024..40243bb 100644
--- a/examples/graph_mobilenet.cpp
+++ b/examples/graph_mobilenet.cpp
@@ -35,7 +35,7 @@
 /** Example demonstrating how to implement MobileNet's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] data layout, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Model ID (0 = MobileNetV1_1.0_224, 1 = MobileNetV1_0.75_160), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] data layout, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
  */
 class GraphMobilenetExample : public Example
 {
@@ -52,7 +52,6 @@
         // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON
         const int                  target                     = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
         Target                     target_hint                = set_target_hint(target);
-        ConvolutionMethod          convolution_hint           = ConvolutionMethod::GEMM;
         DepthwiseConvolutionMethod depthwise_convolution_hint = DepthwiseConvolutionMethod::OPTIMIZED_3x3;
         FastMathHint               fast_math_hint             = FastMathHint::DISABLED;
 
@@ -133,7 +132,6 @@
         }
 
         graph << target_hint
-              << convolution_hint
               << depthwise_convolution_hint
               << fast_math_hint
               << InputLayer(input_descriptor,
diff --git a/examples/graph_vgg16.cpp b/examples/graph_vgg16.cpp
index 72e7240..d70c56e 100644
--- a/examples/graph_vgg16.cpp
+++ b/examples/graph_vgg16.cpp
@@ -51,13 +51,9 @@
         std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb);
 
         // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON
-        const int  target      = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
-        Target     target_hint = set_target_hint(target);
-        const bool is_opencl   = target_hint == Target::CL;
-
-        ConvolutionMethod first_convolution3x3_hint = is_opencl ? ConvolutionMethod::DIRECT : ConvolutionMethod::GEMM;
-        ConvolutionMethod convolution3x3_hint       = ConvolutionMethod::DEFAULT;
-        FastMathHint      fast_math_hint            = FastMathHint::DISABLED;
+        const int    target         = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
+        Target       target_hint    = set_target_hint(target);
+        FastMathHint fast_math_hint = FastMathHint::DISABLED;
 
         // Parse arguments
         if(argc < 2)
@@ -102,7 +98,6 @@
 
         graph << target_hint
               << fast_math_hint
-              << first_convolution3x3_hint
               << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::F32),
                             get_input_accessor(image, std::move(preprocessor)))
               // Layer 1
@@ -113,7 +108,6 @@
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv1_1")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1_1/Relu")
-              << convolution3x3_hint
               // Layer 2
               << ConvolutionLayer(
                   3U, 3U, 64U,
diff --git a/examples/graph_vgg19.cpp b/examples/graph_vgg19.cpp
index b15c3f2..8a0ec6f 100644
--- a/examples/graph_vgg19.cpp
+++ b/examples/graph_vgg19.cpp
@@ -54,10 +54,6 @@
         const int    target         = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
         Target       target_hint    = set_target_hint(target);
         FastMathHint fast_math_hint = FastMathHint::DISABLED;
-        const bool   is_opencl      = target_hint == Target::CL;
-
-        ConvolutionMethod first_convolution3x3_hint = is_opencl ? ConvolutionMethod::DIRECT : ConvolutionMethod::GEMM;
-        ConvolutionMethod convolution3x3_hint       = ConvolutionMethod::DEFAULT;
 
         // Parse arguments
         if(argc < 2)
@@ -101,7 +97,6 @@
         }
 
         graph << target_hint
-              << first_convolution3x3_hint
               << fast_math_hint
               << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::F32),
                             get_input_accessor(image, std::move(preprocessor)))
@@ -113,7 +108,6 @@
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv1_1")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1_1/Relu")
-              << convolution3x3_hint
               << ConvolutionLayer(
                   3U, 3U, 64U,
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_2_w.npy"),
diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp
index 47a8d5f..a9a05b6 100644
--- a/src/runtime/CL/functions/CLConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp
@@ -123,8 +123,42 @@
     ARM_COMPUTE_UNUSED(weights_info);
     ARM_COMPUTE_UNUSED(gpu_target);
 
+    const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
+    const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
     const size_t idx_c = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
 
+    /* Input spatial dims, kernel size, IFM/OFM, conv info*/
+    using ConvolutionConfiguration = std::tuple<Size2D, Size2D, Size2D, PadStrideInfo>;
+    using ConfigurationMethod      = std::pair<ConvolutionConfiguration, ConvolutionMethod>;
+
+    const std::vector<ConfigurationMethod> known_configs =
+    {
+        // Alexnet
+        ConfigurationMethod(ConvolutionConfiguration(Size2D(27U, 27U), Size2D(5U, 5U), Size2D(48U, 128U), PadStrideInfo(1U, 1U, 2U, 2U)), ConvolutionMethod::DIRECT),
+        // VGG16 / VGG19
+        ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 64U), PadStrideInfo(1U, 1U, 1U, 1U)), ConvolutionMethod::DIRECT),
+        // Mobilenet 224
+        ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR)), ConvolutionMethod::GEMM),
+        // Mobilenet 160
+        ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR)), ConvolutionMethod::GEMM)
+    };
+
+    const auto find_config = [&](ConfigurationMethod c)
+    {
+        const ConvolutionConfiguration config = c.first;
+        const PadStrideInfo            info   = std::get<3>(config);
+
+        return std::get<0>(config) == Size2D(input->dimension(idx_w), input->dimension(idx_h)) && std::get<1>(config) == Size2D(weights->dimension(idx_w), weights->dimension(idx_h))
+               && std::get<2>(config) == Size2D(weights->dimension(idx_c), weights->dimension(3)) && info.pad_top() == conv_info.pad_top() && info.pad_right() == conv_info.pad_right()
+               && info.pad_bottom() == conv_info.pad_bottom() && info.pad_left() == conv_info.pad_left() && info.stride() == conv_info.stride();
+    };
+
+    std::vector<ConfigurationMethod>::const_iterator found;
+    if((found = std::find_if(known_configs.begin(), known_configs.end(), find_config)) != known_configs.end())
+    {
+        return (*found).second;
+    }
+
     if(dilation != Size2D(1U, 1U) || (input->dimension(idx_c) < 16))
     {
         return ConvolutionMethod::GEMM;
diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
index 7053c7e..96ac95f 100644
--- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
@@ -108,6 +108,42 @@
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, weights);
     ARM_COMPUTE_UNUSED(weights_info);
 
+    const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
+    const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
+    const size_t idx_c = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
+
+    /* Input spatial dims, kernel size, IFM/OFM, conv info*/
+    using ConvolutionConfiguration = std::tuple<Size2D, Size2D, Size2D, PadStrideInfo>;
+    using ConfigurationMethod      = std::pair<ConvolutionConfiguration, ConvolutionMethod>;
+
+    const std::vector<ConfigurationMethod> known_configs =
+    {
+        // Alexnet
+        ConfigurationMethod(ConvolutionConfiguration(Size2D(27U, 27U), Size2D(5U, 5U), Size2D(48U, 128U), PadStrideInfo(1U, 1U, 2U, 2U)), ConvolutionMethod::GEMM),
+        // VGG16 / VGG19
+        ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 64U), PadStrideInfo(1U, 1U, 1U, 1U)), ConvolutionMethod::GEMM),
+        // Mobilenet 224
+        ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR)), ConvolutionMethod::GEMM),
+        // Mobilenet 160
+        ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR)), ConvolutionMethod::GEMM)
+    };
+
+    const auto find_config = [&](ConfigurationMethod c)
+    {
+        const ConvolutionConfiguration config = c.first;
+        const PadStrideInfo            info   = std::get<3>(config);
+
+        return std::get<0>(config) == Size2D(input->dimension(idx_w), input->dimension(idx_h)) && std::get<1>(config) == Size2D(weights->dimension(idx_w), weights->dimension(idx_h))
+               && std::get<2>(config) == Size2D(weights->dimension(idx_c), weights->dimension(3)) && info.pad_top() == conv_info.pad_top() && info.pad_right() == conv_info.pad_right()
+               && info.pad_bottom() == conv_info.pad_bottom() && info.pad_left() == conv_info.pad_left() && info.stride() == conv_info.stride();
+    };
+
+    std::vector<ConfigurationMethod>::const_iterator found;
+    if((found = std::find_if(known_configs.begin(), known_configs.end(), find_config)) != known_configs.end())
+    {
+        return (*found).second;
+    }
+
     if(dilation != Size2D(1U, 1U) || Scheduler::get().cpu_info().get_cpu_model() == CPUModel::A53
        || input->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL)) <= 16)
     {