blob: dc98d72f4b85d4f3fc5e7f4b00293fd947ecd164 [file] [log] [blame]
Giorgio Arena232c4522022-03-03 10:09:01 +00001/*
2 * Copyright (c) 2022 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
SiCong Li4e9f5682022-05-10 10:15:59 +010024#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
Giorgio Arena232c4522022-03-03 10:09:01 +000025
26#include "src/gpu/cl/kernels/experimental/dynamic_fusion/ClCompositeKernel.h"
SiCong Lib63b1192022-01-28 18:24:39 +000027#include "src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h"
Giorgio Arena232c4522022-03-03 10:09:01 +000028
29#include "src/core/utils/helpers/float_ops.h"
30#include "src/gpu/cl/kernels/ClElementwiseKernel.h"
31#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h"
32#include "tests/CL/CLAccessor.h"
33#include "tests/framework/Macros.h"
34#include "tests/framework/datasets/Datasets.h"
35#include "tests/validation/Validation.h"
Gunes Bayir16c56972022-03-28 21:32:33 +010036#include "tests/validation/reference/ConvolutionLayer.h"
Giorgio Arena232c4522022-03-03 10:09:01 +000037#include "tests/validation/reference/ElementwiseOperations.h"
38#include "tests/validation/reference/GEMM.h"
Gunes Bayir16c56972022-03-28 21:32:33 +010039#include "tests/validation/reference/Permute.h"
Giorgio Arena232c4522022-03-03 10:09:01 +000040
41#include "arm_compute/core/utils/misc/ShapeCalculator.h"
42#include "src/core/AccessWindowStatic.h"
43#include "src/core/helpers/AutoConfiguration.h"
44#include "src/core/helpers/WindowHelpers.h"
45
SiCong Lib63b1192022-01-28 18:24:39 +000046#include "tests/validation/CL/UNIT/dynamic_fusion/Utils.h"
47
Giorgio Arena232c4522022-03-03 10:09:01 +000048#include <chrono>
49
50using namespace arm_compute::experimental::dynamic_fusion;
SiCong Lib63b1192022-01-28 18:24:39 +000051using namespace arm_compute::test::validation::utils;
Giorgio Arena232c4522022-03-03 10:09:01 +000052
53namespace arm_compute
54{
55namespace test
56{
57namespace validation
58{
Giorgio Arena232c4522022-03-03 10:09:01 +000059TEST_SUITE(CL)
60TEST_SUITE(UNIT)
61TEST_SUITE(DYNAMIC_FUSION)
62TEST_SUITE(ClCompositeKernel)
63TEST_SUITE(Validate)
64
Gunes Bayir16c56972022-03-28 21:32:33 +010065TEST_CASE(MoveNet_SubGraph_1_DirectConv2d, framework::DatasetMode::ALL)
66{
67 /* Computation:
68 * out = add(addend, direct_conv2d(lhs, rhs, bias)) (non-broadcast)
69 */
70
71 ClCompositeKernel kernel{};
72 ClKernelBlueprint bp{};
73 ClKernelCode cl_code{};
74 ClExecutionDescriptor exec_desc{};
75 Status st{};
76
Michalis Spyroub1fcefd2022-06-15 19:02:28 +010077 const auto data_type = DataType::F32;
78 const auto conv_info = Conv2dDescriptor{ Padding2D{ 1U, 1U, 1U, 1U }, { 1U, 1U } /* stride */ };
79 const auto eltwise_info = ElementwiseDescriptor{ ArithmeticOperation::ADD };
Gunes Bayir16c56972022-03-28 21:32:33 +010080
81 const auto width = 7U;
82 const auto height = 6U;
83 const auto IFM = 5U;
84 const auto OFM = 4U;
85 const auto kernel_sz = 3U;
86
SiCong Lib63b1192022-01-28 18:24:39 +000087 const auto src_shape = TensorShape(IFM, width, height);
88 const auto wei_shape = TensorShape(IFM, kernel_sz, kernel_sz, OFM);
89 const auto bia_shape = TensorShape(OFM);
90 const auto addend_shape = TensorShape(1, 1);
91 const auto dst_shape = TensorShape(OFM, width, height);
Gunes Bayir16c56972022-03-28 21:32:33 +010092
SiCong Lib63b1192022-01-28 18:24:39 +000093 auto src_info = TensorInfo(src_shape, 1, data_type, DataLayout::NHWC);
94 auto wei_info = TensorInfo(wei_shape, 1, data_type, DataLayout::NHWC);
95 auto bia_info = TensorInfo(bia_shape, 1, data_type, DataLayout::NHWC);
96 auto addend_info = TensorInfo(addend_shape, 1, data_type, DataLayout::NHWC);
97 auto dst_info = TensorInfo(dst_shape, 1, data_type, DataLayout::NHWC);
Gunes Bayir16c56972022-03-28 21:32:33 +010098
99 const auto n0 = std::min(OFM, 4u);
100 const auto m0 = (OFM > 16) ? ((data_type == DataType::F32) ? 2U : 4U) : 1U;
101
SiCong Lib63b1192022-01-28 18:24:39 +0000102 const ClDirectConv2dKernelDescriptor direct_conv2d_desc{ conv_info };
Michalis Spyroub1fcefd2022-06-15 19:02:28 +0100103 const ClElementwiseKernelDescriptor eltwise_add_desc{ eltwise_info };
SiCong Lib63b1192022-01-28 18:24:39 +0000104 const TileDescriptor store_tile_info{ Size2D(n0, m0), Size2D(width, height), ClippingStrategy::TOP_LEFT };
Gunes Bayir16c56972022-03-28 21:32:33 +0100105
106 ArgumentID src_id{ g_arg_placeholder };
107 ArgumentID wei_id{ g_arg_placeholder };
108 ArgumentID bia_id{ g_arg_placeholder };
109 ArgumentID acc_id{ g_arg_placeholder };
SiCong Lib63b1192022-01-28 18:24:39 +0000110 ArgumentID acc_1_id{ g_arg_placeholder };
Gunes Bayir16c56972022-03-28 21:32:33 +0100111 ArgumentID addend_id{ g_arg_placeholder };
112 ArgumentID dst_id{ g_arg_placeholder };
113
SiCong Lib63b1192022-01-28 18:24:39 +0000114 st = add_tensor(bp, &src_info, src_id);
115 st = add_tensor(bp, &wei_info, wei_id);
116 st = add_tensor(bp, &bia_info, bia_id);
117 st = add_tensor(bp, &dst_info, acc_id);
118 st = add_tensor(bp, &dst_info, acc_1_id);
119 st = add_tensor(bp, &addend_info, addend_id);
120 st = add_tensor(bp, &dst_info, dst_id);
Gunes Bayir16c56972022-03-28 21:32:33 +0100121
SiCong Lib63b1192022-01-28 18:24:39 +0000122 st = add_kcomp_direct_conv2d(bp, direct_conv2d_desc, src_id, wei_id, bia_id, acc_id);
Michalis Spyroub1fcefd2022-06-15 19:02:28 +0100123 st = add_kcomp_eltwise_op(bp, eltwise_add_desc, addend_id, acc_id, acc_1_id);
SiCong Lib63b1192022-01-28 18:24:39 +0000124 st = add_kcomp_store(bp, StoreType::TStoreIndirectWidthSelect, acc_1_id, dst_id);
Gunes Bayir16c56972022-03-28 21:32:33 +0100125
126 exec_desc.skip_sliding_window = true;
127
128 st = set_tile_info(bp, store_tile_info);
129 st = build(cl_code, ClCodeBuilderContext{ GpuInfo{ GPUTarget::G71 } }, bp);
130 st = tune_static(exec_desc, cl_code);
131
132 CLScheduler::get().default_reinit();
133 kernel.configure(CLKernelLibrary::get().get_compile_context(), cl_code);
134
135 // Construct tensors
136 CLTensor src{};
137 CLTensor wei{};
138 CLTensor bia{};
139 CLTensor addend{};
140 CLTensor dst{};
141
142 // Init tensors
143 src.allocator()->init(src_info);
144 wei.allocator()->init(wei_info);
145 bia.allocator()->init(bia_info);
146 addend.allocator()->init(dst_info);
147 dst.allocator()->init(dst_info);
148
149 // "Pack" tensors
SiCong Lib63b1192022-01-28 18:24:39 +0000150 ITensorPack tensors{ { src_id, &src },
Gunes Bayir16c56972022-03-28 21:32:33 +0100151 { wei_id, &wei },
152 { bia_id, &bia },
153 { addend_id, &addend },
SiCong Lib63b1192022-01-28 18:24:39 +0000154 { dst_id, &dst } };
Gunes Bayir16c56972022-03-28 21:32:33 +0100155
156 // Allocate and fill tensors
157 src.allocator()->allocate();
158 wei.allocator()->allocate();
159 bia.allocator()->allocate();
160 addend.allocator()->allocate();
161 dst.allocator()->allocate();
162
SiCong Lib63b1192022-01-28 18:24:39 +0000163 fill<float>(CLAccessor(src), 0, library.get());
164 fill<float>(CLAccessor(wei), 1, library.get());
165 fill<float>(CLAccessor(bia), 2, library.get());
166 fill<float>(CLAccessor(addend), 3, library.get());
Gunes Bayir16c56972022-03-28 21:32:33 +0100167
168 CLScheduler::get().enqueue_op(kernel, tensors, exec_desc, true);
169
170 // Create reference
171 SimpleTensor<float> ref_src_nhwc{ src_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC };
172 SimpleTensor<float> ref_wei_nhwc{ wei_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC };
173 SimpleTensor<float> ref_bia_nhwc{ bia_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC };
SiCong Li5687e552022-08-17 17:09:05 +0100174 SimpleTensor<float> ref_addend_nhwc{ addend_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC };
Gunes Bayir16c56972022-03-28 21:32:33 +0100175
176 // Fill reference
SiCong Lib63b1192022-01-28 18:24:39 +0000177 fill<float>(ref_src_nhwc, 0, library.get());
178 fill<float>(ref_wei_nhwc, 1, library.get());
179 fill<float>(ref_bia_nhwc, 2, library.get());
180 fill<float>(ref_addend_nhwc, 3, library.get());
Gunes Bayir16c56972022-03-28 21:32:33 +0100181
182 auto ref_src = reference::permute(ref_src_nhwc, PermutationVector(1U, 2U, 0U));
183 auto ref_wei = reference::permute(ref_wei_nhwc, PermutationVector(1U, 2U, 0U));
184 auto ref_bia = reference::permute(ref_bia_nhwc, PermutationVector(1U, 2U, 0U));
185 auto ref_addend = reference::permute(ref_addend_nhwc, PermutationVector(1U, 2U, 0U));
186
187 TensorShape dst_shape_nchw{ dst_shape };
188 permute(dst_shape_nchw, PermutationVector(1U, 2U, 0U));
189
190 const auto ref_dst = reference::arithmetic_operation(
191 ArithmeticOperation::ADD,
192 ref_addend,
SiCong Lib63b1192022-01-28 18:24:39 +0000193 reference::convolution_layer<float>(ref_src, ref_wei, ref_bia, dst_shape_nchw,
194 PadStrideInfo
195 {
196 static_cast<unsigned int>(conv_info.stride.x()),
197 static_cast<unsigned int>(conv_info.stride.y()),
198 static_cast<unsigned int>(conv_info.pad.left),
199 static_cast<unsigned int>(conv_info.pad.top) }),
200 data_type,
201 ConvertPolicy::SATURATE);
Gunes Bayir16c56972022-03-28 21:32:33 +0100202
203 RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
204 validate(CLAccessor(dst), ref_dst, tolerance_f32);
205}
206
Giorgio Arena232c4522022-03-03 10:09:01 +0000207TEST_SUITE_END() // Validate
Giorgio Arena232c4522022-03-03 10:09:01 +0000208TEST_SUITE_END() // ClCompositeKernel
209TEST_SUITE_END() // DYNAMIC_FUSION
210TEST_SUITE_END() // UNIT
211TEST_SUITE_END() // CL
212} // namespace validation
213} // namespace test
SiCong Li4e9f5682022-05-10 10:15:59 +0100214} // namespace arm_compute
215#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */