blob: 96a845c36e4818d3e989da9955c653a9c3e67e6b [file] [log] [blame]
Giorgio Arena232c4522022-03-03 10:09:01 +00001/*
2 * Copyright (c) 2022 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
SiCong Li4e9f5682022-05-10 10:15:59 +010024#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
Giorgio Arena232c4522022-03-03 10:09:01 +000025
26#include "src/gpu/cl/kernels/experimental/dynamic_fusion/ClCompositeKernel.h"
SiCong Lib63b1192022-01-28 18:24:39 +000027#include "src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h"
Giorgio Arena232c4522022-03-03 10:09:01 +000028
29#include "src/core/utils/helpers/float_ops.h"
30#include "src/gpu/cl/kernels/ClElementwiseKernel.h"
31#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h"
32#include "tests/CL/CLAccessor.h"
33#include "tests/framework/Macros.h"
34#include "tests/framework/datasets/Datasets.h"
35#include "tests/validation/Validation.h"
Gunes Bayir16c56972022-03-28 21:32:33 +010036#include "tests/validation/reference/ConvolutionLayer.h"
Giorgio Arena232c4522022-03-03 10:09:01 +000037#include "tests/validation/reference/ElementwiseOperations.h"
38#include "tests/validation/reference/GEMM.h"
Gunes Bayir16c56972022-03-28 21:32:33 +010039#include "tests/validation/reference/Permute.h"
Giorgio Arena232c4522022-03-03 10:09:01 +000040
41#include "arm_compute/core/utils/misc/ShapeCalculator.h"
42#include "src/core/AccessWindowStatic.h"
43#include "src/core/helpers/AutoConfiguration.h"
44#include "src/core/helpers/WindowHelpers.h"
45
SiCong Lib63b1192022-01-28 18:24:39 +000046#include "tests/validation/CL/UNIT/dynamic_fusion/Utils.h"
47
Giorgio Arena232c4522022-03-03 10:09:01 +000048#include <chrono>
49
50using namespace arm_compute::experimental::dynamic_fusion;
SiCong Lib63b1192022-01-28 18:24:39 +000051using namespace arm_compute::test::validation::utils;
Giorgio Arena232c4522022-03-03 10:09:01 +000052
53namespace arm_compute
54{
55namespace test
56{
57namespace validation
58{
Giorgio Arena232c4522022-03-03 10:09:01 +000059TEST_SUITE(CL)
60TEST_SUITE(UNIT)
61TEST_SUITE(DYNAMIC_FUSION)
62TEST_SUITE(ClCompositeKernel)
63TEST_SUITE(Validate)
64
Gunes Bayir16c56972022-03-28 21:32:33 +010065TEST_CASE(MoveNet_SubGraph_1_DirectConv2d, framework::DatasetMode::ALL)
66{
67 /* Computation:
68 * out = add(addend, direct_conv2d(lhs, rhs, bias)) (non-broadcast)
69 */
70
71 ClCompositeKernel kernel{};
72 ClKernelBlueprint bp{};
73 ClKernelCode cl_code{};
74 ClExecutionDescriptor exec_desc{};
75 Status st{};
76
77 const auto data_type = DataType::F32;
SiCong Lib63b1192022-01-28 18:24:39 +000078 const auto conv_info = Conv2dDescriptor{ Padding2D{ 1U, 1U, 1U, 1U }, { 1U, 1U } /* stride */ };
Gunes Bayir16c56972022-03-28 21:32:33 +010079
80 const auto width = 7U;
81 const auto height = 6U;
82 const auto IFM = 5U;
83 const auto OFM = 4U;
84 const auto kernel_sz = 3U;
85
SiCong Lib63b1192022-01-28 18:24:39 +000086 const auto src_shape = TensorShape(IFM, width, height);
87 const auto wei_shape = TensorShape(IFM, kernel_sz, kernel_sz, OFM);
88 const auto bia_shape = TensorShape(OFM);
89 const auto addend_shape = TensorShape(1, 1);
90 const auto dst_shape = TensorShape(OFM, width, height);
Gunes Bayir16c56972022-03-28 21:32:33 +010091
SiCong Lib63b1192022-01-28 18:24:39 +000092 auto src_info = TensorInfo(src_shape, 1, data_type, DataLayout::NHWC);
93 auto wei_info = TensorInfo(wei_shape, 1, data_type, DataLayout::NHWC);
94 auto bia_info = TensorInfo(bia_shape, 1, data_type, DataLayout::NHWC);
95 auto addend_info = TensorInfo(addend_shape, 1, data_type, DataLayout::NHWC);
96 auto dst_info = TensorInfo(dst_shape, 1, data_type, DataLayout::NHWC);
Gunes Bayir16c56972022-03-28 21:32:33 +010097
98 const auto n0 = std::min(OFM, 4u);
99 const auto m0 = (OFM > 16) ? ((data_type == DataType::F32) ? 2U : 4U) : 1U;
100
SiCong Lib63b1192022-01-28 18:24:39 +0000101 const ClDirectConv2dKernelDescriptor direct_conv2d_desc{ conv_info };
102 const ClEltwiseAddKernelDescriptor eltwise_add_desc{};
103 const TileDescriptor store_tile_info{ Size2D(n0, m0), Size2D(width, height), ClippingStrategy::TOP_LEFT };
Gunes Bayir16c56972022-03-28 21:32:33 +0100104
105 ArgumentID src_id{ g_arg_placeholder };
106 ArgumentID wei_id{ g_arg_placeholder };
107 ArgumentID bia_id{ g_arg_placeholder };
108 ArgumentID acc_id{ g_arg_placeholder };
SiCong Lib63b1192022-01-28 18:24:39 +0000109 ArgumentID acc_1_id{ g_arg_placeholder };
Gunes Bayir16c56972022-03-28 21:32:33 +0100110 ArgumentID addend_id{ g_arg_placeholder };
111 ArgumentID dst_id{ g_arg_placeholder };
112
SiCong Lib63b1192022-01-28 18:24:39 +0000113 st = add_tensor(bp, &src_info, src_id);
114 st = add_tensor(bp, &wei_info, wei_id);
115 st = add_tensor(bp, &bia_info, bia_id);
116 st = add_tensor(bp, &dst_info, acc_id);
117 st = add_tensor(bp, &dst_info, acc_1_id);
118 st = add_tensor(bp, &addend_info, addend_id);
119 st = add_tensor(bp, &dst_info, dst_id);
Gunes Bayir16c56972022-03-28 21:32:33 +0100120
SiCong Lib63b1192022-01-28 18:24:39 +0000121 st = add_kcomp_direct_conv2d(bp, direct_conv2d_desc, src_id, wei_id, bia_id, acc_id);
122 st = add_kcomp_eltwise_add(bp, eltwise_add_desc, addend_id, acc_id, acc_1_id);
123 st = add_kcomp_store(bp, StoreType::TStoreIndirectWidthSelect, acc_1_id, dst_id);
Gunes Bayir16c56972022-03-28 21:32:33 +0100124
125 exec_desc.skip_sliding_window = true;
126
127 st = set_tile_info(bp, store_tile_info);
128 st = build(cl_code, ClCodeBuilderContext{ GpuInfo{ GPUTarget::G71 } }, bp);
129 st = tune_static(exec_desc, cl_code);
130
131 CLScheduler::get().default_reinit();
132 kernel.configure(CLKernelLibrary::get().get_compile_context(), cl_code);
133
134 // Construct tensors
135 CLTensor src{};
136 CLTensor wei{};
137 CLTensor bia{};
138 CLTensor addend{};
139 CLTensor dst{};
140
141 // Init tensors
142 src.allocator()->init(src_info);
143 wei.allocator()->init(wei_info);
144 bia.allocator()->init(bia_info);
145 addend.allocator()->init(dst_info);
146 dst.allocator()->init(dst_info);
147
148 // "Pack" tensors
SiCong Lib63b1192022-01-28 18:24:39 +0000149 ITensorPack tensors{ { src_id, &src },
Gunes Bayir16c56972022-03-28 21:32:33 +0100150 { wei_id, &wei },
151 { bia_id, &bia },
152 { addend_id, &addend },
SiCong Lib63b1192022-01-28 18:24:39 +0000153 { dst_id, &dst } };
Gunes Bayir16c56972022-03-28 21:32:33 +0100154
155 // Allocate and fill tensors
156 src.allocator()->allocate();
157 wei.allocator()->allocate();
158 bia.allocator()->allocate();
159 addend.allocator()->allocate();
160 dst.allocator()->allocate();
161
SiCong Lib63b1192022-01-28 18:24:39 +0000162 fill<float>(CLAccessor(src), 0, library.get());
163 fill<float>(CLAccessor(wei), 1, library.get());
164 fill<float>(CLAccessor(bia), 2, library.get());
165 fill<float>(CLAccessor(addend), 3, library.get());
Gunes Bayir16c56972022-03-28 21:32:33 +0100166
167 CLScheduler::get().enqueue_op(kernel, tensors, exec_desc, true);
168
169 // Create reference
170 SimpleTensor<float> ref_src_nhwc{ src_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC };
171 SimpleTensor<float> ref_wei_nhwc{ wei_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC };
172 SimpleTensor<float> ref_bia_nhwc{ bia_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC };
173 SimpleTensor<float> ref_addend_nhwc{ dst_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC };
174
175 // Fill reference
SiCong Lib63b1192022-01-28 18:24:39 +0000176 fill<float>(ref_src_nhwc, 0, library.get());
177 fill<float>(ref_wei_nhwc, 1, library.get());
178 fill<float>(ref_bia_nhwc, 2, library.get());
179 fill<float>(ref_addend_nhwc, 3, library.get());
Gunes Bayir16c56972022-03-28 21:32:33 +0100180
181 auto ref_src = reference::permute(ref_src_nhwc, PermutationVector(1U, 2U, 0U));
182 auto ref_wei = reference::permute(ref_wei_nhwc, PermutationVector(1U, 2U, 0U));
183 auto ref_bia = reference::permute(ref_bia_nhwc, PermutationVector(1U, 2U, 0U));
184 auto ref_addend = reference::permute(ref_addend_nhwc, PermutationVector(1U, 2U, 0U));
185
186 TensorShape dst_shape_nchw{ dst_shape };
187 permute(dst_shape_nchw, PermutationVector(1U, 2U, 0U));
188
189 const auto ref_dst = reference::arithmetic_operation(
190 ArithmeticOperation::ADD,
191 ref_addend,
SiCong Lib63b1192022-01-28 18:24:39 +0000192 reference::convolution_layer<float>(ref_src, ref_wei, ref_bia, dst_shape_nchw,
193 PadStrideInfo
194 {
195 static_cast<unsigned int>(conv_info.stride.x()),
196 static_cast<unsigned int>(conv_info.stride.y()),
197 static_cast<unsigned int>(conv_info.pad.left),
198 static_cast<unsigned int>(conv_info.pad.top) }),
199 data_type,
200 ConvertPolicy::SATURATE);
Gunes Bayir16c56972022-03-28 21:32:33 +0100201
202 RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
203 validate(CLAccessor(dst), ref_dst, tolerance_f32);
204}
205
Giorgio Arena232c4522022-03-03 10:09:01 +0000206TEST_SUITE_END() // Validate
Giorgio Arena232c4522022-03-03 10:09:01 +0000207TEST_SUITE_END() // ClCompositeKernel
208TEST_SUITE_END() // DYNAMIC_FUSION
209TEST_SUITE_END() // UNIT
210TEST_SUITE_END() // CL
211} // namespace validation
212} // namespace test
SiCong Li4e9f5682022-05-10 10:15:59 +0100213} // namespace arm_compute
214#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */