blob: 26d9e9999d8a77d21ae6ec67cba2e39cbe63e030 [file] [log] [blame]
Anthony Barbier3d677cc2018-07-23 16:42:59 +01001/*
Georgios Pinitas7cd26d42019-01-09 18:35:17 +00002 * Copyright (c) 2018-2019 ARM Limited.
Anthony Barbier3d677cc2018-07-23 16:42:59 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_NEGEMMINTERLEAVEDSTRATEGIES_H__
25#define __ARM_COMPUTE_NEGEMMINTERLEAVEDSTRATEGIES_H__
26
27#include "../arm_gemm/utils.hpp"
28#include "arm_gemm.hpp"
29
30#include "../arm_gemm/mergeresults.hpp"
31#include "../arm_gemm/transform.hpp"
32
33#include "../arm_gemm/kernels/a32_sgemm_8x6.hpp"
34#include "../arm_gemm/kernels/a64_gemm_s8_12x8.hpp"
35#include "../arm_gemm/kernels/a64_gemm_s8_4x4.hpp"
36#include "../arm_gemm/kernels/a64_gemm_u8_12x8.hpp"
37#include "../arm_gemm/kernels/a64_gemm_u8_4x4.hpp"
38#include "../arm_gemm/kernels/a64_hgemm_24x8.hpp"
39#include "../arm_gemm/kernels/a64_sgemm_12x8.hpp"
Georgios Pinitas421405b2018-10-26 19:05:32 +010040#include "../arm_gemm/kernels/sve_interleaved_fp16_mla_3VLx8.hpp"
41#include "../arm_gemm/kernels/sve_interleaved_fp32_mla_3VLx8.hpp"
42#include "../arm_gemm/kernels/sve_interleaved_s8s32_dot_3VLx8.hpp"
43#include "../arm_gemm/kernels/sve_interleaved_u8u32_dot_3VLx8.hpp"
Anthony Barbier3d677cc2018-07-23 16:42:59 +010044
45namespace arm_compute
46{
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000047namespace detail
Anthony Barbier3d677cc2018-07-23 16:42:59 +010048{
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000049/** GEMM Interleaved Strategy interface */
50class IInterleavedStrategy
Anthony Barbier3d677cc2018-07-23 16:42:59 +010051{
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000052public:
53 /** Virtual Destructor */
54 virtual ~IInterleavedStrategy() = default;
Georgios Pinitas1509e4b2019-01-28 10:01:50 +000055 /** Return output height of the interleaved strategy
56 *
57 * @return Output height of strategy
58 */
59 virtual unsigned int out_height() const = 0;
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000060 /** Instantiate and configure a prepareB Kernel
61 *
62 * @param[in] b Input tensor B.
63 * @param[in] transformed_b Reshaped tensor B.
64 * @param[in] params GM, N, K sizes.
65 * @param[in] ci CPUInfo to be used for kernel configuration.
66 *
67 * @return A wrapped specialized prepareB kernel
68 */
69 virtual std::unique_ptr<NEGEMMInterleavedPrepareBWrapperKernel> instantiate_prepareB(const ITensor *b,
70 ITensor *transformed_b,
71 const INEGEMMWrapperKernel::Params &params,
72 const CPUInfo &ci) = 0;
73 /** Instantiate and configure a transformA Kernel
74 *
75 * @param[in] a Input tensor A.
76 * @param[in] transformed_a Reshaped tensor A.
77 * @param[in] block_walker Window representing the layout of the matrix's blocks.
78 * @param[in] params M, N, K sizes.
79 *
80 * @return A wrapped specialized transformA kernel
81 */
82 virtual std::unique_ptr<NEGEMMInterleavedTransformAWrapper> instantiate_transformA(const ITensor *a,
83 ITensor *transformed_a,
84 const Window &block_walker,
85 const INEGEMMWrapperKernel::Params &params) = 0;
86 /** Instantiate and configure a prepareB Kernel
87 *
88 * @param transformed_a Already reshaped tensor A.
89 * @param transformed_b Already reshaped tensor B.
90 * @param tmp_c Temporary buffer to be used to store intermediate results.
91 * @param c Result tensor C.
92 * @param block_walker Window containing iteration information for the M and batch dimensions.
93 * @param block_sizes Block sizes to use for the matrix multiplication (A & B must have been reshaped using these same block sizes).
94 * @param params M, N, K sizes.
95 * @param alpha Alpha value
96 * @param beta Beta value
97 * @param pretranspose_b Is B also pretransposed ?
98 * @param num_threads Maximum number of threads that might be used for the calculations.
99 *
100 * @return A wrapped specialized MatrixMultiply kernel
101 */
102 virtual std::unique_ptr<NEGEMMInterleavedMatrixMultiplyWrapper> instantiate_matrix_multiply(const ITensor *transformed_a, const ITensor *transformed_b, ITensor *tmp_c, ITensor *c,
103 const Window &block_walker, const BlockSizes &block_sizes,
104 const INEGEMMWrapperKernel::Params &params, float alpha, float beta, bool pretranspose_b,
105 unsigned int num_threads) = 0;
106 /** Calculates the block sizes of a given strategy
107 *
108 * @param[in] ci CPUInfo to be used for kernel configuration.
109 * @param[in] params M, N, K sizes.
110 *
111 * @return BlockSizes for a given strategy
112 */
113 virtual BlockSizes calculate_block_sizes_for_strategy(const CPUInfo &ci, const INEGEMMWrapperKernel::Params &params) = 0;
Anthony Barbier3d677cc2018-07-23 16:42:59 +0100114};
115
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000116/** Interleaved Strategy class */
117template <typename StrategyType>
118class InterleavedStrategy : public IInterleavedStrategy
119{
120public:
121 using strategy = StrategyType;
Anthony Barbierac314c22018-09-11 17:49:10 +0100122
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000123public:
124 // Inherited methods overridden
Georgios Pinitas1509e4b2019-01-28 10:01:50 +0000125 unsigned int out_height() const override
126 {
127 return strategy::out_height();
128 }
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000129 std::unique_ptr<NEGEMMInterleavedPrepareBWrapperKernel> instantiate_prepareB(const ITensor *b,
130 ITensor *transformed_b,
131 const INEGEMMWrapperKernel::Params &params,
132 const CPUInfo &ci) override
133 {
134 auto prepare_b = support::cpp14::make_unique<NEGEMMInterleavedPrepareBWrapperKernelTemplate<strategy>>();
135 prepare_b->configure(b, transformed_b, false, ci, params);
136 return std::move(prepare_b);
137 }
138 std::unique_ptr<NEGEMMInterleavedTransformAWrapper> instantiate_transformA(const ITensor *a,
139 ITensor *transformed_a,
140 const Window &block_walker,
141 const INEGEMMWrapperKernel::Params &params) override
142 {
143 auto transform_a = support::cpp14::make_unique<NEGEMMInterleavedTransformAWrapperTemplate<strategy>>();
144 transform_a->configure(a, transformed_a, false, block_walker, params);
145 return std::move(transform_a);
146 }
147 std::unique_ptr<NEGEMMInterleavedMatrixMultiplyWrapper> instantiate_matrix_multiply(const ITensor *transformed_a, const ITensor *transformed_b, ITensor *tmp_c, ITensor *c,
148 const Window &block_walker, const BlockSizes &block_sizes,
149 const INEGEMMWrapperKernel::Params &params, float alpha, float beta, bool pretranspose_b,
150 unsigned int num_threads) override
151 {
152 auto matrix_multiply = support::cpp14::make_unique<NEGEMMInterleavedMatrixMultiplyWrapperTemplate<strategy>>();
153 matrix_multiply->configure(transformed_a, transformed_b, tmp_c, c, block_walker, block_sizes, params, pretranspose_b, alpha, beta, num_threads);
154 return std::move(matrix_multiply);
155 }
Anthony Barbierac314c22018-09-11 17:49:10 +0100156
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000157 BlockSizes calculate_block_sizes_for_strategy(const CPUInfo &ci, const INEGEMMWrapperKernel::Params &params) override
158 {
159 return calculate_block_sizes<strategy>(ci, params.M, params.N, params.K);
160 }
Anthony Barbier3d677cc2018-07-23 16:42:59 +0100161};
162
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000163/** Create the backend GEMM strategy to use given the provided kernel info
164 *
165 * @param[in] kernel_name Kernel name of the backend strategy to instantiate
166 *
167 * @return The requested kernel strategy if exists else nullptr
168 */
169std::unique_ptr<IInterleavedStrategy> create_strategy(const std::string &kernel_name)
Anthony Barbier3d677cc2018-07-23 16:42:59 +0100170{
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000171#if defined(__arm__)
172 if(kernel_name.find("sgemm_8x6") != std::string::npos)
173 {
174 return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::sgemm_8x6>>();
175 }
176#endif // defined(__arm__)
177#if defined(__aarch64__)
178 if(kernel_name.find("gemm_s8_4x4") != std::string::npos)
179 {
180 return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::gemm_s8_4x4>>();
181 }
182 if(kernel_name.find("gemm_s8_12x8") != std::string::npos)
183 {
184 return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::gemm_s8_12x8>>();
185 }
186 if(kernel_name.find("gemm_u8_4x4") != std::string::npos)
187 {
188 return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::gemm_u8_4x4>>();
189 }
190 if(kernel_name.find("gemm_u8_12x8") != std::string::npos)
191 {
192 return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::gemm_u8_12x8>>();
193 }
194#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
195 if(kernel_name.find("hgemm_24x8") != std::string::npos)
196 {
197 return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::hgemm_24x8>>();
198 }
199#endif // defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
200 if(kernel_name.find("sgemm_12x8") != std::string::npos)
201 {
202 return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::sgemm_12x8>>();
203 }
204#if defined(__ARM_FEATURE_SVE)
205 if(kernel_name.find("interleaved_fp16_mla_3VLx8") != std::string::npos)
206 {
207 return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::interleaved_fp16_mla_3VLx8>>();
208 }
209 if(kernel_name.find("interleaved_fp32_mla_3VLx8") != std::string::npos)
210 {
211 return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::interleaved_fp32_mla_3VLx8>>();
212 }
213 if(kernel_name.find("interleaved_s8s32_dot_3VLx8") != std::string::npos)
214 {
215 return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::interleaved_s8s32_dot_3VLx8>>();
216 }
217 if(kernel_name.find("interleaved_u8u32_dot_3VLx8") != std::string::npos)
218 {
219 return support::cpp14::make_unique<InterleavedStrategy<arm_gemm::interleaved_u8u32_dot_3VLx8>>();
220 }
221#endif // defined(__ARM_FEATURE_SVE)
222#endif // defined(__aarch64__)_
223 return nullptr;
224}
225} // namespace detail
Anthony Barbier3d677cc2018-07-23 16:42:59 +0100226} // namespace arm_compute
227#endif /* __ARM_COMPUTE_NEGEMMINTERLEAVEDSTRATEGIES_H__ */