blob: 682475c82422bb16583781850263b726dd7850eb [file] [log] [blame]
Gian Marco Iodiceab182122017-10-09 15:05:40 +01001/*
Pablo Telloeb82fd22018-02-23 13:43:50 +00002 * Copyright (c) 2017-2018 ARM Limited.
Gian Marco Iodiceab182122017-10-09 15:05:40 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H__
25#define __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H__
26
27#include "arm_compute/core/NEON/INEKernel.h"
Gian Marcoe75a02b2017-11-08 12:24:09 +000028#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
29#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
Gian Marco Iodiceab182122017-10-09 15:05:40 +010030#include "arm_compute/runtime/IFunction.h"
31#include "arm_compute/runtime/IMemoryManager.h"
32#include "arm_compute/runtime/MemoryGroup.h"
Anthony Barbier71d9b572018-07-06 17:05:59 +010033#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
Gian Marco Iodiceab182122017-10-09 15:05:40 +010034#include "arm_compute/runtime/Tensor.h"
35
36#include <memory>
37
38namespace arm_compute
39{
40class ITensor;
41
42/** Basic function to execute GEMMLowpMatrixMultiplyCore on NEON. This function calls the following NEON kernels if the DOT product instruction is not available:
43 *
44 * -# @ref NEGEMMInterleave4x4Kernel
45 * -# @ref NEGEMMTranspose1xWKernel
46 * -# @ref NEGEMMLowpMatrixMultiplyKernel
Gian Marcoe75a02b2017-11-08 12:24:09 +000047 * -# @ref NEGEMMLowpOffsetContributionKernel
Gian Marco Iodiceab182122017-10-09 15:05:40 +010048 *
49 * otherwise if the DOT product instruction is available:
50 *
Gian Marcoe75a02b2017-11-08 12:24:09 +000051 * -# @ref NEGEMMLowpOffsetContributionKernel
Gian Marco Iodiceab182122017-10-09 15:05:40 +010052 *
53*/
54class NEGEMMLowpMatrixMultiplyCore : public IFunction
55{
56public:
57 /** Constructor */
58 NEGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
Georgios Pinitas72219332018-06-05 14:56:06 +010059 /** Prevent instances of this class from being copied (As this class contains pointers) */
60 NEGEMMLowpMatrixMultiplyCore(const NEGEMMLowpMatrixMultiplyCore &) = delete;
61 /** Default move constructor */
62 NEGEMMLowpMatrixMultiplyCore(NEGEMMLowpMatrixMultiplyCore &&) = default;
63 /** Prevent instances of this class from being copied (As this class contains pointers) */
64 NEGEMMLowpMatrixMultiplyCore &operator=(const NEGEMMLowpMatrixMultiplyCore &) = delete;
65 /** Default move assignment operator */
66 NEGEMMLowpMatrixMultiplyCore &operator=(NEGEMMLowpMatrixMultiplyCore &&) = default;
Gian Marco Iodiceab182122017-10-09 15:05:40 +010067 /** Initialise the kernel's inputs, output
Anthony Barbierf202e502017-11-23 18:02:04 +000068 *
69 * @note GEMM_LOWP: low precision GEMM kernel
70 * This kernel performs the following computations:
71 *
72 * -# Convert a values from QASYMM8 to int32 and add a_offset to each of them.
73 * -# Convert b values from QASYMM8 to int32 add b_offset to each of them.
74 * -# Compute the matrix product of the resulting a * b in int32.
75 *
Chunosov5124be52017-11-22 20:42:13 +070076 * @param[in] a First input tensor (Matrix A). Data type supported: QASYMM8.
77 * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a
Gian Marco Iodice4b908652018-10-18 10:21:02 +010078 * @param[in] c Third input tensor (Matrix C). It can be a nullptr. Data type supported: S32
Chunosov5124be52017-11-22 20:42:13 +070079 * @param[out] output Output tensor. Data type supported: Data type supported: S32
80 * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
81 * if the reshape of matrix B should be executed only for the first run
Anthony Barbierf202e502017-11-23 18:02:04 +000082 */
Gian Marco Iodice4b908652018-10-18 10:21:02 +010083 void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info = GEMMInfo());
Georgios Pinitasa3b1b462017-11-16 19:24:39 +000084 /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixMultiplyCore
85 *
Georgios Pinitas358ca202017-12-07 16:47:52 +000086 * @param[in] a First input tensor (Matrix A). Data type supported: QASYMM8.
87 * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a
Gian Marco Iodice4b908652018-10-18 10:21:02 +010088 * @param[in] c Third input tensor (Matrix C). It can be a nullptr. Data type supported: S32
Georgios Pinitas358ca202017-12-07 16:47:52 +000089 * @param[in] output Output tensor. Data type supported: Data type supported: S32
90 * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
91 * if the reshape of matrix B should be executed only for the first run
Georgios Pinitasa3b1b462017-11-16 19:24:39 +000092 *
Georgios Pinitas631c41a2017-12-06 11:53:03 +000093 * @return a status
Georgios Pinitasa3b1b462017-11-16 19:24:39 +000094 */
Gian Marco Iodice4b908652018-10-18 10:21:02 +010095 static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info = GEMMInfo());
Gian Marco Iodiceab182122017-10-09 15:05:40 +010096
Pablo Tellofc004492018-03-23 11:40:05 +000097 // Inherited methods overridden
Gian Marco Iodiceab182122017-10-09 15:05:40 +010098 void run() override;
Georgios Pinitas72219332018-06-05 14:56:06 +010099 void prepare() override;
Gian Marco Iodiceab182122017-10-09 15:05:40 +0100100
101private:
Gian Marcoe75a02b2017-11-08 12:24:09 +0000102 MemoryGroup _memory_group;
Anthony Barbiereaefd002018-07-20 17:49:35 +0100103 NEGEMMAssemblyDispatch _asm_glue;
Gian Marcoe75a02b2017-11-08 12:24:09 +0000104 std::unique_ptr<INEKernel> _mm_kernel;
105 std::unique_ptr<INEKernel> _mtx_a_reshape_kernel;
106 std::unique_ptr<INEKernel> _mtx_b_reshape_kernel;
107 NEGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel;
108 NEGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel;
109 NEGEMMLowpOffsetContributionKernel _offset_contribution_kernel;
110 Tensor _vector_sum_col;
111 Tensor _vector_sum_row;
112 Tensor _tmp_a;
113 Tensor _tmp_b;
Georgios Pinitas72219332018-06-05 14:56:06 +0100114 const ITensor *_original_b;
Gian Marcoe75a02b2017-11-08 12:24:09 +0000115 int32_t _a_offset;
116 int32_t _b_offset;
Gian Marcoc7f9b892017-11-30 14:31:13 +0000117 bool _run_vector_matrix_multiplication;
118 bool _dot_product_path;
Giorgio Arenabb54e4e2018-04-05 17:20:34 +0100119 bool _reshape_b_only_on_first_run;
Georgios Pinitas72219332018-06-05 14:56:06 +0100120 bool _is_prepared;
Gian Marco Iodiceab182122017-10-09 15:05:40 +0100121};
122}
123#endif /*__ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H__ */