blob: 5e05e93855c84518d3acf11b47c118049a42d62e [file] [log] [blame]
Gian Marco Iodiceab182122017-10-09 15:05:40 +01001/*
Pablo Telloeb82fd22018-02-23 13:43:50 +00002 * Copyright (c) 2017-2018 ARM Limited.
Gian Marco Iodiceab182122017-10-09 15:05:40 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H__
25#define __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H__
26
27#include "arm_compute/core/NEON/INEKernel.h"
Gian Marcoe75a02b2017-11-08 12:24:09 +000028#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
29#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
Gian Marco Iodiceab182122017-10-09 15:05:40 +010030#include "arm_compute/runtime/IFunction.h"
31#include "arm_compute/runtime/IMemoryManager.h"
32#include "arm_compute/runtime/MemoryGroup.h"
Pablo Telloeb82fd22018-02-23 13:43:50 +000033#include "arm_compute/runtime/NEON/AssemblyHelper.h"
Gian Marco Iodiceab182122017-10-09 15:05:40 +010034#include "arm_compute/runtime/Tensor.h"
35
36#include <memory>
37
38namespace arm_compute
39{
40class ITensor;
41
42/** Basic function to execute GEMMLowpMatrixMultiplyCore on NEON. This function calls the following NEON kernels if the DOT product instruction is not available:
43 *
44 * -# @ref NEGEMMInterleave4x4Kernel
45 * -# @ref NEGEMMTranspose1xWKernel
46 * -# @ref NEGEMMLowpMatrixMultiplyKernel
Gian Marcoe75a02b2017-11-08 12:24:09 +000047 * -# @ref NEGEMMLowpOffsetContributionKernel
Gian Marco Iodiceab182122017-10-09 15:05:40 +010048 *
49 * otherwise if the DOT product instruction is available:
50 *
Gian Marcoe75a02b2017-11-08 12:24:09 +000051 * -# @ref NEGEMMLowpOffsetContributionKernel
Gian Marco Iodiceab182122017-10-09 15:05:40 +010052 *
53*/
54class NEGEMMLowpMatrixMultiplyCore : public IFunction
55{
56public:
57 /** Constructor */
58 NEGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
59 /** Initialise the kernel's inputs, output
Anthony Barbierf202e502017-11-23 18:02:04 +000060 *
61 * @note GEMM_LOWP: low precision GEMM kernel
62 * This kernel performs the following computations:
63 *
64 * -# Convert a values from QASYMM8 to int32 and add a_offset to each of them.
65 * -# Convert b values from QASYMM8 to int32 add b_offset to each of them.
66 * -# Compute the matrix product of the resulting a * b in int32.
67 *
Chunosov5124be52017-11-22 20:42:13 +070068 * @param[in] a First input tensor (Matrix A). Data type supported: QASYMM8.
69 * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a
70 * @param[out] output Output tensor. Data type supported: Data type supported: S32
71 * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
72 * if the reshape of matrix B should be executed only for the first run
Anthony Barbierf202e502017-11-23 18:02:04 +000073 */
Chunosov5124be52017-11-22 20:42:13 +070074 void configure(const ITensor *a, const ITensor *b, ITensor *output, const GEMMInfo &gemm_info = GEMMInfo());
Georgios Pinitasa3b1b462017-11-16 19:24:39 +000075 /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixMultiplyCore
76 *
Georgios Pinitas358ca202017-12-07 16:47:52 +000077 * @param[in] a First input tensor (Matrix A). Data type supported: QASYMM8.
78 * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a
79 * @param[in] output Output tensor. Data type supported: Data type supported: S32
80 * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
81 * if the reshape of matrix B should be executed only for the first run
Georgios Pinitasa3b1b462017-11-16 19:24:39 +000082 *
Georgios Pinitas631c41a2017-12-06 11:53:03 +000083 * @return a status
Georgios Pinitasa3b1b462017-11-16 19:24:39 +000084 */
Georgios Pinitas631c41a2017-12-06 11:53:03 +000085 static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *output, const GEMMInfo &gemm_info = GEMMInfo());
Gian Marco Iodiceab182122017-10-09 15:05:40 +010086
Pablo Tellofc004492018-03-23 11:40:05 +000087 // Inherited methods overridden
Gian Marco Iodiceab182122017-10-09 15:05:40 +010088 void run() override;
89
90private:
Gian Marcoe75a02b2017-11-08 12:24:09 +000091 MemoryGroup _memory_group;
Pablo Telloeb82fd22018-02-23 13:43:50 +000092 AssemblyKernelGlueU8U32 _asm_glue_unsigned;
93 AssemblyKernelGlueS8S32 _asm_glue_signed;
Gian Marcoe75a02b2017-11-08 12:24:09 +000094 std::unique_ptr<INEKernel> _mm_kernel;
95 std::unique_ptr<INEKernel> _mtx_a_reshape_kernel;
96 std::unique_ptr<INEKernel> _mtx_b_reshape_kernel;
97 NEGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel;
98 NEGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel;
99 NEGEMMLowpOffsetContributionKernel _offset_contribution_kernel;
100 Tensor _vector_sum_col;
101 Tensor _vector_sum_row;
102 Tensor _tmp_a;
103 Tensor _tmp_b;
104 Tensor _workspace;
105 int32_t _a_offset;
106 int32_t _b_offset;
Gian Marcoc7f9b892017-11-30 14:31:13 +0000107 bool _run_vector_matrix_multiplication;
108 bool _dot_product_path;
Gian Marco Iodiceab182122017-10-09 15:05:40 +0100109};
110}
111#endif /*__ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H__ */