blob: 598756e4358cc53b74c34903068a82029e173be7 [file] [log] [blame]
Gian Marco Iodiceab182122017-10-09 15:05:40 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H__
25#define __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H__
26
27#include "arm_compute/core/NEON/INEKernel.h"
Gian Marcoe75a02b2017-11-08 12:24:09 +000028#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
29#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
Gian Marco Iodiceab182122017-10-09 15:05:40 +010030#include "arm_compute/runtime/IFunction.h"
31#include "arm_compute/runtime/IMemoryManager.h"
32#include "arm_compute/runtime/MemoryGroup.h"
33#include "arm_compute/runtime/Tensor.h"
34
35#include <memory>
36
37namespace arm_compute
38{
39class ITensor;
40
41/** Basic function to execute GEMMLowpMatrixMultiplyCore on NEON. This function calls the following NEON kernels if the DOT product instruction is not available:
42 *
43 * -# @ref NEGEMMInterleave4x4Kernel
44 * -# @ref NEGEMMTranspose1xWKernel
45 * -# @ref NEGEMMLowpMatrixMultiplyKernel
Gian Marcoe75a02b2017-11-08 12:24:09 +000046 * -# @ref NEGEMMLowpOffsetContributionKernel
Gian Marco Iodiceab182122017-10-09 15:05:40 +010047 *
48 * otherwise if the DOT product instruction is available:
49 *
50 * -# @ref NEGEMMInterleaveBlockedKernel
51 * -# @ref NEGEMMLowpAArch64V8P4Kernel
Gian Marcoe75a02b2017-11-08 12:24:09 +000052 * -# @ref NEGEMMLowpOffsetContributionKernel
Gian Marco Iodiceab182122017-10-09 15:05:40 +010053 *
54*/
55class NEGEMMLowpMatrixMultiplyCore : public IFunction
56{
57public:
58 /** Constructor */
59 NEGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
60 /** Initialise the kernel's inputs, output
61 *
62 * @note GEMM_LOWP: low precision GEMM kernel
63 * This kernel performs the following computations:
64 *
Gian Marcoe75a02b2017-11-08 12:24:09 +000065 * -# Convert a values from QASYMM8 to int32 and add a_offset to each of them.
66 * -# Convert b values from QASYMM8 to int32 add b_offset to each of them.
67 * -# Compute the matrix product of the resulting a * b in int32.
Gian Marco Iodiceab182122017-10-09 15:05:40 +010068 *
Gian Marcoe75a02b2017-11-08 12:24:09 +000069 * @param[in] a First input tensor (Matrix A). Data type supported: QASYMM8.
Gian Marco Iodiceab182122017-10-09 15:05:40 +010070 * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a
71 * @param[out] output Output tensor. Data type supported: Data type supported: S32
72 */
73 void configure(const ITensor *a, const ITensor *b, ITensor *output);
Georgios Pinitasa3b1b462017-11-16 19:24:39 +000074 /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixMultiplyCore
75 *
76 * @param[in] a First input tensor (Matrix A). Data type supported: QASYMM8.
77 * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a
78 * @param[out] output Output tensor. Data type supported: Data type supported: S32
79 *
80 * @return an error status
81 */
82 static Error validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *output);
Gian Marco Iodiceab182122017-10-09 15:05:40 +010083
84 // Inherited methods overridden:
85 void run() override;
86
87private:
Gian Marcoe75a02b2017-11-08 12:24:09 +000088 MemoryGroup _memory_group;
89 std::unique_ptr<INEKernel> _mm_kernel;
90 std::unique_ptr<INEKernel> _mtx_a_reshape_kernel;
91 std::unique_ptr<INEKernel> _mtx_b_reshape_kernel;
92 NEGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel;
93 NEGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel;
94 NEGEMMLowpOffsetContributionKernel _offset_contribution_kernel;
95 Tensor _vector_sum_col;
96 Tensor _vector_sum_row;
97 Tensor _tmp_a;
98 Tensor _tmp_b;
99 Tensor _workspace;
100 int32_t _a_offset;
101 int32_t _b_offset;
Gian Marco Iodiceab182122017-10-09 15:05:40 +0100102};
103}
104#endif /*__ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H__ */