blob: b5ac70c93b006bebf9ddc8b63cbe29588e48df62 [file] [log] [blame]
Georgios Pinitas529b5a22021-07-27 15:55:30 +01001/*
2 * Copyright (c) 2017-2021 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef ARM_COMPUTE_CL_FULLY_CONNECTED_H
25#define ARM_COMPUTE_CL_FULLY_CONNECTED_H
26
27#include "arm_compute/core/TensorInfo.h"
28
Georgios Pinitas7891a732021-08-20 21:39:25 +010029#include "src/gpu/cl/ClCompileContext.h"
30#include "src/gpu/cl/IClOperator.h"
Georgios Pinitas529b5a22021-07-27 15:55:30 +010031
32#include <memory>
33
34namespace arm_compute
35{
36namespace opencl
37{
38// Forward declarations
39class ClConvertFullyConnectedWeights;
40class ClFlatten;
41class ClGemm;
42class ClGemmLowpMatrixMultiplyCore;
43class ClTranspose;
44
45/** Basic function to compute a Fully Connected layer on OpenCL. This function calls the following OpenCL kernels:
46 *
47 * -# @ref opencl::kernels::ClIm2ColKernel (called when the input comes from a convolutional layer)
48 * -# @ref CLTranspose (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once)
Gian Marco Iodicec9cecc02021-10-15 10:23:24 +010049 * -# @ref opencl::ClGemm or @ref CLGEMMLowpMatrixMultiplyCore (if quantized asymmetric)
Georgios Pinitas529b5a22021-07-27 15:55:30 +010050 *
51 * @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
52 */
53class ClFullyConnected : public IClOperator
54{
55public:
56 ClFullyConnected();
57 ~ClFullyConnected();
58 /** Set the input and output tensors.
59 *
60 * Valid data layouts:
61 * - NHWC
62 * - NCHW
63 *
64 * Valid data type configurations:
65 * |src0 |src1 |src2 |dst |
66 * |:--------------|:------------------|:------|:--------------|
67 * |F16 |F16 |F16 |F16 |
68 * |F32 |F32 |F32 |F32 |
69 * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
70 * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
71 *
72 * @param[in] compile_context The compile context to be used.
73 * @param[in] src Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
74 * @param[in] weights Weights tensor. The weights must be 2 dimensional.
75 * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions.
76 * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension.
77 * Data type supported: Same as @p src.
78 * @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p src.
79 * @param[out] dst Destination tensor. Its shape should be equal to the output of a matrix multiplication between:
80 * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer
81 * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer.
82 * Data type supported: Same as @p src.
83 * @param[in] fc_info (Optional) Fully connected layer additional info
84 */
85 void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst,
86 FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
87 /** Static function to check if given info will lead to a valid configuration
88 *
89 * Similar to ClFullyConnected::configure()
90 *
91 * @return a status
92 */
93 static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
94 FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
95
96 // Inherited methods overriden
97 void run(ITensorPack &tensors) override;
98 void prepare(ITensorPack &tensors) override;
99 experimental::MemoryRequirements workspace() const override;
100
101private:
102 void configure_fc_fc(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *bias, ITensorInfo *dst, const FullyConnectedLayerInfo &fc_info);
103 void configure_conv_fc(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *bias, ITensorInfo *dst, const FullyConnectedLayerInfo &fc_info);
104 void configure_mm(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *bias, ITensorInfo *dst, const FullyConnectedLayerInfo &fc_info);
105
106private:
107 enum AuxTensorIdx
108 {
109 TransposedWeights = 10,
110 ConvertedWeights = 11,
111 FlattenedSrc = 12,
112 Count = 13
113 };
114
115 std::unique_ptr<ClConvertFullyConnectedWeights> _convert_weights;
116 std::unique_ptr<ClFlatten> _flatten;
117 std::unique_ptr<ClTranspose> _reshape_weights;
118 std::unique_ptr<ClGemm> _mm_gemm;
119 std::unique_ptr<ClGemmLowpMatrixMultiplyCore> _mm_gemmlowp;
120
121 experimental::MemoryRequirements _aux_mem{};
122
123 TensorInfo _flattened_src{};
124 TensorInfo _converted_weights{};
125 TensorInfo _reshaped_weights{};
126
127 TensorInfo _weights_to_use{};
128 int _weights_to_use_idx{ ACL_SRC_1 };
129
130 bool _are_weights_converted{ true };
131 bool _are_weights_reshaped{ true };
132 bool _is_fc_after_conv{ true };
133 bool _is_quantized{ false };
134 bool _is_prepared{ false };
135};
136} // namespace opencl
137} // namespace arm_compute
138#endif /* ARM_COMPUTE_CL_FULLY_CONNECTED_H */