blob: 695dcd5b6efd4d469dc96cfbcf9fa4bc75e1b1e8 [file] [log] [blame]
Anthony Barbier3d677cc2018-07-23 16:42:59 +01001/*
Georgios Pinitas7cd26d42019-01-09 18:35:17 +00002 * Copyright (c) 2018-2019 ARM Limited.
Anthony Barbier3d677cc2018-07-23 16:42:59 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_NEGEMMINTERLEAVEDWRAPPER_H__
25#define __ARM_COMPUTE_NEGEMMINTERLEAVEDWRAPPER_H__
26
27#include "arm_compute/core/NEON/kernels/assembly/Helpers.h"
28#include "arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h"
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000029#include "arm_compute/core/NEON/kernels/assembly/NEGEMMInterleavedMatrixMultiplyWrapper.h"
30#include "arm_compute/core/NEON/kernels/assembly/NEGEMMInterleavedPrepareBWrapperKernel.h"
31#include "arm_compute/core/NEON/kernels/assembly/NEGEMMInterleavedTransformAWrapper.h"
Anthony Barbier3d677cc2018-07-23 16:42:59 +010032#include "arm_compute/runtime/IFunction.h"
33#include "arm_compute/runtime/IMemoryManager.h"
34#include "arm_compute/runtime/IScheduler.h"
Michalis Spyrou1a569a32019-09-10 17:20:34 +010035#include "arm_compute/runtime/IWeightsManager.h"
Anthony Barbier3d677cc2018-07-23 16:42:59 +010036#include "arm_compute/runtime/MemoryGroup.h"
37#include "arm_compute/runtime/Tensor.h"
38
39#include <memory>
40
41namespace arm_compute
42{
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000043// Forward declarations
Anthony Barbier3d677cc2018-07-23 16:42:59 +010044class ITensor;
Anthony Barbier3d677cc2018-07-23 16:42:59 +010045
Anthony Barbierff0bccf2018-11-30 10:42:40 +000046/** Buffer manager used when reshaping B on the fly
47 *
48 * The typical workflow is:
49 * - lock_to_reshape_if_needed()
50 * - If the previous lock was successful: mark_as_reshaped()
51 * - wait_for_reshaping() wait for the reshaping to be complete
52 * - mark_as_unused() once the thread is done using this given buffer.
53 *
54 * Calls for different indices might be interleaved, however the calls for a given index must always be in that order.
55 */
56class IBufferManager
57{
58public:
59 /** Lock a buffer for the given index if it's available else return
60 *
61 * @param[in] index Index of the buffer to lock
62 *
63 * @return True if the buffer has been successfully locked, false if it's already reshaped / being reshaped.
64 */
65 virtual bool lock_to_reshape_if_needed(unsigned int index) = 0;
66 /** Mark a buffer previously locked as reshaped
67 *
68 * @pre The thread calling this function must have locked the given buffer through lock_to_reshape_if_needed()
69 *
70 * @param[in] index Index of the buffer to mark as reshaped
71 */
72 virtual void mark_as_reshaped(unsigned int index) = 0;
73 /** Block until the given buffer is marked as reshaped
74 *
75 * @param[in] index Index of the buffer
76 */
77 virtual void wait_for_reshaping(unsigned int index) = 0;
78 /** Mark a reshaped buffer as unused
79 *
80 * Once all the users have marked a buffer as unused then it goes back to being free
81 */
82 virtual void mark_as_unused(unsigned int index) = 0;
83
84 /** Number of buffers used internally
85 *
86 * @return The number of buffers used by the manager.
87 */
88 virtual unsigned int num_buffers() const = 0;
89 /** Default destructor */
90 virtual ~IBufferManager() = default;
91};
92
Anthony Barbier3d677cc2018-07-23 16:42:59 +010093/** Equivalent to arm_gemm::GemmInterleaved but using Compute Library types.
94 */
95class NEGEMMInterleavedWrapper : public IFunction
96{
97public:
Michalis Spyrou1a569a32019-09-10 17:20:34 +010098 NEGEMMInterleavedWrapper(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
99 ~NEGEMMInterleavedWrapper() = default;
Anthony Barbier3d677cc2018-07-23 16:42:59 +0100100
101 NEGEMMInterleavedWrapper(const NEGEMMInterleavedWrapper &) = delete;
102 NEGEMMInterleavedWrapper &operator=(const NEGEMMInterleavedWrapper &) = delete;
103
104 /** Initialise the kernel's input and output.
105 *
106 * @note The input and output tensor must have the same dimensions
107 *
Georgios Pinitas37d080f2019-06-21 18:43:12 +0100108 * @param[in] a Input tensor (Matrix A)
109 * @param[in] b Input tensor (Matrix B)
110 * @param[out] c Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
111 * @param[in] alpha Scalar multiplier to apply to AB matrix product.
112 * @param[in] beta Scalar multiplier to apply to input C matrix before adding product.
113 * @param[in] gemm_info GEMM meta-data
Anthony Barbier3d677cc2018-07-23 16:42:59 +0100114 */
Georgios Pinitas37d080f2019-06-21 18:43:12 +0100115 void configure(const ITensor *a, const ITensor *b, ITensor *c, float alpha, float beta, const GEMMInfo &gemm_info);
Anthony Barbier3d677cc2018-07-23 16:42:59 +0100116
117 // Inherited methods overridden:
118 void run() override;
119 void prepare() override;
120
121private:
122 MemoryGroup _memory_group;
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100123 IWeightsManager *_weights_manager;
Anthony Barbier3d677cc2018-07-23 16:42:59 +0100124 bool _is_prepared{ false };
125 bool _pretranspose_b{ false };
126 Window _block_walker{};
127 Window _batch_window{};
128 const ITensor *_a{ nullptr };
129 const ITensor *_b{ nullptr };
130 ITensor *_c{ nullptr };
131 Tensor _transformed_b{};
132 Tensor _transformed_a{};
133 Tensor _tmp_c{};
134 INEGEMMWrapperKernel::Params _params{};
135 BlockSizes _block_sizes{};
136 std::unique_ptr<NEGEMMInterleavedPrepareBWrapperKernel> _prepare_b{ nullptr };
137 std::unique_ptr<NEGEMMInterleavedTransformAWrapper> _transform_a{ nullptr };
138 std::unique_ptr<NEGEMMInterleavedMatrixMultiplyWrapper> _matrix_multiply{ nullptr };
Anthony Barbierff0bccf2018-11-30 10:42:40 +0000139 std::unique_ptr<IBufferManager> _buffer_manager{ nullptr };
Anthony Barbier3d677cc2018-07-23 16:42:59 +0100140 std::vector<TransformAWorkload> _a_workloads{};
141 std::vector<PrepareBWorkload> _b_workloads{};
142 std::vector<MatrixMultiplyWorkload> _mm_workloads{};
143 std::vector<IScheduler::Workload> _workloads{};
Anthony Barbierac314c22018-09-11 17:49:10 +0100144 std::string _tag{};
Anthony Barbier3d677cc2018-07-23 16:42:59 +0100145};
Anthony Barbier3d677cc2018-07-23 16:42:59 +0100146} // namespace arm_compute
147#endif /* __ARM_COMPUTE_NEGEMMINTERLEAVEDWRAPPER_H__ */