blob: 949564750bdb91203d69076d0042792e4fe3f836 [file] [log] [blame]
Anthony Barbier3d677cc2018-07-23 16:42:59 +01001/*
Georgios Pinitas7cd26d42019-01-09 18:35:17 +00002 * Copyright (c) 2018-2019 ARM Limited.
Anthony Barbier3d677cc2018-07-23 16:42:59 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_NEGEMMINTERLEAVEDWRAPPER_H__
25#define __ARM_COMPUTE_NEGEMMINTERLEAVEDWRAPPER_H__
26
27#include "arm_compute/core/NEON/kernels/assembly/Helpers.h"
28#include "arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h"
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000029#include "arm_compute/core/NEON/kernels/assembly/NEGEMMInterleavedMatrixMultiplyWrapper.h"
30#include "arm_compute/core/NEON/kernels/assembly/NEGEMMInterleavedPrepareBWrapperKernel.h"
31#include "arm_compute/core/NEON/kernels/assembly/NEGEMMInterleavedTransformAWrapper.h"
Anthony Barbier3d677cc2018-07-23 16:42:59 +010032#include "arm_compute/runtime/IFunction.h"
33#include "arm_compute/runtime/IMemoryManager.h"
34#include "arm_compute/runtime/IScheduler.h"
35#include "arm_compute/runtime/MemoryGroup.h"
36#include "arm_compute/runtime/Tensor.h"
37
38#include <memory>
39
40namespace arm_compute
41{
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000042// Forward declarations
Anthony Barbier3d677cc2018-07-23 16:42:59 +010043class ITensor;
Anthony Barbier3d677cc2018-07-23 16:42:59 +010044
Anthony Barbierff0bccf2018-11-30 10:42:40 +000045/** Buffer manager used when reshaping B on the fly
46 *
47 * The typical workflow is:
48 * - lock_to_reshape_if_needed()
49 * - If the previous lock was successful: mark_as_reshaped()
50 * - wait_for_reshaping() wait for the reshaping to be complete
51 * - mark_as_unused() once the thread is done using this given buffer.
52 *
53 * Calls for different indices might be interleaved, however the calls for a given index must always be in that order.
54 */
55class IBufferManager
56{
57public:
58 /** Lock a buffer for the given index if it's available else return
59 *
60 * @param[in] index Index of the buffer to lock
61 *
62 * @return True if the buffer has been successfully locked, false if it's already reshaped / being reshaped.
63 */
64 virtual bool lock_to_reshape_if_needed(unsigned int index) = 0;
65 /** Mark a buffer previously locked as reshaped
66 *
67 * @pre The thread calling this function must have locked the given buffer through lock_to_reshape_if_needed()
68 *
69 * @param[in] index Index of the buffer to mark as reshaped
70 */
71 virtual void mark_as_reshaped(unsigned int index) = 0;
72 /** Block until the given buffer is marked as reshaped
73 *
74 * @param[in] index Index of the buffer
75 */
76 virtual void wait_for_reshaping(unsigned int index) = 0;
77 /** Mark a reshaped buffer as unused
78 *
79 * Once all the users have marked a buffer as unused then it goes back to being free
80 */
81 virtual void mark_as_unused(unsigned int index) = 0;
82
83 /** Number of buffers used internally
84 *
85 * @return The number of buffers used by the manager.
86 */
87 virtual unsigned int num_buffers() const = 0;
88 /** Default destructor */
89 virtual ~IBufferManager() = default;
90};
91
Anthony Barbier3d677cc2018-07-23 16:42:59 +010092/** Equivalent to arm_gemm::GemmInterleaved but using Compute Library types.
93 */
94class NEGEMMInterleavedWrapper : public IFunction
95{
96public:
97 NEGEMMInterleavedWrapper(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000098 ~NEGEMMInterleavedWrapper() = default;
Anthony Barbier3d677cc2018-07-23 16:42:59 +010099
100 NEGEMMInterleavedWrapper(const NEGEMMInterleavedWrapper &) = delete;
101 NEGEMMInterleavedWrapper &operator=(const NEGEMMInterleavedWrapper &) = delete;
102
103 /** Initialise the kernel's input and output.
104 *
105 * @note The input and output tensor must have the same dimensions
106 *
107 * @param[in] a Input tensor (Matrix A)
108 * @param[in] b Input tensor (Matrix B)
109 * @param[out] c Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
110 * @param[in] alpha Scalar multiplier to apply to AB matrix product.
111 * @param[in] beta Scalar multiplier to apply to input C matrix before adding product.
112 * @param[in] pretranspose_b If true, pretranspose B once during the prepare() stage instead of on the fly every time.
Anthony Barbier3d677cc2018-07-23 16:42:59 +0100113 */
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000114 void configure(const ITensor *a, const ITensor *b, ITensor *c, float alpha, float beta, bool pretranspose_b);
Anthony Barbier3d677cc2018-07-23 16:42:59 +0100115
116 // Inherited methods overridden:
117 void run() override;
118 void prepare() override;
119
120private:
121 MemoryGroup _memory_group;
122 bool _is_prepared{ false };
123 bool _pretranspose_b{ false };
124 Window _block_walker{};
125 Window _batch_window{};
126 const ITensor *_a{ nullptr };
127 const ITensor *_b{ nullptr };
128 ITensor *_c{ nullptr };
129 Tensor _transformed_b{};
130 Tensor _transformed_a{};
131 Tensor _tmp_c{};
132 INEGEMMWrapperKernel::Params _params{};
133 BlockSizes _block_sizes{};
134 std::unique_ptr<NEGEMMInterleavedPrepareBWrapperKernel> _prepare_b{ nullptr };
135 std::unique_ptr<NEGEMMInterleavedTransformAWrapper> _transform_a{ nullptr };
136 std::unique_ptr<NEGEMMInterleavedMatrixMultiplyWrapper> _matrix_multiply{ nullptr };
Anthony Barbierff0bccf2018-11-30 10:42:40 +0000137 std::unique_ptr<IBufferManager> _buffer_manager{ nullptr };
Anthony Barbier3d677cc2018-07-23 16:42:59 +0100138 std::vector<TransformAWorkload> _a_workloads{};
139 std::vector<PrepareBWorkload> _b_workloads{};
140 std::vector<MatrixMultiplyWorkload> _mm_workloads{};
141 std::vector<IScheduler::Workload> _workloads{};
Anthony Barbierac314c22018-09-11 17:49:10 +0100142 std::string _tag{};
Georgios Pinitas1509e4b2019-01-28 10:01:50 +0000143 unsigned int _num_windows{ 1 };
Anthony Barbier3d677cc2018-07-23 16:42:59 +0100144};
Anthony Barbier3d677cc2018-07-23 16:42:59 +0100145} // namespace arm_compute
146#endif /* __ARM_COMPUTE_NEGEMMINTERLEAVEDWRAPPER_H__ */