blob: 226f303c7d7ad68a49295a65a3b0cb22479d64a1 [file] [log] [blame]
Pablo Tello9ceebbe2018-01-10 16:44:13 +00001/*
Pablo Tello8f43d742019-03-27 09:28:32 +00002 * Copyright (c) 2017-2019 ARM Limited.
Pablo Tello9ceebbe2018-01-10 16:44:13 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Pablo Tello79ffade2018-05-04 11:45:13 +010024#include <cstring>
Pablo Tello8f43d742019-03-27 09:28:32 +000025#include "winograd.hpp"
Pablo Tello9ceebbe2018-01-10 16:44:13 +000026using namespace winograd;
27
28/** Get the output shape of a convolution. */
Pablo Tello8f43d742019-03-27 09:28:32 +000029template <int kr, int kc, int itr, int itc, WinogradRoots R>
30template <typename TOut, typename TIn, typename TInGEMM, typename TOutGEMM>
31Tensor4DShape WinogradGEMM<kr, kc, itr, itc, R>::Convolution<TOut, TIn, TInGEMM, TOutGEMM>::get_output_shape(
Pablo Tello9ceebbe2018-01-10 16:44:13 +000032 const KernelShape &kernel_shape,
33 const Tensor4DShape &in_shape,
34 const PaddingType padding
35)
36{
Pablo Tello9ceebbe2018-01-10 16:44:13 +000037 return Tensor4DShape {
38 in_shape.n_batches,
Pablo Tello79ffade2018-05-04 11:45:13 +010039 (padding == PADDING_SAME) ? in_shape.n_rows : in_shape.n_rows - (kernel_rows - 1),
40 (padding == PADDING_SAME) ? in_shape.n_cols : in_shape.n_cols - (kernel_cols - 1),
Pablo Tello9ceebbe2018-01-10 16:44:13 +000041 kernel_shape.n_output_channels,
42 in_shape.ordering
43 };
44}
45
46/* Get the memory required to transform the kernel.
47 */
48template <int kernel_rows, int kernel_cols,
Pablo Tello8f43d742019-03-27 09:28:32 +000049 int output_tile_rows, int output_tile_cols, WinogradRoots roots>
50template <typename TOut, typename TIn, typename TGIn, typename TGOut>
51size_t WinogradGEMM<kernel_rows, kernel_cols, output_tile_rows, output_tile_cols, roots>::Convolution<TOut, TIn, TGIn, TGOut>::get_kernel_transform_working_size(const KernelShape &shape)
Pablo Tello9ceebbe2018-01-10 16:44:13 +000052{
53 if (shape.ordering == HWIO)
54 {
55 // Kernel is already in the correct order, so no additional memory is
56 // required.
57 return 0;
58 }
59 else
60 {
61 // Need to re-order the kernel into HWIO form, require enough space to
62 // represent the tensor.
63 return sizeof(TIn) * shape.size();
64 }
65}
66
67/** Get the memory required to store the kernel transformed into the
68 * Winograd domain.
69 */
Pablo Tello8f43d742019-03-27 09:28:32 +000070template <int kernel_rows, int kernel_cols, int output_tile_rows, int output_tile_cols, WinogradRoots roots>
71template <typename TOut, typename TIn, typename TGIn, typename TGOut>
72size_t WinogradGEMM<kernel_rows, kernel_cols, output_tile_rows, output_tile_cols, roots>::Convolution<TOut, TIn, TGIn, TGOut>::get_kernel_storage_size(const KernelShape &shape)
Pablo Tello9ceebbe2018-01-10 16:44:13 +000073{
74 return N_GEMMS * get_kernel_matrix_size(shape);
75}
76
77
Pablo Tello8f43d742019-03-27 09:28:32 +000078template <int kernel_rows, int kernel_cols, int output_tile_rows, int output_tile_cols, WinogradRoots roots>
79template <typename TOut, typename TIn, typename TGIn, typename TGOut>
80size_t WinogradGEMM<kernel_rows, kernel_cols, output_tile_rows, output_tile_cols, roots>::Convolution<TOut, TIn, TGIn, TGOut>::get_input_storage_size(
Pablo Tello9ceebbe2018-01-10 16:44:13 +000081 const KernelShape &kernel_shape,
82 const Tensor4DShape &input_shape,
83 const PaddingType padding
84)
85{
86 return N_GEMMS * get_input_matrix_size(kernel_shape, input_shape, padding);
87}
88
89
Pablo Tello8f43d742019-03-27 09:28:32 +000090template <int kernel_rows, int kernel_cols, int output_tile_rows, int output_tile_cols, WinogradRoots roots>
91template <typename TOut, typename TIn, typename TGIn, typename TGOut>
92size_t WinogradGEMM<kernel_rows, kernel_cols, output_tile_rows, output_tile_cols, roots>::Convolution<TOut, TIn, TGIn, TGOut>::get_output_storage_size(
Pablo Tello9ceebbe2018-01-10 16:44:13 +000093 const KernelShape &kernel_shape,
94 const Tensor4DShape &input_shape,
95 const PaddingType padding
96)
97{
98 return N_GEMMS * get_output_matrix_size(kernel_shape, input_shape, padding);
99}
100
101
102/** Get the memory required to apply a Winograd operator to some input.
103 */
Pablo Tello8f43d742019-03-27 09:28:32 +0000104template <int kernel_rows, int kernel_cols, int output_tile_rows, int output_tile_cols, WinogradRoots roots>
105template <typename TOut, typename TIn, typename TGIn, typename TGOut>
106size_t WinogradGEMM<kernel_rows, kernel_cols, output_tile_rows, output_tile_cols, roots>::Convolution<TOut, TIn, TGIn, TGOut>::get_working_space_size(
Pablo Tello9ceebbe2018-01-10 16:44:13 +0000107 const KernelShape &kernel_shape,
108 const Tensor4DShape &input_shape,
109 const PaddingType padding_type
110)
111{
112 const auto output_shape = get_output_shape(kernel_shape, input_shape, padding_type);
113
114 // Get the memory required to store the matrices
115 const size_t matrix_sizes = N_GEMMS * (
116 get_input_matrix_size(kernel_shape, input_shape, padding_type) +
117 get_output_matrix_size(kernel_shape, input_shape, padding_type)
118 );
119
120 // Add additional space to re-order the input and output if the input tensor
121 // is not in NHWC format.
122 if (input_shape.ordering == NHWC)
123 {
124 return matrix_sizes; // No extra spacing required
125 }
126 else // NCHW, must reorder the input and output tensors
127 {
128 // We only need to re-order the input or output at any one time, so request
129 // enough memory to do the largest of these.
130 const size_t extra_memory = std::max(
131 sizeof(TIn) * input_shape.size(),
132 sizeof(TOut) * output_shape.size()
133 );
134 return matrix_sizes + extra_memory;
135 }
136}
137
138
139/* Get the memory required by a single "input" matrix.
140 */
Pablo Tello8f43d742019-03-27 09:28:32 +0000141template <int kernel_rows, int kernel_cols, int output_tile_rows, int output_tile_cols, WinogradRoots roots>
142template <typename TOut, typename TIn, typename TGIn, typename TGOut>
143size_t WinogradGEMM<kernel_rows, kernel_cols, output_tile_rows, output_tile_cols, roots>::Convolution<TOut, TIn, TGIn, TGOut>::get_input_matrix_size(
Pablo Tello9ceebbe2018-01-10 16:44:13 +0000144 const KernelShape &kernel_shape,
145 const Tensor4DShape &input_shape,
146 const PaddingType padding_type
147)
148{
Pablo Tello8f43d742019-03-27 09:28:32 +0000149 return get_input_matrix_stride(kernel_shape, input_shape, padding_type) * sizeof(TGIn);
Pablo Tello9ceebbe2018-01-10 16:44:13 +0000150}
151
Pablo Tello8f43d742019-03-27 09:28:32 +0000152template <int kernel_rows, int kernel_cols, int output_tile_rows, int output_tile_cols, WinogradRoots roots>
153template <typename TOut, typename TIn, typename TGIn, typename TGOut>
154int WinogradGEMM<kernel_rows, kernel_cols, output_tile_rows, output_tile_cols, roots>::Convolution<TOut, TIn, TGIn, TGOut>::get_input_matrix_stride(
Pablo Tello9ceebbe2018-01-10 16:44:13 +0000155 const KernelShape &kernel_shape,
156 const Tensor4DShape &input_shape,
157 const PaddingType padding_type
158)
159{
160 // Compute shape for the GEMM
161 const auto output_shape = get_output_shape(kernel_shape, input_shape, padding_type);
162 const int tile_rows = iceildiv(output_shape.n_rows, output_tile_rows);
163 const int tile_cols = iceildiv(output_shape.n_cols, output_tile_cols);
164 const int M = roundup(input_shape.n_batches * tile_rows * tile_cols, M_BLOCK);
165 const int K = kernel_shape.n_input_channels;
166
167 return M * K;
168}
169
170
171/* Get the memory required by a single "output" matrix.
172 */
Pablo Tello8f43d742019-03-27 09:28:32 +0000173template <int kernel_rows, int kernel_cols, int output_tile_rows, int output_tile_cols, WinogradRoots roots>
174template <typename TOut, typename TIn, typename TGIn, typename TGOut>
175size_t WinogradGEMM<kernel_rows, kernel_cols, output_tile_rows, output_tile_cols, roots>::Convolution<TOut, TIn, TGIn, TGOut>::get_output_matrix_size(
Pablo Tello9ceebbe2018-01-10 16:44:13 +0000176 const KernelShape &kernel_shape,
177 const Tensor4DShape &input_shape,
178 const PaddingType padding_type
179)
180{
Pablo Tello8f43d742019-03-27 09:28:32 +0000181 return get_output_matrix_stride(kernel_shape, input_shape, padding_type) * sizeof(TGOut);
Pablo Tello9ceebbe2018-01-10 16:44:13 +0000182}
183
184
Pablo Tello8f43d742019-03-27 09:28:32 +0000185template <int kernel_rows, int kernel_cols, int output_tile_rows, int output_tile_cols, WinogradRoots roots>
186template <typename TOut, typename TIn, typename TGIn, typename TGOut>
187int WinogradGEMM<kernel_rows, kernel_cols, output_tile_rows, output_tile_cols, roots>::Convolution<TOut, TIn, TGIn, TGOut>::get_output_matrix_stride(
Pablo Tello9ceebbe2018-01-10 16:44:13 +0000188 const KernelShape &kernel_shape,
189 const Tensor4DShape &input_shape,
190 const PaddingType padding_type
191)
192{
193 // Compute shape for the GEMM
194 const auto output_shape = get_output_shape(kernel_shape, input_shape, padding_type);
195 const int tile_rows = iceildiv(output_shape.n_rows, output_tile_rows);
196 const int tile_cols = iceildiv(output_shape.n_cols, output_tile_cols);
197 const int M = roundup(tile_rows * tile_cols, M_BLOCK);
198 const int N = roundup(kernel_shape.n_output_channels, N_BLOCK);
199
200 return input_shape.n_batches * M * N;
201}
202
203
204/* Get the memory required by a single "kernel" matrix.
205 */
Pablo Tello8f43d742019-03-27 09:28:32 +0000206template <int kernel_rows, int kernel_cols, int output_tile_rows, int output_tile_cols, WinogradRoots roots>
207template <typename TOut, typename TIn, typename TGIn, typename TGOut>
208size_t WinogradGEMM<kernel_rows, kernel_cols, output_tile_rows, output_tile_cols, roots>::Convolution<TOut, TIn, TGIn, TGOut>::get_kernel_matrix_size(const KernelShape &shape)
Pablo Tello9ceebbe2018-01-10 16:44:13 +0000209{
Pablo Tello8f43d742019-03-27 09:28:32 +0000210 return sizeof(TGIn) * get_kernel_matrix_stride(shape);
Pablo Tello9ceebbe2018-01-10 16:44:13 +0000211}
212
Pablo Tello8f43d742019-03-27 09:28:32 +0000213template <int kernel_rows, int kernel_cols, int output_tile_rows, int output_tile_cols, WinogradRoots roots>
214template <typename TOut, typename TIn, typename TGIn, typename TGOut>
215int WinogradGEMM<kernel_rows, kernel_cols, output_tile_rows, output_tile_cols, roots>::Convolution<TOut, TIn, TGIn, TGOut>::get_kernel_matrix_stride(const KernelShape &shape)
Pablo Tello9ceebbe2018-01-10 16:44:13 +0000216{
217 const int K = shape.n_input_channels;
218 const int N = roundup(shape.n_output_channels, N_BLOCK);
219 return K * N;
220}
221
222
Pablo Tello9ceebbe2018-01-10 16:44:13 +0000223// Instantiate required implementations
Pablo Tello8f43d742019-03-27 09:28:32 +0000224template class WinogradGEMM<2, 2, 3, 3, WinogradRoots::Integers>::Convolution<float, float, float, float>;
225template class WinogradGEMM<4, 4, 3, 3, WinogradRoots::Integers>::Convolution<float, float, float, float>;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000226
Pablo Tello8f43d742019-03-27 09:28:32 +0000227template class WinogradGEMM<1, 6, 1, 3, WinogradRoots::Integers>::Convolution<float, float, float, float>;
228template class WinogradGEMM<6, 1, 3, 1, WinogradRoots::Integers>::Convolution<float, float, float, float>;
Pablo Tellobda6e4b2018-08-22 11:40:33 +0100229
Pablo Tello8f43d742019-03-27 09:28:32 +0000230template class WinogradGEMM<2, 2, 5, 5, WinogradRoots::Integers>::Convolution<float, float, float, float>;
Pablo Tello000d33a2018-09-03 16:59:20 +0100231
Pablo Tello8f43d742019-03-27 09:28:32 +0000232template class WinogradGEMM<1, 4, 1, 5, WinogradRoots::Integers>::Convolution<float, float, float, float>;
233template class WinogradGEMM<4, 1, 5, 1, WinogradRoots::Integers>::Convolution<float, float, float, float>;
Pablo Tello000d33a2018-09-03 16:59:20 +0100234
Pablo Tello8f43d742019-03-27 09:28:32 +0000235template class WinogradGEMM<1, 2, 1, 7, WinogradRoots::Integers>::Convolution<float, float, float, float>;
236template class WinogradGEMM<2, 1, 7, 1, WinogradRoots::Integers>::Convolution<float, float, float, float>;