| /* |
| * Copyright (c) 2017-2018 ARM Limited. |
| * |
| * SPDX-License-Identifier: MIT |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a copy |
| * of this software and associated documentation files (the "Software"), to |
| * deal in the Software without restriction, including without limitation the |
| * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
| * sell copies of the Software, and to permit persons to whom the Software is |
| * furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included in all |
| * copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| #ifndef __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H__ |
| #define __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H__ |
| |
| #include "arm_compute/core/Helpers.h" |
| #include "arm_compute/core/ITensorInfo.h" |
| #include "arm_compute/core/Utils.h" |
| |
| #include <cmath> |
| |
| namespace arm_compute |
| { |
| namespace misc |
| { |
| namespace shape_calculator |
| { |
| inline TensorShape compute_vector_to_tensor_output_shape(const TensorShape &input, size_t conv_w, size_t conv_h, const DataLayout &data_layout) |
| { |
| const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); |
| const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); |
| const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); |
| |
| TensorShape output_shape(input); |
| output_shape.set(idx_w, conv_w); |
| output_shape.set(idx_h, conv_h); |
| output_shape.set(idx_c, input.x() / (conv_w * conv_h)); |
| |
| return output_shape; |
| } |
| inline TensorShape compute_permutation_output_shape(const ITensorInfo &input, const PermutationVector &perm) |
| { |
| TensorShape output_shape = input.tensor_shape(); |
| permute(output_shape, perm); |
| return output_shape; |
| } |
| inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias = false, const unsigned int num_groups = 1) |
| { |
| // Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it. |
| |
| ARM_COMPUTE_ERROR_ON(num_groups == 0); |
| ARM_COMPUTE_ERROR_ON((weights.dimension(3) % num_groups) != 0); |
| ARM_COMPUTE_ERROR_ON(weights.data_layout() == DataLayout::NHWC && num_groups > 1); |
| |
| // Calculate output shape |
| TensorShape weights_reshaped{ weights.tensor_shape() }; |
| weights_reshaped.collapse(3); |
| const size_t tmp_dim = weights_reshaped[0]; |
| weights_reshaped.set(0, weights_reshaped[1] / num_groups); |
| weights_reshaped.set(1, tmp_dim + (has_bias ? 1 : 0)); |
| if(weights.num_dimensions() < 5) |
| { |
| weights_reshaped.set(2, num_groups); |
| } |
| |
| return weights_reshaped; |
| } |
| inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_interleave4x4_height = 1, bool reinterpret_input_as_3d = false) |
| { |
| // The interleaved output matrix will have the following shape: [ a_height * W, ceil(a_width / W) ] where W = 4 * mult_interleave4x4_height |
| ARM_COMPUTE_ERROR_ON(mult_interleave4x4_height < 1); |
| const int interleave_width = 4 * mult_interleave4x4_height; |
| TensorShape shape_interleaved_a{ a.tensor_shape() }; |
| shape_interleaved_a.set(0, a.dimension(0) * interleave_width); |
| if(reinterpret_input_as_3d) |
| { |
| const int M = a.dimension(1) * a.dimension(2); |
| const int height = std::ceil(M / static_cast<float>(interleave_width)); |
| shape_interleaved_a.set(1, height); |
| shape_interleaved_a.remove_dimension(2); |
| } |
| else |
| { |
| shape_interleaved_a.set(1, std::ceil(a.dimension(1) / static_cast<float>(interleave_width))); |
| } |
| |
| return shape_interleaved_a; |
| } |
| inline TensorShape compute_transpose1xW_shape(const ITensorInfo &b) |
| { |
| // The transpose1xW output matrix will have the following shape: [ b_height * 16, ceil(b_width / 16.0f) ] |
| TensorShape shape_transposed1xW_b{ b.tensor_shape() }; |
| shape_transposed1xW_b.set(0, b.dimension(1) * 16); |
| shape_transposed1xW_b.set(1, std::ceil(b.dimension(0) / 16.f)); |
| |
| return shape_transposed1xW_b; |
| } |
| inline TensorShape compute_transpose1xW_with_element_size_shape(const ITensorInfo &b, int mult_transpose1xW_width = 1) |
| { |
| // Note: mult_transpose1xW_width expresses the number of chunks with size 1x(W) we want to store on the same row |
| // The transpose1xW output matrix will have the following shape: |
| // [ b_height * W, ceil(b_width / W) ] where W = (16 / element size of the tensor) * mult_transpose1xW_width |
| ARM_COMPUTE_ERROR_ON(mult_transpose1xW_width < 1); |
| TensorShape shape_transposed1xW_b{ b.tensor_shape() }; |
| const size_t transpose_width = (16 / b.element_size()) * mult_transpose1xW_width; |
| shape_transposed1xW_b.set(0, b.dimension(1) * transpose_width); |
| shape_transposed1xW_b.set(1, static_cast<size_t>(std::ceil(b.dimension(0) / static_cast<float>(transpose_width)))); |
| |
| return shape_transposed1xW_b; |
| } |
| inline TensorShape compute_reductionA_shape(const ITensorInfo &b) |
| { |
| TensorShape shape_vector_sum_col{ b.tensor_shape() }; |
| if(shape_vector_sum_col.num_dimensions() > 1) |
| { |
| shape_vector_sum_col.remove_dimension(1); |
| } |
| |
| return shape_vector_sum_col; |
| } |
| inline TensorShape compute_reductionB_shape(const ITensorInfo &a) |
| { |
| TensorShape shape_vector_sum_row{ a.tensor_shape() }; |
| shape_vector_sum_row.set(Window::DimX, a.dimension(1)); |
| if(a.num_dimensions() > 1) |
| { |
| shape_vector_sum_row.remove_dimension(1); |
| } |
| |
| return shape_vector_sum_row; |
| } |
| inline TensorShape compute_col2im_shape(const ITensorInfo &input, std::pair<unsigned int, unsigned int> convolved_dims) |
| { |
| TensorShape col2im_shape{ input.tensor_shape() }; |
| col2im_shape.shift_right(1); |
| col2im_shape.set(0, convolved_dims.first); |
| col2im_shape.set(1, convolved_dims.second); |
| col2im_shape.set(2, input.tensor_shape()[0]); |
| |
| return col2im_shape; |
| } |
| inline TensorShape compute_transposed_shape(const ITensorInfo &input) |
| { |
| TensorShape shape_transposed{ input.tensor_shape() }; |
| |
| shape_transposed.set(0, input.dimension(1)); |
| shape_transposed.set(1, input.dimension(0)); |
| |
| return shape_transposed; |
| } |
| inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info, unsigned int depth_multiplier) |
| { |
| const TensorShape input_shape{ input.tensor_shape() }; |
| const TensorShape weights_shape{ weights.tensor_shape() }; |
| |
| const DataLayout data_layout = input.data_layout(); |
| const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); |
| const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); |
| const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); |
| |
| unsigned int output_width = 0; |
| unsigned int output_height = 0; |
| std::tie(output_width, output_height) = scaled_dimensions(input_shape[width_idx], input_shape[height_idx], |
| weights_shape[width_idx], weights_shape[height_idx], |
| conv_info); |
| |
| TensorShape output_shape{ input_shape }; |
| output_shape.set(width_idx, output_width); |
| output_shape.set(height_idx, output_height); |
| output_shape.set(channel_idx, input_shape[channel_idx] * depth_multiplier); |
| |
| return output_shape; |
| } |
| inline TensorShape compute_deconvolution_shape(const ITensorInfo &input, unsigned int sx, unsigned int sy, unsigned int inner_border_right, unsigned int inner_border_top, const PadStrideInfo &info) |
| { |
| TensorShape scale_out_shape(input.tensor_shape()); |
| const unsigned int out_x = input.dimension(0) + (input.dimension(0) - 1) * (sx - 1) + inner_border_right + 2 * info.pad().first; |
| const unsigned int out_y = input.dimension(1) + (input.dimension(1) - 1) * (sy - 1) + inner_border_top + 2 * info.pad().second; |
| scale_out_shape.set(0, out_x); |
| scale_out_shape.set(1, out_y); |
| |
| return scale_out_shape; |
| } |
| inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z, |
| unsigned int num_groups = 1) |
| { |
| // The output shape will be the 3D shape [ out_channels * kernel_area, num_elems_per_out_channel, batches ] if batch_size_on_z == true |
| // or the 4D shape [ out_channels * kernel_area / num_groups, num_elems_per_out_channel, num_groups, batches ] if batch_size_on_z == false |
| |
| ARM_COMPUTE_ERROR_ON(num_groups == 0); |
| ARM_COMPUTE_ERROR_ON(num_groups > 1 && input->data_layout() != DataLayout::NCHW); |
| ARM_COMPUTE_ERROR_ON(num_groups > 1 && batch_size_on_z); |
| |
| TensorShape output_shape{ input->tensor_shape() }; |
| |
| const DataLayout data_layout = input->data_layout(); |
| const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); |
| const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); |
| const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); |
| |
| std::pair<unsigned int, unsigned int> out_dims = scaled_dimensions(output_shape[width_idx], output_shape[height_idx], kernel_dims.width, kernel_dims.height, conv_info, dilation); |
| output_shape.set(0, (output_shape[channel_idx] / num_groups * kernel_dims.area() + (has_bias ? 1 : 0))); // NOLINT |
| output_shape.set(1, (out_dims.first * out_dims.second)); |
| if(batch_size_on_z && output_shape.num_dimensions() >= 3) |
| { |
| output_shape.remove_dimension(2); |
| } |
| else |
| { |
| output_shape.set(2, num_groups); |
| } |
| |
| return output_shape; |
| } |
| inline TensorShape compute_flatten_shape(const ITensorInfo *input) |
| { |
| // The output shape will be the flatten version of the input (i.e. [ width * height * channels, num_batches, ... ] ). Used for FlattenLayer and FullyConnectedLayer. |
| |
| TensorShape output_shape{ input->tensor_shape() }; |
| |
| output_shape.collapse(3); |
| |
| return output_shape; |
| } |
| inline TensorShape compute_interleave_custom_shape(const TensorShape &input, const int x_interleave, const int y_interleave) |
| { |
| TensorShape output_shape{ input }; |
| |
| output_shape.set(0, output_shape.x() * x_interleave); |
| output_shape.set(1, std::ceil(output_shape.y() / static_cast<float>(y_interleave))); |
| |
| return output_shape; |
| } |
| inline TensorShape compute_fully_connected_reshaped_weights_shape(const ITensorInfo *input, bool transpose_weights, bool is_batched_fc_layer, const int interleave) |
| { |
| TensorShape output_shape{ input->tensor_shape() }; |
| |
| // Transpose weights if the user hasn't done it |
| if(transpose_weights) |
| { |
| output_shape = compute_transposed_shape(*input); |
| } |
| |
| // If we run multiple batches we need 1xW transpose, too. |
| if(is_batched_fc_layer) |
| { |
| output_shape = compute_transposed_shape(input->clone()->set_tensor_shape(output_shape)); |
| output_shape = compute_interleave_custom_shape(output_shape, interleave, interleave); |
| } |
| |
| return output_shape; |
| } |
| |
| inline TensorShape compute_winograd_filter_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info) |
| { |
| TensorShape tensor_shape{ input.tensor_shape() }; |
| |
| const Size2D kernel_size = winograd_info.kernel_size; |
| const Size2D output_tile_size = winograd_info.output_tile_size; |
| const Size2D input_tile_size = Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1); |
| |
| tensor_shape.remove_dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH)); |
| tensor_shape.set(Window::DimX, input.dimension(3)); |
| tensor_shape.set(Window::DimY, input.dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL))); |
| tensor_shape.set(Window::DimZ, input_tile_size.area()); |
| |
| return tensor_shape; |
| } |
| inline TensorShape compute_winograd_input_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info) |
| { |
| const PadStrideInfo conv_info = winograd_info.convolution_info; |
| const Size2D kernel_size = winograd_info.kernel_size; |
| const Size2D output_tile_size = winograd_info.output_tile_size; |
| const Size2D input_tile_size = Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1); |
| |
| const size_t idx_w = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH); |
| const size_t idx_h = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT); |
| const size_t idx_c = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL); |
| |
| // Compute the number of output tiles along the x and y direction of size "output_tile_size" |
| const Size2D num_tiles = compute_winograd_convolution_tiles(Size2D(input.tensor_shape()[idx_w], input.tensor_shape()[idx_h]), |
| kernel_size, |
| output_tile_size, |
| conv_info); |
| |
| const unsigned int width = input.tensor_shape()[idx_c]; |
| const unsigned int height = num_tiles.area(); |
| const unsigned int depth = input_tile_size.area(); |
| |
| TensorShape output_shape{ input.tensor_shape() }; |
| output_shape.set(0, width); |
| output_shape.set(1, height); |
| output_shape.set(2, depth); |
| |
| return output_shape; |
| } |
| inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info) |
| { |
| const PadStrideInfo conv_info = winograd_info.convolution_info; |
| const Size2D kernel_size = winograd_info.kernel_size; |
| const Size2D input_dimensions = winograd_info.input_dimensions; |
| const DataLayout data_layout = winograd_info.output_data_layout; |
| |
| // Compute output shape |
| unsigned int output_width = 0; |
| unsigned int output_height = 0; |
| std::tie(output_width, output_height) = scaled_dimensions(input_dimensions.width, input_dimensions.height, |
| kernel_size.width, kernel_size.height, conv_info); |
| |
| TensorShape tensor_shape{ input.tensor_shape() }; |
| |
| // Output dimension |
| const unsigned int out_w = output_width; |
| const unsigned int out_h = output_height; |
| const unsigned int out_c = input.dimension(0); |
| |
| tensor_shape.set(get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH), out_w); |
| tensor_shape.set(get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT), out_h); |
| tensor_shape.set(get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL), out_c); |
| |
| return tensor_shape; |
| } |
| inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info) |
| { |
| const TensorShape input_shape{ input.tensor_shape() }; |
| const TensorShape weights_shape{ weights.tensor_shape() }; |
| |
| const size_t idx_width = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH); |
| const size_t idx_height = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT); |
| const size_t idx_channel = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL); |
| |
| const unsigned int input_width = input_shape[idx_width]; |
| const unsigned int input_height = input_shape[idx_height]; |
| const unsigned int weights_width = weights_shape[idx_width]; |
| const unsigned int weights_height = weights_shape[idx_height]; |
| const unsigned int weights_out_channel = weights_shape[3]; |
| unsigned int output_width = 0; |
| unsigned int output_height = 0; |
| std::tie(output_width, output_height) = scaled_dimensions(input_width, input_height, weights_width, weights_height, conv_info); |
| |
| TensorShape output_shape{ input_shape }; |
| output_shape.set(idx_width, output_width); |
| output_shape.set(idx_height, output_height); |
| output_shape.set(idx_channel, weights_out_channel); |
| |
| return output_shape; |
| } |
| inline TensorShape compute_min_max_shape(const ITensorInfo *input) |
| { |
| TensorShape output_shape{ input->tensor_shape() }; |
| output_shape.set(Window::DimX, 2); |
| output_shape.remove_dimension(1); |
| output_shape.remove_dimension(1); |
| |
| return output_shape; |
| } |
| |
| inline TensorShape compute_pool_shape(const ITensorInfo &input, PoolingLayerInfo pool_info) |
| { |
| unsigned int pooled_w = 0; |
| unsigned int pooled_h = 0; |
| |
| TensorShape output_shape{ input.tensor_shape() }; |
| |
| const bool is_global_pooling = pool_info.is_global_pooling(); |
| const unsigned int idx_width = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH); |
| const unsigned int idx_height = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT); |
| const unsigned int pool_size_x = is_global_pooling ? output_shape[idx_width] : pool_info.pool_size().width; |
| const unsigned int pool_size_y = is_global_pooling ? output_shape[idx_height] : pool_info.pool_size().height; |
| |
| std::tie(pooled_w, pooled_h) = scaled_dimensions(output_shape[idx_width], |
| output_shape[idx_height], |
| pool_size_x, |
| pool_size_y, |
| pool_info.pad_stride_info()); |
| |
| output_shape.set(idx_width, pooled_w); |
| output_shape.set(idx_height, pooled_h); |
| |
| return output_shape; |
| } |
| |
| inline TensorShape compute_rnn_shape(const ITensorInfo *input, const unsigned int batch_size) |
| { |
| TensorShape output_shape{ input->tensor_shape() }; |
| output_shape.set(1, batch_size); |
| |
| return output_shape; |
| } |
| inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info) |
| { |
| ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4"); |
| ARM_COMPUTE_ERROR_ON_MSG(is_interleaved_transposed && reshape_info.reinterpret_input_as_3d(), "The first input tensor cannot be reinterpreted as 3D if is_interleaved_transposed is true"); |
| |
| const bool reinterpret_input_as_3d = reshape_info.reinterpret_input_as_3d(); |
| const bool reinterpret_output_as_3d = reshape_info.depth_output_gemm3d() != 1; |
| const int m = reshape_info.reinterpret_input_as_3d() ? input0.dimension(1) * input0.dimension(2) : input0.dimension(1); |
| |
| // If the output of GEMM has to be reinterpreted as 3D, the number of input0 rows (M) is obtained collapsing the second and third |
| // dimension of the output tensor |
| const int dim0 = is_interleaved_transposed ? reshape_info.n() : input1.dimension(0); |
| const int dim1 = is_interleaved_transposed ? reshape_info.m() / reshape_info.depth_output_gemm3d() : m / reshape_info.depth_output_gemm3d(); |
| const int dim2 = reinterpret_input_as_3d ? input0.tensor_shape()[3] : input0.tensor_shape()[2]; |
| const int dim3 = reinterpret_input_as_3d ? 1 : input0.tensor_shape()[3]; |
| |
| TensorShape output_shape{ input0.tensor_shape() }; |
| |
| output_shape.set(0, dim0); |
| output_shape.set(1, dim1); |
| output_shape.set(2, reinterpret_output_as_3d ? reshape_info.depth_output_gemm3d() : dim2); |
| output_shape.set(3, reinterpret_output_as_3d ? dim2 : dim3); |
| output_shape.set(4, reinterpret_output_as_3d ? dim3 : 1); |
| |
| return output_shape; |
| } |
| |
| template <typename T> |
| inline TensorShape extract_shape(T *data) |
| { |
| return data->info()->tensor_shape(); |
| } |
| |
| inline TensorShape extract_shape(ITensorInfo *data) |
| { |
| return data->tensor_shape(); |
| } |
| |
| inline TensorShape extract_shape(const TensorShape *data) |
| { |
| return *data; |
| } |
| |
| template <typename T> |
| inline TensorShape calculate_depth_concatenate_shape(const std::vector<T *> &inputs_vector) |
| { |
| TensorShape out_shape = extract_shape(inputs_vector[0]); |
| |
| size_t max_x = 0; |
| size_t max_y = 0; |
| size_t depth = 0; |
| |
| for(const auto &tensor : inputs_vector) |
| { |
| ARM_COMPUTE_ERROR_ON(tensor == nullptr); |
| const TensorShape shape = extract_shape(tensor); |
| max_x = std::max(shape.x(), max_x); |
| max_y = std::max(shape.y(), max_y); |
| depth += shape.z(); |
| } |
| |
| out_shape.set(0, max_x); |
| out_shape.set(1, max_y); |
| out_shape.set(2, depth); |
| |
| return out_shape; |
| } |
| |
| template <typename T> |
| inline TensorShape calculate_width_concatenate_shape(const std::vector<T *> &inputs_vector) |
| { |
| TensorShape out_shape = extract_shape(inputs_vector[0]); |
| |
| size_t width = 0; |
| for(const auto &tensor : inputs_vector) |
| { |
| ARM_COMPUTE_ERROR_ON(tensor == nullptr); |
| const TensorShape shape = extract_shape(tensor); |
| width += shape.x(); |
| } |
| |
| out_shape.set(0, width); |
| |
| return out_shape; |
| } |
| } // namespace shape_calculator |
| } // namespace misc |
| } // namespace arm_compute |
| #endif /* __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H__ */ |