blob: 249e5757f02ca7f1e0a34f7da585150d5ed16296 [file] [log] [blame]
/*
* Copyright (c) 2017 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
/** Re-order a weight tensor from [Output feature map x Input feature map x
* Height x Width] format to [Height x Width x Input feature map x Output
* feature map] format.
*/
template <typename T>
inline void ofm_ifm_h_w_to_h_w_ifm_ofm(
const T* const in, // Input in [Output x Input x Height x Width] form
T* const out, // Output in [Height x Width x Input x Output] form
const int n_output_feature_maps,
const int n_input_feature_maps,
const int n_rows,
const int n_cols,
int in_output_feature_map_stride=0,
int in_input_feature_map_stride=0,
int in_row_stride=0,
int out_row_stride=0,
int out_col_stride=0,
int out_input_feature_map_stride=0
);
/** Re-order a weight tensor from [Height x Width x Input feature map x Output
* feature map] format to [Output feature map x Input feature map x Height x
* Width] format.
*/
template <typename T>
inline void h_w_ifm_ofm_to_ofm_ifm_h_w(
const T* const in, // Input in [Height x Width x Input x Output] form
T* const out, // Output in [Output x Input x Height x Width] form
const int n_rows,
const int n_cols,
const int n_input_feature_maps,
const int n_output_feature_maps,
int in_row_stride=0,
int in_col_stride=0,
int in_input_feature_map_stride=0,
int out_output_feature_map_stride=0,
int out_input_feature_map_stride=0,
int out_row_stride=0
);
/* Re-order a tensor from NCHW format to NHWC.
*/
template <typename T>
inline void nchw_to_nhwc(
const T* const in,
T* const out,
const int n_batches,
const int n_channels,
const int n_rows,
const int n_cols,
int in_batch_stride=0,
int in_channel_stride=0,
int in_row_stride=0,
int out_batch_stride=0,
int out_row_stride=0,
int out_col_stride=0
)
{
// Fill in the stride values
in_row_stride = (in_row_stride) ? in_row_stride : n_cols;
in_channel_stride = (in_channel_stride) ? in_channel_stride
: n_rows * in_row_stride;
in_batch_stride = (in_batch_stride) ? in_batch_stride
: n_channels * in_channel_stride;
out_col_stride = (out_col_stride) ? out_col_stride : n_channels;
out_row_stride = (out_row_stride) ? out_row_stride : n_cols * out_col_stride;
out_batch_stride = (out_batch_stride) ? out_batch_stride
: n_rows * out_row_stride;
// Perform the re-ordering
for (int n = 0; n < n_batches; n++)
{
const T* const in_batch = in + n*in_batch_stride;
T* const out_batch = out + n*out_batch_stride;
for (int i = 0; i < n_rows; i++)
{
const T* const in_row = in_batch + i*in_row_stride;
T* const out_row = out_batch + i*out_row_stride;
for (int j = 0; j < n_cols; j++)
{
const T* const in_col = in_row + j;
T* const out_col = out_row + j*out_col_stride;
for (int c = 0; c < n_channels; c++)
{
const T* const in_channel = in_col + c*in_channel_stride;
out_col[c] = *(in_channel);
}
}
}
}
}
/* Re-order a tensor from NHWC format to NCHW.
*/
template <typename T>
inline void nhwc_to_nchw(
const T* const in, // Input data in NHWC form
T* const out, // Output data in NCHW form
const int n_batches,
const int n_rows,
const int n_cols,
const int n_channels,
int in_batch_stride=0,
int in_row_stride=0,
int in_col_stride=0,
int out_batch_stride=0,
int out_channel_stride=0,
int out_row_stride=0
)
{
// Fill in stride values
in_col_stride = (in_col_stride) ? in_col_stride : n_channels;
in_row_stride = (in_row_stride) ? in_row_stride : n_cols * in_col_stride;
in_batch_stride = (in_batch_stride) ? in_batch_stride
: n_rows * in_row_stride;
out_row_stride = (out_row_stride) ? out_row_stride : n_cols;
out_channel_stride = (out_channel_stride) ? out_channel_stride
: n_rows * out_row_stride;
out_batch_stride = (out_batch_stride) ? out_batch_stride
: n_channels * out_channel_stride;
// Perform the re-ordering
// For every batch
for (int n = 0; n < n_batches; n++)
{
const T* const in_batch = in + n*in_batch_stride;
T* const out_batch = out + n*out_batch_stride;
// For every row
for (int i = 0; i < n_rows; i++)
{
const T* const in_i = in_batch + i*in_row_stride;
T* const out_i = out_batch + i*out_row_stride;
// For every column
for (int j = 0; j < n_cols; j++)
{
const T* const in_j = in_i + j*in_col_stride;
T* const out_j = out_i + j;
// For every channel
for (int c = 0; c < n_channels; c++)
{
const T* const in_channel = in_j + c;
T* const out_channel = out_j + c*out_channel_stride;
*(out_channel) = *(in_channel);
}
}
}
}
}
/*****************************************************************************/
/* Generic weight re-order implementation.
*/
template <typename T>
inline void ofm_ifm_h_w_to_h_w_ifm_ofm(
const T* const in, // Input in [Output x Input x Height x Width] form
T* const out, // Output in [Height x Width x Input x Output] form
const int n_output_feature_maps,
const int n_input_feature_maps,
const int n_rows,
const int n_cols,
int in_output_feature_map_stride,
int in_input_feature_map_stride,
int in_row_stride,
int out_row_stride,
int out_col_stride,
int out_input_feature_map_stride
)
{
// Fill in stride values
in_row_stride = (in_row_stride)
? in_row_stride
: n_cols;
in_input_feature_map_stride = (in_input_feature_map_stride)
? in_input_feature_map_stride
: n_rows * in_row_stride;
in_output_feature_map_stride = (in_output_feature_map_stride)
? in_output_feature_map_stride
: n_input_feature_maps * in_input_feature_map_stride;
out_input_feature_map_stride = (out_input_feature_map_stride)
? out_input_feature_map_stride
: n_output_feature_maps;
out_col_stride = (out_col_stride)
? out_col_stride
: n_input_feature_maps * out_input_feature_map_stride;
out_row_stride = (out_row_stride)
? out_row_stride
: n_cols * out_col_stride;
// Perform the re-ordering
for (int i = 0; i < n_rows; i++)
{
const T* const in_row = in + i * in_row_stride;
T* out_row = out + i * out_row_stride;
for (int j = 0; j < n_cols; j++)
{
const T* const in_col = in_row + j;
T* const out_col = out_row + j * out_col_stride;
for (int ifm = 0; ifm < n_input_feature_maps; ifm++)
{
const T* const in_ifm = in_col + ifm * in_input_feature_map_stride;
T* const out_ifm = out_col + ifm * out_input_feature_map_stride;
for (int ofm = 0; ofm < n_output_feature_maps; ofm++)
{
const T* const in_ofm = in_ifm + ofm * in_output_feature_map_stride;
T* const out_ofm = out_ifm + ofm;
*(out_ofm) = *(in_ofm);
}
}
}
}
}
/*****************************************************************************/
/* Generic weight re-order implementation.
*/
template <typename T>
inline void h_w_ifm_ofm_to_ofm_ifm_h_w(
const T* const in, // Input in [Height x Width x Input x Output] form
T* const out, // Output in [Output x Input x Height x Width] form
const int n_rows,
const int n_cols,
const int n_input_feature_maps,
const int n_output_feature_maps,
int in_row_stride,
int in_col_stride,
int in_input_feature_map_stride,
int out_output_feature_map_stride,
int out_input_feature_map_stride,
int out_row_stride
)
{
// Fill in the stride values
in_input_feature_map_stride = (in_input_feature_map_stride)
? in_input_feature_map_stride
: n_output_feature_maps;
in_col_stride = (in_col_stride)
? in_col_stride
: n_input_feature_maps * in_input_feature_map_stride;
in_row_stride = (in_row_stride)
? in_row_stride
: n_cols * in_col_stride;
out_row_stride = (out_row_stride)
? out_row_stride
: n_cols;
out_input_feature_map_stride = (out_input_feature_map_stride)
? out_input_feature_map_stride
: n_rows * out_row_stride;
out_output_feature_map_stride = (out_output_feature_map_stride)
? out_output_feature_map_stride
: n_input_feature_maps * out_input_feature_map_stride;
// Perform the re-ordering
for (int i = 0; i < n_rows; i++)
{
const T* const in_row = in + i * in_row_stride;
T* const out_row = out + i * out_row_stride;
for (int j = 0; j < n_cols; j++)
{
const T* const in_col = in_row + j * in_col_stride;
T* const out_col = out_row + j;
for (int ifm = 0; ifm < n_input_feature_maps; ifm++)
{
const T* const in_ifm = in_col + ifm * in_input_feature_map_stride;
T* const out_ifm = out_col + ifm * out_input_feature_map_stride;
for (int ofm = 0; ofm < n_output_feature_maps; ofm++)
{
const T* const in_ofm = in_ifm + ofm;
T* const out_ofm = out_ifm + ofm * out_output_feature_map_stride;
*(out_ofm) = *(in_ofm);
}
}
}
}
}