Apply clang-format on repository
Code is formatted as per a revised clang format configuration
file(not part of this delivery). Version 14.0.6 is used.
Exclusion List:
- files with .cl extension
- files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...)
And the following directories
- compute_kernel_writer/validation/
- tests/
- include/
- src/core/NEON/kernels/convolution/
- src/core/NEON/kernels/arm_gemm/
- src/core/NEON/kernels/arm_conv/
- data/
There will be a follow up for formatting of .cl files and the
files under tests/ and compute_kernel_writer/validation/.
Signed-off-by: Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>
Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
diff --git a/compute_kernel_writer/src/Error.cpp b/compute_kernel_writer/src/Error.cpp
index c5dae2e..e1e4bff 100644
--- a/compute_kernel_writer/src/Error.cpp
+++ b/compute_kernel_writer/src/Error.cpp
@@ -28,8 +28,8 @@
namespace ckw
{
-std::string create_error_msg(const std::string &file, const std::string &func, const std::string &line,
- const std::string &msg)
+std::string
+create_error_msg(const std::string &file, const std::string &func, const std::string &line, const std::string &msg)
{
std::string err;
err += "[COMPUTE_KERNEL_WRITER][ERROR]:";
@@ -38,4 +38,4 @@
err += " " + msg;
return err;
}
-} // namespace ckw
\ No newline at end of file
+} // namespace ckw
diff --git a/compute_kernel_writer/src/Helpers.cpp b/compute_kernel_writer/src/Helpers.cpp
index 799f79a..82d4c4e 100644
--- a/compute_kernel_writer/src/Helpers.cpp
+++ b/compute_kernel_writer/src/Helpers.cpp
@@ -22,15 +22,15 @@
* SOFTWARE.
*/
-#include "ckw/Error.h"
-
#include "src/Helpers.h"
+#include "ckw/Error.h"
+
namespace ckw
{
std::string dec_to_hex_as_string(int32_t dec)
{
- switch(dec)
+ switch (dec)
{
case 0:
case 1:
diff --git a/compute_kernel_writer/src/ITensorArgument.h b/compute_kernel_writer/src/ITensorArgument.h
index 838bd40..ece45a4 100644
--- a/compute_kernel_writer/src/ITensorArgument.h
+++ b/compute_kernel_writer/src/ITensorArgument.h
@@ -28,6 +28,7 @@
#include "ckw/TensorInfo.h"
#include "ckw/types/TensorComponentType.h"
#include "ckw/types/TensorStorageType.h"
+
#include "src/ITile.h"
#include <string>
@@ -41,8 +42,8 @@
/** Tensor storage variable */
struct TensorStorageVariable
{
- std::string val{ "" }; /** Tensor storage as a string */
- TensorStorageType type{ TensorStorageType::Unknown }; /** Tensor storage type */
+ std::string val{""}; /** Tensor storage as a string */
+ TensorStorageType type{TensorStorageType::Unknown}; /** Tensor storage type */
};
/** Tensor argument base class.
@@ -83,8 +84,8 @@
}
protected:
- TensorInfo _info{}; // Tensor info
- std::string _basename{ "" }; // Tensor name
+ TensorInfo _info{}; // Tensor info
+ std::string _basename{""}; // Tensor name
};
/** Tensor component argument base class */
diff --git a/compute_kernel_writer/src/ITensorComponent.h b/compute_kernel_writer/src/ITensorComponent.h
index e2775b6..f9c9d8f 100644
--- a/compute_kernel_writer/src/ITensorComponent.h
+++ b/compute_kernel_writer/src/ITensorComponent.h
@@ -26,6 +26,7 @@
#define CKW_SRC_ITENSORCOMPONENT_H
#include "ckw/types/TensorComponentType.h"
+
#include "src/ITile.h"
namespace ckw
diff --git a/compute_kernel_writer/src/ITile.h b/compute_kernel_writer/src/ITile.h
index 73b7315..8eaac5a 100644
--- a/compute_kernel_writer/src/ITile.h
+++ b/compute_kernel_writer/src/ITile.h
@@ -37,15 +37,15 @@
/** Tile descriptor which reports the underlying datatype and vector length */
struct TileVariableDescriptor
{
- DataType dt{ DataType::Unknown }; /** Data type */
- int32_t len{ 1 }; /** Number of elements in a single variable. For example, 1 for scalar */
+ DataType dt{DataType::Unknown}; /** Data type */
+ int32_t len{1}; /** Number of elements in a single variable. For example, 1 for scalar */
};
/** Tile variable */
struct TileVariable
{
- std::string str{ "" }; /** Tile variable as a string */
- TileVariableDescriptor desc{}; /** Tile value descriptor which reports the datatype and vector length */
+ std::string str{""}; /** Tile variable as a string */
+ TileVariableDescriptor desc{}; /** Tile value descriptor which reports the datatype and vector length */
};
/** Interface to provide support for scalar access for a Tile.
diff --git a/compute_kernel_writer/src/Kernel.cpp b/compute_kernel_writer/src/Kernel.cpp
index bfb0f46..12389b3 100644
--- a/compute_kernel_writer/src/Kernel.cpp
+++ b/compute_kernel_writer/src/Kernel.cpp
@@ -23,6 +23,7 @@
*/
#include "ckw/Kernel.h"
+
#include "ckw/types/TargetLanguage.h"
namespace ckw
diff --git a/compute_kernel_writer/src/KernelArgument.cpp b/compute_kernel_writer/src/KernelArgument.cpp
index a31ca17..a640d36 100644
--- a/compute_kernel_writer/src/KernelArgument.cpp
+++ b/compute_kernel_writer/src/KernelArgument.cpp
@@ -23,6 +23,7 @@
*/
#include "ckw/KernelArgument.h"
+
#include "ckw/Error.h"
namespace ckw
diff --git a/compute_kernel_writer/src/KernelWriter.cpp b/compute_kernel_writer/src/KernelWriter.cpp
index 0bea120..a478231 100644
--- a/compute_kernel_writer/src/KernelWriter.cpp
+++ b/compute_kernel_writer/src/KernelWriter.cpp
@@ -23,14 +23,16 @@
*/
#include "ckw/KernelWriter.h"
+
#include "ckw/Error.h"
#include "ckw/TileOperand.h"
#include "ckw/types/TargetArchitecture.h"
#include "ckw/types/TargetLanguage.h"
-#include "src/TileView.h"
+
#include "src/cl/CLKernelWriter.h"
#include "src/cl/CLTensorArgument.h"
#include "src/cl/CLTile.h"
+#include "src/TileView.h"
#include <tuple>
@@ -42,7 +44,7 @@
std::unique_ptr<KernelWriter> KernelWriter::create_instance(TargetArchitecture architecture, TargetLanguage language)
{
CKW_UNUSED(architecture);
- switch(language)
+ switch (language)
{
case TargetLanguage::OpenCL:
// Currently this is the oldest and the only supported GPU architecture.
@@ -95,7 +97,7 @@
std::tuple<ITile &, TileArea> KernelWriter::get_tile(const TileOperand &operand)
{
- return { *operand._tile, { operand._row_start, operand._row_end, operand._col_start, operand._col_end } };
+ return {*operand._tile, {operand._row_start, operand._row_end, operand._col_start, operand._col_end}};
}
TensorOperand KernelWriter::create_tensor_operand(ITensor &tensor)
diff --git a/compute_kernel_writer/src/Tensor3dMapper.cpp b/compute_kernel_writer/src/Tensor3dMapper.cpp
index 7384b92..acef641 100644
--- a/compute_kernel_writer/src/Tensor3dMapper.cpp
+++ b/compute_kernel_writer/src/Tensor3dMapper.cpp
@@ -26,19 +26,19 @@
#include "ckw/Error.h"
#include "ckw/types/TensorSamplerTypes.h"
+
#include "src/ITensor.h"
#include "src/ITile.h"
namespace ckw
{
-Tensor3dMapper::Tensor3dMapper(ITensor *tensor, TensorSamplerFormat format)
- : _tensor(tensor), _format(format)
+Tensor3dMapper::Tensor3dMapper(ITensor *tensor, TensorSamplerFormat format) : _tensor(tensor), _format(format)
{
}
TileVariable Tensor3dMapper::dim_x() const
{
- switch(_format)
+ switch (_format)
{
case TensorSamplerFormat::Dim0_Dim1xDim2_1:
case TensorSamplerFormat::Dim0_Dim1_Dim2:
@@ -51,7 +51,7 @@
TileVariable Tensor3dMapper::dim_y() const
{
- switch(_format)
+ switch (_format)
{
case TensorSamplerFormat::Dim0_Dim1xDim2_1:
return _tensor->component(TensorComponentType::Dim1xDim2).scalar(0, 0);
@@ -67,10 +67,10 @@
{
TileVariable dim_one;
- switch(_format)
+ switch (_format)
{
case TensorSamplerFormat::Dim0_Dim1xDim2_1:
- dim_one = _tensor->component(TensorComponentType::Dim3).scalar(0, 0);
+ dim_one = _tensor->component(TensorComponentType::Dim3).scalar(0, 0);
dim_one.str = "1";
return dim_one;
case TensorSamplerFormat::Dim0_Dim1_Dim2:
@@ -85,7 +85,7 @@
{
TileVariable dim_one;
- switch(_format)
+ switch (_format)
{
case TensorSamplerFormat::Dim0_Dim1xDim2_1:
case TensorSamplerFormat::Dim0_Dim1_Dim2:
@@ -98,7 +98,7 @@
TileVariable Tensor3dMapper::stride_x() const
{
- switch(_format)
+ switch (_format)
{
case TensorSamplerFormat::Dim0_Dim1xDim2_1:
case TensorSamplerFormat::Dim0_Dim1_Dim2:
@@ -111,7 +111,7 @@
TileVariable Tensor3dMapper::stride_y() const
{
- switch(_format)
+ switch (_format)
{
case TensorSamplerFormat::Dim0_Dim1xDim2_1:
case TensorSamplerFormat::Dim0_Dim1_Dim2:
@@ -126,10 +126,10 @@
{
TileVariable stride_zero;
- switch(_format)
+ switch (_format)
{
case TensorSamplerFormat::Dim0_Dim1xDim2_1:
- stride_zero = _tensor->component(TensorComponentType::Stride3).scalar(0, 0);
+ stride_zero = _tensor->component(TensorComponentType::Stride3).scalar(0, 0);
stride_zero.str = "0";
return stride_zero;
case TensorSamplerFormat::Dim0_Dim1_Dim2:
@@ -142,7 +142,7 @@
TileVariable Tensor3dMapper::stride_batch() const
{
- switch(_format)
+ switch (_format)
{
case TensorSamplerFormat::Dim0_Dim1xDim2_1:
case TensorSamplerFormat::Dim0_Dim1_Dim2:
@@ -152,4 +152,4 @@
return _tensor->component(TensorComponentType::Unknown).scalar(0, 0);
}
}
-} // namespace ckw
\ No newline at end of file
+} // namespace ckw
diff --git a/compute_kernel_writer/src/Tensor3dMapper.h b/compute_kernel_writer/src/Tensor3dMapper.h
index fa68ac2..e94b595 100644
--- a/compute_kernel_writer/src/Tensor3dMapper.h
+++ b/compute_kernel_writer/src/Tensor3dMapper.h
@@ -74,8 +74,8 @@
TileVariable stride_batch() const;
private:
- ITensor *_tensor;
- TensorSamplerFormat _format;
+ ITensor *_tensor;
+ TensorSamplerFormat _format;
};
} // namespace ckw
diff --git a/compute_kernel_writer/src/TensorOperand.cpp b/compute_kernel_writer/src/TensorOperand.cpp
index 5ad24c6..bf11d0d 100644
--- a/compute_kernel_writer/src/TensorOperand.cpp
+++ b/compute_kernel_writer/src/TensorOperand.cpp
@@ -23,13 +23,13 @@
*/
#include "ckw/TensorOperand.h"
+
#include "src/ITensor.h"
namespace ckw
{
-TensorOperand::TensorOperand(ITensor &tensor)
- : _tensor(tensor)
+TensorOperand::TensorOperand(ITensor &tensor) : _tensor(tensor)
{
}
@@ -108,4 +108,4 @@
return TileOperand(_tensor.component(TensorComponentType::OffsetFirstElement));
}
-} // namespace ckw
\ No newline at end of file
+} // namespace ckw
diff --git a/compute_kernel_writer/src/TensorSampler.cpp b/compute_kernel_writer/src/TensorSampler.cpp
index 2ee8df4..91d5af2 100644
--- a/compute_kernel_writer/src/TensorSampler.cpp
+++ b/compute_kernel_writer/src/TensorSampler.cpp
@@ -32,7 +32,11 @@
TensorSamplerAddressModeX address_mode_x,
TensorSamplerAddressModeY address_mode_y,
TensorSamplerAddressModeZ address_mode_z)
- : _storage(storage), _format(format), _address_mode_x(address_mode_x), _address_mode_y(address_mode_y), _address_mode_z(address_mode_z)
+ : _storage(storage),
+ _format(format),
+ _address_mode_x(address_mode_x),
+ _address_mode_y(address_mode_y),
+ _address_mode_z(address_mode_z)
{
}
diff --git a/compute_kernel_writer/src/TensorUtils.cpp b/compute_kernel_writer/src/TensorUtils.cpp
index 2483609..17fc954 100644
--- a/compute_kernel_writer/src/TensorUtils.cpp
+++ b/compute_kernel_writer/src/TensorUtils.cpp
@@ -23,6 +23,7 @@
*/
#include "src/TensorUtils.h"
+
#include "ckw/Error.h"
#include "ckw/TensorInfo.h"
#include "ckw/types/TensorComponentType.h"
@@ -31,10 +32,10 @@
{
TensorComponentType get_tensor_dimension(TensorDataLayout layout, TensorDataLayoutComponent component)
{
- switch(layout)
+ switch (layout)
{
case TensorDataLayout::Nhwc:
- switch(component)
+ switch (component)
{
case TensorDataLayoutComponent::C:
return TensorComponentType::Dim0;
@@ -49,7 +50,7 @@
return TensorComponentType::Unknown;
}
case TensorDataLayout::Ndhwc:
- switch(component)
+ switch (component)
{
case TensorDataLayoutComponent::C:
return TensorComponentType::Dim0;
@@ -73,10 +74,10 @@
TensorComponentType get_tensor_stride(TensorDataLayout layout, TensorDataLayoutComponent component)
{
- switch(layout)
+ switch (layout)
{
case TensorDataLayout::Nhwc:
- switch(component)
+ switch (component)
{
case TensorDataLayoutComponent::C:
return TensorComponentType::Stride0;
@@ -91,7 +92,7 @@
return TensorComponentType::Unknown;
}
case TensorDataLayout::Ndhwc:
- switch(component)
+ switch (component)
{
case TensorDataLayoutComponent::C:
return TensorComponentType::Stride0;
diff --git a/compute_kernel_writer/src/TileInfo.cpp b/compute_kernel_writer/src/TileInfo.cpp
index 66d8cb1..273266e 100644
--- a/compute_kernel_writer/src/TileInfo.cpp
+++ b/compute_kernel_writer/src/TileInfo.cpp
@@ -26,18 +26,15 @@
namespace ckw
{
-TileInfo::TileInfo(DataType dt)
- : _dt(dt), _shape({ { 1, 1 } })
+TileInfo::TileInfo(DataType dt) : _dt(dt), _shape({{1, 1}})
{
}
-TileInfo::TileInfo(DataType dt, int32_t w)
- : _dt(dt), _shape({ { w, 1 } })
+TileInfo::TileInfo(DataType dt, int32_t w) : _dt(dt), _shape({{w, 1}})
{
}
-TileInfo::TileInfo(DataType dt, int32_t h, int32_t w)
- : _dt(dt), _shape({ { w, h } })
+TileInfo::TileInfo(DataType dt, int32_t h, int32_t w) : _dt(dt), _shape({{w, h}})
{
}
diff --git a/compute_kernel_writer/src/TileOperand.cpp b/compute_kernel_writer/src/TileOperand.cpp
index 3dfa2b8..865ef85 100644
--- a/compute_kernel_writer/src/TileOperand.cpp
+++ b/compute_kernel_writer/src/TileOperand.cpp
@@ -23,7 +23,9 @@
*/
#include "ckw/TileOperand.h"
+
#include "ckw/Error.h"
+
#include "src/ITile.h"
namespace ckw
@@ -34,7 +36,8 @@
{
}
-TileOperand::TileOperand(const TileOperand &operand, int32_t row_start, int32_t row_end, int32_t col_start, int32_t col_end)
+TileOperand::TileOperand(
+ const TileOperand &operand, int32_t row_start, int32_t row_end, int32_t col_start, int32_t col_end)
: _tile(operand._tile), _row_start(row_start), _row_end(row_end), _col_start(col_start), _col_end(col_end)
{
CKW_ASSERT(row_start >= 0 && row_start < _tile->info().height());
@@ -50,7 +53,8 @@
CKW_ASSERT(col_start >= 0 && _col_start + col_start < _col_end);
CKW_ASSERT(col_end > col_start && _col_start + col_end <= _col_end);
- return TileOperand(*this, _row_start + row_start, _row_start + row_end, _col_start + col_start, _col_start + col_end);
+ return TileOperand(*this, _row_start + row_start, _row_start + row_end, _col_start + col_start,
+ _col_start + col_end);
}
TileOperand TileOperand::row(int32_t row) const
diff --git a/compute_kernel_writer/src/TileView.h b/compute_kernel_writer/src/TileView.h
index e0d034f..50ae66b 100644
--- a/compute_kernel_writer/src/TileView.h
+++ b/compute_kernel_writer/src/TileView.h
@@ -27,6 +27,7 @@
#include "ckw/Error.h"
#include "ckw/types/DataType.h"
+
#include "src/ITile.h"
#include <cstdint>
@@ -81,8 +82,7 @@
*
* @param[in] tile The tile object.
*/
- TileView(const T &tile)
- : _tile(&tile), _area(0, tile.info().height(), 0, tile.info().width())
+ TileView(const T &tile) : _tile(&tile), _area(0, tile.info().height(), 0, tile.info().width())
{
}
@@ -91,8 +91,7 @@
* @param[in] tile The tile object.
* @param[in] area The rectangular active area.
*/
- TileView(const T &tile, const TileArea &area)
- : _tile(&tile), _area(area)
+ TileView(const T &tile, const TileArea &area) : _tile(&tile), _area(area)
{
}
@@ -176,7 +175,8 @@
/** Get whether the tile view refers to the whole tile. */
bool is_full_tile() const
{
- return row_start() == 0 && row_end() == _tile->info().height() && col_start() == 0 && col_end() == _tile->info().width();
+ return row_start() == 0 && row_end() == _tile->info().height() && col_start() == 0 &&
+ col_end() == _tile->info().width();
}
private:
diff --git a/compute_kernel_writer/src/cl/CLHelpers.cpp b/compute_kernel_writer/src/cl/CLHelpers.cpp
index ff4408b..8e4a932 100644
--- a/compute_kernel_writer/src/cl/CLHelpers.cpp
+++ b/compute_kernel_writer/src/cl/CLHelpers.cpp
@@ -28,6 +28,7 @@
#include "ckw/types/DataType.h"
#include "ckw/types/Operators.h"
#include "ckw/types/TensorStorageType.h"
+
#include "src/types/DataTypeHelpers.h"
namespace ckw
@@ -35,7 +36,7 @@
bool cl_validate_vector_length(int32_t len)
{
bool valid_vector_length = true;
- if(len < 1 || len > 16 || (len > 4 && len < 8) || (len > 8 && len < 16))
+ if (len < 1 || len > 16 || (len > 4 && len < 8) || (len > 8 && len < 16))
{
valid_vector_length = false;
}
@@ -44,14 +45,14 @@
std::string cl_get_variable_datatype_as_string(DataType dt, int32_t len)
{
- if(cl_validate_vector_length(len) == false)
+ if (cl_validate_vector_length(len) == false)
{
CKW_THROW_MSG("Unsupported vector length");
return "";
}
std::string res;
- switch(dt)
+ switch (dt)
{
case DataType::Fp32:
res += "float";
@@ -85,7 +86,7 @@
return "";
}
- if(len > 1)
+ if (len > 1)
{
res += std::to_string(len);
}
@@ -95,7 +96,7 @@
int32_t cl_round_up_to_nearest_valid_vector_width(int32_t width)
{
- switch(width)
+ switch (width)
{
case 1:
return 1;
@@ -128,7 +129,7 @@
std::string cl_get_variable_storagetype_as_string(TensorStorageType storage)
{
std::string res;
- switch(storage)
+ switch (storage)
{
case TensorStorageType::BufferUint8Ptr:
res += "__global uchar*";
@@ -148,7 +149,7 @@
std::string cl_get_assignment_op_as_string(AssignmentOp op)
{
- switch(op)
+ switch (op)
{
case AssignmentOp::Increment:
return "+=";
@@ -163,34 +164,34 @@
std::tuple<bool, std::string> cl_get_unary_op(UnaryOp op)
{
- switch(op)
+ switch (op)
{
case UnaryOp::LogicalNot:
- return { false, "!" };
+ return {false, "!"};
case UnaryOp::BitwiseNot:
- return { false, "~" };
+ return {false, "~"};
case UnaryOp::Exp:
- return { true, "exp" };
+ return {true, "exp"};
case UnaryOp::Tanh:
- return { true, "tanh" };
+ return {true, "tanh"};
case UnaryOp::Sqrt:
- return { true, "sqrt" };
+ return {true, "sqrt"};
case UnaryOp::Erf:
- return { true, "erf" };
+ return {true, "erf"};
case UnaryOp::Fabs:
- return { true, "fabs" };
+ return {true, "fabs"};
case UnaryOp::Log:
- return { true, "log" };
+ return {true, "log"};
case UnaryOp::Round:
- return { true, "round" };
+ return {true, "round"};
default:
CKW_THROW_MSG("Unsupported unary operation!");
@@ -201,52 +202,52 @@
{
const auto is_float = is_data_type_float(data_type);
- switch(op)
+ switch (op)
{
case BinaryOp::Add:
- return { false, "+" };
+ return {false, "+"};
case BinaryOp::Sub:
- return { false, "-" };
+ return {false, "-"};
case BinaryOp::Mul:
- return { false, "*" };
+ return {false, "*"};
case BinaryOp::Div:
- return { false, "/" };
+ return {false, "/"};
case BinaryOp::Mod:
- return { false, "%" };
+ return {false, "%"};
case BinaryOp::Equal:
- return { false, "==" };
+ return {false, "=="};
case BinaryOp::Less:
- return { false, "<" };
+ return {false, "<"};
case BinaryOp::LessEqual:
- return { false, "<=" };
+ return {false, "<="};
case BinaryOp::Greater:
- return { false, ">" };
+ return {false, ">"};
case BinaryOp::GreaterEqual:
- return { false, ">=" };
+ return {false, ">="};
case BinaryOp::LogicalAnd:
- return { false, "&&" };
+ return {false, "&&"};
case BinaryOp::LogicalOr:
- return { false, "||" };
+ return {false, "||"};
case BinaryOp::BitwiseXOR:
- return { false, "^" };
+ return {false, "^"};
case BinaryOp::Min:
- return { true, is_float ? "fmin" : "min" };
+ return {true, is_float ? "fmin" : "min"};
case BinaryOp::Max:
- return { true, is_float ? "fmax" : "max" };
+ return {true, is_float ? "fmax" : "max"};
default:
CKW_THROW_MSG("Unsupported binary operator/function!");
@@ -255,13 +256,13 @@
std::tuple<bool, std::string> cl_get_ternary_op(TernaryOp op)
{
- switch(op)
+ switch (op)
{
case TernaryOp::Select:
- return { true, "select" };
+ return {true, "select"};
case TernaryOp::Clamp:
- return { true, "clamp" };
+ return {true, "clamp"};
default:
CKW_THROW_MSG("Unsupported ternary function!");
@@ -273,7 +274,7 @@
std::string data_type;
const int32_t w = cl_round_up_to_nearest_valid_vector_width(width);
data_type += cl_get_variable_datatype_as_string(dt, 1);
- if(w != 1)
+ if (w != 1)
{
data_type += std::to_string(w);
}
@@ -284,7 +285,7 @@
{
std::vector<int32_t> x;
- switch(vector_width)
+ switch (vector_width)
{
case 0:
break;
diff --git a/compute_kernel_writer/src/cl/CLKernelWriter.cpp b/compute_kernel_writer/src/cl/CLKernelWriter.cpp
index 2db9c13..62e6853 100644
--- a/compute_kernel_writer/src/cl/CLKernelWriter.cpp
+++ b/compute_kernel_writer/src/cl/CLKernelWriter.cpp
@@ -31,14 +31,15 @@
#include "ckw/types/DataType.h"
#include "ckw/types/MemoryOperation.h"
#include "ckw/types/TargetLanguage.h"
-#include "src/ITensorComponent.h"
-#include "src/TileView.h"
+
#include "src/cl/CLHelpers.h"
#include "src/cl/CLTensorArgument.h"
#include "src/cl/CLTile.h"
#include "src/cl/helpers/CLMemoryOpBufferHelper.h"
#include "src/cl/helpers/CLMemoryOpImage2dHelper.h"
#include "src/cl/helpers/ICLMemoryOpHelper.h"
+#include "src/ITensorComponent.h"
+#include "src/TileView.h"
#include "src/types/DataTypeHelpers.h"
#include <algorithm>
@@ -63,14 +64,14 @@
// Create the list of arguments.
std::vector<KernelArgument> arguments;
- for(const auto &tensor : _tensors)
+ for (const auto &tensor : _tensors)
{
const auto tensor_id = tensor->info().id();
const auto storages = tensor->storages();
const auto components = tensor->components();
- for(const auto &storage : storages)
+ for (const auto &storage : storages)
{
code += cl_get_variable_storagetype_as_string(storage.type);
code += " ";
@@ -80,7 +81,7 @@
arguments.emplace_back(tensor_id, storage.type);
}
- for(const auto &component : components)
+ for (const auto &component : components)
{
const auto &tile = component->tile();
const auto &tile_info = tile.info();
@@ -96,7 +97,7 @@
}
}
- if(code.size() >= 2 && code[code.size() - 2] == ',' && code[code.size() - 1] == '\n')
+ if (code.size() >= 2 && code[code.size() - 2] == ',' && code[code.size() - 1] == '\n')
{
// Remove the last comma in the argument list.
code.pop_back();
@@ -127,11 +128,12 @@
const std::string src_prefix = broadcast_src_x ? "(" + data_type_str + ")" : "";
CKW_ASSERT_MSG(src_view.data_type() == dst_view.data_type(), "Source and destination type must match.");
- CKW_ASSERT_MSG(src_view.height() == dst_h || src_view.height() == 1, "Tile height must match or source is broadcasting in y dimension.");
+ CKW_ASSERT_MSG(src_view.height() == dst_h || src_view.height() == 1,
+ "Tile height must match or source is broadcasting in y dimension.");
CKW_ASSERT_MSG(src_w == dst_w || src_w == 1, "Tile width must match or source is broadcasting in x dimension.");
// Broadcasting on y dimension is automatic (see CLTile::vector).
- for(int32_t y = 0; y < dst_h; ++y)
+ for (int32_t y = 0; y < dst_h; ++y)
{
append_code(dst_view.vector(y).str, " = ", src_prefix, src_view.vector(y).str, ";\n");
}
@@ -158,13 +160,15 @@
const std::string prefix = broadcast_x ? "(" + dst_type_str + ")" : "";
CKW_ASSERT_MSG(src_view.data_type() != dst_view.data_type(), "Source and destination type must be different.");
- CKW_ASSERT_MSG(src_view.height() == dst_h || src_view.height() == 1, "Tile height must match or source is broadcasting in y dimension.");
+ CKW_ASSERT_MSG(src_view.height() == dst_h || src_view.height() == 1,
+ "Tile height must match or source is broadcasting in y dimension.");
CKW_ASSERT_MSG(src_w == dst_w || src_w == 1, "Tile width must match or source is broadcasting in x dimension.");
// Broadcasting on y dimension is automatic (see CLTile::vector).
- for(int32_t y = 0; y < dst_h; ++y)
+ for (int32_t y = 0; y < dst_h; ++y)
{
- append_code(dst_view.vector(y).str, " = ", prefix, "convert_", convert_type_str, sat, "(", src_view.vector(y).str, ");\n");
+ append_code(dst_view.vector(y).str, " = ", prefix, "convert_", convert_type_str, sat, "(",
+ src_view.vector(y).str, ");\n");
}
}
@@ -189,11 +193,12 @@
const auto op_suffix = op_is_func ? ")" : "";
CKW_ASSERT_MSG(src_view.data_type() == dst_view.data_type(), "Source and destination type must match.");
- CKW_ASSERT_MSG(src_view.height() == dst_h || src_view.height() == 1, "Tile height must match or source is broadcasting in y dimension.");
+ CKW_ASSERT_MSG(src_view.height() == dst_h || src_view.height() == 1,
+ "Tile height must match or source is broadcasting in y dimension.");
CKW_ASSERT_MSG(src_w == dst_w || src_w == 1, "Tile width must match or source is broadcasting in x dimension.");
// Broadcasting on y dimension is automatic (see CLTile::vector).
- for(int32_t y = 0; y < dst_h; ++y)
+ for (int32_t y = 0; y < dst_h; ++y)
{
append_code(dst_view.vector(y).str, " = ", src_prefix, op_prefix, src_view.vector(y).str, op_suffix, ";\n");
}
@@ -214,27 +219,28 @@
CKW_ASSERT_MSG(lhs_view.data_type() == rhs_view.data_type(), "LHS and RHS type must match.");
- CKW_ASSERT_MSG(lhs_view.height() == dst_h || lhs_view.height() == 1, "LHS tile height must match or source is broadcasting in y dimension.");
- CKW_ASSERT_MSG(rhs_view.height() == dst_h || rhs_view.height() == 1, "RHS tile height must match or source is broadcasting in y dimension.");
+ CKW_ASSERT_MSG(lhs_view.height() == dst_h || lhs_view.height() == 1,
+ "LHS tile height must match or source is broadcasting in y dimension.");
+ CKW_ASSERT_MSG(rhs_view.height() == dst_h || rhs_view.height() == 1,
+ "RHS tile height must match or source is broadcasting in y dimension.");
- CKW_ASSERT_MSG(lhs_w == dst_w || lhs_w == 1, "LHS tile width must match destination or LHS is broadcasting in x dimension.");
- CKW_ASSERT_MSG(rhs_w == dst_w || rhs_w == 1, "RHS tile width must match destination or RHS is broadcasting in x dimension.");
+ CKW_ASSERT_MSG(lhs_w == dst_w || lhs_w == 1,
+ "LHS tile width must match destination or LHS is broadcasting in x dimension.");
+ CKW_ASSERT_MSG(rhs_w == dst_w || rhs_w == 1,
+ "RHS tile width must match destination or RHS is broadcasting in x dimension.");
- if(op == BinaryOp::MatMul_Nt_T)
+ if (op == BinaryOp::MatMul_Nt_T)
{
CKW_ASSERT(is_data_type_float(data_type));
- for(int32_t y = 0; y < dst_h; ++y)
+ for (int32_t y = 0; y < dst_h; ++y)
{
- for(int32_t x = 0; x < dst_w; ++x)
+ for (int32_t x = 0; x < dst_w; ++x)
{
- for(int32_t k = 0; k < lhs_w; ++k)
+ for (int32_t k = 0; k < lhs_w; ++k)
{
- append_code(
- dst_view.scalar(x, y).str, " = fma(",
- lhs_view.scalar(k, y).str, ", ",
- rhs_view.scalar(k, x).str, ", ",
- dst_view.scalar(x, y).str, ");\n");
+ append_code(dst_view.scalar(x, y).str, " = fma(", lhs_view.scalar(k, y).str, ", ",
+ rhs_view.scalar(k, x).str, ", ", dst_view.scalar(x, y).str, ");\n");
}
}
}
@@ -258,14 +264,16 @@
const std::string op_suffix = op_is_func ? ");\n" : ";\n";
// Broadcasting on y dimension is automatic (see CLTile::vector).
- for(int32_t y = 0; y < dst_h; ++y)
+ for (int32_t y = 0; y < dst_h; ++y)
{
- append_code(dst_view.vector(y).str, op_prefix, lhs_prefix, lhs_view.vector(y).str, op_separator, rhs_prefix, rhs_view.vector(y).str, op_suffix);
+ append_code(dst_view.vector(y).str, op_prefix, lhs_prefix, lhs_view.vector(y).str, op_separator, rhs_prefix,
+ rhs_view.vector(y).str, op_suffix);
}
}
}
-void CLKernelWriter::op_ternary(const TileOperand &dst, TernaryOp op, const TileOperand &first, const TileOperand &second, const TileOperand &third)
+void CLKernelWriter::op_ternary(
+ const TileOperand &dst, TernaryOp op, const TileOperand &first, const TileOperand &second, const TileOperand &third)
{
const auto dst_view = to_cl_tile_view(dst);
const auto first_view = to_cl_tile_view(first);
@@ -297,37 +305,42 @@
CKW_ASSERT_MSG(second_view.data_type() == dst_view.data_type(), "2nd source and destination type must match.");
CKW_ASSERT_MSG(third_view.data_type() == dst_view.data_type(), "3rd source and destination type must match.");
- CKW_ASSERT_MSG(first_view.height() == dst_h || first_view.height() == 1, "1st tile height must match or source is broadcasting in y dimension.");
- CKW_ASSERT_MSG(second_view.height() == dst_h || second_view.height() == 1, "2nd tile height must match or source is broadcasting in y dimension.");
- CKW_ASSERT_MSG(third_view.height() == dst_h || third_view.height() == 1, "3rd tile height must match or source is broadcasting in y dimension.");
+ CKW_ASSERT_MSG(first_view.height() == dst_h || first_view.height() == 1,
+ "1st tile height must match or source is broadcasting in y dimension.");
+ CKW_ASSERT_MSG(second_view.height() == dst_h || second_view.height() == 1,
+ "2nd tile height must match or source is broadcasting in y dimension.");
+ CKW_ASSERT_MSG(third_view.height() == dst_h || third_view.height() == 1,
+ "3rd tile height must match or source is broadcasting in y dimension.");
- CKW_ASSERT_MSG(first_w == dst_w || first_w == 1, "1st tile width must match or source is broadcasting in x dimension.");
- CKW_ASSERT_MSG(second_w == dst_w || second_w == 1, "2nd tile width must match or source is broadcasting in x dimension.");
- CKW_ASSERT_MSG(third_w == dst_w || third_w == 1, "3rd tile width must match or source is broadcasting in x dimension.");
+ CKW_ASSERT_MSG(first_w == dst_w || first_w == 1,
+ "1st tile width must match or source is broadcasting in x dimension.");
+ CKW_ASSERT_MSG(second_w == dst_w || second_w == 1,
+ "2nd tile width must match or source is broadcasting in x dimension.");
+ CKW_ASSERT_MSG(third_w == dst_w || third_w == 1,
+ "3rd tile width must match or source is broadcasting in x dimension.");
// Broadcasting on y dimension is automatic (see CLTile::vector).
- for(int32_t y = 0; y < dst_h; ++y)
+ for (int32_t y = 0; y < dst_h; ++y)
{
- append_code(
- dst_view.vector(y).str, " = ", op_name, "(",
- first_prefix, first_view.vector(y).str, ", ",
- second_prefix, second_view.vector(y).str, ", ",
- third_prefix, third_view.vector(y).str, ");\n");
+ append_code(dst_view.vector(y).str, " = ", op_name, "(", first_prefix, first_view.vector(y).str, ", ",
+ second_prefix, second_view.vector(y).str, ", ", third_prefix, third_view.vector(y).str, ");\n");
}
}
-void CLKernelWriter::op_if_generic(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body, bool is_else_if)
+void CLKernelWriter::op_if_generic(
+ const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body, bool is_else_if)
{
const auto lhs_view = to_cl_tile_view(lhs);
const auto rhs_view = to_cl_tile_view(rhs);
const auto op_name = std::get<1>(cl_get_binary_op(op, lhs_view.data_type()));
- CKW_ASSERT(op == BinaryOp::Less || op == BinaryOp::LessEqual || op == BinaryOp::Equal || op == BinaryOp::GreaterEqual || op == BinaryOp::Greater);
+ CKW_ASSERT(op == BinaryOp::Less || op == BinaryOp::LessEqual || op == BinaryOp::Equal ||
+ op == BinaryOp::GreaterEqual || op == BinaryOp::Greater);
CKW_ASSERT(lhs_view.is_scalar());
CKW_ASSERT(rhs_view.is_scalar());
- if(is_else_if)
+ if (is_else_if)
{
append_code("else ");
}
@@ -337,12 +350,18 @@
append_code("}\n");
}
-void CLKernelWriter::op_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body)
+void CLKernelWriter::op_if(const TileOperand &lhs,
+ BinaryOp op,
+ const TileOperand &rhs,
+ const std::function<void()> &body)
{
op_if_generic(lhs, op, rhs, body, false /* is_else_if */);
}
-void CLKernelWriter::op_else_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body)
+void CLKernelWriter::op_else_if(const TileOperand &lhs,
+ BinaryOp op,
+ const TileOperand &rhs,
+ const std::function<void()> &body)
{
op_if_generic(lhs, op, rhs, body, true /* is_else_if */);
}
@@ -354,10 +373,13 @@
append_code("}\n");
}
-void CLKernelWriter::op_for_loop(
- const TileOperand &var, BinaryOp cond_op, const TileOperand &cond_value,
- const TileOperand &update_var, AssignmentOp update_op, const TileOperand &update_value,
- const std::function<void()> &body)
+void CLKernelWriter::op_for_loop(const TileOperand &var,
+ BinaryOp cond_op,
+ const TileOperand &cond_value,
+ const TileOperand &update_var,
+ AssignmentOp update_op,
+ const TileOperand &update_value,
+ const std::function<void()> &body)
{
const auto var_view = to_cl_tile_view(var);
const auto cond_value_view = to_cl_tile_view(cond_value);
@@ -373,11 +395,12 @@
CKW_ASSERT(update_var_view.data_type() == update_value_view.data_type());
const auto cond_op_name = std::get<1>(cl_get_binary_op(cond_op, var_view.data_type()));
- CKW_ASSERT(cond_op == BinaryOp::Less || cond_op == BinaryOp::LessEqual || cond_op == BinaryOp::Equal || cond_op == BinaryOp::GreaterEqual || cond_op == BinaryOp::Greater);
+ CKW_ASSERT(cond_op == BinaryOp::Less || cond_op == BinaryOp::LessEqual || cond_op == BinaryOp::Equal ||
+ cond_op == BinaryOp::GreaterEqual || cond_op == BinaryOp::Greater);
- append_code(
- "for (; ", var_view.scalar(0, 0).str, " ", cond_op_name, " ", cond_value_view.scalar(0, 0).str, "; ",
- update_var_view.scalar(0, 0).str, " ", cl_get_assignment_op_as_string(update_op), " ", update_value_view.scalar(0, 0).str, ")\n{\n");
+ append_code("for (; ", var_view.scalar(0, 0).str, " ", cond_op_name, " ", cond_value_view.scalar(0, 0).str, "; ",
+ update_var_view.scalar(0, 0).str, " ", cl_get_assignment_op_as_string(update_op), " ",
+ update_value_view.scalar(0, 0).str, ")\n{\n");
write_body(body);
append_code("}\n");
}
@@ -404,7 +427,7 @@
std::string format_code;
std::string args_code;
- for(auto &op : operands)
+ for (auto &op : operands)
{
const auto tile_view = to_cl_tile_view(op);
@@ -416,12 +439,12 @@
// Construct the format specifier to print out one row of the tile.
std::string row_format("%");
- if(width > 1)
+ if (width > 1)
{
row_format += "v" + std::to_string(width);
}
- switch(data_type)
+ switch (data_type)
{
case DataType::Fp32:
row_format += "hlg";
@@ -452,7 +475,7 @@
CKW_THROW_MSG("Unsupported data type!");
}
- if(width > 1)
+ if (width > 1)
{
row_format = "[" + row_format + "]";
}
@@ -460,14 +483,14 @@
// Construct the format specifier for the printf statement.
format_code += name + " = ";
- if(height == 1)
+ if (height == 1)
{
format_code += row_format;
}
else
{
format_code += "[" + row_format;
- for(int32_t row = 1; row < height; ++row)
+ for (int32_t row = 1; row < height; ++row)
{
format_code += ", " + row_format;
}
@@ -477,7 +500,7 @@
format_code += "\\n";
// Construct the variable arguments for the printf statement.
- for(int32_t row = 0; row < height; ++row)
+ for (int32_t row = 0; row < height; ++row)
{
args_code += ", " + tile_view.vector(row).str;
}
@@ -527,19 +550,14 @@
const int32_t width = tile_info.width();
const DataType data_type = tile_info.data_type();
- CKW_ASSERT_MSG(
- std::find_if(
- _tiles.begin(), _tiles.end(),
- [=](const std::unique_ptr<CLTile> &e)
- {
- return e->name() == fullname;
- })
- == _tiles.end(),
- "There is already a tile with name: " + fullname);
+ CKW_ASSERT_MSG(std::find_if(_tiles.begin(), _tiles.end(),
+ [=](const std::unique_ptr<CLTile> &e)
+ { return e->name() == fullname; }) == _tiles.end(),
+ "There is already a tile with name: " + fullname);
auto tile = std::make_unique<CLTile>(fullname, tile_info);
- for(int32_t row = 0; row < height; ++row)
+ for (int32_t row = 0; row < height; ++row)
{
const std::string cl_type = cl_get_variable_datatype_as_string(data_type, width);
append_code(cl_type, " ", tile->vector(row).str, ";\n");
@@ -578,40 +596,40 @@
{
bool found = false;
- for(const auto &t : _tiles)
+ for (const auto &t : _tiles)
{
- if(&tile == t.get())
+ if (&tile == t.get())
{
found = true;
break;
}
}
- for(const auto &t : _constant_tiles)
+ for (const auto &t : _constant_tiles)
{
- if(&tile == t.get())
+ if (&tile == t.get())
{
found = true;
break;
}
}
- if(!found)
+ if (!found)
{
- for(const auto &t : _tensors)
+ for (const auto &t : _tensors)
{
const auto components = t->components();
- for(const auto component : components)
+ for (const auto component : components)
{
- if(&tile == &component->tile())
+ if (&tile == &component->tile())
{
found = true;
break;
}
}
- if(found)
+ if (found)
{
break;
}
@@ -622,66 +640,106 @@
}
#endif // COMPUTE_KERNEL_WRITER_ASSERTS_ENABLED
- return { static_cast<CLTile &>(tile), area };
+ return {static_cast<CLTile &>(tile), area};
}
-void CLKernelWriter::op_load(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
- const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch)
+void CLKernelWriter::op_load(const TileOperand &tile_op,
+ const TensorOperand &tensor_op,
+ TensorSampler &sampler,
+ const TileOperand &x,
+ const TileOperand &y,
+ const TileOperand &z,
+ const TileOperand &batch)
{
- const CLTile dilation_x({ { "1" } }, DataType::Int32);
- const CLTile dilation_y({ { "1" } }, DataType::Int32);
+ const CLTile dilation_x({{"1"}}, DataType::Int32);
+ const CLTile dilation_y({{"1"}}, DataType::Int32);
- op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, false /* indirect buffer */);
+ op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y,
+ false /* indirect buffer */);
}
-void CLKernelWriter::op_load_dilated(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
- const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
- const TileOperand &dilation_x, const TileOperand &dilation_y)
+void CLKernelWriter::op_load_dilated(const TileOperand &tile_op,
+ const TensorOperand &tensor_op,
+ TensorSampler &sampler,
+ const TileOperand &x,
+ const TileOperand &y,
+ const TileOperand &z,
+ const TileOperand &batch,
+ const TileOperand &dilation_x,
+ const TileOperand &dilation_y)
{
const auto dil_x_view = to_cl_tile_view(dilation_x);
const auto dil_y_view = to_cl_tile_view(dilation_y);
- op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_view, dil_y_view, false /* indirect buffer */);
+ op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_view, dil_y_view,
+ false /* indirect buffer */);
}
-void CLKernelWriter::op_store(const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler,
- const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch)
+void CLKernelWriter::op_store(const TensorOperand &tensor_op,
+ const TileOperand &tile_op,
+ TensorSampler &sampler,
+ const TileOperand &x,
+ const TileOperand &y,
+ const TileOperand &z,
+ const TileOperand &batch)
{
- const CLTile dilation_x({ { "1" } }, DataType::Int32);
- const CLTile dilation_y({ { "1" } }, DataType::Int32);
+ const CLTile dilation_x({{"1"}}, DataType::Int32);
+ const CLTile dilation_y({{"1"}}, DataType::Int32);
- op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, false /* indirect buffer */);
+ op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y,
+ false /* indirect buffer */);
}
-void CLKernelWriter::op_store_dilated(const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler,
- const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
- const TileOperand &dilation_x, const TileOperand &dilation_y)
+void CLKernelWriter::op_store_dilated(const TensorOperand &tensor_op,
+ const TileOperand &tile_op,
+ TensorSampler &sampler,
+ const TileOperand &x,
+ const TileOperand &y,
+ const TileOperand &z,
+ const TileOperand &batch,
+ const TileOperand &dilation_x,
+ const TileOperand &dilation_y)
{
const auto dil_x_view = to_cl_tile_view(dilation_x);
const auto dil_y_view = to_cl_tile_view(dilation_y);
- op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_view, dil_y_view, false /* indirect buffer */);
+ op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_view, dil_y_view,
+ false /* indirect buffer */);
}
-void CLKernelWriter::op_load_indirect(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
- const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch)
+void CLKernelWriter::op_load_indirect(const TileOperand &tile_op,
+ const TensorOperand &tensor_op,
+ TensorSampler &sampler,
+ const TileOperand &x,
+ const TileOperand &y,
+ const TileOperand &z,
+ const TileOperand &batch)
{
- const CLTile dilation_x({ { "1" } }, DataType::Int32);
- const CLTile dilation_y({ { "1" } }, DataType::Int32);
+ const CLTile dilation_x({{"1"}}, DataType::Int32);
+ const CLTile dilation_y({{"1"}}, DataType::Int32);
- op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, true /* indirect buffer */);
+ op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y,
+ true /* indirect buffer */);
}
-void CLKernelWriter::op_load_store(MemoryOperation op, const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
- const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
- const TileView<CLTile> &dilation_x, const TileView<CLTile> &dilation_y, bool indirect_buffer)
+void CLKernelWriter::op_load_store(MemoryOperation op,
+ const TileOperand &tile_op,
+ const TensorOperand &tensor_op,
+ TensorSampler &sampler,
+ const TileOperand &x,
+ const TileOperand &y,
+ const TileOperand &z,
+ const TileOperand &batch,
+ const TileView<CLTile> &dilation_x,
+ const TileView<CLTile> &dilation_y,
+ bool indirect_buffer)
{
CKW_UNUSED(dilation_x);
CKW_ASSERT(dilation_x.is_scalar());
CKW_ASSERT(dilation_y.is_scalar());
CKW_ASSERT(dilation_x.scalar(0, 0).str == "((int)(1))"); // Dilation in x dimension is not implemented yet
- if(indirect_buffer)
+ if (indirect_buffer)
{
CKW_ASSERT(dilation_y.scalar(0, 0).str == "((int)(1))" && dilation_x.scalar(0, 0).str == "((int)(1))");
}
@@ -689,7 +747,7 @@
ITensor &tensor = get_tensor(tensor_op);
std::unique_ptr<ICLMemoryOpHelper> helper;
- switch(sampler.storage())
+ switch (sampler.storage())
{
case TensorStorageType::BufferUint8Ptr:
helper = std::make_unique<CLMemoryOpBufferHelper>(this, &tensor, &sampler, op);
@@ -717,13 +775,13 @@
helper->initialize(&tile, &x_tile, &z_tile, &batch_tile);
- for(int row = 0; row < tile.info().height(); ++row)
+ for (int row = 0; row < tile.info().height(); ++row)
{
- if(!indirect_buffer)
+ if (!indirect_buffer)
{
std::string coord_y = y_tile.scalar(0, 0).str + " + " + std::to_string(row);
- if(dilation_y.scalar(0, 0).str != "((int)(1))")
+ if (dilation_y.scalar(0, 0).str != "((int)(1))")
{
coord_y += " * " + dilation_y.scalar(0, 0).str;
}
diff --git a/compute_kernel_writer/src/cl/CLKernelWriter.h b/compute_kernel_writer/src/cl/CLKernelWriter.h
index d7cf24d..6485bae 100644
--- a/compute_kernel_writer/src/cl/CLKernelWriter.h
+++ b/compute_kernel_writer/src/cl/CLKernelWriter.h
@@ -26,6 +26,7 @@
#define CKW_SRC_CL_CLKERNELWRITER_H
#include "ckw/KernelWriter.h"
+
#include "src/TileView.h"
#include <memory>
@@ -73,7 +74,11 @@
void op_binary(const TileOperand &dst, BinaryOp op, const TileOperand &first, const TileOperand &second) override;
- void op_ternary(const TileOperand &dst, TernaryOp op, const TileOperand &first, const TileOperand &second, const TileOperand &third) override;
+ void op_ternary(const TileOperand &dst,
+ TernaryOp op,
+ const TileOperand &first,
+ const TileOperand &second,
+ const TileOperand &third) override;
// =============================================================================================
// Flow control
@@ -81,14 +86,18 @@
void op_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body) override;
- void op_else_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body) override;
+ void
+ op_else_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body) override;
void op_else(const std::function<void()> &body) override;
- void op_for_loop(
- const TileOperand &var, BinaryOp cond_op, const TileOperand &cond_value,
- const TileOperand &update_var, AssignmentOp update_op, const TileOperand &update_value,
- const std::function<void()> &body) override;
+ void op_for_loop(const TileOperand &var,
+ BinaryOp cond_op,
+ const TileOperand &cond_value,
+ const TileOperand &update_var,
+ AssignmentOp update_op,
+ const TileOperand &update_value,
+ const std::function<void()> &body) override;
void op_return() override;
@@ -132,26 +141,49 @@
// Memory Operations
// =============================================================================================
- void op_load(
- const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
- const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) override;
+ void op_load(const TileOperand &tile_op,
+ const TensorOperand &tensor_op,
+ TensorSampler &sampler,
+ const TileOperand &x,
+ const TileOperand &y,
+ const TileOperand &z,
+ const TileOperand &batch) override;
- void op_load_dilated(
- const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
- const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
- const TileOperand &dilation_x, const TileOperand &dilation_y) override;
+ void op_load_dilated(const TileOperand &tile_op,
+ const TensorOperand &tensor_op,
+ TensorSampler &sampler,
+ const TileOperand &x,
+ const TileOperand &y,
+ const TileOperand &z,
+ const TileOperand &batch,
+ const TileOperand &dilation_x,
+ const TileOperand &dilation_y) override;
- void op_store(
- const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler,
- const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) override;
+ void op_store(const TensorOperand &tensor_op,
+ const TileOperand &tile_op,
+ TensorSampler &sampler,
+ const TileOperand &x,
+ const TileOperand &y,
+ const TileOperand &z,
+ const TileOperand &batch) override;
- void op_store_dilated(
- const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler,
- const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
- const TileOperand &dilation_x, const TileOperand &dilation_y) override;
+ void op_store_dilated(const TensorOperand &tensor_op,
+ const TileOperand &tile_op,
+ TensorSampler &sampler,
+ const TileOperand &x,
+ const TileOperand &y,
+ const TileOperand &z,
+ const TileOperand &batch,
+ const TileOperand &dilation_x,
+ const TileOperand &dilation_y) override;
- void op_load_indirect(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
- const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) override;
+ void op_load_indirect(const TileOperand &tile_op,
+ const TensorOperand &tensor_op,
+ TensorSampler &sampler,
+ const TileOperand &x,
+ const TileOperand &y,
+ const TileOperand &z,
+ const TileOperand &batch) override;
protected:
/** Return a tile view containing a reference to @ref CLTile object and the active area.
@@ -181,9 +213,17 @@
// For helper functions
private:
/** Helper method to consolidate all load/store logic in this class */
- void op_load_store(MemoryOperation op, const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
- const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
- const TileView<CLTile> &dilation_x, const TileView<CLTile> &dilation_y, bool indirect_buffer);
+ void op_load_store(MemoryOperation op,
+ const TileOperand &tile_op,
+ const TensorOperand &tensor_op,
+ TensorSampler &sampler,
+ const TileOperand &x,
+ const TileOperand &y,
+ const TileOperand &z,
+ const TileOperand &batch,
+ const TileView<CLTile> &dilation_x,
+ const TileView<CLTile> &dilation_y,
+ bool indirect_buffer);
/** This function is the generic function to write both `if` and `else if` blocks.
*
@@ -195,7 +235,11 @@
* @param[in] body The function that writes the body of the else-if block.
* @param[in] is_else_if True if this is an `else if` block, otherwise this is an `if` block.
*/
- void op_if_generic(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body, bool is_else_if);
+ void op_if_generic(const TileOperand &lhs,
+ BinaryOp op,
+ const TileOperand &rhs,
+ const std::function<void()> &body,
+ bool is_else_if);
// For attributes
private:
diff --git a/compute_kernel_writer/src/cl/CLTensorArgument.cpp b/compute_kernel_writer/src/cl/CLTensorArgument.cpp
index 7d4dc95..e53de28 100644
--- a/compute_kernel_writer/src/cl/CLTensorArgument.cpp
+++ b/compute_kernel_writer/src/cl/CLTensorArgument.cpp
@@ -23,11 +23,13 @@
*/
#include "src/cl/CLTensorArgument.h"
+
#include "ckw/Error.h"
-#include "src/ITensorArgument.h"
-#include "src/ITensorComponent.h"
+
#include "src/cl/CLHelpers.h"
#include "src/cl/CLTensorComponent.h"
+#include "src/ITensorArgument.h"
+#include "src/ITensorComponent.h"
#include "src/types/TensorComponentType.h"
#include <algorithm>
@@ -48,25 +50,23 @@
{
// Return the component if it has already been created.
{
- const auto it = std::find_if(
- _components_used.begin(), _components_used.end(),
- [=](const std::unique_ptr<CLTensorComponent> &item)
- {
- return item->component_type() == x;
- });
+ const auto it =
+ std::find_if(_components_used.begin(), _components_used.end(),
+ [=](const std::unique_ptr<CLTensorComponent> &item) { return item->component_type() == x; });
- if(it != _components_used.end())
+ if (it != _components_used.end())
{
return **it;
}
}
- if(_return_dims_by_value)
+ if (_return_dims_by_value)
{
uint32_t component_type = static_cast<uint32_t>(x);
- const bool is_dimension = (component_type & static_cast<uint32_t>(TensorComponentBitmask::Dimension)) != 0;
- const bool is_folded_dimensions = (component_type & static_cast<uint32_t>(TensorComponentBitmask::FoldedDimensions)) != 0;
+ const bool is_dimension = (component_type & static_cast<uint32_t>(TensorComponentBitmask::Dimension)) != 0;
+ const bool is_folded_dimensions =
+ (component_type & static_cast<uint32_t>(TensorComponentBitmask::FoldedDimensions)) != 0;
constexpr auto bitmask_all = static_cast<uint32_t>(TensorComponentIndexBitmask::All);
constexpr auto bitmask_index_0 = static_cast<uint32_t>(TensorComponentIndexBitmask::Index0);
@@ -83,16 +83,16 @@
CKW_ASSERT(bitmask_index_2 == bitmask_index_3 >> 4);
// If we have a dimension or folded dimensions, we can return the corresponding value if it is not dynamic (not equal to -1)
- if(is_dimension == true || is_folded_dimensions == true)
+ if (is_dimension == true || is_folded_dimensions == true)
{
component_type = component_type & bitmask_all;
int32_t idx = 1;
- for(int32_t i = 0; i < tensor_component_index_max_count; ++i)
+ for (int32_t i = 0; i < tensor_component_index_max_count; ++i)
{
uint32_t dim_idx = component_type & bitmask_index_0;
- if(dim_idx == 0)
+ if (dim_idx == 0)
{
// Stop at the first nibble containing 0
break;
@@ -104,7 +104,7 @@
// Get the dimension value
const int32_t dim_val = _info.shape()[dim_idx];
- if(dim_val == kDynamicTensorDimensionValue)
+ if (dim_val == kDynamicTensorDimensionValue)
{
// We cannot return the dimension by value if it is dynamic.
// Therefore, force the idx variable to kDynamicTensorDimensionValue and break the loop.
@@ -118,7 +118,7 @@
component_type >>= 4;
}
- if(idx != kDynamicTensorDimensionValue)
+ if (idx != kDynamicTensorDimensionValue)
{
_components_used.emplace_back(std::make_unique<CLTensorComponent>(*this, x, idx));
@@ -141,14 +141,10 @@
{
// Return the storage if it has already been created.
{
- const auto it = std::find_if(
- _storages_used.begin(), _storages_used.end(),
- [=](const TensorStorageVariable &item)
- {
- return item.type == x;
- });
+ const auto it = std::find_if(_storages_used.begin(), _storages_used.end(),
+ [=](const TensorStorageVariable &item) { return item.type == x; });
- if(it != _storages_used.end())
+ if (it != _storages_used.end())
{
return *it;
}
@@ -167,7 +163,7 @@
{
std::string var_name = _basename;
- switch(x)
+ switch (x)
{
case TensorStorageType::BufferUint8Ptr:
var_name += "_ptr";
@@ -198,9 +194,9 @@
{
std::vector<const ITensorComponent *> components;
- for(const auto &component : _components_used)
+ for (const auto &component : _components_used)
{
- if(component->is_assignable())
+ if (component->is_assignable())
{
components.push_back(component.get());
}
diff --git a/compute_kernel_writer/src/cl/CLTensorArgument.h b/compute_kernel_writer/src/cl/CLTensorArgument.h
index 4cbbee2..35df514 100644
--- a/compute_kernel_writer/src/cl/CLTensorArgument.h
+++ b/compute_kernel_writer/src/cl/CLTensorArgument.h
@@ -26,7 +26,9 @@
#include "ckw/types/TensorComponentType.h"
#include "ckw/types/TensorStorageType.h"
+
#include "src/ITensor.h"
+
#include <memory>
#include <string>
#include <vector>
@@ -67,7 +69,7 @@
* unlike @ref CLTensorComponent::component which is for the public API and only returns
* a reference to a generic @ref ITile object.
*/
- CLTensorComponent& cl_component(TensorComponentType component_type);
+ CLTensorComponent &cl_component(TensorComponentType component_type);
// Inherited method overridden
TensorStorageVariable &storage(TensorStorageType x) override;
@@ -78,7 +80,7 @@
private:
std::string create_storage_name(TensorStorageType x) const;
- bool _return_dims_by_value{ false };
+ bool _return_dims_by_value{false};
std::vector<TensorStorageVariable> _storages_used{};
std::vector<std::unique_ptr<CLTensorComponent>> _components_used{};
};
diff --git a/compute_kernel_writer/src/cl/CLTensorComponent.cpp b/compute_kernel_writer/src/cl/CLTensorComponent.cpp
index c29b307..dbe2036 100644
--- a/compute_kernel_writer/src/cl/CLTensorComponent.cpp
+++ b/compute_kernel_writer/src/cl/CLTensorComponent.cpp
@@ -23,8 +23,10 @@
*/
#include "src/cl/CLTensorComponent.h"
+
#include "ckw/Error.h"
#include "ckw/types/TensorComponentType.h"
+
#include "src/cl/CLTensorArgument.h"
#include "src/cl/CLTile.h"
@@ -38,7 +40,7 @@
{
std::string var_name(name);
- switch(x)
+ switch (x)
{
case TensorComponentType::OffsetFirstElement:
var_name += "_offset_first_element";
@@ -93,12 +95,13 @@
} // namespace
CLTensorComponent::CLTensorComponent(const CLTensorArgument &tensor, TensorComponentType component_type)
- : CLTile(create_component_name(tensor.name(), component_type), TileInfo(DataType::Int32)), _component_type(component_type)
+ : CLTile(create_component_name(tensor.name(), component_type), TileInfo(DataType::Int32)),
+ _component_type(component_type)
{
}
CLTensorComponent::CLTensorComponent(const CLTensorArgument &tensor, TensorComponentType component_type, int32_t value)
- : CLTile({ { std::to_string(value) } }, DataType::Int32), _component_type(component_type)
+ : CLTile({{std::to_string(value)}}, DataType::Int32), _component_type(component_type)
{
CKW_UNUSED(tensor);
}
diff --git a/compute_kernel_writer/src/cl/CLTensorComponent.h b/compute_kernel_writer/src/cl/CLTensorComponent.h
index 42a4266..731597e 100644
--- a/compute_kernel_writer/src/cl/CLTensorComponent.h
+++ b/compute_kernel_writer/src/cl/CLTensorComponent.h
@@ -26,8 +26,9 @@
#define CKW_SRC_CL_CLTENSORCOMPONENT_H
#include "ckw/types/TensorComponentType.h"
-#include "src/ITensorComponent.h"
+
#include "src/cl/CLTile.h"
+#include "src/ITensorComponent.h"
namespace ckw
{
@@ -72,7 +73,7 @@
TensorComponentType component_type() const override;
private:
- TensorComponentType _component_type{ TensorComponentType::Unknown };
+ TensorComponentType _component_type{TensorComponentType::Unknown};
};
} // namespace ckw
diff --git a/compute_kernel_writer/src/cl/CLTile.cpp b/compute_kernel_writer/src/cl/CLTile.cpp
index 0cce69a..f6e271e 100644
--- a/compute_kernel_writer/src/cl/CLTile.cpp
+++ b/compute_kernel_writer/src/cl/CLTile.cpp
@@ -21,20 +21,20 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+#include "src/cl/CLTile.h"
+
#include "ckw/Error.h"
#include "ckw/TileInfo.h"
-#include "src/Helpers.h"
#include "src/cl/CLHelpers.h"
-#include "src/cl/CLTile.h"
+#include "src/Helpers.h"
#include <algorithm>
#include <vector>
namespace ckw
{
-CLTile::CLTile(const std::string &name, const TileInfo &info)
- : _is_constant(false)
+CLTile::CLTile(const std::string &name, const TileInfo &info) : _is_constant(false)
{
validate_tile_info(info);
@@ -42,8 +42,7 @@
_info = info;
}
-CLTile::CLTile(const TileContainer &vals, DataType dt)
- : _is_constant(true)
+CLTile::CLTile(const TileContainer &vals, DataType dt) : _is_constant(true)
{
const int32_t w = vals[0].size();
const int32_t h = vals.size();
@@ -56,9 +55,9 @@
_vals = TileContainer(h, std::vector<std::string>(w));
- for(int32_t y = 0; y < h; ++y)
+ for (int32_t y = 0; y < h; ++y)
{
- for(int32_t x = 0; x < w; ++x)
+ for (int32_t x = 0; x < w; ++x)
{
_vals[y][x] = vals[y][x];
}
@@ -81,7 +80,7 @@
col = clamp(col, static_cast<int32_t>(0), _info.width() - 1);
row = clamp(row, static_cast<int32_t>(0), _info.height() - 1);
- if(_is_constant)
+ if (_is_constant)
{
// We can use the vector method to retrieve the scalar variable stored in the constant tile
return vector(row, col, 1);
@@ -94,7 +93,7 @@
t.desc.len = 1;
// This check is required because if the width has only one element, we cannot use .s0
- if(_info.width() != 1)
+ if (_info.width() != 1)
{
// Automatic broadcasting
t.str += ".s" + dec_to_hex_as_string(col);
@@ -109,7 +108,7 @@
// Clamp to nearest valid edge
row = clamp(row, static_cast<int32_t>(0), _info.height() - 1);
- if(_is_constant)
+ if (_is_constant)
{
return vector(row, 0, _info.width());
}
@@ -138,14 +137,14 @@
t.desc.dt = _info.data_type();
t.desc.len = width;
- if(_is_constant)
+ if (_is_constant)
{
// The vector has the following form: ((data_typeN)(val0, val1,..., ValN-1))
t.str = "((" + cl_get_variable_datatype_as_string(t.desc.dt, width) + ")";
t.str += "(";
int32_t col = col_start;
- for(; col < width - 1; ++col)
+ for (; col < width - 1; ++col)
{
t.str += _vals[row][col];
t.str += ", ";
@@ -157,10 +156,10 @@
{
t.str = create_var_name(row);
- if(_info.width() != 1 && _info.width() != width)
+ if (_info.width() != 1 && _info.width() != width)
{
t.str += ".s";
- for(int i = 0; i < width; ++i)
+ for (int i = 0; i < width; ++i)
{
t.str += dec_to_hex_as_string(col_start + i);
}
@@ -174,11 +173,11 @@
{
std::vector<TileVariable> vars;
- if(_is_constant)
+ if (_is_constant)
{
- for(int32_t y = 0; y < _info.height(); ++y)
+ for (int32_t y = 0; y < _info.height(); ++y)
{
- for(int32_t x = 0; x < _info.width(); ++x)
+ for (int32_t x = 0; x < _info.width(); ++x)
{
// We can use the vector method to retrieve all the scalar variables stored in the constant tile
TileVariable t = vector(y, x, 1);
@@ -188,7 +187,7 @@
}
else
{
- for(int32_t y = 0; y < _info.height(); ++y)
+ for (int32_t y = 0; y < _info.height(); ++y)
{
TileVariable t;
t.str = create_var_name(y);
@@ -211,7 +210,7 @@
std::string var_name = _basename;
// If a scalar variable, we do not append the row index
- if(_info.height() > 1)
+ if (_info.height() > 1)
{
var_name += "__";
var_name += std::to_string(row);
@@ -222,7 +221,7 @@
std::vector<int32_t> CLTile::supported_vector_lengths() const
{
- return std::vector<int32_t>{ 1, 2, 3, 4, 8, 16 };
+ return std::vector<int32_t>{1, 2, 3, 4, 8, 16};
}
void CLTile::validate_tile_info(const TileInfo &info) const
diff --git a/compute_kernel_writer/src/cl/CLTile.h b/compute_kernel_writer/src/cl/CLTile.h
index 1fb0fc9..498cf51 100644
--- a/compute_kernel_writer/src/cl/CLTile.h
+++ b/compute_kernel_writer/src/cl/CLTile.h
@@ -25,6 +25,7 @@
#define COMPUTE_KERNEL_WRITER_SRC_CL_CLTILE_H
#include "src/ITile.h"
+
#include <string>
namespace ckw
@@ -75,9 +76,9 @@
std::string create_var_name(int32_t row) const;
- TileInfo _info{ DataType::Unknown };
- std::string _basename{ "" };
- bool _is_constant{ false };
+ TileInfo _info{DataType::Unknown};
+ std::string _basename{""};
+ bool _is_constant{false};
TileContainer _vals{};
};
} // namespace ckw
diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp
index f906bcd..a98ebed 100644
--- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp
+++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp
@@ -28,20 +28,25 @@
#include "ckw/types/MemoryOperation.h"
#include "ckw/types/TensorStorageType.h"
-#include "src/ITensor.h"
-#include "src/Tensor3dMapper.h"
#include "src/cl/CLHelpers.h"
#include "src/cl/CLKernelWriter.h"
#include "src/cl/CLTensorArgument.h"
#include "src/cl/CLTile.h"
+#include "src/ITensor.h"
+#include "src/Tensor3dMapper.h"
namespace ckw
{
-bool CLMemoryOpBufferHelper::validate(const CLKernelWriter *writer, const ITensor *tensor, const TensorSampler *sampler, const Tensor3dMapper *mapper, MemoryOperation op, const CLTile *dst)
+bool CLMemoryOpBufferHelper::validate(const CLKernelWriter *writer,
+ const ITensor *tensor,
+ const TensorSampler *sampler,
+ const Tensor3dMapper *mapper,
+ MemoryOperation op,
+ const CLTile *dst)
{
CKW_UNUSED(writer, tensor, mapper, op, dst);
- if(sampler->storage() != TensorStorageType::BufferUint8Ptr)
+ if (sampler->storage() != TensorStorageType::BufferUint8Ptr)
{
return false;
}
@@ -97,15 +102,15 @@
*/
void CLMemoryOpBufferHelper::initialize(const CLTile *dst, const CLTile *x, const CLTile *z, const CLTile *b)
{
- _dst = dst;
+ _dst = dst;
CKW_ASSERT(validate(_writer, _tensor, _sampler, _mapper.get(), _op, _dst));
_ls_width_full = dst->info().width();
- _coord_x = x->scalar(0, 0).str;
- _coord_z = z->scalar(0, 0).str;
- _coord_b = b->scalar(0, 0).str;
- _coord_orig_z = _coord_z;
+ _coord_x = x->scalar(0, 0).str;
+ _coord_z = z->scalar(0, 0).str;
+ _coord_b = b->scalar(0, 0).str;
+ _coord_orig_z = _coord_z;
out_of_bound_initialize_x(_coord_x);
out_of_bound_initialize_z(_coord_z);
@@ -126,10 +131,10 @@
out_of_bound_finalize_y(dst);
// The left over load/store will be written in the finalize stage
- if(_ls_width_part.size() != 0)
+ if (_ls_width_part.size() != 0)
{
int32_t col_start = 0;
- for(int32_t partial_width : _ls_width_part)
+ for (int32_t partial_width : _ls_width_part)
{
const std::string dst = _dst->vector(row_id, col_start, partial_width).str;
const std::string coord_x = _coord_x + " + " + std::to_string(col_start);
@@ -150,13 +155,13 @@
void CLMemoryOpBufferHelper::out_of_bound_initialize_x(const std::string &coord)
{
- if(_sampler->address_mode_x() == TensorSamplerAddressModeX::OverlappingMin)
+ if (_sampler->address_mode_x() == TensorSamplerAddressModeX::OverlappingMin)
{
- TensorInfo tensor_info = _tensor->info();
- TensorShape shape = tensor_info.shape();
+ TensorInfo tensor_info = _tensor->info();
+ TensorShape shape = tensor_info.shape();
_ls_width_part = cl_decompose_vector_width(shape[0] % _ls_width_full);
- if(_ls_width_part.size() != 0)
+ if (_ls_width_part.size() != 0)
{
_writer->op_write_raw_code("if(" + coord + " > 0)\n{\n");
}
@@ -165,14 +170,14 @@
void CLMemoryOpBufferHelper::out_of_bound_finalize_x()
{
- if(_sampler->address_mode_x() == TensorSamplerAddressModeX::OverlappingMin)
+ if (_sampler->address_mode_x() == TensorSamplerAddressModeX::OverlappingMin)
{
- if(_ls_width_part.size() != 0)
+ if (_ls_width_part.size() != 0)
{
_writer->op_write_raw_code("}\nelse\n{\n");
out_of_bound_initialize_z(_coord_orig_z);
- for(LeftoverDescriptor leftover_desc : _leftovers_x)
+ for (LeftoverDescriptor leftover_desc : _leftovers_x)
{
out_of_bound_initialize_y(leftover_desc.coord);
_writer->op_write_raw_code(leftover_desc.statement);
@@ -191,7 +196,7 @@
const TensorSamplerAddressModeY address_mode_y = _sampler->address_mode_y();
- switch(address_mode_y)
+ switch (address_mode_y)
{
case TensorSamplerAddressModeY::ClampToBorderMaxOnly:
// Not to be moved outside the case because it marks the relevant tensor component as used even if we dont't use the variable
@@ -212,7 +217,7 @@
{
const TensorSamplerAddressModeY address_mode_y = _sampler->address_mode_y();
- switch(address_mode_y)
+ switch (address_mode_y)
{
case TensorSamplerAddressModeY::ClampToBorderMaxOnly:
_writer->op_write_raw_code("}\nelse\n{\n");
@@ -234,7 +239,7 @@
CKW_UNUSED(coord);
const TensorSamplerAddressModeZ address_mode_z = _sampler->address_mode_z();
- switch(address_mode_z)
+ switch (address_mode_z)
{
case TensorSamplerAddressModeZ::None:
break;
@@ -247,7 +252,7 @@
{
const TensorSamplerAddressModeZ address_mode_z = _sampler->address_mode_z();
- switch(address_mode_z)
+ switch (address_mode_z)
{
case TensorSamplerAddressModeZ::None:
break;
@@ -256,13 +261,15 @@
}
}
-std::string CLMemoryOpBufferHelper::to_statement(MemoryOperation op, int32_t vector_width, const std::string &data,
- const std::string &address) const
+std::string CLMemoryOpBufferHelper::to_statement(MemoryOperation op,
+ int32_t vector_width,
+ const std::string &data,
+ const std::string &address) const
{
- switch(op)
+ switch (op)
{
case MemoryOperation::Load:
- if(vector_width != 1)
+ if (vector_width != 1)
{
return data + " = vload" + std::to_string(vector_width) + "(0, " + address + ")";
}
@@ -272,7 +279,7 @@
}
break;
case MemoryOperation::Store:
- if(vector_width != 1)
+ if (vector_width != 1)
{
return "vstore" + std::to_string(vector_width) + "(" + data + ", 0, " + address + ")";
}
@@ -288,26 +295,28 @@
return "";
}
-std::string CLMemoryOpBufferHelper::to_buffer_address(const std::string &x, const std::string &y, const std::string &z,
- const std::string &b) const
+std::string CLMemoryOpBufferHelper::to_buffer_address(const std::string &x,
+ const std::string &y,
+ const std::string &z,
+ const std::string &b) const
{
TensorStorageType tensor_storage = _sampler->storage();
CKW_ASSERT(tensor_storage == TensorStorageType::BufferUint8Ptr);
- const std::string ptr_buf = _tensor->storage(tensor_storage).val;
- const std::string dst_type = cl_data_type_rounded_up_to_valid_vector_width(_dst->info().data_type(), 1);
+ const std::string ptr_buf = _tensor->storage(tensor_storage).val;
+ const std::string dst_type = cl_data_type_rounded_up_to_valid_vector_width(_dst->info().data_type(), 1);
std::string address;
address += "(__global ";
address += dst_type;
address += "*)(";
address += ptr_buf;
- if(x != "0" && (_mapper->dim_x().str != "1"))
+ if (x != "0" && (_mapper->dim_x().str != "1"))
{
address += " + (";
address += x + ") * sizeof(" + dst_type + ")";
}
- if(y != "0")
+ if (y != "0")
{
const std::string stride_y = _mapper->stride_y().str;
address += " + (";
@@ -315,7 +324,7 @@
address += " * ";
address += stride_y;
}
- if(z != "0" && (_mapper->dim_z().str != "1"))
+ if (z != "0" && (_mapper->dim_z().str != "1"))
{
const std::string stride_z = _mapper->stride_z().str;
address += " + (";
@@ -323,7 +332,7 @@
address += " * ";
address += stride_z;
}
- if(b != "0" && (_mapper->dim_batch().str != "1"))
+ if (b != "0" && (_mapper->dim_batch().str != "1"))
{
const std::string stride_b = _mapper->stride_batch().str;
address += " + (";
diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h
index 9bcd571..4e1a842 100644
--- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h
+++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h
@@ -27,9 +27,9 @@
#include "src/cl/helpers/ICLMemoryOpHelper.h"
+#include <cstdint>
#include <string>
#include <vector>
-#include <cstdint>
namespace ckw
{
@@ -65,20 +65,25 @@
struct LeftoverDescriptor
{
LeftoverDescriptor(const std::string &dst, const std::string &coord, const std::string &statement)
- : dst(dst), coord(coord), statement(statement)
+ : dst(dst), coord(coord), statement(statement)
{
}
- std::string dst{}; // Describes the destination tile or part of it
- std::string coord{}; // Describes the coordinate to be used in boundary checks
- std::string statement{}; // Describes the memory operation statement
+ std::string dst{}; // Describes the destination tile or part of it
+ std::string coord{}; // Describes the coordinate to be used in boundary checks
+ std::string statement{}; // Describes the memory operation statement
};
std::vector<int32_t> _ls_width_part{};
std::vector<LeftoverDescriptor> _leftovers_x{};
std::string _coord_orig_z{};
- static bool validate(const CLKernelWriter *writer, const ITensor *tensor, const TensorSampler *sampler, const Tensor3dMapper *mapper, MemoryOperation op, const CLTile *dst);
+ static bool validate(const CLKernelWriter *writer,
+ const ITensor *tensor,
+ const TensorSampler *sampler,
+ const Tensor3dMapper *mapper,
+ MemoryOperation op,
+ const CLTile *dst);
void out_of_bound_initialize_x(const std::string &coord);
void out_of_bound_finalize_x();
@@ -87,8 +92,10 @@
void out_of_bound_initialize_z(const std::string &coord);
void out_of_bound_finalize_z();
- std::string to_statement(MemoryOperation op, int32_t vector_width, const std::string &data, const std::string &address) const;
- std::string to_buffer_address(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const;
+ std::string
+ to_statement(MemoryOperation op, int32_t vector_width, const std::string &data, const std::string &address) const;
+ std::string
+ to_buffer_address(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const;
};
} // namespace ckw
diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp
index 55f88f4..b7d146b 100644
--- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp
+++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp
@@ -28,11 +28,11 @@
#include "ckw/types/MemoryOperation.h"
#include "ckw/types/TensorStorageType.h"
-#include "src/ITensor.h"
-#include "src/Tensor3dMapper.h"
#include "src/cl/CLKernelWriter.h"
#include "src/cl/CLTensorArgument.h"
#include "src/cl/CLTile.h"
+#include "src/ITensor.h"
+#include "src/Tensor3dMapper.h"
namespace ckw
{
@@ -66,31 +66,36 @@
{
}
-bool CLMemoryOpImage2dHelper::validate(const CLKernelWriter *writer, const ITensor *tensor, const TensorSampler *sampler, const Tensor3dMapper *mapper, MemoryOperation op, const CLTile *dst)
+bool CLMemoryOpImage2dHelper::validate(const CLKernelWriter *writer,
+ const ITensor *tensor,
+ const TensorSampler *sampler,
+ const Tensor3dMapper *mapper,
+ MemoryOperation op,
+ const CLTile *dst)
{
CKW_UNUSED(writer, tensor, mapper);
- if(dst->info().width() != 4)
+ if (dst->info().width() != 4)
{
return false;
}
- if(sampler->address_mode_x() != TensorSamplerAddressModeX::None)
+ if (sampler->address_mode_x() != TensorSamplerAddressModeX::None)
{
return false;
}
- if(sampler->address_mode_z() != TensorSamplerAddressModeZ::None)
+ if (sampler->address_mode_z() != TensorSamplerAddressModeZ::None)
{
return false;
}
- if(sampler->storage() != TensorStorageType::Texture2dReadOnly && op == MemoryOperation::Load)
+ if (sampler->storage() != TensorStorageType::Texture2dReadOnly && op == MemoryOperation::Load)
{
return false;
}
- if(sampler->storage() != TensorStorageType::Texture2dWriteOnly && op == MemoryOperation::Store)
+ if (sampler->storage() != TensorStorageType::Texture2dWriteOnly && op == MemoryOperation::Store)
{
return false;
}
- if((dst->info().data_type() != DataType::Fp32) && (dst->info().data_type() != DataType::Fp16))
+ if ((dst->info().data_type() != DataType::Fp32) && (dst->info().data_type() != DataType::Fp16))
{
return false;
}
@@ -102,7 +107,7 @@
CKW_UNUSED(coord);
const TensorSamplerAddressModeY address_mode_y = _sampler->address_mode_y();
- switch(address_mode_y)
+ switch (address_mode_y)
{
case TensorSamplerAddressModeY::SkipLessThanZero:
_writer->op_write_raw_code("if(" + coord + " >= 0)\n{\n");
@@ -118,7 +123,7 @@
void CLMemoryOpImage2dHelper::out_of_bound_finalize_y()
{
const TensorSamplerAddressModeY address_mode_y = _sampler->address_mode_y();
- switch(address_mode_y)
+ switch (address_mode_y)
{
case TensorSamplerAddressModeY::SkipLessThanZero:
_writer->op_write_raw_code("}\n");
@@ -131,15 +136,19 @@
}
}
-std::string CLMemoryOpImage2dHelper::to_ls_image2d(MemoryOperation op, int32_t vector_width, const std::string &data, const std::string &sampler, const std::string &address) const
+std::string CLMemoryOpImage2dHelper::to_ls_image2d(MemoryOperation op,
+ int32_t vector_width,
+ const std::string &data,
+ const std::string &sampler,
+ const std::string &address) const
{
CKW_UNUSED(vector_width);
const TensorStorageType tensor_storage = _sampler->storage();
- const std::string image2d_obj = _tensor->storage(tensor_storage).val;
- const std::string post_fix = _dst->info().data_type() == DataType::Fp32 ? "f" : "h";
+ const std::string image2d_obj = _tensor->storage(tensor_storage).val;
+ const std::string post_fix = _dst->info().data_type() == DataType::Fp32 ? "f" : "h";
- switch(op)
+ switch (op)
{
case MemoryOperation::Load:
return data + " = read_image" + post_fix + "(" + image2d_obj + ", " + sampler + ", " + address + ")";
@@ -155,7 +164,7 @@
{
const auto address_mode_y = _sampler->address_mode_y();
- switch(address_mode_y)
+ switch (address_mode_y)
{
case TensorSamplerAddressModeY::None:
return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST";
@@ -167,17 +176,19 @@
}
}
-std::string CLMemoryOpImage2dHelper::to_ls_image2d_address(const std::string &x, const std::string &y, const std::string &z,
+std::string CLMemoryOpImage2dHelper::to_ls_image2d_address(const std::string &x,
+ const std::string &y,
+ const std::string &z,
const std::string &b) const
{
std::string coord_x = "(" + x + ") >> 2";
std::string coord_y = "(";
- if(y != "0")
+ if (y != "0")
{
coord_y += y;
}
- if(z != "0" && (_mapper->dim_z().str != "1"))
+ if (z != "0" && (_mapper->dim_z().str != "1"))
{
const std::string dim = _mapper->dim_y().str;
coord_y += " + (";
@@ -185,7 +196,7 @@
coord_y += " * ";
coord_y += dim;
}
- if(b != "0" && (_mapper->dim_batch().str != "1"))
+ if (b != "0" && (_mapper->dim_batch().str != "1"))
{
const std::string dim0 = _mapper->dim_y().str;
const std::string dim1 = _mapper->dim_z().str;
diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h
index 73bede7..fd9b097 100644
--- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h
+++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h
@@ -59,14 +59,24 @@
void finalize() override;
private:
- static bool validate(const CLKernelWriter *writer, const ITensor *tensor, const TensorSampler *sampler, const Tensor3dMapper *mapper, MemoryOperation op, const CLTile *dst);
+ static bool validate(const CLKernelWriter *writer,
+ const ITensor *tensor,
+ const TensorSampler *sampler,
+ const Tensor3dMapper *mapper,
+ MemoryOperation op,
+ const CLTile *dst);
void out_of_bound_initialize_y(const std::string &coord);
void out_of_bound_finalize_y();
- std::string to_ls_image2d(MemoryOperation op, int32_t vector_width, const std::string &data, const std::string &sampler, const std::string &address) const;
+ std::string to_ls_image2d(MemoryOperation op,
+ int32_t vector_width,
+ const std::string &data,
+ const std::string &sampler,
+ const std::string &address) const;
std::string to_ls_image2d_sampler() const;
- std::string to_ls_image2d_address(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const;
+ std::string
+ to_ls_image2d_address(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const;
};
} // namespace ckw
diff --git a/compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h b/compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h
index 7f36343..f46fee9 100644
--- a/compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h
+++ b/compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h
@@ -26,6 +26,7 @@
#define CKW_SRC_CL_HELPERS_ICLMEMORYOPHELPER_H
#include "ckw/TensorSampler.h"
+
#include "src/Tensor3dMapper.h"
#include <cstdint>
@@ -98,16 +99,16 @@
virtual void finalize() = 0;
protected:
- CLKernelWriter *_writer{ nullptr };
- ITensor *_tensor{ nullptr };
- TensorSampler *_sampler{ nullptr };
- MemoryOperation _op;
- std::unique_ptr<Tensor3dMapper> _mapper{ nullptr };
- const CLTile *_dst{ nullptr };
- int32_t _ls_width_full{ 0 };
- std::string _coord_x{};
- std::string _coord_z{};
- std::string _coord_b{};
+ CLKernelWriter *_writer{nullptr};
+ ITensor *_tensor{nullptr};
+ TensorSampler *_sampler{nullptr};
+ MemoryOperation _op;
+ std::unique_ptr<Tensor3dMapper> _mapper{nullptr};
+ const CLTile *_dst{nullptr};
+ int32_t _ls_width_full{0};
+ std::string _coord_x{};
+ std::string _coord_z{};
+ std::string _coord_b{};
};
} // namespace ckw
diff --git a/compute_kernel_writer/src/types/ConstantData.cpp b/compute_kernel_writer/src/types/ConstantData.cpp
index d2155cf..67b1103 100644
--- a/compute_kernel_writer/src/types/ConstantData.cpp
+++ b/compute_kernel_writer/src/types/ConstantData.cpp
@@ -30,52 +30,51 @@
{
namespace
{
- template<typename T>
- inline typename std::enable_if<std::is_same<T, float>::value, std::string>::type to_str(T value)
- {
- std::stringstream ss;
- ss << std::scientific << std::setprecision(std::numeric_limits<T>::max_digits10) << value;
- return ss.str();
- }
-
- template<typename T>
- inline typename std::enable_if<!std::is_same<T, float>::value && !std::is_same<T, bool>::value, std::string>::type to_str(T value)
- {
- return std::to_string(value);
- }
-
- template<typename T>
- inline typename std::enable_if<std::is_same<T, bool>::value, std::string>::type to_str(T value)
- {
- return std::to_string((int) value);
- }
+template <typename T>
+inline typename std::enable_if<std::is_same<T, float>::value, std::string>::type to_str(T value)
+{
+ std::stringstream ss;
+ ss << std::scientific << std::setprecision(std::numeric_limits<T>::max_digits10) << value;
+ return ss.str();
}
-template<typename T>
+template <typename T>
+inline typename std::enable_if<!std::is_same<T, float>::value && !std::is_same<T, bool>::value, std::string>::type
+to_str(T value)
+{
+ return std::to_string(value);
+}
+
+template <typename T>
+inline typename std::enable_if<std::is_same<T, bool>::value, std::string>::type to_str(T value)
+{
+ return std::to_string((int)value);
+}
+} // namespace
+
+template <typename T>
ConstantData::ConstantData(std::initializer_list<std::initializer_list<T>> values, DataType data_type)
: _data_type(data_type)
{
CKW_ASSERT(validate<T>(data_type));
CKW_ASSERT(values.size() > 0);
- for(auto value_arr: values)
+ for (auto value_arr : values)
{
// Each row must have the same number of elements
CKW_ASSERT(value_arr.size() == (*values.begin()).size());
StringVector vec;
- std::transform(value_arr.begin(), value_arr.end(),
- std::back_inserter(vec),
- [](T val) { return to_str(val); });
+ std::transform(value_arr.begin(), value_arr.end(), std::back_inserter(vec), [](T val) { return to_str(val); });
_values.push_back(std::move(vec));
}
}
-template<typename T>
+template <typename T>
bool ConstantData::validate(DataType data_type)
{
- switch(data_type)
+ switch (data_type)
{
case DataType::Fp32:
case DataType::Fp16:
@@ -107,7 +106,7 @@
template bool ConstantData::validate<bool>(DataType);
template bool ConstantData::validate<float>(DataType);
-const std::vector<std::vector<std::string>>& ConstantData::values() const
+const std::vector<std::vector<std::string>> &ConstantData::values() const
{
return _values;
}