blob: 6a9d585c142d1bb03e29964f2064b0e798b46849 [file] [log] [blame]
/*
* Copyright (c) 2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef ARM_COMPUTE_ACL_HPP_
#define ARM_COMPUTE_ACL_HPP_
#include "arm_compute/Acl.h"
#include <cstdlib>
#include <memory>
#include <string>
#include <vector>
#if defined(ARM_COMPUTE_EXCEPTIONS_ENABLED)
#include <exception>
#endif /* defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) */
// Helper Macros
#define ARM_COMPUTE_IGNORE_UNUSED(x) (void)(x)
namespace acl
{
// Forward declarations
class Context;
class Queue;
class Tensor;
class TensorPack;
/**< Status code enum */
enum class StatusCode
{
Success = AclSuccess,
RuntimeError = AclRuntimeError,
OutOfMemory = AclOutOfMemory,
Unimplemented = AclUnimplemented,
UnsupportedTarget = AclUnsupportedTarget,
InvalidArgument = AclInvalidArgument,
InvalidTarget = AclInvalidTarget,
UnsupportedConfig = AclUnsupportedConfig,
InvalidObjectState = AclInvalidObjectState,
};
/**< Utility namespace containing helpers functions */
namespace detail
{
/** Construct to handle destruction of objects
*
* @tparam T Object base type
*/
template <typename T>
struct ObjectDeleter
{
};
#define OBJECT_DELETER(obj, func) \
template <> \
struct ObjectDeleter<obj> \
\
{ \
static inline AclStatus Destroy(obj v) \
{ \
return func(v); \
} \
};
OBJECT_DELETER(AclContext, AclDestroyContext)
OBJECT_DELETER(AclQueue, AclDestroyQueue)
OBJECT_DELETER(AclTensor, AclDestroyTensor)
OBJECT_DELETER(AclTensorPack, AclDestroyTensorPack)
OBJECT_DELETER(AclOperator, AclDestroyOperator)
#undef OBJECT_DELETER
/** Convert a strongly typed enum to an old plain c enum
*
* @tparam E Plain old C enum
* @tparam SE Strongly typed resulting enum
*
* @param[in] v Value to convert
*
* @return A corresponding plain old C enumeration
*/
template <typename E, typename SE>
constexpr E as_cenum(SE v) noexcept
{
return static_cast<E>(static_cast<typename std::underlying_type<SE>::type>(v));
}
/** Convert plain old enumeration to a strongly typed enum
*
* @tparam SE Strongly typed resulting enum
* @tparam E Plain old C enum
*
* @param[in] val Value to convert
*
* @return A corresponding strongly typed enumeration
*/
template <typename SE, typename E>
constexpr SE as_enum(E val) noexcept
{
return static_cast<SE>(val);
}
/** Object base class for library objects
*
* Class is defining basic common interface for all the library objects
*
* @tparam T Object type to be templated on
*/
template <typename T>
class ObjectBase
{
public:
/** Destructor */
~ObjectBase() = default;
/** Copy constructor */
ObjectBase(const ObjectBase<T> &) = default;
/** Move Constructor */
ObjectBase(ObjectBase<T> &&) = default;
/** Copy assignment operator */
ObjectBase<T> &operator=(const ObjectBase<T> &) = default;
/** Move assignment operator */
ObjectBase<T> &operator=(ObjectBase<T> &&) = default;
/** Reset object value
*
* @param [in] val Value to set
*/
void reset(T *val)
{
_object.reset(val, detail::ObjectDeleter<T *>::Destroy);
}
/** Access uderlying object
*
* @return Underlying object
*/
const T *get() const
{
return _object.get();
}
/** Access uderlying object
*
* @return Underlying object
*/
T *get()
{
return _object.get();
}
protected:
/** Constructor */
ObjectBase() = default;
protected:
std::shared_ptr<T> _object{nullptr}; /**< Library object */
};
/** Equality operator for library object
*
* @tparam T Parameter to template on
*
* @param[in] lhs Left hand-side argument
* @param[in] rhs Right hand-side argument
*
* @return True if objects are equal, else false
*/
template <typename T>
bool operator==(const ObjectBase<T> &lhs, const ObjectBase<T> &rhs)
{
return lhs.get() == rhs.get();
}
/** Inequality operator for library object
*
* @tparam T Parameter to template on
*
* @param[in] lhs Left hand-side argument
* @param[in] rhs Right hand-side argument
*
* @return True if objects are equal, else false
*/
template <typename T>
bool operator!=(const ObjectBase<T> &lhs, const ObjectBase<T> &rhs)
{
return !(lhs == rhs);
}
} // namespace detail
#if defined(ARM_COMPUTE_EXCEPTIONS_ENABLED)
/** Status class
*
* Class is an extension of std::exception and contains the underlying
* status construct and an error explanatory message to be reported.
*
* @note Class is visible only when exceptions are enabled during compilation
*/
class Status : public std::exception
{
public:
/** Constructor
*
* @param[in] status Status returned
* @param[in] msg Error message to be bound with the exception
*/
Status(StatusCode status, const std::string &msg) : _status(status), _msg(msg)
{
}
/** Returns an explanatory exception message
*
* @return Status message
*/
const char *what() const noexcept override
{
return _msg.c_str();
}
/** Underlying status accessor
*
* @return Status code
*/
StatusCode status() const
{
return _status;
}
/** Explicit status converter
*
* @return Status code
*/
explicit operator StatusCode() const
{
return _status;
}
private:
StatusCode _status; /**< Status code */
std::string _msg; /**< Status message */
};
/** Reports an error status and throws an exception object in case of failure
*
* @note This implementation is used when exceptions are enabled during compilation
*
* @param[in] status Status to report
* @param[in] msg Explanatory error messaged
*
* @return Status code
*/
static inline void report_status(StatusCode status, const std::string &msg)
{
if (status != StatusCode::Success)
{
throw Status(status, msg);
}
}
#else /* defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) */
/** Reports a status code
*
* @note This implementation is used when exceptions are disabled during compilation
* @note Message is surpressed and not reported in this case
*
* @param[in] status Status to report
* @param[in] msg Explanatory error messaged
*
* @return Status code
*/
static inline void report_status(StatusCode status, const std::string &msg)
{
ARM_COMPUTE_IGNORE_UNUSED(status);
ARM_COMPUTE_IGNORE_UNUSED(msg);
}
#endif /* defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) */
/**< Target enum */
enum class Target
{
Cpu = AclCpu, /**< Cpu target that leverages SIMD */
GpuOcl = AclGpuOcl /**< Gpu target that leverages OpenCL */
};
/**< Available execution modes */
enum class ExecutionMode
{
FastRerun =
AclPreferFastRerun, /**< Prefer minimum latency in consecutive runs, might introduce higher startup times */
FastStart = AclPreferFastStart, /**< Prefer minimizing startup time */
};
/** Context class
*
* Context acts as a central aggregate service for further objects created from it.
* It provides, internally, common facilities in order to avoid the use of global
* statically initialized objects that can lead to important side-effect under
* specific execution contexts.
*
* For example context contains allocators for object creation, for further backing memory allocation,
* any serialization interfaces and other modules that affect the construction of objects,
* like program caches for OpenCL.
*/
class Context : public detail::ObjectBase<AclContext_>
{
public:
/**< Context options */
struct Options
{
static constexpr int32_t num_threads_auto = -1; /**< Allow runtime to specify number of threads */
/** Default Constructor
*
* @note By default no precision loss is enabled for operators
* @note By default the preferred execution mode is to favor multiple consecutive reruns of an operator
*/
Options()
: Options(ExecutionMode::FastRerun /* mode */,
AclCpuCapabilitiesAuto /* caps */,
false /* enable_fast_math */,
nullptr /* kernel_config */,
num_threads_auto /* max_compute_units */,
nullptr /* allocator */)
{
}
/** Constructor
*
* @param[in] mode Execution mode to be used
* @param[in] caps Capabilities to be used
* @param[in] enable_fast_math Allow precision loss in favor of performance
* @param[in] kernel_config Kernel configuration file containing construction tuning meta-data
* @param[in] max_compute_units Max compute units that are expected to used
* @param[in] allocator Allocator to be used for internal memory allocation
*/
Options(ExecutionMode mode,
AclTargetCapabilities caps,
bool enable_fast_math,
const char *kernel_config,
int32_t max_compute_units,
AclAllocator *allocator)
{
copts.mode = detail::as_cenum<AclExecutionMode>(mode);
copts.capabilities = caps;
copts.enable_fast_math = enable_fast_math;
copts.kernel_config_file = kernel_config;
copts.max_compute_units = max_compute_units;
copts.allocator = allocator;
}
AclContextOptions copts{};
};
public:
/** Constructor
*
* @note Serves as a simpler delegate constructor
* @note As context options, default conservative options will be used
*
* @param[in] target Target to create context for
* @param[out] status Status information if requested
*/
explicit Context(Target target, StatusCode *status = nullptr) : Context(target, Options(), status)
{
}
/** Constructor
*
* @param[in] target Target to create context for
* @param[in] options Context construction options
* @param[out] status Status information if requested
*/
Context(Target target, const Options &options, StatusCode *status = nullptr)
{
AclContext ctx;
const auto st =
detail::as_enum<StatusCode>(AclCreateContext(&ctx, detail::as_cenum<AclTarget>(target), &options.copts));
reset(ctx);
report_status(st, "[Compute Library] Failed to create context");
if (status)
{
*status = st;
}
}
};
/**< Available tuning modes */
enum class TuningMode
{
Rapid = AclRapid,
Normal = AclNormal,
Exhaustive = AclExhaustive
};
/** Queue class
*
* Queue is responsible for the execution related aspects, with main responsibilities those of
* scheduling and tuning operators.
*
* Multiple queues can be created from the same context, and the same operator can be scheduled on each concurrently.
*
* @note An operator might depend on the maximum possible compute units that are provided in the context,
* thus in cases where the number of the scheduling units of the queue are greater might lead to errors.
*/
class Queue : public detail::ObjectBase<AclQueue_>
{
public:
/**< Queue options */
struct Options
{
/** Default Constructor
*
* As default options, no tuning will be performed, and the number of scheduling units will
* depends on internal device discovery functionality
*/
Options() : opts{AclTuningModeNone, 0} {};
/** Constructor
*
* @param[in] mode Tuning mode to be used
* @param[in] compute_units Number of scheduling units to be used
*/
Options(TuningMode mode, int32_t compute_units) : opts{detail::as_cenum<AclTuningMode>(mode), compute_units}
{
}
AclQueueOptions opts;
};
public:
/** Constructor
*
* @note Serves as a simpler delegate constructor
* @note As queue options, default conservative options will be used
*
* @param[in] ctx Context to create queue for
* @param[out] status Status information if requested
*/
explicit Queue(Context &ctx, StatusCode *status = nullptr) : Queue(ctx, Options(), status)
{
}
/** Constructor
*
* @note As queue options, default conservative options will be used
*
* @param[in] ctx Context from where the queue will be created from
* @param[in] options Queue options to be used
* @param[out] status Status information if requested
*/
explicit Queue(Context &ctx, const Options &options = Options(), StatusCode *status = nullptr)
{
AclQueue queue;
const auto st = detail::as_enum<StatusCode>(AclCreateQueue(&queue, ctx.get(), &options.opts));
reset(queue);
report_status(st, "[Compute Library] Failed to create queue!");
if (status)
{
*status = st;
}
}
/** Block until all the tasks of the queue have been marked as finished
*
* @return Status code
*/
StatusCode finish()
{
return detail::as_enum<StatusCode>(AclQueueFinish(_object.get()));
}
};
/**< Data type enumeration */
enum class DataType
{
Unknown = AclDataTypeUnknown,
UInt8 = AclUInt8,
Int8 = AclInt8,
UInt16 = AclUInt16,
Int16 = AclInt16,
UInt32 = AclUint32,
Int32 = AclInt32,
Float16 = AclFloat16,
BFloat16 = AclBFloat16,
Float32 = AclFloat32,
};
/** Tensor Descriptor class
*
* Structure that contains all the required meta-data to represent a tensor
*/
class TensorDescriptor
{
public:
/** Constructor
*
* @param[in] shape Shape of the tensor
* @param[in] data_type Data type of the tensor
*/
TensorDescriptor(const std::vector<int32_t> &shape, DataType data_type) : _shape(shape), _data_type(data_type)
{
_cdesc.ndims = _shape.size();
_cdesc.shape = _shape.data();
_cdesc.data_type = detail::as_cenum<AclDataType>(_data_type);
_cdesc.strides = nullptr;
_cdesc.boffset = 0;
}
/** Constructor
*
* @param[in] desc C-type descriptor
*/
explicit TensorDescriptor(const AclTensorDescriptor &desc)
{
_cdesc = desc;
_data_type = detail::as_enum<DataType>(desc.data_type);
_shape.reserve(desc.ndims);
for (int32_t d = 0; d < desc.ndims; ++d)
{
_shape.emplace_back(desc.shape[d]);
}
}
/** Get underlying C tensor descriptor
*
* @return Underlying structure
*/
const AclTensorDescriptor *get() const
{
return &_cdesc;
}
/** Operator to compare two TensorDescriptor
*
* @param[in] other The instance to compare against
*
* @return True if two instances have the same shape and data type
*/
bool operator==(const TensorDescriptor &other)
{
bool is_same = true;
is_same &= _data_type == other._data_type;
is_same &= _shape.size() == other._shape.size();
if (is_same)
{
for (uint32_t d = 0; d < _shape.size(); ++d)
{
is_same &= _shape[d] == other._shape[d];
}
}
return is_same;
}
private:
std::vector<int32_t> _shape{};
DataType _data_type{};
AclTensorDescriptor _cdesc{};
};
/** Import memory types */
enum class ImportType
{
Host = AclImportMemoryType::AclHostPtr
};
/** Tensor class
*
* Tensor is an mathematical construct that can represent an N-Dimensional space.
*
* @note Maximum dimensionality support is 6 internally at the moment
*/
class Tensor : public detail::ObjectBase<AclTensor_>
{
public:
/** Constructor
*
* @note Tensor memory is allocated
*
* @param[in] ctx Context from where the tensor will be created from
* @param[in] desc Tensor descriptor to be used
* @param[out] status Status information if requested
*/
Tensor(Context &ctx, const TensorDescriptor &desc, StatusCode *status = nullptr) : Tensor(ctx, desc, true, status)
{
}
/** Constructor
*
* @param[in] ctx Context from where the tensor will be created from
* @param[in] desc Tensor descriptor to be used
* @param[in] allocate Flag to indicate if the tensor needs to be allocated
* @param[out] status Status information if requested
*/
Tensor(Context &ctx, const TensorDescriptor &desc, bool allocate, StatusCode *status)
{
AclTensor tensor;
const auto st = detail::as_enum<StatusCode>(AclCreateTensor(&tensor, ctx.get(), desc.get(), allocate));
reset(tensor);
report_status(st, "[Compute Library] Failed to create tensor!");
if (status)
{
*status = st;
}
}
/** Maps the backing memory of a given tensor that can be used by the host to access any contents
*
* @return A valid non-zero pointer in case of success else nullptr
*/
void *map()
{
void *handle = nullptr;
const auto st = detail::as_enum<StatusCode>(AclMapTensor(_object.get(), &handle));
report_status(st, "[Compute Library] Failed to map the tensor and extract the tensor's backing memory!");
return handle;
}
/** Unmaps tensor's memory
*
* @param[in] handle Handle to unmap
*
* @return Status code
*/
StatusCode unmap(void *handle)
{
const auto st = detail::as_enum<StatusCode>(AclUnmapTensor(_object.get(), handle));
report_status(st, "[Compute Library] Failed to unmap the tensor!");
return st;
}
/** Import external memory to a given tensor object
*
* @param[in] handle External memory handle
* @param[in] type Type of memory to be imported
*
* @return Status code
*/
StatusCode import(void *handle, ImportType type)
{
const auto st = detail::as_enum<StatusCode>(
AclTensorImport(_object.get(), handle, detail::as_cenum<AclImportMemoryType>(type)));
report_status(st, "[Compute Library] Failed to import external memory to tensor!");
return st;
}
/** Get the size of the tensor in byte
*
* @note The size isn't based on allocated memory, but based on information in its descriptor (dimensions, data type, etc.).
*
* @return The size of the tensor in byte
*/
uint64_t get_size()
{
uint64_t size{0};
const auto st = detail::as_enum<StatusCode>(AclGetTensorSize(_object.get(), &size));
report_status(st, "[Compute Library] Failed to get the size of the tensor");
return size;
}
/** Get the descriptor of this tensor
*
* @return The descriptor describing the characteristics of this tensor
*/
TensorDescriptor get_descriptor()
{
AclTensorDescriptor desc;
const auto st = detail::as_enum<StatusCode>(AclGetTensorDescriptor(_object.get(), &desc));
report_status(st, "[Compute Library] Failed to get the descriptor of the tensor");
return TensorDescriptor(desc);
}
};
/** Tensor pack class
*
* Pack is a utility construct that is used to create a collection of tensors that can then
* be passed into operator as inputs.
*/
class TensorPack : public detail::ObjectBase<AclTensorPack_>
{
public:
/** Pack pair construct */
struct PackPair
{
/** Constructor
*
* @param[in] tensor_ Tensor to pack
* @param[in] slot_id_ Slot identification of the tensor in respect with the operator
*/
PackPair(Tensor *tensor_, int32_t slot_id_) : tensor(tensor_), slot_id(slot_id_)
{
}
Tensor *tensor{nullptr}; /**< Tensor object */
int32_t slot_id{AclSlotUnknown}; /**< Slot id in respect with the operator */
};
public:
/** Constructor
*
* @param[in] ctx Context from where the tensor pack will be created from
* @param[out] status Status information if requested
*/
explicit TensorPack(Context &ctx, StatusCode *status = nullptr)
{
AclTensorPack pack;
const auto st = detail::as_enum<StatusCode>(AclCreateTensorPack(&pack, ctx.get()));
reset(pack);
report_status(st, "[Compute Library] Failure during tensor pack creation");
if (status)
{
*status = st;
}
}
/** Add tensor to tensor pack
*
* @param[in] slot_id Slot id of the tensor in respect with the operator
* @param[in] tensor Tensor to be added in the pack
*
* @return Status code
*/
StatusCode add(Tensor &tensor, int32_t slot_id)
{
return detail::as_enum<StatusCode>(AclPackTensor(_object.get(), tensor.get(), slot_id));
}
/** Add a list of tensors to a tensor pack
*
* @param[in] packed Pair packs to be added
*
* @return Status code
*/
StatusCode add(std::initializer_list<PackPair> packed)
{
const size_t size = packed.size();
std::vector<int32_t> slots(size);
std::vector<AclTensor> tensors(size);
int i = 0;
for (auto &p : packed)
{
slots[i] = p.slot_id;
tensors[i] = AclTensor(p.tensor);
++i;
}
return detail::as_enum<StatusCode>(AclPackTensors(_object.get(), tensors.data(), slots.data(), size));
}
};
/** Operator class
*
* Operators are the basic algorithmic blocks responsible for performing distinct operations
*/
class Operator : public detail::ObjectBase<AclOperator_>
{
public:
/** Run an operator on a given input list
*
* @param[in,out] queue Queue to scheduler the operator on
* @param pack Tensor list to be used as input
*
* @return Status Code
*/
StatusCode run(Queue &queue, TensorPack &pack)
{
return detail::as_cenum<StatusCode>(AclRunOperator(_object.get(), queue.get(), pack.get()));
}
protected:
/** Constructor */
Operator() = default;
};
/// Operators
using ActivationDesc = AclActivationDescriptor;
class Activation : public Operator
{
public:
Activation(Context &ctx,
const TensorDescriptor &src,
const TensorDescriptor &dst,
const ActivationDesc &desc,
StatusCode *status = nullptr)
{
AclOperator op;
const auto st = detail::as_enum<StatusCode>(AclActivation(&op, ctx.get(), src.get(), dst.get(), desc));
reset(op);
report_status(st, "[Compute Library] Failure during Activation operator creation");
if (status)
{
*status = st;
}
}
};
} // namespace acl
#undef ARM_COMPUTE_IGNORE_UNUSED
#endif /* ARM_COMPUTE_ACL_HPP_ */