blob: 902a508b914d7c57c76a51c2c9a1230096ed1e77 [file] [log] [blame]
Georgios Pinitas8a5146f2021-01-12 15:51:07 +00001/*
2 * Copyright (c) 2021 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef ARM_COMPUTE_ACLTYPES_H_
25#define ARM_COMPUTE_ACLTYPES_H_
26
27#include <stddef.h>
28#include <stdint.h>
29
30#ifdef __cplusplus
31extern "C" {
32#endif /* __cplusplus */
33
34/**< Opaque Context object */
35typedef struct AclContext_ *AclContext;
Georgios Pinitasc3c352e2021-03-18 10:59:40 +000036/**< Opaque Queue object */
37typedef struct AclQueue_ *AclQueue;
Georgios Pinitas3f26ef42021-02-23 10:01:33 +000038/**< Opaque Tensor object */
39typedef struct AclTensor_ *AclTensor;
40/**< Opaque Tensor pack object */
41typedef struct AclTensorPack_ *AclTensorPack;
Georgios Pinitas8a5146f2021-01-12 15:51:07 +000042
43// Capabilities bitfield (Note: if multiple are enabled ComputeLibrary will pick the best possible)
44typedef uint64_t AclTargetCapabilities;
45
46/**< Error codes returned by the public entry-points */
47typedef enum AclStatus : int32_t
48{
49 AclSuccess = 0, /**< Call succeeded, leading to valid state for all involved objects/data */
50 AclRuntimeError = 1, /**< Call failed during execution */
51 AclOutOfMemory = 2, /**< Call failed due to failure to allocate resources */
52 AclUnimplemented = 3, /**< Call failed as requested capability is not implemented */
53 AclUnsupportedTarget = 4, /**< Call failed as an invalid backend was requested */
54 AclInvalidTarget = 5, /**< Call failed as invalid argument was passed */
55 AclInvalidArgument = 6, /**< Call failed as invalid argument was passed */
56 AclUnsupportedConfig = 7, /**< Call failed as configuration is unsupported */
57 AclInvalidObjectState = 8, /**< Call failed as an object has invalid state */
58} AclStatus;
59
60/**< Supported CPU targets */
61typedef enum AclTarget
62{
63 AclCpu = 0, /**< Cpu target that uses SIMD extensions */
64 AclGpuOcl = 1, /**< OpenCL target for GPU */
65} AclTarget;
66
67/** Execution mode types */
68typedef enum AclExecutionMode
69{
70 AclPreferFastRerun = 0, /**< Prioritize performance when multiple iterations are performed */
71 AclPreferFastStart = 1, /**< Prioritize performance when a single iterations is expected to be performed */
72} AclExecutionMode;
73
74/** Available CPU capabilities */
Georgios Pinitasd122c102021-03-10 00:30:18 +000075typedef enum AclCpuCapabilities
Georgios Pinitas8a5146f2021-01-12 15:51:07 +000076{
77 AclCpuCapabilitiesAuto = 0, /**< Automatic discovery of capabilities */
78
79 AclCpuCapabilitiesNeon = (1 << 0), /**< Enable NEON optimized paths */
80 AclCpuCapabilitiesSve = (1 << 1), /**< Enable SVE optimized paths */
81 AclCpuCapabilitiesSve2 = (1 << 2), /**< Enable SVE2 optimized paths */
82 // Reserve 3, 4, 5, 6
83
84 AclCpuCapabilitiesFp16 = (1 << 7), /**< Enable float16 data-type support */
85 AclCpuCapabilitiesBf16 = (1 << 8), /**< Enable bfloat16 data-type support */
86 // Reserve 9, 10, 11, 12
87
88 AclCpuCapabilitiesDot = (1 << 13), /**< Enable paths that use the udot/sdot instructions */
89 AclCpuCapabilitiesMmlaInt8 = (1 << 14), /**< Enable paths that use the mmla integer instructions */
90 AclCpuCapabilitiesMmlaFp = (1 << 15), /**< Enable paths that use the mmla float instructions */
91
92 AclCpuCapabilitiesAll = ~0 /**< Enable all paths */
Georgios Pinitasd122c102021-03-10 00:30:18 +000093} AclCpuCapabilities;
Georgios Pinitas8a5146f2021-01-12 15:51:07 +000094
95/**< Allocator interface that can be passed to a context */
96typedef struct AclAllocator
97{
98 /** Allocate a block of size bytes of memory.
99 *
100 * @param[in] user_data User provided data that can be used by the allocator
101 * @param[in] size Size of the allocation
102 *
103 * @return A pointer to the allocated block if successfull else NULL
104 */
105 void *(*alloc)(void *user_data, size_t size);
106 /** Release a block of size bytes of memory.
107 *
108 * @param[in] user_data User provided data that can be used by the allocator
109 * @param[in] size Size of the allocation
110 */
111 void (*free)(void *user_data, void *ptr);
112 /** Allocate a block of size bytes of memory.
113 *
114 * @param[in] user_data User provided data that can be used by the allocator
115 * @param[in] size Size of the allocation
116 *
117 * @return A pointer to the allocated block if successfull else NULL
118 */
119 void *(*aligned_alloc)(void *user_data, size_t size, size_t alignment);
120 /** Allocate a block of size bytes of memory.
121 *
122 * @param[in] user_data User provided data that can be used by the allocator
123 * @param[in] size Size of the allocation
124 */
125 void (*aligned_free)(void *user_data, void *ptr);
126
127 /**< User provided information */
128 void *user_data;
129} AclAllocator;
130
131/**< Context options */
132typedef struct AclContextOptions
133{
134 AclExecutionMode mode; /**< Execution mode to use */
135 AclTargetCapabilities capabilities; /**< Target capabilities */
136 bool enable_fast_math; /**< Allow precision loss */
137 const char *kernel_config_file; /**< Kernel cofiguration file */
138 int32_t max_compute_units; /**< Max compute units that can be used by a queue created from the context.
139 If <=0 the system will use the hw concurency insted */
140 AclAllocator *allocator; /**< Allocator to be used by all the memory internally */
141} AclContextOptions;
142
Georgios Pinitasc3c352e2021-03-18 10:59:40 +0000143/**< Supported tuning modes */
144typedef enum
145{
146 AclTuningModeNone = 0, /**< No tuning */
147 AclRapid = 1, /**< Fast tuning mode, testing a small portion of the tuning space */
148 AclNormal = 2, /**< Normal tuning mode, gives a good balance between tuning mode and performance */
149 AclExhaustive = 3, /**< Exhaustive tuning mode, increased tuning time but with best results */
150} AclTuningMode;
151
152/**< Queue options */
153typedef struct
154{
155 AclTuningMode mode; /**< Tuning mode */
156 int32_t compute_units; /**< Compute Units that the queue will deploy */
157} AclQueueOptions;
158
Georgios Pinitas3f26ef42021-02-23 10:01:33 +0000159/**< Supported data types */
160typedef enum AclDataType
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000161{
Georgios Pinitas3f26ef42021-02-23 10:01:33 +0000162 AclDataTypeUnknown = 0, /**< Unknown data type */
163 AclUInt8 = 1, /**< 8-bit unsigned integer */
164 AclInt8 = 2, /**< 8-bit signed integer */
165 AclUInt16 = 3, /**< 16-bit unsigned integer */
166 AclInt16 = 4, /**< 16-bit signed integer */
167 AclUint32 = 5, /**< 32-bit unsigned integer */
168 AclInt32 = 6, /**< 32-bit signed integer */
169 AclFloat16 = 7, /**< 16-bit floating point */
170 AclBFloat16 = 8, /**< 16-bit brain floating point */
171 AclFloat32 = 9, /**< 32-bit floating point */
172} AclDataType;
173
174/**< Supported data layouts for operations */
175typedef enum AclDataLayout
176{
177 AclDataLayoutUnknown = 0, /**< Unknown data layout */
178 AclNhwc = 1, /**< Native, performant, Compute Library data layout */
179 AclNchw = 2, /**< Data layout where width is the fastest changing dimension */
180} AclDataLayout;
181
182/** Type of memory to be imported */
183typedef enum AclImportMemoryType
184{
185 AclHostPtr = 0 /**< Host allocated memory */
186} AclImportMemoryType;
187
188/**< Tensor Descriptor */
189typedef struct AclTensorDescriptor
190{
191 int32_t ndims; /**< Number or dimensions */
192 int32_t *shape; /**< Tensor Shape */
193 AclDataType data_type; /**< Tensor Data type */
194 int64_t *strides; /**< Strides on each dimension. Linear memory is assumed if nullptr */
195 int64_t boffset; /**< Offset in terms of bytes for the first element */
196} AclTensorDescriptor;
197
198/**< Slot type of a tensor */
199typedef enum
200{
201 AclSlotUnknown = -1,
202 AclSrc = 0,
203 AclSrc0 = 0,
204 AclSrc1 = 1,
205 AclDst = 30,
206 AclSrcVec = 256,
207} AclTensorSlot;
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000208
209#ifdef __cplusplus
210}
211#endif /* __cplusplus */
212#endif /* ARM_COMPUTE_ACLTYPES_H_ */