blob: 6e5da43a9441e71b526ac4a3681a44a886cdd503 [file] [log] [blame]
Pablo Telloc9564cb2019-09-13 10:20:25 +01001/*
2 * Copyright (c) 2019 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h"
25
26#include "arm_compute/core/Types.h"
27#include "arm_compute/runtime/NEON/NEScheduler.h"
28#include "support/ToolchainSupport.h"
29
30namespace arm_compute
31{
32NEGenerateProposalsLayer::NEGenerateProposalsLayer(std::shared_ptr<IMemoryManager> memory_manager)
33 : _memory_group(std::move(memory_manager)),
34 _permute_deltas_kernel(),
35 _flatten_deltas_kernel(),
36 _permute_scores_kernel(),
37 _flatten_scores_kernel(),
38 _compute_anchors_kernel(),
39 _bounding_box_kernel(),
40 _memset_kernel(),
41 _padded_copy_kernel(),
42 _cpp_nms_kernel(),
43 _is_nhwc(false),
44 _deltas_permuted(),
45 _deltas_flattened(),
46 _scores_permuted(),
47 _scores_flattened(),
48 _all_anchors(),
49 _all_proposals(),
50 _keeps_nms_unused(),
51 _classes_nms_unused(),
52 _proposals_4_roi_values(),
53 _num_valid_proposals(nullptr),
54 _scores_out(nullptr)
55{
56}
57
58void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *deltas, const ITensor *anchors, ITensor *proposals, ITensor *scores_out, ITensor *num_valid_proposals,
59 const GenerateProposalsInfo &info)
60{
61 ARM_COMPUTE_ERROR_ON_NULLPTR(scores, deltas, anchors, proposals, scores_out, num_valid_proposals);
62 ARM_COMPUTE_ERROR_THROW_ON(NEGenerateProposalsLayer::validate(scores->info(), deltas->info(), anchors->info(), proposals->info(), scores_out->info(), num_valid_proposals->info(), info));
63
64 _is_nhwc = scores->info()->data_layout() == DataLayout::NHWC;
65 const DataType data_type = deltas->info()->data_type();
66 const int num_anchors = scores->info()->dimension(get_data_layout_dimension_index(scores->info()->data_layout(), DataLayoutDimension::CHANNEL));
67 const int feat_width = scores->info()->dimension(get_data_layout_dimension_index(scores->info()->data_layout(), DataLayoutDimension::WIDTH));
68 const int feat_height = scores->info()->dimension(get_data_layout_dimension_index(scores->info()->data_layout(), DataLayoutDimension::HEIGHT));
69 const int total_num_anchors = num_anchors * feat_width * feat_height;
70 const int pre_nms_topN = info.pre_nms_topN();
71 const int post_nms_topN = info.post_nms_topN();
72 const size_t values_per_roi = info.values_per_roi();
73
74 // Compute all the anchors
75 _memory_group.manage(&_all_anchors);
76 _compute_anchors_kernel.configure(anchors, &_all_anchors, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale()));
77
78 const TensorShape flatten_shape_deltas(values_per_roi, total_num_anchors);
79 _deltas_flattened.allocator()->init(TensorInfo(flatten_shape_deltas, 1, data_type));
80 _memory_group.manage(&_deltas_flattened);
81
82 // Permute and reshape deltas
83 if(!_is_nhwc)
84 {
85 _memory_group.manage(&_deltas_permuted);
86 _permute_deltas_kernel.configure(deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 });
87 _flatten_deltas_kernel.configure(&_deltas_permuted, &_deltas_flattened);
88 _deltas_permuted.allocator()->allocate();
89 }
90 else
91 {
92 _flatten_deltas_kernel.configure(deltas, &_deltas_flattened);
93 }
94
95 const TensorShape flatten_shape_scores(1, total_num_anchors);
96 _scores_flattened.allocator()->init(TensorInfo(flatten_shape_scores, 1, data_type));
97 _memory_group.manage(&_scores_flattened);
98 // Permute and reshape scores
99 if(!_is_nhwc)
100 {
101 _memory_group.manage(&_scores_permuted);
102 _permute_scores_kernel.configure(scores, &_scores_permuted, PermutationVector{ 2, 0, 1 });
103 _flatten_scores_kernel.configure(&_scores_permuted, &_scores_flattened);
104 _scores_permuted.allocator()->allocate();
105 }
106 else
107 {
108 _flatten_scores_kernel.configure(scores, &_scores_flattened);
109 }
110
111 // Bounding box transform
112 _memory_group.manage(&_all_proposals);
113 BoundingBoxTransformInfo bbox_info(info.im_width(), info.im_height(), 1.f);
114 _bounding_box_kernel.configure(&_all_anchors, &_all_proposals, &_deltas_flattened, bbox_info);
115 _deltas_flattened.allocator()->allocate();
116 _all_anchors.allocator()->allocate();
117
118 // The original layer implementation first selects the best pre_nms_topN anchors (thus having a lightweight sort)
119 // that are then transformed by bbox_transform. The boxes generated are then fed into a non-sorting NMS operation.
120 // Since we are reusing the NMS layer and we don't implement any CL/sort, we let NMS do the sorting (of all the input)
121 // and the filtering
122 const int scores_nms_size = std::min<int>(std::min<int>(post_nms_topN, pre_nms_topN), total_num_anchors);
123 const float min_size_scaled = info.min_size() * info.im_scale();
124 _memory_group.manage(&_classes_nms_unused);
125 _memory_group.manage(&_keeps_nms_unused);
126
127 // Note that NMS needs outputs preinitialized.
128 auto_init_if_empty(*scores_out->info(), TensorShape(scores_nms_size), 1, data_type);
129 auto_init_if_empty(*_proposals_4_roi_values.info(), TensorShape(values_per_roi, scores_nms_size), 1, data_type);
130 auto_init_if_empty(*num_valid_proposals->info(), TensorShape(scores_nms_size), 1, DataType::U32);
131
132 // Initialize temporaries (unused) outputs
133 _classes_nms_unused.allocator()->init(TensorInfo(TensorShape(8, 1), 1, data_type));
134 _keeps_nms_unused.allocator()->init(*scores_out->info());
135
136 // Save the output (to map and unmap them at run)
137 _scores_out = scores_out;
138 _num_valid_proposals = num_valid_proposals;
139
140 _memory_group.manage(&_proposals_4_roi_values);
141
142 const BoxNMSLimitInfo box_nms_info(0.0f, info.nms_thres(), scores_nms_size, false, NMSType::LINEAR, 0.5f, 0.001f, true, min_size_scaled, info.im_width(), info.im_height());
143 _cpp_nms_kernel.configure(&_scores_flattened /*scores_in*/,
144 &_all_proposals /*boxes_in,*/,
145 nullptr /* batch_splits_in*/,
146 scores_out /* scores_out*/,
147 &_proposals_4_roi_values /*boxes_out*/,
148 &_classes_nms_unused /*classes*/,
149 nullptr /*batch_splits_out*/,
150 &_keeps_nms_unused /*keeps*/,
151 num_valid_proposals /* keeps_size*/,
152 box_nms_info);
153
154 _keeps_nms_unused.allocator()->allocate();
155 _classes_nms_unused.allocator()->allocate();
156 _all_proposals.allocator()->allocate();
157 _scores_flattened.allocator()->allocate();
158
159 // Add the first column that represents the batch id. This will be all zeros, as we don't support multiple images
160 _padded_copy_kernel.configure(&_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } });
161 _proposals_4_roi_values.allocator()->allocate();
162
163 _memset_kernel.configure(proposals, PixelValue());
164}
165
166Status NEGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITensorInfo *deltas, const ITensorInfo *anchors, const ITensorInfo *proposals, const ITensorInfo *scores_out,
167 const ITensorInfo *num_valid_proposals, const GenerateProposalsInfo &info)
168{
169 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(scores, deltas, anchors, proposals, scores_out, num_valid_proposals);
170 ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(scores, DataLayout::NCHW, DataLayout::NHWC);
171 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(scores, deltas);
172
173 const int num_anchors = scores->dimension(get_data_layout_dimension_index(scores->data_layout(), DataLayoutDimension::CHANNEL));
174 const int feat_width = scores->dimension(get_data_layout_dimension_index(scores->data_layout(), DataLayoutDimension::WIDTH));
175 const int feat_height = scores->dimension(get_data_layout_dimension_index(scores->data_layout(), DataLayoutDimension::HEIGHT));
176 const int num_images = scores->dimension(3);
177 const int total_num_anchors = num_anchors * feat_width * feat_height;
178 const int values_per_roi = info.values_per_roi();
179
180 ARM_COMPUTE_RETURN_ERROR_ON(num_images > 1);
181
182 TensorInfo all_anchors_info(anchors->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true));
183 ARM_COMPUTE_RETURN_ON_ERROR(NEComputeAllAnchorsKernel::validate(anchors, &all_anchors_info, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale())));
184
185 TensorInfo deltas_permuted_info = deltas->clone()->set_tensor_shape(TensorShape(values_per_roi * num_anchors, feat_width, feat_height)).set_is_resizable(true);
186 TensorInfo scores_permuted_info = scores->clone()->set_tensor_shape(TensorShape(num_anchors, feat_width, feat_height)).set_is_resizable(true);
187 if(scores->data_layout() == DataLayout::NHWC)
188 {
189 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(deltas, &deltas_permuted_info);
190 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(scores, &scores_permuted_info);
191 }
192 else
193 {
194 ARM_COMPUTE_RETURN_ON_ERROR(NEPermuteKernel::validate(deltas, &deltas_permuted_info, PermutationVector{ 2, 0, 1 }));
195 ARM_COMPUTE_RETURN_ON_ERROR(NEPermuteKernel::validate(scores, &scores_permuted_info, PermutationVector{ 2, 0, 1 }));
196 }
197
198 TensorInfo deltas_flattened_info(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true));
199 ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayerKernel::validate(&deltas_permuted_info, &deltas_flattened_info));
200
201 TensorInfo scores_flattened_info(scores->clone()->set_tensor_shape(TensorShape(1, total_num_anchors)).set_is_resizable(true));
202 TensorInfo proposals_4_roi_values(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true));
203
204 ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayerKernel::validate(&scores_permuted_info, &scores_flattened_info));
205 ARM_COMPUTE_RETURN_ON_ERROR(NEBoundingBoxTransformKernel::validate(&all_anchors_info, &proposals_4_roi_values, &deltas_flattened_info, BoundingBoxTransformInfo(info.im_width(), info.im_height(),
206 1.f)));
207
208 ARM_COMPUTE_RETURN_ON_ERROR(NECopyKernel::validate(&proposals_4_roi_values, proposals, PaddingList{ { 0, 1 } }));
209
210 if(num_valid_proposals->total_size() > 0)
211 {
212 ARM_COMPUTE_RETURN_ERROR_ON(num_valid_proposals->num_dimensions() > 1);
213 ARM_COMPUTE_RETURN_ERROR_ON(num_valid_proposals->dimension(0) > 1);
214 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(num_valid_proposals, 1, DataType::U32);
215 }
216
217 if(proposals->total_size() > 0)
218 {
219 ARM_COMPUTE_RETURN_ERROR_ON(proposals->num_dimensions() > 2);
220 ARM_COMPUTE_RETURN_ERROR_ON(proposals->dimension(0) != size_t(values_per_roi) + 1);
221 ARM_COMPUTE_RETURN_ERROR_ON(proposals->dimension(1) != size_t(total_num_anchors));
222 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(proposals, deltas);
223 }
224
225 if(scores_out->total_size() > 0)
226 {
227 ARM_COMPUTE_RETURN_ERROR_ON(scores_out->num_dimensions() > 1);
228 ARM_COMPUTE_RETURN_ERROR_ON(scores_out->dimension(0) != size_t(total_num_anchors));
229 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(scores_out, scores);
230 }
231
232 return Status{};
233}
234
235void NEGenerateProposalsLayer::run()
236{
237 // Acquire all the temporaries
238 MemoryGroupResourceScope scope_mg(_memory_group);
239
240 // Compute all the anchors
241 NEScheduler::get().schedule(&_compute_anchors_kernel, Window::DimY);
242
243 // Transpose and reshape the inputs
244 if(!_is_nhwc)
245 {
246 NEScheduler::get().schedule(&_permute_deltas_kernel, Window::DimY);
247 NEScheduler::get().schedule(&_permute_scores_kernel, Window::DimY);
248 }
249
250 NEScheduler::get().schedule(&_flatten_deltas_kernel, Window::DimY);
251 NEScheduler::get().schedule(&_flatten_scores_kernel, Window::DimY);
252
253 // Build the boxes
254 NEScheduler::get().schedule(&_bounding_box_kernel, Window::DimY);
255
256 // Non maxima suppression
257 CPPScheduler::get().schedule(&_cpp_nms_kernel, Window::DimX);
258
259 // Add dummy batch indexes
260
261 NEScheduler::get().schedule(&_memset_kernel, Window::DimY);
262 NEScheduler::get().schedule(&_padded_copy_kernel, Window::DimY);
263}
264} // namespace arm_compute