blob: 86236e8854b5532f812fa6d8043edf816028ff8f [file] [log] [blame]
Georgios Pinitasd8734b52017-12-22 15:27:52 +00001/*
Sheri Zhang1d359272021-06-10 13:56:11 +01002 * Copyright (c) 2018-2021 Arm Limited.
Georgios Pinitasd8734b52017-12-22 15:27:52 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Georgios Pinitasd9eb2752018-04-03 13:44:29 +010024#include "arm_compute/graph/mutators/InPlaceOperationMutator.h"
Georgios Pinitasd8734b52017-12-22 15:27:52 +000025
SiCongLibc4e3112021-06-29 13:18:30 +010026#include "arm_compute/core/Helpers.h"
Sheri Zhang1d359272021-06-10 13:56:11 +010027#include "arm_compute/core/Validate.h"
Georgios Pinitasd9eb2752018-04-03 13:44:29 +010028#include "arm_compute/graph/Graph.h"
29#include "arm_compute/graph/Logger.h"
SiCongLibc4e3112021-06-29 13:18:30 +010030#include "arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h"
31#include "arm_compute/graph/nodes/FusedDepthwiseConvolutionBatchNormalizationNode.h"
32#include "support/Cast.h"
33
34using namespace arm_compute::utils::cast;
Georgios Pinitasd8734b52017-12-22 15:27:52 +000035
36namespace arm_compute
37{
Georgios Pinitasd9eb2752018-04-03 13:44:29 +010038namespace graph
Georgios Pinitasd8734b52017-12-22 15:27:52 +000039{
Michele Di Giorgio294f6ff2020-06-19 12:11:06 +010040namespace
41{
42// Check if the output edges of the parent node are separate tensors. If not,
43// it means the same output is connected to multiple nodes and computations on
44// these nodes cannot be done in-place.
45bool output_edges_are_separate_tensors(Graph &g, const Edge *input_edge)
46{
47 const auto parent_node = input_edge->producer();
48 const auto input_tensor = input_edge->tensor();
49 const auto input_edge_id = input_edge->id();
50
51 if(parent_node == nullptr)
52 {
53 return false;
54 }
55
56 const auto output_edges = parent_node->output_edges();
57
58 // If the output is connected to only one edge, then computations can
59 // be done in-place.
60 if(output_edges.size() == 1)
61 {
62 return true;
63 }
64
65 return std::all_of(output_edges.begin(),
66 output_edges.end(),
67 [&](const EdgeID & edge_id)
68 {
69 // Skip check on current input edge
70 if(edge_id == input_edge_id)
71 {
72 return true;
73 }
74
75 auto edge = g.edge(edge_id);
76 return edge->tensor() != input_tensor;
77 });
78}
Sheri Zhang1d359272021-06-10 13:56:11 +010079
80// If do in-place calculation, then need to use the new output and inherit original output's accessor
81void set_new_output_and_inherit_accessor(std::unique_ptr<INode> &node, Tensor *orig_output, Tensor *new_output)
82{
83 ARM_COMPUTE_LOG_GRAPH_INFO("Switching to in-place computation for the node with ID : "
84 << node->id() << " and name : " << node->name() << std::endl);
85 // Update accessor
86 new_output->set_accessor(orig_output->extract_accessor());
87 // Update output
88 node->set_output_tensor(new_output->id(), 0);
89}
90
SiCongLibc4e3112021-06-29 13:18:30 +010091// Try to mutate the node to perform the depthwise in-place calculation
92void try_in_place_depthwiseconv(std::unique_ptr<INode> &node)
93{
94 // Get input edge
95 Edge *input_edge = node->input_edge(0);
96 Edge *weight_edge = node->input_edge(1);
97 ARM_COMPUTE_ERROR_ON(input_edge == nullptr || weight_edge == nullptr);
98
99 auto input_tensor = input_edge->tensor();
100 auto weight_tensor = weight_edge->tensor();
101 ARM_COMPUTE_ERROR_ON(input_tensor == nullptr || weight_tensor == nullptr);
102
103 const auto input_shape = input_tensor->desc().shape;
104 const auto qinfo_input = input_tensor->desc().quant_info;
105
106 const auto weight_shape = weight_tensor->desc().shape;
107 const auto weight_layout = weight_tensor->desc().layout;
108
109 // Extract PadStrideInfo and depth multiplier
110 PadStrideInfo conv_info{};
111 unsigned int depth_multiplier{};
112 if(node->type() == NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer)
113 {
114 conv_info = polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node.get())->convolution_info();
115 depth_multiplier = polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node.get())->depth_multiplier();
116 }
117 else if(node->type() == NodeType::DepthwiseConvolutionLayer)
118 {
119 conv_info = polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node.get())->convolution_info();
120 depth_multiplier = polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node.get())->depth_multiplier();
121 }
122
123 // Get current output tensor
124 auto current_output_tensor = node->output(0);
125 ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr);
126 const auto out_shape = current_output_tensor->desc().shape;
127 const auto qinfo_out = current_output_tensor->desc().quant_info;
128
129 bool input_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, input_shape, 0) && (qinfo_input == qinfo_out) && (input_tensor->accessor() == nullptr);
130
131 // Specify conditions with which input can be in-placed
132 input_can_in_place &= weight_layout == input_tensor->desc().layout && weight_layout == DataLayout::NHWC;
133
134 const int weights_width_idx = get_data_layout_dimension_index(weight_layout, DataLayoutDimension::WIDTH);
135 const int weights_height_idx = get_data_layout_dimension_index(weight_layout, DataLayoutDimension::HEIGHT);
136 const bool is_1x1 = weight_shape[weights_width_idx] == 1U && weight_shape[weights_height_idx] == 1U;
137 input_can_in_place &= is_1x1;
138
139 input_can_in_place &= depth_multiplier == 1;
140 input_can_in_place &= conv_info.stride() == std::make_pair(1U, 1U);
141 input_can_in_place &= !conv_info.has_padding();
142 // NOTE: Dilation should also be (1, 1). However currently dilation is not supported in the depthwise conv node
143
144 if(input_can_in_place)
145 {
146 set_new_output_and_inherit_accessor(node, current_output_tensor, input_tensor);
147 }
148 else
149 {
150 ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor or the quantization info are different.\n");
151 }
152}
153
Sheri Zhang1d359272021-06-10 13:56:11 +0100154// Try to mutate the node to perform the elementwise in-place calculation
155void try_in_place_elementwise(std::unique_ptr<INode> &node)
156{
157 // Get input edge
158 Edge *input0_edge = node->input_edge(0);
159 Edge *input1_edge = node->input_edge(1);
160 ARM_COMPUTE_ERROR_ON(input0_edge == nullptr || input1_edge == nullptr);
161
162 auto input0_tensor = input0_edge->tensor();
163 auto input1_tensor = input1_edge->tensor();
164 ARM_COMPUTE_ERROR_ON(input0_tensor == nullptr || input1_tensor == nullptr);
165
166 const auto shape0 = input0_tensor->desc().shape;
167 const auto shape1 = input1_tensor->desc().shape;
168 const auto qinfo0 = input0_tensor->desc().quant_info;
169 const auto qinfo1 = input1_tensor->desc().quant_info;
170
171 const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1);
172 // Inputs are not broadcast compatible
173 if(out_shape.total_size() == 0)
174 {
175 return;
176 }
177
178 // Get current output tensor
179 auto current_output_tensor = node->output(0);
180 ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr);
181 const auto qinfo_out = current_output_tensor->desc().quant_info;
182
183 // Can do in place, if the input has same shape as output, has same quntisation info as output, and input doesn't have accessor.
184 bool input0_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) && (qinfo0 == qinfo_out) && (input0_tensor->accessor() == nullptr);
185 bool input1_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) && (qinfo1 == qinfo_out) && (input1_tensor->accessor() == nullptr);
186
187 if(input0_can_in_place)
188 {
189 set_new_output_and_inherit_accessor(node, current_output_tensor, input0_tensor);
190 }
191 else if(input1_can_in_place)
192 {
193 set_new_output_and_inherit_accessor(node, current_output_tensor, input1_tensor);
194 }
195 else
196 {
197 ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor or the quantization info are different.\n");
198 }
199}
Michele Di Giorgio294f6ff2020-06-19 12:11:06 +0100200} // namespace
201
Georgios Pinitasd8734b52017-12-22 15:27:52 +0000202const char *InPlaceOperationMutator::name()
203{
204 return "InPlaceOperationMutator";
205}
206
Georgios Pinitasf4261ad2019-12-02 11:58:19 +0000207IGraphMutator::MutationType InPlaceOperationMutator::type() const
208{
209 return IGraphMutator::MutationType::Backend;
210}
211
Georgios Pinitasd8734b52017-12-22 15:27:52 +0000212void InPlaceOperationMutator::mutate(Graph &g)
213{
Michele Di Giorgio3be0b8c2020-06-18 15:28:54 +0100214 std::set<NodeType> in_place_nodes =
215 {
216 NodeType::ActivationLayer,
217 NodeType::BatchNormalizationLayer,
218 NodeType::EltwiseLayer,
Manuel Bottini80feed52020-06-03 13:20:41 +0100219 NodeType::UnaryEltwiseLayer,
SiCongLibc4e3112021-06-29 13:18:30 +0100220 NodeType::DepthwiseConvolutionLayer,
221 NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer,
Michele Di Giorgio3be0b8c2020-06-18 15:28:54 +0100222 NodeType::PrintLayer
223 };
Georgios Pinitasd8734b52017-12-22 15:27:52 +0000224
225 // Not interested in the order of nodes
226 for(auto &node : g.nodes())
227 {
228 if(node && in_place_nodes.find(node->type()) != std::end(in_place_nodes))
229 {
230 // Get input edge
231 Edge *input_edge = node->input_edge(0);
232
233 // Check if parent has a single output if yes then force in place calculation else not
Michele Di Giorgio294f6ff2020-06-19 12:11:06 +0100234 if((input_edge != nullptr) && output_edges_are_separate_tensors(g, input_edge))
Georgios Pinitasd8734b52017-12-22 15:27:52 +0000235 {
Sheri Zhang1d359272021-06-10 13:56:11 +0100236 if(node->type() == NodeType::EltwiseLayer)
Isabella Gottardi2ea37612019-07-16 11:48:51 +0100237 {
Sheri Zhang1d359272021-06-10 13:56:11 +0100238 try_in_place_elementwise(node);
Isabella Gottardi2ea37612019-07-16 11:48:51 +0100239 }
SiCongLibc4e3112021-06-29 13:18:30 +0100240 else if(node->type() == NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer || node->type() == NodeType::DepthwiseConvolutionLayer)
241 {
242 try_in_place_depthwiseconv(node);
243 }
Isabella Gottardi2ea37612019-07-16 11:48:51 +0100244 else
Georgios Pinitasd3a78ab2018-06-18 15:35:09 +0100245 {
Sheri Zhang1d359272021-06-10 13:56:11 +0100246 // Get current and new output tensors
247 auto current_output_tensor = node->output(0);
248 auto new_output_tensor = input_edge->tensor();
249
250 ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr || new_output_tensor == nullptr);
251
252 // Prevent in-place operation if there is an accessor bound to the in-place tensor or quantization info are different
253 if(new_output_tensor->accessor() != nullptr || current_output_tensor->desc().quant_info != new_output_tensor->desc().quant_info)
254 {
255 ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor or the quantization info are different.\n");
256 }
257 else
258 {
259 set_new_output_and_inherit_accessor(node, current_output_tensor, new_output_tensor);
260 }
Georgios Pinitasd3a78ab2018-06-18 15:35:09 +0100261 }
Georgios Pinitasd8734b52017-12-22 15:27:52 +0000262 }
263 }
264 }
265}
Georgios Pinitasd9eb2752018-04-03 13:44:29 +0100266} // namespace graph
Georgios Pinitasd8734b52017-12-22 15:27:52 +0000267} // namespace arm_compute