blob: c4f11a4e299e3d9e9b3d5745388fcfa8826d8adb [file] [log] [blame]
Sheri Zhangf9ab9f92021-03-16 12:09:15 +00001/*
2 * Copyright (c) 2021 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "src/runtime/gpu/cl/operators/ClPixelWiseMultiplication.h"
25
26#include "arm_compute/runtime/CL/CLScheduler.h"
27#include "src/core/CL/kernels/CLFillBorderKernel.h"
28#include "src/core/gpu/cl/ClCompileContext.h"
29#include "src/core/gpu/cl/kernels/ClPixelWiseMultiplicationKernel.h"
30
31namespace arm_compute
32{
33namespace opencl
34{
35namespace
36{
37ITensorPack select_border_input(ITensorPack &tensors)
38{
39 ITensorPack pack;
40 if(tensors.get_tensor(TensorType::ACL_DST)->info()->dimension(0) > 1)
41 {
42 if(tensors.get_const_tensor(TensorType::ACL_SRC_1)->info()->dimension(0) == 1)
43 {
44 pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_1));
45 }
46 else
47 {
48 pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(TensorType::ACL_SRC_0));
49 }
50 }
51 return pack;
52}
53} // namespace
54
55void ClPixelWiseMultiplication::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, float scale,
56 ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info)
57{
58 auto k = std::make_unique<kernels::ClPixelWiseMultiplicationKernel>();
59 k->configure(compile_context, src1, src2, dst, scale, overflow_policy, rounding_policy, act_info);
60 _kernel = std::move(k);
61
62 if(dst->dimension(0) > 1)
63 {
64 ITensorInfo *broadcasted_info = (src1->dimension(0) == 1) ? src1 : src2;
65
66 if(broadcasted_info->dimension(0) == 1)
67 {
68 auto b = std::make_unique<CLFillBorderKernel>();
69 b->configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
70 _border_handler = std::move(b);
71 }
72 }
73}
74
75Status ClPixelWiseMultiplication::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float scale,
76 ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info)
77{
78 return kernels::ClPixelWiseMultiplicationKernel::validate(src1, src2, dst, scale, overflow_policy, rounding_policy, act_info);
79}
80
81void ClPixelWiseMultiplication::run(ITensorPack &tensors)
82{
83 if(_border_handler)
84 {
85 auto border_pack = select_border_input(tensors);
86 CLScheduler::get().enqueue_op(*_border_handler, border_pack);
87 }
88 ICLOperator::run(tensors);
89}
90
91void ClComplexPixelWiseMultiplication::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info)
92{
93 auto k = std::make_unique<kernels::ClComplexPixelWiseMultiplicationKernel>();
94 k->configure(compile_context, src1, src2, dst, act_info);
95 _kernel = std::move(k);
96
97 if(dst->dimension(0) > 1)
98 {
99 ITensorInfo *broadcasted_info = (src1->dimension(0) == 1) ? src1 : src2;
100
101 if(broadcasted_info->dimension(0) == 1)
102 {
103 auto b = std::make_unique<CLFillBorderKernel>();
104 b->configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
105 _border_handler = std::move(b);
106 }
107 }
108}
109
110Status ClComplexPixelWiseMultiplication::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info)
111{
112 return kernels::ClComplexPixelWiseMultiplicationKernel::validate(src1, src2, dst, act_info);
113}
114
115void ClComplexPixelWiseMultiplication::run(ITensorPack &tensors)
116{
117 if(_border_handler)
118 {
119 auto border_pack = select_border_input(tensors);
120 CLScheduler::get().enqueue_op(*_border_handler, border_pack);
121 }
122 ICLOperator::run(tensors);
123}
124} // namespace opencl
125} // namespace arm_compute