COMPMID-2451: Use kernel lws_hint() on enqueue
Avoid querying device's default lws on kernel enqueue as this is already
cached in the kernel during configuration.
Change-Id: Ia26ecb712caeb8f042356815e0cfd23522764d27
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1803
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp
index 0c1206a..557046e 100644
--- a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp
+++ b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -96,7 +96,7 @@
add_2D_tensor_argument(idx, _input1, slice);
add_2D_tensor_argument(idx, _input2, slice);
add_2D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_2D(slice));
}
diff --git a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp
index aa06d3a..075a7b1 100644
--- a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp
+++ b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp
@@ -173,7 +173,7 @@
add_1D_tensor_argument(idx, _block_shape, vector_slice);
}
add_4D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_in);
+ enqueue(queue, *this, slice_in, lws_hint());
++batch_id;
}
diff --git a/src/core/CL/kernels/CLBitwiseAndKernel.cpp b/src/core/CL/kernels/CLBitwiseAndKernel.cpp
index dd301cd..2d05f2e 100644
--- a/src/core/CL/kernels/CLBitwiseAndKernel.cpp
+++ b/src/core/CL/kernels/CLBitwiseAndKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -82,7 +82,7 @@
add_2D_tensor_argument(idx, _input1, slice);
add_2D_tensor_argument(idx, _input2, slice);
add_2D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_2D(slice));
}
diff --git a/src/core/CL/kernels/CLBitwiseOrKernel.cpp b/src/core/CL/kernels/CLBitwiseOrKernel.cpp
index aa84618..b3efab8 100644
--- a/src/core/CL/kernels/CLBitwiseOrKernel.cpp
+++ b/src/core/CL/kernels/CLBitwiseOrKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -83,7 +83,7 @@
add_2D_tensor_argument(idx, _input1, slice);
add_2D_tensor_argument(idx, _input2, slice);
add_2D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_2D(slice));
}
diff --git a/src/core/CL/kernels/CLBitwiseXorKernel.cpp b/src/core/CL/kernels/CLBitwiseXorKernel.cpp
index ad1f923..d8ac486 100644
--- a/src/core/CL/kernels/CLBitwiseXorKernel.cpp
+++ b/src/core/CL/kernels/CLBitwiseXorKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -83,7 +83,7 @@
add_2D_tensor_argument(idx, _input1, slice);
add_2D_tensor_argument(idx, _input2, slice);
add_2D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_2D(slice));
}
diff --git a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp
index 7c30a94..045bd02 100644
--- a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp
+++ b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp
@@ -127,6 +127,6 @@
add_2D_tensor_argument(idx, _deltas, slice);
// Note that we don't need to loop over the slices, as we are sure that we are dealing with all 2D tensors
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLChannelCombineKernel.cpp b/src/core/CL/kernels/CLChannelCombineKernel.cpp
index 126614a..d029efe 100644
--- a/src/core/CL/kernels/CLChannelCombineKernel.cpp
+++ b/src/core/CL/kernels/CLChannelCombineKernel.cpp
@@ -281,7 +281,7 @@
_kernel.setArg(idx++, slice.y().end());
}
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_2D(slice));
}
diff --git a/src/core/CL/kernels/CLChannelExtractKernel.cpp b/src/core/CL/kernels/CLChannelExtractKernel.cpp
index 8bddba8..d2a0f98 100644
--- a/src/core/CL/kernels/CLChannelExtractKernel.cpp
+++ b/src/core/CL/kernels/CLChannelExtractKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -181,7 +181,7 @@
unsigned int idx = 0;
add_2D_tensor_argument(idx, _input, slice);
add_2D_tensor_argument(idx, _output, win_sub);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_2D(slice));
}
diff --git a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp
index ace3fd5..8185676 100644
--- a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp
+++ b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -107,6 +107,6 @@
unsigned int idx = 0;
add_2D_tensor_argument(idx, _input, window);
add_2D_tensor_argument(idx, _output, window);
- enqueue(queue, *this, window);
+ enqueue(queue, *this, window, lws_hint());
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLCopyKernel.cpp b/src/core/CL/kernels/CLCopyKernel.cpp
index c87768a..c49251b 100644
--- a/src/core/CL/kernels/CLCopyKernel.cpp
+++ b/src/core/CL/kernels/CLCopyKernel.cpp
@@ -259,7 +259,7 @@
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx, _output, out_slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_3D(slice) && _output_window.slide_window_slice_3D(out_slice));
}
@@ -272,7 +272,7 @@
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(collapsed.slide_window_slice_3D(slice));
}
diff --git a/src/core/CL/kernels/CLCropKernel.cpp b/src/core/CL/kernels/CLCropKernel.cpp
index f8a2456..90e054b 100644
--- a/src/core/CL/kernels/CLCropKernel.cpp
+++ b/src/core/CL/kernels/CLCropKernel.cpp
@@ -127,6 +127,6 @@
add_3D_tensor_argument(idx, _output, window);
add_argument(idx, _start.x);
add_argument(idx, _start.y);
- enqueue(queue, *this, window);
+ enqueue(queue, *this, window, lws_hint());
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
index 295fb5c..50f6546 100644
--- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
+++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
@@ -129,7 +129,7 @@
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice_in);
add_3D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_out);
+ enqueue(queue, *this, slice_out, lws_hint());
}
while(collapsed.slide_window_slice_3D(slice_in) && collapsed.slide_window_slice_3D(slice_out));
break;
@@ -148,7 +148,7 @@
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice_in);
add_3D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_out);
+ enqueue(queue, *this, slice_out, lws_hint());
}
while(window.slide_window_slice_3D(slice_in) && window.slide_window_slice_3D(slice_out));
break;
diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
index 2d8de75..c985811 100644
--- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
@@ -147,7 +147,7 @@
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_3D(slice));
}
diff --git a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp
index f2d65e8..49a5590 100644
--- a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp
@@ -119,7 +119,7 @@
add_3D_tensor_argument(idx, _input, slice_in);
add_argument(idx, batch_id);
add_4D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_in);
+ enqueue(queue, *this, slice_in, lws_hint());
++batch_id;
}
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel.cpp
index b73ccf5..582c600 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel.cpp
@@ -141,7 +141,7 @@
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice);
add_2D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_3D(slice) && window.slide_window_slice_2D(slice_out));
}
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp
index 6b6438a..1fd6312 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp
@@ -121,6 +121,6 @@
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, window);
add_2D_tensor_argument(idx, _output, window);
- enqueue(queue, *this, window);
+ enqueue(queue, *this, window, lws_hint());
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp b/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp
index 2dad729..0f029fd 100644
--- a/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp
@@ -119,7 +119,7 @@
unsigned int idx = 0;
add_1D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_1D(slice) && window.slide_window_slice_3D(slice_out));
}
diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
index 12d36cd..10a2878 100644
--- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
@@ -131,7 +131,7 @@
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window_collapsed.slide_window_slice_3D(slice));
}
diff --git a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp
index 03edcef..c4ab504 100644
--- a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp
+++ b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp
@@ -128,7 +128,7 @@
unsigned int idx = 0;
add_1D_tensor_argument(idx, _input, collapsed);
add_1D_tensor_argument(idx, _output, collapsed);
- enqueue(queue, *this, collapsed);
+ enqueue(queue, *this, collapsed, lws_hint());
}
while(window.slide_window_slice_1D(collapsed));
}
diff --git a/src/core/CL/kernels/CLFillBorderKernel.cpp b/src/core/CL/kernels/CLFillBorderKernel.cpp
index c63b3be..475a52b 100644
--- a/src/core/CL/kernels/CLFillBorderKernel.cpp
+++ b/src/core/CL/kernels/CLFillBorderKernel.cpp
@@ -180,7 +180,7 @@
{
unsigned int idx = 0;
add_3D_tensor_argument(idx, _tensor, slice);
- enqueue(queue, *this, slice, cl::NullRange);
+ enqueue(queue, *this, slice, lws_hint());
}
while(collapsed.slide_window_slice_3D(slice));
}
diff --git a/src/core/CL/kernels/CLFloorKernel.cpp b/src/core/CL/kernels/CLFloorKernel.cpp
index 831173d..7c39d87 100644
--- a/src/core/CL/kernels/CLFloorKernel.cpp
+++ b/src/core/CL/kernels/CLFloorKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -124,7 +124,7 @@
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(collapsed.slide_window_slice_3D(slice));
}
diff --git a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp
index bf827bf..423e583 100644
--- a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp
+++ b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp
@@ -209,6 +209,6 @@
{
add_1D_tensor_argument(idx, _bn_gamma, slice_1d);
}
- enqueue(queue, *this, slice_3d);
+ enqueue(queue, *this, slice_3d, lws_hint());
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp
index 557e82d..8720123 100644
--- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp
@@ -174,7 +174,7 @@
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx1, _output, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(collapsed.slide_window_slice_3D(slice));
}
diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp
index b7eff0f..7c06638 100644
--- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -176,7 +176,7 @@
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx1, _output, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(collapsed.slide_window_slice_3D(slice));
}
diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp
index b7730d5..2967a73 100644
--- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -173,7 +173,7 @@
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx1, _output, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(collapsed.slide_window_slice_3D(slice));
}
diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
index 621bd2b..8175f60 100644
--- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -170,7 +170,7 @@
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx1, _output, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(collapsed.slide_window_slice_3D(slice));
}
diff --git a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp
index 0c0b0ec..3a59b43 100644
--- a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp
@@ -214,7 +214,7 @@
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice_in);
add_2D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_out);
+ enqueue(queue, *this, slice_out, lws_hint());
}
while(collapsed.slide_window_slice_2D(slice_out));
}
diff --git a/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp b/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp
index 5b9e68d..2ca4132 100644
--- a/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp
@@ -166,7 +166,7 @@
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_3D(slice));
}
\ No newline at end of file
diff --git a/src/core/CL/kernels/CLGatherKernel.cpp b/src/core/CL/kernels/CLGatherKernel.cpp
index 412821b..4e91cf2 100644
--- a/src/core/CL/kernels/CLGatherKernel.cpp
+++ b/src/core/CL/kernels/CLGatherKernel.cpp
@@ -131,6 +131,6 @@
add_4D_tensor_argument(idx, _input, window_collapsed);
add_1D_tensor_argument(idx, _indices, window_collapsed);
add_4D_tensor_argument(idx, _output, window_collapsed);
- enqueue(queue, *this, window_collapsed);
+ enqueue(queue, *this, window_collapsed, lws_hint());
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp
index ab95ddc..79e364c 100644
--- a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp
+++ b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp
@@ -124,6 +124,6 @@
// Note that we don't need to loop over the slices, as we are launching exactly
// as many threads as all the anchors generated
- enqueue(queue, *this, collapsed);
+ enqueue(queue, *this, collapsed, lws_hint());
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp
index 70af541..ea292c0 100644
--- a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp
@@ -162,5 +162,5 @@
unsigned int idx = 0;
add_4D_tensor_argument(idx, _input, window);
add_4D_tensor_argument(idx, _output, window);
- enqueue(queue, *this, window);
+ enqueue(queue, *this, window, lws_hint());
}
diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
index 8816138..2ef977b 100644
--- a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
+++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
@@ -178,7 +178,7 @@
add_2D_tensor_argument(idx, _input, in_slice);
add_2D_tensor_argument(idx, _sum, sum_slice);
add_2D_tensor_argument(idx, _output, in_slice);
- enqueue(queue, *this, in_slice);
+ enqueue(queue, *this, in_slice, lws_hint());
}
while(window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(sum_slice));
}
@@ -194,7 +194,7 @@
add_2D_tensor_argument(idx, _input, in_slice);
add_2D_tensor_argument(idx, _sum, sum_slice);
add_2D_tensor_argument(idx, _output, in_slice);
- enqueue(queue, *this, in_slice);
+ enqueue(queue, *this, in_slice, lws_hint());
}
while(window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(sum_slice));
}
@@ -210,7 +210,7 @@
add_3D_tensor_argument(idx, _input, in_slice);
add_3D_tensor_argument(idx, _sum, sum_slice);
add_3D_tensor_argument(idx, _output, in_slice);
- enqueue(queue, *this, in_slice);
+ enqueue(queue, *this, in_slice, lws_hint());
}
while(window.slide_window_slice_3D(in_slice) && window.slide_window_slice_3D(sum_slice));
}
diff --git a/src/core/CL/kernels/CLLKTrackerKernel.cpp b/src/core/CL/kernels/CLLKTrackerKernel.cpp
index 40ed630..68a210c 100644
--- a/src/core/CL/kernels/CLLKTrackerKernel.cpp
+++ b/src/core/CL/kernels/CLLKTrackerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -83,7 +83,7 @@
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
- enqueue(queue, *this, window);
+ enqueue(queue, *this, window, lws_hint());
}
void CLLKTrackerFinalizeKernel::configure(ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points)
@@ -112,7 +112,7 @@
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
- enqueue(queue, *this, window);
+ enqueue(queue, *this, window, lws_hint());
}
CLLKTrackerStage0Kernel::CLLKTrackerStage0Kernel()
@@ -202,7 +202,7 @@
add_2D_tensor_argument(idx, _old_scharr_gx, window);
add_2D_tensor_argument(idx, _old_scharr_gy, window);
- enqueue(queue, *this, window);
+ enqueue(queue, *this, window, lws_hint());
}
CLLKTrackerStage1Kernel::CLLKTrackerStage1Kernel()
@@ -284,5 +284,5 @@
unsigned int idx = 0;
add_2D_tensor_argument(idx, _new_input, window);
- enqueue(queue, *this, window);
+ enqueue(queue, *this, window, lws_hint());
}
diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.cpp b/src/core/CL/kernels/CLMeanStdDevKernel.cpp
index 0cde9c5..7bfd6d6 100644
--- a/src/core/CL/kernels/CLMeanStdDevKernel.cpp
+++ b/src/core/CL/kernels/CLMeanStdDevKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -132,7 +132,7 @@
// Set slice step equal to height to force gws[1] to 1,
// as each thread calculates the sum across all rows and columns equal to the number of elements processed by each work-item
slice.set_dimension_step(Window::DimY, _input->info()->dimension(1));
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_2D(slice));
diff --git a/src/core/CL/kernels/CLMemsetKernel.cpp b/src/core/CL/kernels/CLMemsetKernel.cpp
index 80caf94..1426420 100644
--- a/src/core/CL/kernels/CLMemsetKernel.cpp
+++ b/src/core/CL/kernels/CLMemsetKernel.cpp
@@ -103,7 +103,7 @@
{
unsigned int idx = 0;
add_3D_tensor_argument(idx, _tensor, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(collapsed.slide_window_slice_3D(slice));
}
diff --git a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp
index 92b5f8d..0591274 100644
--- a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp
@@ -157,7 +157,7 @@
// Set inputs
add_3D_tensor_argument(idx, _input, slice);
add_1D_tensor_argument(idx, _output, output_slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window_collapsed.slide_window_slice_3D(slice));
}
diff --git a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp
index 0c7f3bc..e865b45 100644
--- a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp
+++ b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -134,7 +134,7 @@
{
unsigned int idx = 0;
add_2D_tensor_argument(idx, _input, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_2D(slice));
@@ -226,7 +226,7 @@
{
unsigned int idx = 0;
add_2D_tensor_argument(idx, _input, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_2D(slice));
}
diff --git a/src/core/CL/kernels/CLPadLayerKernel.cpp b/src/core/CL/kernels/CLPadLayerKernel.cpp
index d35164d..5270e31 100644
--- a/src/core/CL/kernels/CLPadLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPadLayerKernel.cpp
@@ -136,7 +136,7 @@
add_3D_tensor_argument(idx, _input, slice_in);
add_3D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_out);
+ enqueue(queue, *this, slice_out, lws_hint());
}
while(window.slide_window_slice_3D(slice_out) && win_in.slide_window_slice_3D(slice_in));
}
diff --git a/src/core/CL/kernels/CLPermuteKernel.cpp b/src/core/CL/kernels/CLPermuteKernel.cpp
index a5fc1a7..9cb72b3 100644
--- a/src/core/CL/kernels/CLPermuteKernel.cpp
+++ b/src/core/CL/kernels/CLPermuteKernel.cpp
@@ -141,7 +141,7 @@
unsigned int idx = 0;
add_4D_tensor_argument(idx, _input, slice_in);
add_4D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_in);
+ enqueue(queue, *this, slice_in, lws_hint());
}
while(window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out));
}
diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
index 76403d1..50cdc9c 100644
--- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
+++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
@@ -274,7 +274,7 @@
add_3D_tensor_argument(idx, _input1, slice_input1);
add_3D_tensor_argument(idx, _input2, slice_input2);
add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
ARM_COMPUTE_UNUSED(collapsed.slide_window_slice_3D(slice_input1));
ARM_COMPUTE_UNUSED(collapsed.slide_window_slice_3D(slice_input2));
@@ -409,7 +409,7 @@
add_3D_tensor_argument(idx, _input1, slice_input1);
add_3D_tensor_argument(idx, _input2, slice_input2);
add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
ARM_COMPUTE_UNUSED(collapsed.slide_window_slice_3D(slice_input1));
ARM_COMPUTE_UNUSED(collapsed.slide_window_slice_3D(slice_input2));
diff --git a/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp b/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp
index c76d839..12ea193 100644
--- a/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -214,6 +214,6 @@
unsigned int idx = 0;
add_2D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
index 22d4e33..493255f 100644
--- a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
@@ -126,7 +126,7 @@
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window_collapsed.slide_window_slice_3D(slice));
}
diff --git a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp
index 47dc62c..50729f2 100644
--- a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp
+++ b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp
@@ -149,6 +149,6 @@
add_argument<cl_uint>(idx, _input->info()->strides_in_bytes()[3]);
add_argument<cl_uint>(idx, _output->info()->strides_in_bytes()[3]);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
index c32ec1b..9858a4d 100644
--- a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
@@ -143,6 +143,6 @@
add_argument<cl_uint>(idx, _input->info()->strides_in_bytes()[3]);
add_argument<cl_uint>(idx, _output->info()->strides_in_bytes()[3]);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp
index 9db8ae6..ee33a72 100644
--- a/src/core/CL/kernels/CLReductionOperationKernel.cpp
+++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp
@@ -285,7 +285,7 @@
unsigned int idx = 0;
add_1D_tensor_argument(idx, _input, in_slice);
add_1D_tensor_argument(idx, _output, out_slice);
- enqueue(queue, *this, in_slice);
+ enqueue(queue, *this, in_slice, lws_hint());
}
while(window_in.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(out_slice));
}
@@ -331,7 +331,7 @@
unsigned int idx = 0;
add_2D_tensor_argument(idx, _input, in_slice);
add_2D_tensor_argument(idx, _output, out_slice);
- enqueue(queue, *this, in_slice);
+ enqueue(queue, *this, in_slice, lws_hint());
}
while(window_in.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(out_slice));
}
@@ -349,7 +349,7 @@
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, in_slice);
add_3D_tensor_argument(idx, _output, out_slice);
- enqueue(queue, *this, in_slice);
+ enqueue(queue, *this, in_slice, lws_hint());
}
while(window_in.slide_window_slice_3D(in_slice) && window.slide_window_slice_3D(out_slice));
}
@@ -367,7 +367,7 @@
unsigned int idx = 0;
add_4D_tensor_argument(idx, _input, in_slice);
add_4D_tensor_argument(idx, _output, out_slice);
- enqueue(queue, *this, in_slice);
+ enqueue(queue, *this, in_slice, lws_hint());
}
while(window_in.slide_window_slice_4D(in_slice) && window.slide_window_slice_4D(out_slice));
}
diff --git a/src/core/CL/kernels/CLRemapKernel.cpp b/src/core/CL/kernels/CLRemapKernel.cpp
index 33c5f2d..12161fc 100644
--- a/src/core/CL/kernels/CLRemapKernel.cpp
+++ b/src/core/CL/kernels/CLRemapKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -105,7 +105,7 @@
add_2D_tensor_argument(idx, _output, slice);
add_2D_tensor_argument(idx, _map_x, slice);
add_2D_tensor_argument(idx, _map_y, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_2D(slice));
}
diff --git a/src/core/CL/kernels/CLReshapeLayerKernel.cpp b/src/core/CL/kernels/CLReshapeLayerKernel.cpp
index aa1339d..040e442 100644
--- a/src/core/CL/kernels/CLReshapeLayerKernel.cpp
+++ b/src/core/CL/kernels/CLReshapeLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -122,6 +122,6 @@
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, window_collapsed);
add_3D_tensor_argument(idx, _output, window_collapsed);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
/** [CLReshapeLayerKernel Kernel] **/
diff --git a/src/core/CL/kernels/CLScharr3x3Kernel.cpp b/src/core/CL/kernels/CLScharr3x3Kernel.cpp
index 1c1fedc..94b0d38 100644
--- a/src/core/CL/kernels/CLScharr3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLScharr3x3Kernel.cpp
@@ -118,7 +118,7 @@
add_2D_tensor_argument_if((_run_scharr_x), idx, _output_x, slice);
add_2D_tensor_argument_if((_run_scharr_y), idx, _output_y, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window.slide_window_slice_2D(slice));
}
diff --git a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp
index d2f676b..a3441cd 100644
--- a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp
+++ b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp
@@ -201,7 +201,7 @@
add_argument(idx, batch_id);
add_3D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_out);
+ enqueue(queue, *this, slice_out, lws_hint());
++batch_id;
}
while(window.slide_window_slice_3D(slice_out));
diff --git a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp
index 57d8305..20454b3 100644
--- a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp
+++ b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp
@@ -120,7 +120,7 @@
add_4D_tensor_argument(idx, _input, slice_in);
add_argument(idx, batch_id);
add_3D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_out);
+ enqueue(queue, *this, slice_out, lws_hint());
++batch_id;
}
diff --git a/src/core/CL/kernels/CLStackLayerKernel.cpp b/src/core/CL/kernels/CLStackLayerKernel.cpp
index ccbe1fc..d5bbae6 100644
--- a/src/core/CL/kernels/CLStackLayerKernel.cpp
+++ b/src/core/CL/kernels/CLStackLayerKernel.cpp
@@ -132,5 +132,5 @@
unsigned int idx = 0;
add_4D_tensor_argument(idx, _input, slice_in);
add_4D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_in);
+ enqueue(queue, *this, slice_in, lws_hint());
}
diff --git a/src/core/CL/kernels/CLTileKernel.cpp b/src/core/CL/kernels/CLTileKernel.cpp
index 7559e7a..bcd1bdc 100644
--- a/src/core/CL/kernels/CLTileKernel.cpp
+++ b/src/core/CL/kernels/CLTileKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -145,7 +145,7 @@
unsigned int idx = 0;
add_4D_tensor_argument(idx, _input, slice);
add_4D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, lws_hint());
}
while(collapsed.slide_window_slice_4D(slice));
}
diff --git a/src/core/CL/kernels/CLUpsampleLayerKernel.cpp b/src/core/CL/kernels/CLUpsampleLayerKernel.cpp
index ce5ed86..331b02d 100644
--- a/src/core/CL/kernels/CLUpsampleLayerKernel.cpp
+++ b/src/core/CL/kernels/CLUpsampleLayerKernel.cpp
@@ -160,7 +160,7 @@
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice_in);
add_3D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_out);
+ enqueue(queue, *this, slice_out, lws_hint());
}
while(collapsed_window.slide_window_slice_3D(slice_out) && collapsed_window.slide_window_slice_3D(slice_in));
}
diff --git a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
index 590af4d..9330b3b 100644
--- a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
+++ b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
@@ -156,7 +156,7 @@
}
// Run kernel
- enqueue(queue, *this, in_slice);
+ enqueue(queue, *this, in_slice, lws_hint());
}
while(window.slide_window_slice_4D(in_slice) && out_window.slide_window_slice_2D(out_slice));
}
diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
index 88cdde7..92ffc9f 100644
--- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
@@ -137,6 +137,6 @@
unsigned int idx = 0;
add_4D_tensor_argument(idx, _input, window);
add_4D_tensor_argument(idx, _output, window);
- enqueue(queue, *this, window);
+ enqueue(queue, *this, window, lws_hint());
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp
index 55cc465..5a508e4 100644
--- a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp
+++ b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -150,5 +150,5 @@
unsigned int idx = 0;
add_4D_tensor_argument(idx, _input, window);
add_3D_tensor_argument(idx, _output, window_out);
- enqueue(queue, *this, window);
+ enqueue(queue, *this, window, lws_hint());
}