APPBROWSER-391: Fix GLES COMPUTE alignment issues APPBROWSER-402: Performance optimization for squeezenet/xray model Change-Id: If31b186b99a6d6087164019fe94d3ac9279e3204 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/119526 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>

commit: 4406fd6cc4abded564d3791324e1f48bdfd34273 [log] [tgz]
author: Frank Lei <frank.lei@arm.com> Thu Feb 01 14:47:14 2018 +0800
committer: Anthony Barbier <anthony.barbier@arm.com> Fri Nov 02 16:47:40 2018 +0000
tree: 22fe402fe9ac7ca338df49e9eccd6eb1587ae875
parent: 898d399a0f62c15612a52df4bff5018e783214e4 [diff] [blame]
diff --git a/src/core/GLES_COMPUTE/cs_shaders/scale.cs b/src/core/GLES_COMPUTE/cs_shaders/scale.cs
index b2689a2..b72c339 100644
--- a/src/core/GLES_COMPUTE/cs_shaders/scale.cs
+++ b/src/core/GLES_COMPUTE/cs_shaders/scale.cs

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2018 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,23 +29,23 @@
 // We DO have to use highp for DATA_TYPE_FP16 float here to calculate the coordinates of source tensor. float is highp by default, but we still write it down here to make it more clearly, and mediump is only used for src/dst tensor in shader body.
 precision highp float;
 
-/** Performs an affine transformation on an image interpolating with the NEAREAST NEIGHBOUR method. Input and output are single channel FP16.
+/** Performs an affine transformation on an tensor interpolating with the NEAREAST NEIGHBOUR method. Input and output are single channel FP16.
  *
  * @param[in]  src_ptr      Pointer to the source tensor. Supported data types: FP16.
  * @param[in]  src_attrs    The attributes of the source tensor
  * @param[out] dst_ptr      Pointer to the destination tensor. Supported data types: FP16. (Must be the same as the input)
  * @param[in]  dst_attrs    The attributes of the destination tensor
- * @param[in]  input_width  Input image width
- * @param[in]  input_height Input image height
+ * @param[in]  input_width  Input tensor width
+ * @param[in]  input_height Input tensor height
  * @param[in]  scale        The scale factor along x/y dimension
  */
 SHADER_PARAMS_DECLARATION
 {
-    ImageAttributes src_attrs;
-    ImageAttributes dst_attrs;
-    float           input_width;
-    float           input_height;
-    vec2            scale;
+    Tensor3DAttributes src_attrs;
+    Tensor3DAttributes dst_attrs;
+    float              input_width;
+    float              input_height;
+    vec2               scale;
 };
 
 #if defined(DATA_TYPE_FP16)
@@ -75,8 +75,8 @@
 
 void main()
 {
-    ImageIterator src_iter = CONVERT_TO_IMAGE_ITERATOR_NO_STEP(src_attrs, src_shift);
-    ImageIterator dst_iter = CONVERT_TO_IMAGE_ITERATOR(dst_attrs, dst_shift);
+    Tensor3DIterator src_iter = CONVERT_TO_TENSOR3D_ITERATOR_NO_STEP(src_attrs, src_shift);
+    Tensor3DIterator dst_iter = CONVERT_TO_TENSOR3D_ITERATOR(dst_attrs, dst_shift);
 
     vec4[2] tc = clamp_to_border_with_size(transform_nearest(vec2(gl_GlobalInvocationID.x << uint(2), gl_GlobalInvocationID.y), scale), input_width, input_height, float(BORDER_SIZE));
 
@@ -85,7 +85,7 @@
 
     for(int i = 0; i < 4; i++)
     {
-        uint offset_in_bytes = image_offset_in_bytes(src_iter, int(tc[0][i]), int(tc[1][i]));
+        uint offset_in_bytes = tensor3D_offset_in_bytes(src_iter, int(tc[0][i]), int(tc[1][i]), int(gl_GlobalInvocationID.z));
 
         s = LOAD_UNPACK2_HALF(src_ptr, uint(offset_in_bytes >> src_shift));
 
@@ -107,15 +107,15 @@
 
 void main()
 {
-    ImageIterator src_iter = CONVERT_TO_IMAGE_ITERATOR_NO_STEP(src_attrs, src_shift);
-    ImageIterator dst_iter = CONVERT_TO_IMAGE_ITERATOR(dst_attrs, dst_shift);
+    Tensor3DIterator src_iter = CONVERT_TO_TENSOR3D_ITERATOR_NO_STEP(src_attrs, src_shift);
+    Tensor3DIterator dst_iter = CONVERT_TO_TENSOR3D_ITERATOR(dst_attrs, dst_shift);
 
     uvec2 tc = uvec2(gl_GlobalInvocationID.x << uint(2), gl_GlobalInvocationID.y >> uint(1));
 
     mediump vec4 s = vec4(0.0f);
     mediump      vec4[2] d;
 
-    s = LOAD_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, int(tc[0]), int(tc[1])));
+    s = LOAD_UNPACK4_HALF(src_ptr, TENSOR3D_OFFSET(src_iter, int(tc[0]), int(tc[1]), int(gl_GlobalInvocationID.z)));
 
     d[0] = vec4(s.x, s.x, s.y, s.y);
     d[1] = vec4(s.z, s.z, s.w, s.w);
commit	4406fd6cc4abded564d3791324e1f48bdfd34273	[log] [tgz]
author	Frank Lei <frank.lei@arm.com>	Thu Feb 01 14:47:14 2018 +0800
committer	Anthony Barbier <anthony.barbier@arm.com>	Fri Nov 02 16:47:40 2018 +0000
tree	22fe402fe9ac7ca338df49e9eccd6eb1587ae875
parent	898d399a0f62c15612a52df4bff5018e783214e4 [diff] [blame]