Blame - src/core/CL/cl_kernels/depthwise_convolution.cl - ml/ComputeLibrary

__global uchar *output_ptr = dst_ptr + dst_offset_first_element_in_bytes + get_global_id(1) * SRC_WIDTH * dst_stride_x + get_global_id(2) * dst_stride_y;

457

458

for(int i = 0; i < SRC_WIDTH; ++i, ++input_ptr)

459

{

460

*((__global DATA_TYPE *)(output_ptr + i * dst_stride_x)) = *input_ptr;

461

}

Georgios Pinitas

81a26ad

2017-10-23 20:29:30 +0100

[diff] [blame]

462

463

#if defined(HAS_BIAS)

464

if(get_global_id(1) == 0)

465

{

466

*((__global DATA_TYPE *)(output_ptr + SRC_WIDTH * get_global_size(1) * dst_stride_x)) = *((__global float *)(biases.ptr + get_global_id(2) * biases_stride_x));

467

}

468

#endif // defined(HAS_BIAS)

Giorgio Arena

9fe4144

2017-08-23 16:36:24 +0100

[diff] [blame]

469

}

470

#endif //defined(SRC_WIDTH) && defined(DATA_TYPE)

471

Jaroslaw Rzepecki

a1ed41f

2017-10-13 11:13:58 +0100

[diff] [blame]

472

#if defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_LEFT) && defined(PAD_TOP) && defined(PAD_RIGHT) && defined(PAD_BOTTOM) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) && defined(DATA_TYPE)

Giorgio Arena

9fe4144

2017-08-23 16:36:24 +0100

[diff] [blame]

473

/** This kernel performs a reshaping of the input tensor to a tensor used to perform depthwise convolution using vector to matrix multiplication.

474

*

475

* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float

Jaroslaw Rzepecki

a1ed41f

2017-10-13 11:13:58 +0100

[diff] [blame]

476

* @note The convolution information must be passed at compile time using -DSTRIDE_X, -DSTRIDE_Y, -DPAD_LEFT, -DPAD_TOP, -DPAD_RIGHT, -DPAD_BOTTOM, -DKERNEL_WIDHT, -DKERNEL_HEIGHT, -DSRC_WIDTH, -DSRC_HEIGHT

Giorgio Arena

9fe4144

2017-08-23 16:36:24 +0100

[diff] [blame]

477

*

478

* @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8/QS16/F16/F32

479

* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)

480

* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)

481

* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)

482

* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)

483

* @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)

484

* @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)

485

* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor

486

* @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr

487

* @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)

488

* @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)

489

* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)

490

* @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)

491

* @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)

492

* @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)

493

* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor

494

*/

Giorgio Arena

9fe4144

2017-08-23 16:36:24 +0100

[diff] [blame]

495

__kernel void depthwise_im2col(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst))

496

{

497

Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);

498

499

const int src_pixel_linear = get_global_id(1) * STRIDE_X;

Jaroslaw Rzepecki

a1ed41f

2017-10-13 11:13:58 +0100

[diff] [blame]

500

const int full_length = SRC_WIDTH + PAD_LEFT + PAD_RIGHT;

Giorgio Arena

9fe4144

2017-08-23 16:36:24 +0100

[diff] [blame]

501

const int max_initial_x = STRIDE_X * (((full_length - KERNEL_WIDTH) / STRIDE_X) + 1);

502

Jaroslaw Rzepecki

a1ed41f

2017-10-13 11:13:58 +0100

[diff] [blame]

503

const int src_x = -PAD_LEFT + src_pixel_linear % max_initial_x;

504

const int src_y = -PAD_TOP + src_pixel_linear / max_initial_x * STRIDE_Y;

Giorgio Arena

9fe4144

2017-08-23 16:36:24 +0100

[diff] [blame]

505

const int src_z = get_global_id(2);

506

507

__global uchar *input_ptr = src_ptr + src_offset_first_element_in_bytes + src_z * src_stride_z;

508

__global DATA_TYPE *output_ptr = ((__global DATA_TYPE *)(dst.ptr));

509

510

for(int y = src_y; y < src_y + KERNEL_HEIGHT; ++y)

511

{

512

for(int x = src_x; x < src_x + KERNEL_WIDTH; ++x, ++output_ptr)

513

{

514

if(x < 0 || x >= SRC_WIDTH || y < 0 || y >= SRC_HEIGHT)

{

*output_ptr = 0;

}

else

{

*output_ptr = *((__global DATA_TYPE *)(input_ptr + x * src_stride_x + y * src_stride_y));

521

}

522

}

523

}

Georgios Pinitas

81a26ad

2017-10-23 20:29:30 +0100

[diff] [blame]

524

#if defined(HAS_BIAS)

525

*output_ptr = (DATA_TYPE)(1);

526

#endif // defined(HAS_BIAS)

Giorgio Arena

9fe4144

2017-08-23 16:36:24 +0100

[diff] [blame]

527

}

528

Jaroslaw Rzepecki

a1ed41f

2017-10-13 11:13:58 +0100

[diff] [blame]

529

#endif //defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_LEFT) && defined(PAD_TOP) && defined(PAD_RIGHT) && defined(PAD_BOTTOM) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(SRC_WIDTH) && defined(DATA_TYPE)

Giorgio Arena

9fe4144

2017-08-23 16:36:24 +0100

[diff] [blame]

530

531

#if defined(CONV_WIDTH) && defined(CONV_HEIGHT) && defined(DATA_TYPE)

532

533

/** This kernel performs a reshaping of the output of the depthwise generic convolution.

534

*

535

* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float

536

* @note The convolution information must be passed at compile time using -DCONV_WIDTH, -DCONV_HEIGHT, e.g -DCONV_WIDTH=32, -DCONV_HEIGHT=42

537

*

538

* @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8/QS16/F16/F32

539

* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)

540

* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)

541

* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor

542

* @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr

543

* @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)

544

* @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)

545

* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)

546

* @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)

547

* @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)

548

* @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)

549

* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor

550

*/

551

__kernel void depthwise_vector_to_tensor(

552

VECTOR_DECLARATION(src),

553

TENSOR3D_DECLARATION(dst))

554

{

555

Vector src = CONVERT_TO_VECTOR_STRUCT(src);

556

557

const int patch_size = CONV_WIDTH * CONV_HEIGHT;

558

const int id0 = get_global_id(0);

559

const int z = id0 / patch_size;

560

const int index2D = id0 - z * patch_size;

561

562

__global uchar *out_ptr = dst_ptr + dst_offset_first_element_in_bytes + index2D % CONV_WIDTH * dst_stride_x + index2D / CONV_WIDTH * dst_stride_y + z * dst_stride_z;

563

*((__global DATA_TYPE *)out_ptr) = *((__global DATA_TYPE *)src.ptr);

564

}

565

566

#endif //defined(CONV_WIDTH) && defined(CONV_HEIGHT) && defined(DATA_TYPE)