Blame - src/core/CL/cl_kernels/fixed_point.h - ml/ComputeLibrary

2017-06-23 18:03:44 +0100

[diff] [blame]

309

}

310

311

DIVQ_SAT_IMPL(qs8, qs8x16, qs16x16)

Georgios Pinitas

2017-06-22 18:13:55 +0100

[diff] [blame]

312

DIVQ_SAT_IMPL(qs16, qs16x8, qs32x8)

Georgios Pinitas

0979675

2017-07-10 16:05:21 +0100

[diff] [blame]

313

DIVQ_SAT_IMPL(qs16, qs16x16, qs32x16)

steniu01

2017-06-23 17:00:26 +0100

[diff] [blame^]

314

DIVQ_SAT_IMPL(qs8, qs8, qs16)

315

DIVQ_SAT_IMPL(qs16, qs16, qs32)

Georgios Pinitas

2017-06-23 18:03:44 +0100

[diff] [blame]

316

steniu01

2017-06-23 17:00:26 +0100

[diff] [blame^]

317

#define DIV_SAT_OP_EXPAND_STR(a, b, type, position) div_sat_##type((a), (b), (position))

318

#define DIV_SAT_OP_EXPAND(a, b, type, position) DIV_SAT_OP_EXPAND_STR(a, b, type, position)

319

320

#define DIV_SAT_OP_VEC_EXPAND_STR(a, b, type, size, position) div_sat_##type##x##size((a), (b), (position))

321

#define DIV_SAT_OP_VEC_EXPAND(a, b, type, size, position) DIV_SAT_OP_VEC_EXPAND_STR(a, b, type, size, position)

Georgios Pinitas

2017-06-23 18:03:44 +0100

[diff] [blame]

322

Michalis Spyrou

2017-06-20 15:00:14 +0100

[diff] [blame]

323

/** Saturate exponential of a fixed point vector

Georgios Pinitas

2017-06-23 18:03:44 +0100

[diff] [blame]

324

*

Georgios Pinitas

2017-06-22 18:13:55 +0100

[diff] [blame]

325

* @note Implemented approach uses taylor polynomial to approximate the exponential function.

326

*

Michalis Spyrou

2017-06-20 15:00:14 +0100

[diff] [blame]

327

* @param[in] stype the actual scalar data type.

328

* @param[in] type the actual data type.

329

* @param[in] size the number of the calculated elements.

330

*

331

* @return The result of the fixed point exponential. The result is saturated in case of overflow

332

*/

Georgios Pinitas

2017-06-22 18:13:55 +0100

[diff] [blame]

333

#define EXPQ_IMPL(stype, type, size) \

334

inline type exp_sat_##type(type VopA, int fixed_point_position) \

335

{ \

336

type const_one = (type)(1 << (fixed_point_position)); \

337

type ln2 = (type)((((0x58B9 >> (14 - fixed_point_position))) + 1) >> 1); \

338

type inv_ln2 = (type)((((0x38AA >> (14 - fixed_point_position)) + 1) >> 1)) | const_one; \

339

type A = (type)(((0x7FBA >> (14 - fixed_point_position)) + 1) >> 1); \

340

type B = (type)(((0x3FE9 >> (14 - fixed_point_position)) + 1) >> 1); \

341

type C = (type)(((0x1693 >> (14 - fixed_point_position)) + 1) >> 1); \

342

type D = (type)(((0x0592 >> (14 - fixed_point_position)) + 1) >> 1); \

343

type m = MUL_SAT_OP_EXPAND(VopA, inv_ln2, stype, size, fixed_point_position); \

344

type dec_m = m >> (type)fixed_point_position; \

345

type alpha = MUL_SAT_OP_EXPAND(dec_m << (type)fixed_point_position, ln2, stype, size, fixed_point_position); \

346

alpha = CONVERT(abs_diff(VopA, alpha), type); \

347

type sum = add_sat(MUL_SAT_OP_EXPAND(alpha, D, stype, size, fixed_point_position), C); \

348

sum = add_sat(MUL_SAT_OP_EXPAND(alpha, sum, stype, size, fixed_point_position), B); \

349

sum = add_sat(MUL_SAT_OP_EXPAND(alpha, sum, stype, size, fixed_point_position), A); \

350

sum = add_sat(MUL_SAT_OP_EXPAND(alpha, sum, stype, size, fixed_point_position), const_one); \

351

return select((type)stype##_MAX, select(sum << dec_m, sum >> -dec_m, dec_m < (type)0), clz(sum) > dec_m); /* Saturate result if needed */ \

Michalis Spyrou

2017-06-20 15:00:14 +0100

[diff] [blame]

352

}

Georgios Pinitas

2017-06-23 18:03:44 +0100

[diff] [blame]

353

Michalis Spyrou

2017-06-20 15:00:14 +0100

[diff] [blame]

354

EXPQ_IMPL(qs8, qs8x16, 16)

Georgios Pinitas

2017-06-22 18:13:55 +0100

[diff] [blame]

355

EXPQ_IMPL(qs16, qs16x8, 8)

Georgios Pinitas

0979675

2017-07-10 16:05:21 +0100

[diff] [blame]

356

EXPQ_IMPL(qs16, qs16x16, 16)

Georgios Pinitas

2017-06-23 18:03:44 +0100

[diff] [blame]

357

Michalis Spyrou

2017-06-20 15:00:14 +0100

[diff] [blame]

358

#define EXP_OP_EXPAND_STR(a, type, size, position) exp_sat_##type##x##size((a), (position))

Georgios Pinitas

2017-06-23 18:03:44 +0100

[diff] [blame]

359

#define EXP_OP_EXPAND(a, type, size, position) EXP_OP_EXPAND_STR(a, type, size, position)

360

Michalis Spyrou

2017-06-20 15:00:14 +0100

[diff] [blame]

361

/** Saturate logarithm of a fixed point vector

362

*

Georgios Pinitas

2017-06-22 18:13:55 +0100

[diff] [blame]

363

* @note Implemented approach uses taylor polynomial to approximate the logarithm function.

364

*

Michalis Spyrou

2017-06-20 15:00:14 +0100

[diff] [blame]

365

* @param[in] stype the actual scalar data type.

366

* @param[in] type the actual data type.

367

* @param[in] size the number of the calculated elements.

368

*

369

* @return The result of the fixed point logarithm. The result is saturated in case of overflow

370

*/

371

#define LOGQ_IMPL(stype, type, size) \

372

inline type log_sat_##type(type VopA, int fixed_point_position) \

373

{ \

374

type const_one = (type)(1 << (fixed_point_position)); \

Georgios Pinitas

2017-06-22 18:13:55 +0100

[diff] [blame]

375

type ln2 = (type)(0x58B9 >> (15 - fixed_point_position)); /* 1.4384189 */ \

376

type A = (type)(0x5C0F >> (14 - fixed_point_position)); /* 1.4384189 */ \

377

type B = -(type)(0x56AE >> (15 - fixed_point_position)); /* -0.6771900 */ \

378

type C = (type)(0x2933 >> (15 - fixed_point_position)); /* 0.3218538 */ \

379

type D = -(type)(0x0AA7 >> (15 - fixed_point_position)); /* -0.0832229 */ \

steniu01

2017-06-23 17:00:26 +0100

[diff] [blame^]

380

type inter_a = select(VopA, DIV_SAT_OP_VEC_EXPAND(const_one, VopA, stype, size, fixed_point_position), VopA < const_one); \

Michalis Spyrou

2017-06-20 15:00:14 +0100

[diff] [blame]

381

type shift_val = (type)(15 - stype##_SHIFT) - clz(inter_a >> (type)fixed_point_position); \

382

inter_a = inter_a >> shift_val; \

383

inter_a = sub_sat(inter_a, const_one); \

384

type sum = add_sat(MUL_SAT_OP_EXPAND(inter_a, D, stype, size, fixed_point_position), C); \

385

sum = add_sat(MUL_SAT_OP_EXPAND(inter_a, sum, stype, size, fixed_point_position), B); \

386

sum = add_sat(MUL_SAT_OP_EXPAND(inter_a, sum, stype, size, fixed_point_position), A); \

387

sum = MUL_SAT_OP_EXPAND(inter_a, sum, stype, size, fixed_point_position); \

388

sum = MUL_SAT_OP_EXPAND(add_sat(sum, shift_val << (type)fixed_point_position), ln2, stype, size, fixed_point_position); \

Georgios Pinitas

2017-06-22 18:13:55 +0100

[diff] [blame]

389

return select(select(sum, -sum, VopA < const_one), (type)0, VopA < (type)0); /* Saturate result if needed */ \

Michalis Spyrou

2017-06-20 15:00:14 +0100

[diff] [blame]

390

}

391

392

LOGQ_IMPL(qs8, qs8x16, 16)

Georgios Pinitas

2017-06-22 18:13:55 +0100

[diff] [blame]

393

LOGQ_IMPL(qs16, qs16x8, 8)

Michele Di Giorgio

6c92834

2017-06-22 16:55:57 +0100

[diff] [blame]

394

LOGQ_IMPL(qs16, qs16x16, 16)

Michalis Spyrou

2017-06-20 15:00:14 +0100

[diff] [blame]

395

396

#define LOG_OP_EXPAND_STR(a, type, size, position) log_sat_##type##x##size((a), (position))

397

#define LOG_OP_EXPAND(a, type, size, position) LOG_OP_EXPAND_STR(a, type, size, position)

398

399

/** Saturate inverse square root of a fixed point vector

400

*

Georgios Pinitas

2017-06-22 18:13:55 +0100

[diff] [blame]

401

* @note Implemented approach uses Newton's method to approximate the inverse square root function.

402

*

Michalis Spyrou

2017-06-20 15:00:14 +0100

[diff] [blame]

403

* @param[in] stype the actual scalar data type.

404

* @param[in] type the actual data type.

405

* @param[in] size the number of the calculated elements.

406

*

407

* @return The result of the fixed point inverse square root. The result is saturated in case of overflow

408

*/

409

#define INVSQRTQ_IMPL(stype, type, size) \

410

inline type invsqrt_sat_##type(type VopA, int fixed_point_position) \

411

{ \

412

type const_three = (type)(3 << (fixed_point_position)); \

413

type shift_value = (type)(16 - stype##_SHIFT) - (clz(VopA) + (type)fixed_point_position); \

Georgios Pinitas

2017-06-22 18:13:55 +0100

[diff] [blame]

414

type temp = select(VopA >> shift_value, select((type)stype##_MAX, VopA << (-shift_value), clz(VopA) > (-shift_value)), shift_value < (type)0); \

Michalis Spyrou

2017-06-20 15:00:14 +0100

[diff] [blame]

415

type x = temp; \

416

x = MUL_SAT_OP_EXPAND(x, sub_sat(const_three, MUL_SAT_OP_EXPAND(MUL_SAT_OP_EXPAND(x, x, stype, size, fixed_point_position), temp, stype, size, fixed_point_position)), stype, size, fixed_point_position) >> 1; \

417

x = MUL_SAT_OP_EXPAND(x, sub_sat(const_three, MUL_SAT_OP_EXPAND(MUL_SAT_OP_EXPAND(x, x, stype, size, fixed_point_position), temp, stype, size, fixed_point_position)), stype, size, fixed_point_position) >> 1; \

418

x = MUL_SAT_OP_EXPAND(x, sub_sat(const_three, MUL_SAT_OP_EXPAND(MUL_SAT_OP_EXPAND(x, x, stype, size, fixed_point_position), temp, stype, size, fixed_point_position)), stype, size, fixed_point_position) >> 1; \

Georgios Pinitas

2017-06-22 18:13:55 +0100

[diff] [blame]

419

if(sizeof((stype)(1)) > 1) /* Perform more iterations if datatype is QS16 */ \

420

{ \

421

x = MUL_SAT_OP_EXPAND(x, sub_sat(const_three, MUL_SAT_OP_EXPAND(MUL_SAT_OP_EXPAND(x, x, stype, size, fixed_point_position), temp, stype, size, fixed_point_position)), stype, size, fixed_point_position) >> 1; \

422

x = MUL_SAT_OP_EXPAND(x, sub_sat(const_three, MUL_SAT_OP_EXPAND(MUL_SAT_OP_EXPAND(x, x, stype, size, fixed_point_position), temp, stype, size, fixed_point_position)), stype, size, fixed_point_position) >> 1; \

423

} \

424

type shift_value2 = select(shift_value >> 1, (-shift_value) >> 1, shift_value < (type)0); \

425

return select(x >> shift_value2, select((type)stype##_MAX, x << shift_value2, clz(x) > shift_value2), shift_value < (type)0); /* Saturate result if needed */ \

Michalis Spyrou

2017-06-20 15:00:14 +0100

[diff] [blame]

426

}

427

428

INVSQRTQ_IMPL(qs8, qs8x16, 16)

Georgios Pinitas

2017-06-22 18:13:55 +0100

[diff] [blame]

429

INVSQRTQ_IMPL(qs16, qs16x8, 8)

Michalis Spyrou

2017-06-20 15:00:14 +0100

[diff] [blame]

430

431

#define INVSQRT_OP_EXPAND_STR(a, type, size, position) invsqrt_sat_##type##x##size((a), (position))

432

#define INVSQRT_OP_EXPAND(a, type, size, position) INVSQRT_OP_EXPAND_STR(a, type, size, position)

433

Georgios Pinitas

2017-06-22 18:13:55 +0100

[diff] [blame]

434

/** Saturate hyperbolic tangent of a fixed point vector

435

*

436

* tanh(x) = (e^2x - 1)/(e^2x + 1)

437

*

438

* @param[in] stype the actual scalar data type.

439

* @param[in] type the actual data type.

440

* @param[in] size the number of the calculated elements.

441

*

442

* @return The result of the fixed point hyperbolic tangent. The result is saturated in case of overflow

443

*/

444

#define TANHQ_IMPL(stype, type, size) \

445

inline type tanh_sat_##type(type VopA, int fixed_point_position) \

446

{ \

447

type const_one = (type)(1 << (fixed_point_position)); \

448

type const_two = (type)(2 << (fixed_point_position)); \

449

type exp2x = EXP_OP_EXPAND(MUL_SAT_OP_EXPAND(const_two, VopA, stype, size, fixed_point_position), stype, size, fixed_point_position); \

450

type num = SUB_SAT_OP_EXPAND(exp2x, const_one, stype, size); \

451

type den = ADD_SAT_OP_EXPAND(exp2x, const_one, stype, size); \

steniu01

2017-06-23 17:00:26 +0100

[diff] [blame^]

452

return DIV_SAT_OP_VEC_EXPAND(num, den, stype, size, fixed_point_position); \

Georgios Pinitas

2017-06-22 18:13:55 +0100

[diff] [blame]

453

}

454

455

TANHQ_IMPL(qs8, qs8x16, 16)

456

TANHQ_IMPL(qs16, qs16x8, 8)

457

458

#define TANH_OP_EXPAND_STR(a, type, size, position) tanh_sat_##type##x##size((a), (position))

459

#define TANH_OP_EXPAND(a, type, size, position) TANH_OP_EXPAND_STR(a, type, size, position)

460

steniu01

da37e2f

2017-06-29 10:14:58 +0100

[diff] [blame]

461

#define floatx16 float16

462

#define float16_TYPE float16

463

464

#define CONVERTQ_DOWN_IMPL(in_type, out_type) \

465

inline out_type convert_##out_type##_##in_type(in_type a, int fixed_point_position) \

466

{ \

467

return CONVERT(a * (1 << fixed_point_position) + select((in_type)-0.5, (in_type)0.5, isgreater(a, (in_type)0)), out_type); \

468

}

469

470

CONVERTQ_DOWN_IMPL(float16, qs8x16)

471

CONVERTQ_DOWN_IMPL(float16, qs16x16)

472

473

#define CONVERTQ_DOWN_SAT_IMPL(in_type, out_type) \

474

inline out_type convert_##out_type##_##in_type##_sat(in_type a, int fixed_point_position) \

475

{ \

476

return CONVERT_SAT(a * (1 << fixed_point_position) + select((in_type)-0.5, (in_type)0.5, isgreater(a, (in_type)0)), out_type); \

477

}

478

479

CONVERTQ_DOWN_SAT_IMPL(float16, qs8x16)

480

CONVERTQ_DOWN_SAT_IMPL(float16, qs16x16)

481

482

#define CONVERTQ_UP_IMPL(in_type, out_type) \

483

inline out_type convert_##out_type##_##in_type(in_type a, int fixed_point_position) \

484

{ \

485

return CONVERT(a, out_type) / (1 << fixed_point_position); \

486

}

487

488

CONVERTQ_UP_IMPL(qs8x16, float16)

489

CONVERTQ_UP_IMPL(qs16x16, float16)

490

Michalis Spyrou

172e570

2017-06-26 14:18:47 +0100

[diff] [blame]

491

#define SQCVT_SAT_IMPL(type) \

492

inline type sqcvt_##type##_sat(float a, int fixed_point_position) \

493

{ \

494

return CONVERT_SAT((a * (1 << fixed_point_position) + ((a < 0) ? -0.5f : 0.5f)), type); \

}

SQCVT_SAT_IMPL(qs8)

SQCVT_SAT_IMPL(qs16)

#define SQCVT_SAT_OP_EXPAND_STR(a, type, position) sqcvt_##type##_sat((a), (position))

501

#define SQCVT_SAT_OP_EXPAND(a, type, position) SQCVT_SAT_OP_EXPAND_STR((a), type, position)

502

Georgios Pinitas