Blame - src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp - ml/ComputeLibrary

*reinterpret_cast<int8_t *>(out.ptr() + x) = finalize_quantization(in_value, _result_fixedpoint_multiplier, _result_shift, _result_offset_after_shift,

154

static_cast<int8_t>(_min), static_cast<int8_t>(_max), is_bounded_relu);

Georgios Pinitas

448a81f

2019-11-21 14:10:25 +0000

[diff] [blame]

}

},

in, out, bias);

}

else

{

execute_window_loop(win_collapsed, [&](const Coordinates &)

162

{

163

// Compute 16 elements per iteration

164

int x = window_start_x;

165

for(; x <= (window_end_x - window_step_x); x += window_step_x)

{

int32x4x4_t in_s32 =

{

{

vld1q_s32(reinterpret_cast<const int32_t *>(in.ptr()) + x + 0),

171

vld1q_s32(reinterpret_cast<const int32_t *>(in.ptr()) + x + 4),

172

vld1q_s32(reinterpret_cast<const int32_t *>(in.ptr()) + x + 8),

173

vld1q_s32(reinterpret_cast<const int32_t *>(in.ptr()) + x + 12)

}

};

vst1q_s8(reinterpret_cast<int8_t *>(out.ptr() + x),

Michalis Spyrou

70d43a3

2020-06-22 17:05:43 +0100

[diff] [blame]

178

finalize_quantization(in_s32, _result_fixedpoint_multiplier, _result_shift, result_offset_after_shift_s32, min_s8, max_s8, is_bounded_relu));

Georgios Pinitas

448a81f

2019-11-21 14:10:25 +0000

[diff] [blame]

179

}

180

181

// Compute left-over elements

182

for(; x < window_end_x; ++x)

183

{

184

const int32_t in_value = *(reinterpret_cast<const int32_t *>(in.ptr()) + x);

185

186

// Finalize and store the result

Michalis Spyrou

70d43a3

2020-06-22 17:05:43 +0100

[diff] [blame]

187

*reinterpret_cast<int8_t *>(out.ptr() + x) = finalize_quantization(in_value, _result_fixedpoint_multiplier, _result_shift, _result_offset_after_shift,

188

static_cast<int8_t>(_min), static_cast<int8_t>(_max), is_bounded_relu);

Georgios Pinitas

448a81f

2019-11-21 14:10:25 +0000

[diff] [blame]

}

},

in, out);

}

}

NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel()

196

: _func(nullptr), _input(nullptr), _bias(nullptr), _output(nullptr), _result_fixedpoint_multiplier(0), _result_shift(0), _result_offset_after_shift(0), _min(0), _max(0)

{

}

void NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift,

201

int result_offset_after_shift, int min, int max)

202

{

203

// Perform validate step

204

ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);

205

ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (bias != nullptr) ? bias->info() : nullptr, output->info(), min, max));

_input = input;

_bias = bias;

_output = output;

_result_fixedpoint_multiplier = result_fixedpoint_multiplier;

211

_result_shift = result_shift;

212

_result_offset_after_shift = result_offset_after_shift;

_min = min;

_max = max;

// Configure kernel window

217

auto win_config = validate_and_configure_window(input->info(), output->info());

218

ARM_COMPUTE_ERROR_THROW_ON(win_config.first);

219

INEKernel::configure(win_config.second);

220

221

// Check if we need to clamp the result using min and max

Giorgio Arena

1856ff7

2020-02-07 13:46:45 +0000

[diff] [blame]

222

const bool is_bounded_relu = !(min <= -128 && max >= 127);

Georgios Pinitas

448a81f

2019-11-21 14:10:25 +0000

[diff] [blame]

223

_func = is_bounded_relu ? &NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::run<true> : &NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::run<false>;

224

}

225

226

Status NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max)

227

{

228

ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);

229

ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, bias, output, min, max));

230