Blame - src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp - ml/ComputeLibrary

*reinterpret_cast<int8_t *>(out.ptr() + x) = finalize_quantization(in_value, _result_fixedpoint_multiplier, _result_shift, _result_offset_after_shift,

156

static_cast<int8_t>(_min), static_cast<int8_t>(_max), is_bounded_relu);

Georgios Pinitas

448a81f

2019-11-21 14:10:25 +0000

[diff] [blame]

}

},

in, out, bias);

}

else

{

execute_window_loop(win_collapsed, [&](const Coordinates &)

164

{

165

// Compute 16 elements per iteration

166

int x = window_start_x;

167

for(; x <= (window_end_x - window_step_x); x += window_step_x)

{

int32x4x4_t in_s32 =

{

{

vld1q_s32(reinterpret_cast<const int32_t *>(in.ptr()) + x + 0),

173

vld1q_s32(reinterpret_cast<const int32_t *>(in.ptr()) + x + 4),

174

vld1q_s32(reinterpret_cast<const int32_t *>(in.ptr()) + x + 8),

175

vld1q_s32(reinterpret_cast<const int32_t *>(in.ptr()) + x + 12)

}

};

vst1q_s8(reinterpret_cast<int8_t *>(out.ptr() + x),

Michalis Spyrou

70d43a3

2020-06-22 17:05:43 +0100

[diff] [blame]

180

finalize_quantization(in_s32, _result_fixedpoint_multiplier, _result_shift, result_offset_after_shift_s32, min_s8, max_s8, is_bounded_relu));

Georgios Pinitas

448a81f

2019-11-21 14:10:25 +0000

[diff] [blame]

181

}

182

183

// Compute left-over elements

184

for(; x < window_end_x; ++x)

185

{

186

const int32_t in_value = *(reinterpret_cast<const int32_t *>(in.ptr()) + x);

187

188

// Finalize and store the result

Michalis Spyrou

70d43a3

2020-06-22 17:05:43 +0100

[diff] [blame]

189

*reinterpret_cast<int8_t *>(out.ptr() + x) = finalize_quantization(in_value, _result_fixedpoint_multiplier, _result_shift, _result_offset_after_shift,

190

static_cast<int8_t>(_min), static_cast<int8_t>(_max), is_bounded_relu);

Georgios Pinitas

448a81f

2019-11-21 14:10:25 +0000

[diff] [blame]

}

},

in, out);

}

}

NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel()

198

: _func(nullptr), _input(nullptr), _bias(nullptr), _output(nullptr), _result_fixedpoint_multiplier(0), _result_shift(0), _result_offset_after_shift(0), _min(0), _max(0)

{

}

void NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift,

203

int result_offset_after_shift, int min, int max)

204

{

205

// Perform validate step

206

ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);

207

ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (bias != nullptr) ? bias->info() : nullptr, output->info(), min, max));

_input = input;

_bias = bias;

_output = output;

_result_fixedpoint_multiplier = result_fixedpoint_multiplier;

213

_result_shift = result_shift;

214

_result_offset_after_shift = result_offset_after_shift;

_min = min;

_max = max;

// Configure kernel window

219

auto win_config = validate_and_configure_window(input->info(), output->info());

220

ARM_COMPUTE_ERROR_THROW_ON(win_config.first);

221

INEKernel::configure(win_config.second);

222

223

// Check if we need to clamp the result using min and max

Giorgio Arena

1856ff7

2020-02-07 13:46:45 +0000

[diff] [blame]

224

const bool is_bounded_relu = !(min <= -128 && max >= 127);

Georgios Pinitas

448a81f

2019-11-21 14:10:25 +0000

[diff] [blame]

225

_func = is_bounded_relu ? &NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::run<true> : &NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::run<false>;

226

}

227

228

Status NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min, int max)

229

{

230

ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);

231

ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, bias, output, min, max));

232