blob: a9863c02118aeb0b37c6c8f698c5e546e0c8a8ea [file] [log] [blame]
Richard Burtonc20be972022-04-19 17:01:08 +01001/*
2 * Copyright (c) 2022 Arm Limited. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17#include "VisualWakeWordProcessing.hpp"
18#include "ImageUtils.hpp"
19#include "VisualWakeWordModel.hpp"
20#include "log_macros.h"
21
22namespace arm {
23namespace app {
24
Richard Burtonb40ecf82022-04-22 16:14:57 +010025 VisualWakeWordPreProcess::VisualWakeWordPreProcess(TfLiteTensor* inputTensor)
26 :m_inputTensor{inputTensor}
27 {}
Richard Burtonc20be972022-04-19 17:01:08 +010028
29 bool VisualWakeWordPreProcess::DoPreProcess(const void* data, size_t inputSize)
30 {
31 if (data == nullptr) {
32 printf_err("Data pointer is null");
33 }
34
35 auto input = static_cast<const uint8_t*>(data);
Richard Burtonc20be972022-04-19 17:01:08 +010036
Richard Burtonb40ecf82022-04-22 16:14:57 +010037 auto unsignedDstPtr = static_cast<uint8_t*>(this->m_inputTensor->data.data);
Richard Burtonc20be972022-04-19 17:01:08 +010038
39 /* VWW model has one channel input => Convert image to grayscale here.
40 * We expect images to always be RGB. */
41 image::RgbToGrayscale(input, unsignedDstPtr, inputSize);
42
43 /* VWW model pre-processing is image conversion from uint8 to [0,1] float values,
44 * then quantize them with input quantization info. */
Richard Burtonb40ecf82022-04-22 16:14:57 +010045 QuantParams inQuantParams = GetTensorQuantParams(this->m_inputTensor);
Richard Burtonc20be972022-04-19 17:01:08 +010046
Richard Burtonb40ecf82022-04-22 16:14:57 +010047 auto signedDstPtr = static_cast<int8_t*>(this->m_inputTensor->data.data);
48 for (size_t i = 0; i < this->m_inputTensor->bytes; i++) {
Richard Burtonc20be972022-04-19 17:01:08 +010049 auto i_data_int8 = static_cast<int8_t>(
50 ((static_cast<float>(unsignedDstPtr[i]) / 255.0f) / inQuantParams.scale) + inQuantParams.offset
51 );
52 signedDstPtr[i] = std::min<int8_t>(INT8_MAX, std::max<int8_t>(i_data_int8, INT8_MIN));
53 }
54
55 debug("Input tensor populated \n");
56
57 return true;
58 }
59
Richard Burtonb40ecf82022-04-22 16:14:57 +010060 VisualWakeWordPostProcess::VisualWakeWordPostProcess(TfLiteTensor* outputTensor, Classifier& classifier,
Richard Burtonc20be972022-04-19 17:01:08 +010061 const std::vector<std::string>& labels, std::vector<ClassificationResult>& results)
Richard Burtonb40ecf82022-04-22 16:14:57 +010062 :m_outputTensor{outputTensor},
63 m_vwwClassifier{classifier},
Richard Burtonc20be972022-04-19 17:01:08 +010064 m_labels{labels},
65 m_results{results}
Richard Burtonb40ecf82022-04-22 16:14:57 +010066 {}
Richard Burtonc20be972022-04-19 17:01:08 +010067
68 bool VisualWakeWordPostProcess::DoPostProcess()
69 {
70 return this->m_vwwClassifier.GetClassificationResults(
Richard Burtonb40ecf82022-04-22 16:14:57 +010071 this->m_outputTensor, this->m_results,
Richard Burtonc20be972022-04-19 17:01:08 +010072 this->m_labels, 1, true);
73 }
74
75} /* namespace app */
76} /* namespace arm */