L2Normalization.cpp 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. /*
  2. * Copyright (C) 2019 The Android Open Source Project
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "CpuOperationUtils.h"
  17. #include "OperationResolver.h"
  18. #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
  19. #include "Tracing.h"
  20. namespace android {
  21. namespace nn {
  22. namespace l2_norm {
  23. constexpr char kOperationName[] = "L2_NORMALIZATION";
  24. constexpr uint32_t kNumInputs = 2;
  25. constexpr uint32_t kInputTensor = 0;
  26. constexpr uint32_t kAxisScalar = 1;
  27. constexpr uint32_t kNumOutputs = 1;
  28. constexpr uint32_t kOutputTensor = 0;
  29. namespace {
  30. inline bool l2normFloat32Impl(const float* inputData, const Shape& inputShape, int32_t axis,
  31. float* outputData, const Shape& outputShape) {
  32. NNTRACE_TRANS("l2normFloat32");
  33. const uint32_t outerSize = getNumberOfElements(inputShape, 0, axis);
  34. const uint32_t axisSize = getSizeOfDimension(inputShape, axis);
  35. const uint32_t innerSize =
  36. getNumberOfElements(inputShape, axis + 1, getNumberOfDimensions(inputShape));
  37. for (uint32_t outer = 0; outer < outerSize; ++outer) {
  38. const float* inputBeg = inputData + outer * axisSize * innerSize;
  39. const float* inputEnd = inputBeg + axisSize * innerSize;
  40. float* outputBeg = outputData + outer * axisSize * innerSize;
  41. for (uint32_t inner = 0; inner < innerSize; ++inner, ++inputBeg, ++inputEnd, ++outputBeg) {
  42. float sum = 0.0f;
  43. for (const float* p = inputBeg; p < inputEnd; p += innerSize) {
  44. float val = *p;
  45. sum += val * val;
  46. }
  47. float l2_norm = std::sqrt(sum);
  48. float* pOut = outputBeg;
  49. for (const float* p = inputBeg; p < inputEnd; p += innerSize, pOut += innerSize) {
  50. *pOut = *p / l2_norm;
  51. }
  52. }
  53. }
  54. return true;
  55. }
  56. inline bool l2normQuant8Impl(const uint8_t* inputData, const Shape& inputShape, int32_t axis,
  57. uint8_t* outputData, const Shape& outputShape) {
  58. NNTRACE_TRANS("l2normQuant8");
  59. const uint32_t outerSize = getNumberOfElements(inputShape, 0, axis);
  60. const uint32_t axisSize = getSizeOfDimension(inputShape, axis);
  61. const uint32_t innerSize =
  62. getNumberOfElements(inputShape, axis + 1, getNumberOfDimensions(inputShape));
  63. for (uint32_t outer = 0; outer < outerSize; ++outer) {
  64. const uint8_t* inputBeg = inputData + outer * axisSize * innerSize;
  65. const uint8_t* inputEnd = inputBeg + axisSize * innerSize;
  66. uint8_t* outputBeg = outputData + outer * axisSize * innerSize;
  67. for (uint32_t inner = 0; inner < innerSize; ++inner, ++inputBeg, ++inputEnd, ++outputBeg) {
  68. int32_t sum = 0;
  69. for (const uint8_t* p = inputBeg; p < inputEnd; p += innerSize) {
  70. int32_t val = static_cast<int32_t>(*p) - inputShape.offset;
  71. sum += val * val;
  72. }
  73. int32_t invMultiplier, invShift;
  74. tflite::GetInvSqrtQuantizedMultiplierExp(sum, -1, &invMultiplier, &invShift);
  75. uint8_t* pOut = outputBeg;
  76. for (const uint8_t* p = inputBeg; p < inputEnd; p += innerSize, pOut += innerSize) {
  77. int32_t val = static_cast<int32_t>(*p) - inputShape.offset;
  78. int32_t scaledVal = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
  79. val * 128, invMultiplier, invShift) +
  80. 128;
  81. *pOut = static_cast<uint8_t>(std::min(std::max(scaledVal, 0), 255));
  82. }
  83. }
  84. }
  85. return true;
  86. }
  87. bool l2normFloat32(const float* inputData, const Shape& inputShape, int32_t axis, float* outputData,
  88. const Shape& outputShape) {
  89. int32_t ndim = getNumberOfDimensions(inputShape);
  90. NN_CHECK(handleNegativeAxis(inputShape, &axis));
  91. // TFLite optimized implementation only supports computation along the last axis
  92. if (axis == ndim - 1) {
  93. NNTRACE_COMP("optimized_ops::L2Normalization::float");
  94. tflite::L2NormalizationParams param = {.input_zero_point = 0};
  95. tflite::optimized_ops::L2Normalization(param, convertShapeToTflshape(inputShape), inputData,
  96. convertShapeToTflshape(outputShape), outputData);
  97. return true;
  98. } else {
  99. return l2normFloat32Impl(inputData, inputShape, axis, outputData, outputShape);
  100. }
  101. }
  102. bool l2normFloat16(const _Float16* inputData, const Shape& inputShape, int32_t axis,
  103. _Float16* outputData, const Shape& outputShape) {
  104. NNTRACE_TRANS("l2normFloat16");
  105. std::vector<float> inputDataFloat32(getNumberOfElements(inputShape));
  106. convertFloat16ToFloat32(inputData, &inputDataFloat32);
  107. std::vector<float> outputDataFloat32(getNumberOfElements(outputShape));
  108. l2normFloat32(inputDataFloat32.data(), inputShape, axis, outputDataFloat32.data(), outputShape);
  109. convertFloat32ToFloat16(outputDataFloat32, outputData);
  110. return true;
  111. }
  112. bool l2normQuant8(const uint8_t* inputData, const Shape& inputShape, int32_t axis,
  113. uint8_t* outputData, const Shape& outputShape) {
  114. int32_t ndim = getNumberOfDimensions(inputShape);
  115. NN_CHECK(handleNegativeAxis(inputShape, &axis));
  116. // TFLite optimized implementation only supports computation along the last axis
  117. if (axis == ndim - 1) {
  118. NNTRACE_COMP("optimized_ops::L2Normalization::uint8");
  119. tflite::L2NormalizationParams param = {.input_zero_point = inputShape.offset};
  120. tflite::optimized_ops::L2Normalization(param, convertShapeToTflshape(inputShape), inputData,
  121. convertShapeToTflshape(outputShape), outputData);
  122. return true;
  123. } else {
  124. return l2normQuant8Impl(inputData, inputShape, axis, outputData, outputShape);
  125. }
  126. }
  127. } // namespace
  128. bool validate(const IOperationValidationContext* context) {
  129. NN_RET_CHECK(context->getNumInputs() == kNumInputs ||
  130. context->getNumInputs() == kNumInputs - 1);
  131. NN_RET_CHECK_EQ(context->getNumOutputs(), kNumOutputs);
  132. const OperandType inputType = context->getInputType(kInputTensor);
  133. std::vector<OperandType> inExpectedTypes = {inputType};
  134. if (inputType == OperandType::TENSOR_FLOAT16 || inputType == OperandType::TENSOR_QUANT8_ASYMM) {
  135. NN_RET_CHECK(validateHalVersion(context, HalVersion::V1_2));
  136. } else if (inputType == OperandType::TENSOR_FLOAT32) {
  137. NN_RET_CHECK(validateHalVersion(context, HalVersion::V1_0));
  138. } else {
  139. NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
  140. }
  141. if (context->getNumInputs() == kNumInputs) {
  142. inExpectedTypes.push_back(OperandType::INT32);
  143. NN_RET_CHECK(validateHalVersion(context, HalVersion::V1_2));
  144. } else if (context->getInputShape(kInputTensor).dimensions.size() != 4) {
  145. NN_RET_CHECK(validateHalVersion(context, HalVersion::V1_2));
  146. }
  147. return validateInputTypes(context, inExpectedTypes) &&
  148. validateOutputTypes(context, {inputType});
  149. }
  150. bool prepare(IOperationExecutionContext* context) {
  151. const Shape& input = context->getInputShape(kInputTensor);
  152. int32_t numDimensions = getNumberOfDimensions(input);
  153. int32_t axis = context->getNumInputs() == kNumInputs
  154. ? context->getInputValue<int32_t>(kAxisScalar)
  155. : -1;
  156. NN_RET_CHECK_GE(axis, -numDimensions);
  157. NN_RET_CHECK_LT(axis, numDimensions);
  158. Shape output = context->getOutputShape(kOutputTensor);
  159. output.type = input.type;
  160. output.dimensions = input.dimensions;
  161. if (output.type == OperandType::TENSOR_QUANT8_ASYMM) {
  162. output.scale = 1.0f / 128.0f;
  163. output.offset = 128;
  164. } else {
  165. output.scale = 0;
  166. output.offset = 0;
  167. }
  168. return context->setOutputShape(kOutputTensor, output);
  169. }
  170. bool execute(IOperationExecutionContext* context) {
  171. int32_t axis = context->getNumInputs() == kNumInputs
  172. ? context->getInputValue<int32_t>(kAxisScalar)
  173. : -1;
  174. NN_RET_CHECK(handleNegativeAxis(context->getInputShape(kInputTensor), &axis));
  175. switch (context->getInputType(kInputTensor)) {
  176. case OperandType::TENSOR_FLOAT32:
  177. return l2normFloat32(context->getInputBuffer<float>(kInputTensor),
  178. context->getInputShape(kInputTensor), axis,
  179. context->getOutputBuffer<float>(kOutputTensor),
  180. context->getOutputShape(kOutputTensor));
  181. case OperandType::TENSOR_FLOAT16:
  182. return l2normFloat16(context->getInputBuffer<_Float16>(kInputTensor),
  183. context->getInputShape(kInputTensor), axis,
  184. context->getOutputBuffer<_Float16>(kOutputTensor),
  185. context->getOutputShape(kOutputTensor));
  186. case OperandType::TENSOR_QUANT8_ASYMM:
  187. return l2normQuant8(context->getInputBuffer<uint8_t>(kInputTensor),
  188. context->getInputShape(kInputTensor), axis,
  189. context->getOutputBuffer<uint8_t>(kOutputTensor),
  190. context->getOutputShape(kOutputTensor));
  191. default:
  192. NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
  193. }
  194. }
  195. } // namespace l2_norm
  196. NN_REGISTER_OPERATION(L2_NORMALIZATION, l2_norm::kOperationName, l2_norm::validate,
  197. l2_norm::prepare, l2_norm::execute);
  198. } // namespace nn
  199. } // namespace android