GroupedConv2D.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. /*
  2. * Copyright (C) 2018 The Android Open Source Project
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "CpuOperationUtils.h"
  17. #include "Operations.h"
  18. #include <cfloat>
  19. #include <cmath>
  20. #include "Tracing.h"
  21. #include "tensorflow/lite/kernels/internal/common.h"
  22. namespace android {
  23. namespace nn {
  24. #define ANDROID_NN_GROUPED_CONV_PARAMETERS \
  25. uint32_t numBatches = getSizeOfDimension(inputShape, 0); \
  26. uint32_t inputHeight = getSizeOfDimension(inputShape, 1); \
  27. uint32_t inputWidth = getSizeOfDimension(inputShape, 2); \
  28. uint32_t inputDepth = getSizeOfDimension(inputShape, 3); \
  29. uint32_t filterHeight = getSizeOfDimension(filterShape, 1); \
  30. uint32_t filterWidth = getSizeOfDimension(filterShape, 2); \
  31. uint32_t filterDepth = getSizeOfDimension(filterShape, 3); \
  32. uint32_t outputHeight = getSizeOfDimension(outputShape, 1); \
  33. uint32_t outputWidth = getSizeOfDimension(outputShape, 2); \
  34. uint32_t outputDepth = getSizeOfDimension(outputShape, 3); \
  35. uint32_t outputGroupDepth = outputDepth / numGroups;
  36. bool groupedConvFloat32(const float* inputData, const Shape& inputShape, const float* filterData,
  37. const Shape& filterShape, const float* biasData, const Shape& biasShape,
  38. int32_t padding_left, int32_t padding_right, int32_t padding_top,
  39. int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
  40. int32_t numGroups, int32_t activation, float* outputData,
  41. const Shape& outputShape) {
  42. NNTRACE_TRANS("groupConvFloat32");
  43. ANDROID_NN_GROUPED_CONV_PARAMETERS
  44. float output_activation_min = 0.0f, output_activation_max = 0.0f;
  45. CalculateActivationRangeFloat(activation, &output_activation_min, &output_activation_max);
  46. const float* inputBase = inputData;
  47. float* outPtr = outputData;
  48. for (uint32_t b = 0; b < numBatches; b++) {
  49. for (uint32_t h = 0; h < outputHeight; h++) {
  50. for (uint32_t w = 0; w < outputWidth; w++) {
  51. const float* filterBase = filterData;
  52. for (uint32_t g = 0; g < numGroups; g++) {
  53. for (uint32_t d = 0; d < outputGroupDepth; d++) {
  54. int32_t wInputOrigin =
  55. static_cast<int32_t>(w) * stride_width - padding_left;
  56. int32_t hInputOrigin =
  57. static_cast<int32_t>(h) * stride_height - padding_top;
  58. float sum = 0.0f;
  59. for (uint32_t i = 0; i < filterHeight; i++) {
  60. for (uint32_t j = 0; j < filterWidth; j++) {
  61. for (uint32_t k = 0; k < filterDepth; k++) {
  62. int32_t hInput = hInputOrigin + static_cast<int32_t>(i);
  63. int32_t wInput = wInputOrigin + static_cast<int32_t>(j);
  64. uint32_t dInput = filterDepth * g + k;
  65. if (hInput >= 0 && hInput < static_cast<int32_t>(inputHeight) &&
  66. wInput >= 0 && wInput < static_cast<int32_t>(inputWidth)) {
  67. uint32_t filterIndex =
  68. i * filterWidth * filterDepth + j * filterDepth + k;
  69. uint32_t inputIndex = hInput * inputWidth * inputDepth +
  70. wInput * inputDepth + dInput;
  71. sum += filterBase[filterIndex] * inputBase[inputIndex];
  72. }
  73. }
  74. }
  75. }
  76. sum += biasData[g * outputGroupDepth + d];
  77. sum = std::max(std::min(sum, output_activation_max), output_activation_min);
  78. outPtr[d] = sum;
  79. filterBase += filterHeight * filterWidth * filterDepth;
  80. }
  81. outPtr += outputGroupDepth;
  82. }
  83. }
  84. }
  85. inputBase += inputHeight * inputWidth * inputDepth;
  86. }
  87. return true;
  88. }
  89. bool groupedConvQuant8(const uint8_t* inputData, const Shape& inputShape, const uint8_t* filterData,
  90. const Shape& filterShape, const int32_t* biasData, const Shape& biasShape,
  91. int32_t padding_left, int32_t padding_right, int32_t padding_top,
  92. int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
  93. int32_t numGroups, int32_t activation, uint8_t* outputData,
  94. const Shape& outputShape) {
  95. NNTRACE_TRANS("groupConvQuant8");
  96. ANDROID_NN_GROUPED_CONV_PARAMETERS
  97. int32_t inputOffset = -inputShape.offset;
  98. int32_t filterOffset = -filterShape.offset;
  99. int32_t outputOffset = outputShape.offset;
  100. double realMultiplier = 0.0;
  101. int32_t outputMultiplier = 0;
  102. int32_t outputShift = 0;
  103. NN_RET_CHECK(GetQuantizedConvolutionMultipler(inputShape, filterShape, biasShape, outputShape,
  104. &realMultiplier));
  105. int exponent;
  106. NN_RET_CHECK(QuantizeMultiplier(realMultiplier, &outputMultiplier, &exponent));
  107. outputShift = -exponent;
  108. int32_t output_activation_min = 0, output_activation_max = 0;
  109. CalculateActivationRangeUint8(activation, outputShape, &output_activation_min,
  110. &output_activation_max);
  111. const uint8_t* inputBase = inputData;
  112. uint8_t* outPtr = outputData;
  113. for (uint32_t b = 0; b < numBatches; b++) {
  114. for (uint32_t h = 0; h < outputHeight; h++) {
  115. for (uint32_t w = 0; w < outputWidth; w++) {
  116. const uint8_t* filterBase = filterData;
  117. for (uint32_t g = 0; g < numGroups; g++) {
  118. for (uint32_t d = 0; d < outputGroupDepth; d++) {
  119. int32_t wInputOrigin =
  120. static_cast<int32_t>(w) * stride_width - padding_left;
  121. int32_t hInputOrigin =
  122. static_cast<int32_t>(h) * stride_height - padding_top;
  123. int32_t sum = 0.0f;
  124. for (uint32_t i = 0; i < filterHeight; i++) {
  125. for (uint32_t j = 0; j < filterWidth; j++) {
  126. for (uint32_t k = 0; k < filterDepth; k++) {
  127. int32_t hInput = hInputOrigin + static_cast<int32_t>(i);
  128. int32_t wInput = wInputOrigin + static_cast<int32_t>(j);
  129. uint32_t dInput = filterDepth * g + k;
  130. if (hInput >= 0 && hInput < static_cast<int32_t>(inputHeight) &&
  131. wInput >= 0 && wInput < static_cast<int32_t>(inputWidth)) {
  132. uint32_t filterIndex =
  133. i * filterWidth * filterDepth + j * filterDepth + k;
  134. uint32_t inputIndex = hInput * inputWidth * inputDepth +
  135. wInput * inputDepth + dInput;
  136. sum += (static_cast<int32_t>(filterBase[filterIndex]) +
  137. filterOffset) *
  138. (static_cast<int32_t>(inputBase[inputIndex]) +
  139. inputOffset);
  140. }
  141. }
  142. }
  143. }
  144. sum += biasData[g * outputGroupDepth + d];
  145. sum = tflite::MultiplyByQuantizedMultiplier(sum, outputMultiplier,
  146. -outputShift);
  147. sum += outputOffset;
  148. sum = std::max(std::min(sum, output_activation_max), output_activation_min);
  149. outPtr[d] = static_cast<uint8_t>(sum);
  150. filterBase += filterHeight * filterWidth * filterDepth;
  151. }
  152. outPtr += outputGroupDepth;
  153. }
  154. }
  155. }
  156. inputBase += inputHeight * inputWidth * inputDepth;
  157. }
  158. return true;
  159. }
  160. bool groupedConvQuant8PerChannel(const uint8_t* inputData, const Shape& inputShape,
  161. const int8_t* filterData, const Shape& filterShape,
  162. const float* filterScales, const int32_t* biasData,
  163. const Shape& biasShape, int32_t padding_left,
  164. int32_t padding_right, int32_t padding_top, int32_t padding_bottom,
  165. int32_t stride_width, int32_t stride_height, int32_t numGroups,
  166. int32_t activation, uint8_t* outputData,
  167. const Shape& outputShape) {
  168. NNTRACE_TRANS("groupConvQuant8");
  169. ANDROID_NN_GROUPED_CONV_PARAMETERS
  170. int32_t inputOffset = -inputShape.offset;
  171. int32_t outputOffset = outputShape.offset;
  172. auto realMultiplier = std::vector<double>(outputDepth, .0f);
  173. auto outputMultiplier = std::vector<int32_t>(outputDepth, 0);
  174. auto outputShift = std::vector<int32_t>(outputDepth, 0);
  175. for (int i = 0; i < outputDepth; ++i) {
  176. Shape filterChannelShape = filterShape;
  177. filterChannelShape.scale = filterScales[i];
  178. Shape biasChannelShape = biasShape;
  179. biasChannelShape.scale = filterScales[i] * inputShape.scale;
  180. NN_RET_CHECK(GetQuantizedConvolutionMultipler(
  181. inputShape, filterChannelShape, biasChannelShape, outputShape, &realMultiplier[i]));
  182. int exponent;
  183. NN_RET_CHECK(QuantizeMultiplier(realMultiplier[i], &outputMultiplier[i], &exponent));
  184. outputShift[i] = -exponent;
  185. }
  186. int32_t output_activation_min = 0, output_activation_max = 0;
  187. CalculateActivationRangeUint8(activation, outputShape, &output_activation_min,
  188. &output_activation_max);
  189. const uint8_t* inputBase = inputData;
  190. uint8_t* outPtr = outputData;
  191. for (uint32_t b = 0; b < numBatches; b++) {
  192. for (uint32_t h = 0; h < outputHeight; h++) {
  193. for (uint32_t w = 0; w < outputWidth; w++) {
  194. const int8_t* filterBase = filterData;
  195. for (uint32_t g = 0; g < numGroups; g++) {
  196. for (uint32_t d = 0; d < outputGroupDepth; d++) {
  197. int32_t wInputOrigin =
  198. static_cast<int32_t>(w) * stride_width - padding_left;
  199. int32_t hInputOrigin =
  200. static_cast<int32_t>(h) * stride_height - padding_top;
  201. int32_t sum = 0.0f;
  202. for (uint32_t i = 0; i < filterHeight; i++) {
  203. for (uint32_t j = 0; j < filterWidth; j++) {
  204. for (uint32_t k = 0; k < filterDepth; k++) {
  205. int32_t hInput = hInputOrigin + static_cast<int32_t>(i);
  206. int32_t wInput = wInputOrigin + static_cast<int32_t>(j);
  207. uint32_t dInput = filterDepth * g + k;
  208. if (hInput >= 0 && hInput < static_cast<int32_t>(inputHeight) &&
  209. wInput >= 0 && wInput < static_cast<int32_t>(inputWidth)) {
  210. uint32_t filterIndex =
  211. i * filterWidth * filterDepth + j * filterDepth + k;
  212. uint32_t inputIndex = hInput * inputWidth * inputDepth +
  213. wInput * inputDepth + dInput;
  214. sum += (static_cast<int32_t>(filterBase[filterIndex])) *
  215. (static_cast<int32_t>(inputBase[inputIndex]) +
  216. inputOffset);
  217. }
  218. }
  219. }
  220. }
  221. int channelIndex = g * outputGroupDepth + d;
  222. sum += biasData[channelIndex];
  223. sum = tflite::MultiplyByQuantizedMultiplier(
  224. sum, outputMultiplier[channelIndex], -outputShift[channelIndex]);
  225. sum += outputOffset;
  226. sum = std::max(std::min(sum, output_activation_max), output_activation_min);
  227. outPtr[d] = static_cast<uint8_t>(sum);
  228. filterBase += filterHeight * filterWidth * filterDepth;
  229. }
  230. outPtr += outputGroupDepth;
  231. }
  232. }
  233. }
  234. inputBase += inputHeight * inputWidth * inputDepth;
  235. }
  236. return true;
  237. }
  238. bool groupedConvFloat16(const _Float16* inputData, const Shape& inputShape,
  239. const _Float16* filterData, const Shape& filterShape,
  240. const _Float16* biasData, const Shape& biasShape, int32_t padding_left,
  241. int32_t padding_right, int32_t padding_top, int32_t padding_bottom,
  242. int32_t stride_width, int32_t stride_height, int32_t numGroups,
  243. int32_t activation, _Float16* outputData, const Shape& outputShape) {
  244. NNTRACE_TRANS("groupConvFloat16");
  245. std::vector<float> inputData_float32(getNumberOfElements(inputShape));
  246. std::vector<float> filterData_float32(getNumberOfElements(filterShape));
  247. std::vector<float> biasData_float32(getNumberOfElements(biasShape));
  248. std::vector<float> outputData_float32(getNumberOfElements(outputShape));
  249. convertFloat16ToFloat32(inputData, &inputData_float32);
  250. convertFloat16ToFloat32(filterData, &filterData_float32);
  251. convertFloat16ToFloat32(biasData, &biasData_float32);
  252. groupedConvFloat32(inputData_float32.data(), inputShape, filterData_float32.data(), filterShape,
  253. biasData_float32.data(), biasShape, padding_left, padding_right, padding_top,
  254. padding_bottom, stride_width, stride_height, numGroups, activation,
  255. outputData_float32.data(), outputShape);
  256. convertFloat32ToFloat16(outputData_float32, outputData);
  257. return true;
  258. }
  259. #undef ANDROID_NN_GROUPED_CONV_PARAMETERS
  260. } // namespace nn
  261. } // namespace android