CpuExecutor.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278
  1. /*
  2. * Copyright (C) 2017 The Android Open Source Project
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H
  17. #define ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H
  18. #include "HalInterfaces.h"
  19. #include "OperationResolver.h"
  20. #include "OperationsUtils.h"
  21. #include "Utils.h"
  22. #include <android-base/macros.h>
  23. #include <ui/GraphicBuffer.h>
  24. #include <algorithm>
  25. #include <optional>
  26. #include <vector>
  27. namespace android {
  28. namespace nn {
  29. // Information we maintain about each operand during execution that
  30. // may change during execution.
  31. struct RunTimeOperandInfo {
  32. // TODO Storing the type here is redundant, as it won't change during execution.
  33. OperandType type;
  34. // The type and dimensions of the operand. The dimensions can
  35. // change at runtime. We include the type because it's useful
  36. // to pass together with the dimension to the functions implementing
  37. // the operators.
  38. //
  39. // A dimension being zero has different meanings for different operands at different stages:
  40. // - Model inputs:
  41. // * Specified in model: implies "dynamic", and must be fully-specified in request.
  42. // * Specified in request: illegal.
  43. // - Constant operands: illegal.
  44. // - Model outputs and internal operands:
  45. // * Before evaluation: implies unknown and to be deduced from execution.
  46. // * After evaluation:
  47. // - If isSufficient reports true: the tensor is zero-sized.
  48. // - Otherwise: implies unknown.
  49. std::vector<uint32_t> dimensions;
  50. float scale;
  51. int32_t zeroPoint;
  52. // Where the operand's data is stored. Check the corresponding
  53. // location information in the model to figure out if this points
  54. // to memory we have allocated for an temporary operand.
  55. uint8_t* buffer;
  56. // The length of the buffer.
  57. uint32_t length;
  58. // Whether this is a temporary variable, a model input, a constant, etc.
  59. OperandLifeTime lifetime;
  60. // Keeps track of how many operations have yet to make use
  61. // of this temporary variable. When the count is decremented to 0,
  62. // we free the buffer. For non-temporary variables, this count is
  63. // always 0.
  64. uint32_t numberOfUsesLeft;
  65. Operand::ExtraParams extraParams;
  66. Shape shape() const {
  67. return {
  68. .type = type,
  69. .dimensions = dimensions,
  70. .scale = scale,
  71. .offset = zeroPoint,
  72. .extraParams = extraParams,
  73. };
  74. }
  75. bool isSufficient() const {
  76. if (isExtensionOperandType(type)) {
  77. // We don't know sizes of extension types.
  78. return true;
  79. }
  80. return length >= nonExtensionOperandSizeOfData(type, dimensions);
  81. }
  82. };
  83. // Used to keep a pointer to each of the memory pools.
  84. //
  85. // RunTimePoolInfo references a region of memory. Other RunTimePoolInfo objects
  86. // may reference the same region of memory by either:
  87. // (1) copying an existing RunTimePoolInfo object, or
  88. // (2) creating multiple RunTimePoolInfo objects from the same memory resource
  89. // (e.g., "createFromHidlMemory" or "createFromExistingBuffer")
  90. //
  91. // If the underlying region of memory is mapped by "createFromHidlMemory", the
  92. // mapping will be sustained until it is no longer referenced by any
  93. // RunTimePoolInfo objects.
  94. class RunTimePoolInfo {
  95. public:
  96. static std::optional<RunTimePoolInfo> createFromHidlMemory(const hidl_memory& hidlMemory);
  97. static RunTimePoolInfo createFromExistingBuffer(uint8_t* buffer);
  98. uint8_t* getBuffer() const;
  99. bool update() const;
  100. hidl_memory getHidlMemory() const;
  101. private:
  102. class RunTimePoolInfoImpl;
  103. RunTimePoolInfo(const std::shared_ptr<const RunTimePoolInfoImpl>& impl);
  104. std::shared_ptr<const RunTimePoolInfoImpl> mImpl;
  105. };
  106. bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos,
  107. const hidl_vec<hidl_memory>& pools);
  108. // This class is used to execute a model on the CPU.
  109. class CpuExecutor {
  110. public:
  111. // This constructor allows clients of CpuExecutor to provide custom CPU
  112. // operation implementations. It is used by a sample driver to test
  113. // extension support.
  114. //
  115. // Note that it is not possible to provide custom CPU implementations for
  116. // non-OperationResolver operations (b/124041202).
  117. //
  118. // The operation resolver must outlive the executor.
  119. explicit CpuExecutor(const IOperationResolver* operationResolver)
  120. : mOperationResolver(operationResolver) {}
  121. CpuExecutor() : CpuExecutor(BuiltinOperationResolver::get()) {}
  122. // Executes the model. The results will be stored at the locations
  123. // specified in the constructor.
  124. // The model must outlive the executor. We prevent it from being modified
  125. // while this is executing.
  126. int run(const Model& model, const Request& request,
  127. const std::vector<RunTimePoolInfo>& modelPoolInfos,
  128. const std::vector<RunTimePoolInfo>& requestPoolInfos);
  129. const std::vector<OutputShape>& getOutputShapes() const {
  130. CHECK(mFinished) << "getOutputShapes() called by an unfinished CpuExecutor.";
  131. return mOutputShapes;
  132. }
  133. private:
  134. bool initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos,
  135. const std::vector<RunTimePoolInfo>& requestPoolInfos);
  136. // Runs one operation of the graph.
  137. int executeOperation(const Operation& entry);
  138. // Decrement the usage count for the operands listed. Frees the memory
  139. // allocated for any temporary variable with a count of zero.
  140. void freeNoLongerUsedOperands(const std::vector<uint32_t>& inputs);
  141. // Frees the memory allocated for any temporary variable, and sets the
  142. // output operand shapes returning to the runtime.
  143. void finish(int result);
  144. // The model and the request that we'll execute. Only valid while run()
  145. // is being executed.
  146. const Model* mModel = nullptr;
  147. const Request* mRequest = nullptr;
  148. // We're copying the list of all the dimensions from the model, as
  149. // these may be modified when we run the operations. Since we're
  150. // making a full copy, the indexes used in the operand description
  151. // stay valid.
  152. // std::vector<uint32_t> mDimensions;
  153. // Runtime information about all the operands.
  154. std::vector<RunTimeOperandInfo> mOperands;
  155. // The output operand shapes returning to the runtime.
  156. std::vector<OutputShape> mOutputShapes;
  157. // Whether execution is finished and mOutputShapes is ready
  158. bool mFinished = false;
  159. const IOperationResolver* mOperationResolver;
  160. };
  161. // Class for setting reasonable OpenMP threading settings. (OpenMP is used by
  162. // the Eigen matrix library.)
  163. //
  164. // Currently sets a low blocktime: the time OpenMP threads busy-wait for more
  165. // work before going to sleep. See b/79159165, https://reviews.llvm.org/D18577.
  166. // The default is 200ms, we set to 20ms here, see b/109645291. This keeps the
  167. // cores enabled throughout inference computation without too much extra power
  168. // consumption afterwards.
  169. //
  170. // The OpenMP settings are thread-local (applying only to worker threads formed
  171. // from that thread), see https://software.intel.com/en-us/node/522688 and
  172. // http://lists.llvm.org/pipermail/openmp-dev/2016-July/001432.html. This class
  173. // ensures that within the scope in which an object is instantiated we use the
  174. // right settings (scopes may be nested), as long as no other library changes
  175. // them. (Note that in current NNAPI usage only one instance is used in the
  176. // CpuExecutor thread).
  177. //
  178. // TODO(mikie): consider also setting the number of threads used. Using as many
  179. // threads as there are cores results in more variable performance: if we don't
  180. // get all cores for our threads, the latency is doubled as we wait for one core
  181. // to do twice the amount of work. Reality is complicated though as not all
  182. // cores are the same. Decision to be based on benchmarking against a
  183. // representative set of workloads and devices. I'm keeping the code here for
  184. // reference.
  185. // b/109953668, disable OpenMP
  186. #ifdef NNAPI_OPENMP
  187. class ScopedOpenmpSettings {
  188. public:
  189. ScopedOpenmpSettings();
  190. ~ScopedOpenmpSettings();
  191. DISALLOW_COPY_AND_ASSIGN(ScopedOpenmpSettings);
  192. private:
  193. int mBlocktimeInitial;
  194. #if NNAPI_LIMIT_CPU_THREADS
  195. int mMaxThreadsInitial;
  196. #endif
  197. };
  198. #endif // NNAPI_OPENMP
  199. namespace {
  200. template <typename T>
  201. T getScalarData(const RunTimeOperandInfo& info) {
  202. // TODO: Check buffer is at least as long as size of data.
  203. T* data = reinterpret_cast<T*>(info.buffer);
  204. return data[0];
  205. }
  206. inline bool IsNullInput(const RunTimeOperandInfo *input) {
  207. return input->lifetime == OperandLifeTime::NO_VALUE;
  208. }
  209. inline int NumInputsWithValues(const Operation &operation,
  210. std::vector<RunTimeOperandInfo> &operands) {
  211. const std::vector<uint32_t> &inputs = operation.inputs;
  212. return std::count_if(inputs.begin(), inputs.end(),
  213. [&operands](uint32_t i) {
  214. return !IsNullInput(&operands[i]);
  215. });
  216. }
  217. inline int NumOutputs(const Operation &operation) {
  218. return operation.outputs.size();
  219. }
  220. inline size_t NumDimensions(const RunTimeOperandInfo *operand) {
  221. return operand->shape().dimensions.size();
  222. }
  223. inline uint32_t SizeOfDimension(const RunTimeOperandInfo *operand, int i) {
  224. return operand->shape().dimensions[i];
  225. }
  226. inline RunTimeOperandInfo *GetInput(const Operation &operation,
  227. std::vector<RunTimeOperandInfo> &operands,
  228. int index) {
  229. return &operands[operation.inputs[index]];
  230. }
  231. inline RunTimeOperandInfo *GetOutput(const Operation &operation,
  232. std::vector<RunTimeOperandInfo> &operands,
  233. int index) {
  234. return &operands[operation.outputs[index]];
  235. }
  236. } // anonymous namespace
  237. } // namespace nn
  238. } // namespace android
  239. #endif // ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H