CpuExecutor.cpp 85 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821
  1. /*
  2. * Copyright (C) 2017 The Android Open Source Project
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #define LOG_TAG "CpuExecutor"
  17. #include "CpuExecutor.h"
  18. #include "NeuralNetworks.h"
  19. #include "OperationResolver.h"
  20. #include "Operations.h"
  21. #include "OperationsUtils.h"
  22. #include "Tracing.h"
  23. #include "Eigen/Core"
  24. // b/109953668, disable OpenMP
  25. #ifdef NNAPI_OPENMP
  26. #include <omp.h>
  27. #endif // NNAPI_OPENMP
  28. #include <android/hardware_buffer.h>
  29. #include <sys/mman.h>
  30. namespace android {
  31. namespace nn {
  32. namespace {
  33. class OperationExecutionContext : public IOperationExecutionContext {
  34. DISALLOW_IMPLICIT_CONSTRUCTORS(OperationExecutionContext);
  35. public:
  36. OperationExecutionContext(const Operation* operation, RunTimeOperandInfo* operands)
  37. : operation(operation), operands(operands) {}
  38. uint32_t getNumInputs() const override;
  39. OperandType getInputType(uint32_t index) const override;
  40. Shape getInputShape(uint32_t index) const override;
  41. const void* getInputBuffer(uint32_t index) const override;
  42. const Operand::ExtraParams getInputExtraParams(uint32_t index) const override;
  43. uint32_t getNumOutputs() const override;
  44. OperandType getOutputType(uint32_t index) const override;
  45. Shape getOutputShape(uint32_t index) const override;
  46. void* getOutputBuffer(uint32_t index) override;
  47. // Return false on failure and store the result code.
  48. // Use getResultCode() to retrieve it at the end of the operation execution.
  49. bool setOutputShape(uint32_t index, const Shape& shape) override;
  50. int getResultCode() const;
  51. bool isOmittedInput(uint32_t index) const override;
  52. bool isOmittedOutput(uint32_t index) const override;
  53. // Return false if any of inputs or outputs is omitted, i.e. has lifetime of NO_VALUE.
  54. bool checkNoOmittedOperand() const;
  55. // Return false if any of inputs has dimension 0.
  56. bool checkNoZeroSizedInput() const;
  57. private:
  58. const RunTimeOperandInfo* getInputInfo(uint32_t index) const;
  59. const RunTimeOperandInfo* getOutputInfo(uint32_t index) const;
  60. RunTimeOperandInfo* getOutputInfo(uint32_t index);
  61. const Operation* operation;
  62. RunTimeOperandInfo* operands;
  63. int result = ANEURALNETWORKS_NO_ERROR;
  64. };
  65. const RunTimeOperandInfo* OperationExecutionContext::getInputInfo(uint32_t index) const {
  66. CHECK(index < operation->inputs.size());
  67. return &operands[operation->inputs[index]];
  68. }
  69. const RunTimeOperandInfo* OperationExecutionContext::getOutputInfo(uint32_t index) const {
  70. CHECK(index < operation->outputs.size());
  71. return &operands[operation->outputs[index]];
  72. }
  73. RunTimeOperandInfo* OperationExecutionContext::getOutputInfo(uint32_t index) {
  74. CHECK(index < operation->outputs.size());
  75. return &operands[operation->outputs[index]];
  76. }
  77. OperandType OperationExecutionContext::getInputType(uint32_t index) const {
  78. return getInputInfo(index)->type;
  79. }
  80. Shape OperationExecutionContext::getInputShape(uint32_t index) const {
  81. return getInputInfo(index)->shape();
  82. }
  83. const void* OperationExecutionContext::getInputBuffer(uint32_t index) const {
  84. return getInputInfo(index)->buffer;
  85. }
  86. const Operand::ExtraParams OperationExecutionContext::getInputExtraParams(uint32_t index) const {
  87. return getInputInfo(index)->extraParams;
  88. }
  89. OperandType OperationExecutionContext::getOutputType(uint32_t index) const {
  90. return getOutputInfo(index)->type;
  91. }
  92. Shape OperationExecutionContext::getOutputShape(uint32_t index) const {
  93. return getOutputInfo(index)->shape();
  94. }
  95. void* OperationExecutionContext::getOutputBuffer(uint32_t index) {
  96. return getOutputInfo(index)->buffer;
  97. }
  98. uint32_t OperationExecutionContext::getNumInputs() const {
  99. return operation->inputs.size();
  100. }
  101. uint32_t OperationExecutionContext::getNumOutputs() const {
  102. return operation->outputs.size();
  103. }
  104. int OperationExecutionContext::getResultCode() const {
  105. return result;
  106. }
  107. // TODO: Return error code directly once we've fully integrated OperationResolver with all ops.
  108. // Updates the RunTimeOperandInfo with the newly calculated shape.
  109. // Allocate the buffer if we need to.
  110. bool setInfoAndAllocateIfNeeded(RunTimeOperandInfo* info, const Shape& shape, int* result) {
  111. // For user-provided model output operands, the parameters must match the Shape
  112. // calculated from the preparation step.
  113. if (info->lifetime == OperandLifeTime::MODEL_OUTPUT) {
  114. if (info->type != shape.type) {
  115. LOG(ERROR) << "Invalid type for model output";
  116. *result = ANEURALNETWORKS_OP_FAILED;
  117. return false;
  118. }
  119. if (info->type == OperandType::TENSOR_QUANT8_ASYMM) {
  120. if (info->scale != shape.scale) {
  121. LOG(ERROR) << "Invalid scale for model output";
  122. *result = ANEURALNETWORKS_OP_FAILED;
  123. return false;
  124. }
  125. if (info->zeroPoint != shape.offset) {
  126. LOG(ERROR) << "Invalid zeroPoint for model output";
  127. *result = ANEURALNETWORKS_OP_FAILED;
  128. return false;
  129. }
  130. }
  131. if (info->extraParams != shape.extraParams) {
  132. LOG(ERROR) << "Invalid extraParams for model output";
  133. *result = ANEURALNETWORKS_OP_FAILED;
  134. return false;
  135. }
  136. }
  137. std::vector<uint32_t> combined;
  138. if (!combineDimensions(shape.dimensions, info->dimensions, &combined)) {
  139. LOG(ERROR) << "Invalid dimensions for model operand";
  140. *result = ANEURALNETWORKS_OP_FAILED;
  141. return false;
  142. }
  143. info->dimensions = combined;
  144. info->type = shape.type;
  145. info->scale = shape.scale;
  146. info->zeroPoint = shape.offset;
  147. info->extraParams = shape.extraParams;
  148. // Allocate the buffer only if the combined dimension is fully specified
  149. if (info->lifetime == OperandLifeTime::TEMPORARY_VARIABLE && info->buffer == nullptr) {
  150. if (isExtensionOperandType(info->type)) {
  151. LOG(ERROR) << "Cannot allocate a temporary variable of an extension type";
  152. *result = ANEURALNETWORKS_OP_FAILED;
  153. return false;
  154. }
  155. uint32_t length = nonExtensionOperandSizeOfData(info->type, info->dimensions);
  156. if (length > 0) {
  157. info->buffer = new uint8_t[length];
  158. if (info->buffer == nullptr) {
  159. *result = ANEURALNETWORKS_OUT_OF_MEMORY;
  160. return false;
  161. }
  162. info->length = length;
  163. }
  164. }
  165. if (!info->isSufficient()) {
  166. uint32_t length = nonExtensionOperandSizeOfData(info->type, info->dimensions);
  167. LOG(ERROR) << "Insufficient size for model operand: require = " << length
  168. << ", provided = " << info->length;
  169. *result = ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE;
  170. return false;
  171. }
  172. *result = ANEURALNETWORKS_NO_ERROR;
  173. return true;
  174. }
  175. bool OperationExecutionContext::setOutputShape(uint32_t index, const Shape& shape) {
  176. return setInfoAndAllocateIfNeeded(getOutputInfo(index), shape, &result);
  177. }
  178. bool OperationExecutionContext::isOmittedInput(uint32_t index) const {
  179. return getInputInfo(index)->lifetime == OperandLifeTime::NO_VALUE;
  180. }
  181. bool OperationExecutionContext::isOmittedOutput(uint32_t index) const {
  182. return getOutputInfo(index)->lifetime == OperandLifeTime::NO_VALUE;
  183. }
  184. bool OperationExecutionContext::checkNoOmittedOperand() const {
  185. for (uint32_t i = 0; i < operation->inputs.size(); i++) {
  186. NN_RET_CHECK(!isOmittedInput(i)) << getOperationName(operation->type) << " input operand "
  187. << i << " is required but missing.";
  188. }
  189. for (uint32_t i = 0; i < operation->outputs.size(); i++) {
  190. NN_RET_CHECK(!isOmittedOutput(i)) << getOperationName(operation->type) << " output operand "
  191. << i << " is required but missing.";
  192. }
  193. return true;
  194. }
  195. bool OperationExecutionContext::checkNoZeroSizedInput() const {
  196. for (uint32_t i = 0; i < operation->inputs.size(); i++) {
  197. if (isOmittedInput(i)) continue;
  198. for (uint32_t j = 0; j < getInputInfo(i)->dimensions.size(); j++) {
  199. NN_RET_CHECK_NE(getInputInfo(i)->dimensions[j], 0)
  200. << getOperationName(operation->type)
  201. << " does not support zero-sized tensor, but input " << i << " dimension " << j
  202. << " is 0.";
  203. }
  204. }
  205. return true;
  206. }
  207. } // namespace
  208. // Used to keep a pointer to a memory pool.
  209. //
  210. // In the case of an "mmap_fd" pool, owns the mmap region
  211. // returned by getBuffer() -- i.e., that region goes away
  212. // when the RunTimePoolInfo is destroyed or is assigned to.
  213. class RunTimePoolInfo::RunTimePoolInfoImpl {
  214. public:
  215. RunTimePoolInfoImpl(const hidl_memory& hidlMemory, uint8_t* buffer, const sp<IMemory>& memory,
  216. const sp<GraphicBuffer>& graphicBuffer);
  217. // rule of five...
  218. ~RunTimePoolInfoImpl();
  219. RunTimePoolInfoImpl(const RunTimePoolInfoImpl&) = delete;
  220. RunTimePoolInfoImpl(RunTimePoolInfoImpl&&) noexcept = delete;
  221. RunTimePoolInfoImpl& operator=(const RunTimePoolInfoImpl&) = delete;
  222. RunTimePoolInfoImpl& operator=(RunTimePoolInfoImpl&&) noexcept = delete;
  223. uint8_t* getBuffer() const { return mBuffer; }
  224. bool update() const;
  225. hidl_memory getHidlMemory() const { return mHidlMemory; }
  226. private:
  227. const hidl_memory mHidlMemory; // always used
  228. uint8_t* const mBuffer = nullptr; // always used
  229. const sp<IMemory> mMemory; // only used when hidlMemory.name() == "ashmem"
  230. const sp<GraphicBuffer>
  231. mGraphicBuffer; // only used when hidlMemory.name() == "hardware_buffer_blob"
  232. };
  233. RunTimePoolInfo::RunTimePoolInfoImpl::RunTimePoolInfoImpl(const hidl_memory& hidlMemory,
  234. uint8_t* buffer,
  235. const sp<IMemory>& memory,
  236. const sp<GraphicBuffer>& graphicBuffer)
  237. : mHidlMemory(hidlMemory), mBuffer(buffer), mMemory(memory), mGraphicBuffer(graphicBuffer) {}
  238. RunTimePoolInfo::RunTimePoolInfoImpl::~RunTimePoolInfoImpl() {
  239. if (mBuffer == nullptr) {
  240. return;
  241. }
  242. const std::string memType = mHidlMemory.name();
  243. if (memType == "ashmem") {
  244. // nothing to do
  245. } else if (memType == "mmap_fd") {
  246. const size_t size = mHidlMemory.size();
  247. if (munmap(mBuffer, size)) {
  248. LOG(ERROR) << "RunTimePoolInfoImpl::~RunTimePoolInfo(): Can't munmap";
  249. }
  250. } else if (memType == "hardware_buffer_blob") {
  251. mGraphicBuffer->unlock();
  252. } else if (memType == "") {
  253. // Represents a POINTER argument; nothing to do
  254. } else {
  255. LOG(ERROR) << "RunTimePoolInfoImpl::~RunTimePoolInfoImpl(): unsupported hidl_memory type";
  256. }
  257. }
  258. // Making sure the output data are correctly updated after execution.
  259. bool RunTimePoolInfo::RunTimePoolInfoImpl::update() const {
  260. const std::string memType = mHidlMemory.name();
  261. if (memType == "ashmem") {
  262. mMemory->commit();
  263. return true;
  264. }
  265. if (memType == "mmap_fd") {
  266. int prot = mHidlMemory.handle()->data[1];
  267. if (prot & PROT_WRITE) {
  268. const size_t size = mHidlMemory.size();
  269. return msync(mBuffer, size, MS_SYNC) == 0;
  270. }
  271. }
  272. // No-op for other types of memory.
  273. return true;
  274. }
  275. // TODO: short term, make share memory mapping and updating a utility function.
  276. // TODO: long term, implement mmap_fd as a hidl IMemory service.
  277. std::optional<RunTimePoolInfo> RunTimePoolInfo::createFromHidlMemory(
  278. const hidl_memory& hidlMemory) {
  279. uint8_t* buffer = nullptr;
  280. sp<IMemory> memory;
  281. sp<GraphicBuffer> graphicBuffer;
  282. const auto& memType = hidlMemory.name();
  283. if (memType == "ashmem") {
  284. memory = mapMemory(hidlMemory);
  285. if (memory == nullptr) {
  286. LOG(ERROR) << "Can't map shared memory.";
  287. return std::nullopt;
  288. }
  289. memory->update();
  290. buffer = reinterpret_cast<uint8_t*>(static_cast<void*>(memory->getPointer()));
  291. if (buffer == nullptr) {
  292. LOG(ERROR) << "Can't access shared memory.";
  293. return std::nullopt;
  294. }
  295. } else if (memType == "mmap_fd") {
  296. size_t size = hidlMemory.size();
  297. int fd = hidlMemory.handle()->data[0];
  298. int prot = hidlMemory.handle()->data[1];
  299. size_t offset = getSizeFromInts(hidlMemory.handle()->data[2], hidlMemory.handle()->data[3]);
  300. buffer = static_cast<uint8_t*>(mmap(nullptr, size, prot, MAP_SHARED, fd, offset));
  301. if (buffer == MAP_FAILED) {
  302. LOG(ERROR) << "RunTimePoolInfo::set(): Can't mmap the file descriptor.";
  303. return std::nullopt;
  304. }
  305. } else if (memType == "hardware_buffer_blob") {
  306. auto handle = hidlMemory.handle();
  307. auto format = AHARDWAREBUFFER_FORMAT_BLOB;
  308. auto usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN;
  309. const uint32_t width = hidlMemory.size();
  310. const uint32_t height = 1; // height is always 1 for BLOB mode AHardwareBuffer.
  311. const uint32_t layers = 1; // layers is always 1 for BLOB mode AHardwareBuffer.
  312. const uint32_t stride = hidlMemory.size();
  313. graphicBuffer = new GraphicBuffer(handle, GraphicBuffer::HandleWrapMethod::CLONE_HANDLE,
  314. width, height, format, layers, usage, stride);
  315. void* gBuffer = nullptr;
  316. int32_t outBytesPerPixel, outBytesPerStride;
  317. status_t status =
  318. graphicBuffer->lock(usage, &gBuffer, &outBytesPerPixel, &outBytesPerStride);
  319. if (status != NO_ERROR) {
  320. LOG(ERROR) << "RunTimePoolInfo Can't lock the AHardwareBuffer.";
  321. return std::nullopt;
  322. }
  323. buffer = static_cast<uint8_t*>(gBuffer);
  324. } else {
  325. LOG(ERROR) << "RunTimePoolInfo::set(): unsupported hidl_memory type";
  326. return std::nullopt;
  327. }
  328. const auto impl =
  329. std::make_shared<const RunTimePoolInfoImpl>(hidlMemory, buffer, memory, graphicBuffer);
  330. return {RunTimePoolInfo(impl)};
  331. }
  332. RunTimePoolInfo RunTimePoolInfo::createFromExistingBuffer(uint8_t* buffer) {
  333. const auto impl =
  334. std::make_shared<const RunTimePoolInfoImpl>(hidl_memory{}, buffer, nullptr, nullptr);
  335. return {impl};
  336. }
  337. RunTimePoolInfo::RunTimePoolInfo(const std::shared_ptr<const RunTimePoolInfoImpl>& impl)
  338. : mImpl(impl) {}
  339. uint8_t* RunTimePoolInfo::getBuffer() const {
  340. return mImpl->getBuffer();
  341. }
  342. bool RunTimePoolInfo::update() const {
  343. return mImpl->update();
  344. }
  345. hidl_memory RunTimePoolInfo::getHidlMemory() const {
  346. return mImpl->getHidlMemory();
  347. }
  348. bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos,
  349. const hidl_vec<hidl_memory>& pools) {
  350. CHECK(poolInfos != nullptr);
  351. poolInfos->clear();
  352. poolInfos->reserve(pools.size());
  353. for (const auto& pool : pools) {
  354. if (std::optional<RunTimePoolInfo> poolInfo = RunTimePoolInfo::createFromHidlMemory(pool)) {
  355. poolInfos->push_back(*poolInfo);
  356. } else {
  357. LOG(ERROR) << "Could not map pools";
  358. poolInfos->clear();
  359. return false;
  360. }
  361. }
  362. return true;
  363. }
  364. template <typename T>
  365. inline bool convertToNhwcImpl(T* to, const T* from, const std::vector<uint32_t>& fromDim) {
  366. uint32_t spatialSize = fromDim[2] * fromDim[3];
  367. for (uint32_t n = 0; n < fromDim[0]; n++) {
  368. for (uint32_t hw = 0; hw < spatialSize; hw++) {
  369. for (uint32_t c = 0; c < fromDim[1]; c++) {
  370. uint32_t fromIndex = n * fromDim[1] * spatialSize + c * spatialSize + hw;
  371. *to++ = from[fromIndex];
  372. }
  373. }
  374. }
  375. return true;
  376. }
  377. template <typename T>
  378. inline bool convertFromNhwcImpl(T* to, const T* from, const std::vector<uint32_t>& fromDim) {
  379. uint32_t spatialSize = fromDim[1] * fromDim[2];
  380. for (uint32_t n = 0; n < fromDim[0]; n++) {
  381. for (uint32_t c = 0; c < fromDim[3]; c++) {
  382. for (uint32_t hw = 0; hw < spatialSize; hw++) {
  383. uint32_t fromIndex = n * spatialSize * fromDim[3] + hw * fromDim[3] + c;
  384. *to++ = from[fromIndex];
  385. }
  386. }
  387. }
  388. return true;
  389. }
  390. static bool convertToNhwc(RunTimeOperandInfo& to, const RunTimeOperandInfo& from,
  391. std::unique_ptr<uint8_t[]>& ptr_guard, bool data_layout) {
  392. int result;
  393. if (from.dimensions.size() != 4) {
  394. LOG(ERROR) << "Error converting a non-4-D tensor to NHWC layout";
  395. return false;
  396. }
  397. to.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
  398. if (data_layout) {
  399. // convert dimensions
  400. Shape inShape = from.shape();
  401. auto& fromDim = from.dimensions;
  402. inShape.dimensions = {fromDim[0], fromDim[2], fromDim[3], fromDim[1]};
  403. // allocate buffer
  404. to.buffer = nullptr;
  405. if (!setInfoAndAllocateIfNeeded(&to, inShape, &result)) {
  406. return false;
  407. }
  408. ptr_guard.reset(to.buffer);
  409. // convert value
  410. if (from.type == OperandType::TENSOR_FLOAT32) {
  411. return convertToNhwcImpl<float>(reinterpret_cast<float*>(to.buffer),
  412. reinterpret_cast<const float*>(from.buffer), fromDim);
  413. } else if (from.type == OperandType::TENSOR_FLOAT16) {
  414. return convertToNhwcImpl<_Float16>(reinterpret_cast<_Float16*>(to.buffer),
  415. reinterpret_cast<const _Float16*>(from.buffer),
  416. fromDim);
  417. } else if (from.type == OperandType::TENSOR_QUANT8_ASYMM) {
  418. return convertToNhwcImpl<uint8_t>(reinterpret_cast<uint8_t*>(to.buffer),
  419. reinterpret_cast<const uint8_t*>(from.buffer),
  420. fromDim);
  421. } else {
  422. LOG(ERROR) << "Unsupported data type";
  423. return false;
  424. }
  425. } else {
  426. to = from;
  427. }
  428. return true;
  429. }
  430. static bool convertFromNhwc(RunTimeOperandInfo& to, const RunTimeOperandInfo& from,
  431. bool data_layout, int* result) {
  432. if (from.dimensions.size() != 4) {
  433. LOG(ERROR) << "Error converting a non-4-D tensor from NHWC layout";
  434. return false;
  435. }
  436. if (data_layout) {
  437. // convert dimensions
  438. Shape outShape = from.shape();
  439. auto& fromDim = from.dimensions;
  440. outShape.dimensions = {fromDim[0], fromDim[3], fromDim[1], fromDim[2]};
  441. // allocate buffer
  442. if (!setInfoAndAllocateIfNeeded(&to, outShape, result)) {
  443. return false;
  444. }
  445. // convert value
  446. if (from.type == OperandType::TENSOR_FLOAT32) {
  447. return convertFromNhwcImpl<float>(reinterpret_cast<float*>(to.buffer),
  448. reinterpret_cast<const float*>(from.buffer), fromDim);
  449. } else if (from.type == OperandType::TENSOR_FLOAT16) {
  450. return convertFromNhwcImpl<_Float16>(reinterpret_cast<_Float16*>(to.buffer),
  451. reinterpret_cast<const _Float16*>(from.buffer),
  452. fromDim);
  453. } else if (from.type == OperandType::TENSOR_QUANT8_ASYMM) {
  454. return convertFromNhwcImpl<uint8_t>(reinterpret_cast<uint8_t*>(to.buffer),
  455. reinterpret_cast<const uint8_t*>(from.buffer),
  456. fromDim);
  457. } else {
  458. LOG(ERROR) << "Unsupported data type";
  459. return false;
  460. }
  461. } else {
  462. Shape outShape = from.shape();
  463. to.buffer = from.buffer;
  464. to.length = from.length;
  465. if (!setInfoAndAllocateIfNeeded(&to, outShape, result)) {
  466. return false;
  467. }
  468. }
  469. return true;
  470. }
  471. // Ignore the .pools entry in model and request. This will have been taken care of
  472. // by the caller.
  473. int CpuExecutor::run(const Model& model, const Request& request,
  474. const std::vector<RunTimePoolInfo>& modelPoolInfos,
  475. const std::vector<RunTimePoolInfo>& requestPoolInfos) {
  476. NNTRACE_CPU(NNTRACE_PHASE_EXECUTION, "run");
  477. VLOG(CPUEXE) << "CpuExecutor::run() with request(" << SHOW_IF_DEBUG(toString(request)) << ")";
  478. // b/109953668, disable OpenMP
  479. #ifdef NNAPI_OPENMP
  480. ScopedOpenmpSettings openMpSettings;
  481. #endif // NNAPI_OPENMP
  482. mModel = &model;
  483. mRequest = &request; // TODO check if mRequest is needed
  484. initializeRunTimeInfo(modelPoolInfos, requestPoolInfos);
  485. // The model has serialized the operation in execution order.
  486. for (const auto& operation : model.operations) {
  487. int n = executeOperation(operation);
  488. if (n != ANEURALNETWORKS_NO_ERROR) {
  489. finish(n);
  490. return n;
  491. }
  492. }
  493. for (auto& runtimeInfo : modelPoolInfos) {
  494. runtimeInfo.update();
  495. }
  496. for (auto& runtimeInfo : requestPoolInfos) {
  497. runtimeInfo.update();
  498. }
  499. finish(ANEURALNETWORKS_NO_ERROR);
  500. VLOG(CPUEXE) << "Completed run normally";
  501. return ANEURALNETWORKS_NO_ERROR;
  502. }
  503. bool CpuExecutor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos,
  504. const std::vector<RunTimePoolInfo>& requestPoolInfos) {
  505. VLOG(CPUEXE) << "CpuExecutor::initializeRunTimeInfo";
  506. const size_t count = mModel->operands.size();
  507. mOperands.resize(count);
  508. // Start by setting the runtime info to what's in the model.
  509. for (size_t i = 0; i < count; i++) {
  510. const Operand& from = mModel->operands[i];
  511. RunTimeOperandInfo& to = mOperands[i];
  512. to.type = from.type;
  513. to.dimensions = from.dimensions;
  514. to.scale = from.scale;
  515. to.zeroPoint = from.zeroPoint;
  516. to.length = from.location.length;
  517. to.lifetime = from.lifetime;
  518. to.extraParams = from.extraParams;
  519. switch (from.lifetime) {
  520. case OperandLifeTime::TEMPORARY_VARIABLE:
  521. to.buffer = nullptr;
  522. to.numberOfUsesLeft = from.numberOfConsumers;
  523. break;
  524. case OperandLifeTime::CONSTANT_COPY:
  525. to.buffer = const_cast<uint8_t*>(&mModel->operandValues[from.location.offset]);
  526. to.numberOfUsesLeft = 0;
  527. break;
  528. case OperandLifeTime::CONSTANT_REFERENCE: {
  529. auto poolIndex = from.location.poolIndex;
  530. nnAssert(poolIndex < modelPoolInfos.size());
  531. auto& r = modelPoolInfos[poolIndex];
  532. to.buffer = r.getBuffer() + from.location.offset;
  533. to.numberOfUsesLeft = 0;
  534. break;
  535. }
  536. case OperandLifeTime::MODEL_INPUT:
  537. case OperandLifeTime::MODEL_OUTPUT:
  538. case OperandLifeTime::NO_VALUE:
  539. to.buffer = nullptr;
  540. to.numberOfUsesLeft = 0;
  541. break;
  542. default:
  543. nnAssert(false);
  544. break;
  545. }
  546. }
  547. // Adjust the runtime info for the arguments passed to the model,
  548. // modifying the buffer location, and possibly the dimensions.
  549. auto updateForArguments = [this, &requestPoolInfos](
  550. const std::vector<uint32_t>& indexes,
  551. const hidl_vec<RequestArgument>& arguments) {
  552. nnAssert(indexes.size() == arguments.size());
  553. for (size_t i = 0; i < indexes.size(); i++) {
  554. const uint32_t operandIndex = indexes[i];
  555. const RequestArgument& from = arguments[i];
  556. RunTimeOperandInfo& to = mOperands[operandIndex];
  557. if (from.dimensions.size() > 0) {
  558. // It's the responsibility of the caller to validate that
  559. // from.dimensions only modifies the dimensions that were
  560. // unspecified in the model. That's the case in SampleDriver.cpp
  561. // with the call to validateRequest().
  562. // TODO make sure that's the case for the default CPU path.
  563. to.dimensions = from.dimensions;
  564. }
  565. if (from.hasNoValue) {
  566. to.lifetime = OperandLifeTime::NO_VALUE;
  567. nnAssert(to.buffer == nullptr);
  568. to.length = 0;
  569. } else {
  570. auto poolIndex = from.location.poolIndex;
  571. nnAssert(poolIndex < requestPoolInfos.size());
  572. auto& r = requestPoolInfos[poolIndex];
  573. to.buffer = r.getBuffer() + from.location.offset;
  574. to.length = from.location.length;
  575. }
  576. }
  577. };
  578. updateForArguments(mModel->inputIndexes, mRequest->inputs);
  579. updateForArguments(mModel->outputIndexes, mRequest->outputs);
  580. return true;
  581. }
  582. void CpuExecutor::freeNoLongerUsedOperands(const std::vector<uint32_t>& inputs) {
  583. for (uint32_t i : inputs) {
  584. auto& info = mOperands[i];
  585. // Check if it's a static or model input/output.
  586. if (info.numberOfUsesLeft == 0) {
  587. continue;
  588. }
  589. info.numberOfUsesLeft--;
  590. if (info.numberOfUsesLeft == 0 && info.buffer != nullptr) {
  591. delete[] info.buffer;
  592. info.buffer = nullptr;
  593. }
  594. }
  595. }
  596. int CpuExecutor::executeOperation(const Operation& operation) {
  597. // VLOG(CPUEXE) << "CpuExecutor::executeOperation(" << toString(operation) << ")";
  598. const hidl_vec<uint32_t>& ins = operation.inputs;
  599. const hidl_vec<uint32_t>& outs = operation.outputs;
  600. bool success = false;
  601. int result = ANEURALNETWORKS_NO_ERROR;
  602. // Function to verify that the number of input and output parameters
  603. // matches what is expected. Also checks that all the parameters have
  604. // values. This function is to be used only for operations that do not
  605. // accept optional arguments.
  606. // TODO Have a version that works for optional arguments.
  607. auto allParametersPresent = [&operation, &ins, &outs, this](size_t requiredIns,
  608. size_t requiredOuts) -> bool {
  609. auto verify = [&operation, this](size_t requiredCount, const hidl_vec<uint32_t>& indexes,
  610. const char* type) -> bool {
  611. size_t actualCount = indexes.size();
  612. if (actualCount != requiredCount) {
  613. LOG(ERROR) << getOperationName(operation.type) << ": Invalid number of " << type
  614. << " operands. Got " << actualCount << " of " << requiredCount;
  615. return false;
  616. }
  617. for (size_t i = 0; i < actualCount; i++) {
  618. if (mOperands[indexes[i]].lifetime == OperandLifeTime::NO_VALUE) {
  619. LOG(ERROR) << getOperationName(operation.type) << " " << type << " operand "
  620. << i << " is required but missing.";
  621. return false;
  622. }
  623. }
  624. return true;
  625. };
  626. auto verifyNoZeroSizedInputs = [&operation, this](const hidl_vec<uint32_t>& indexes) {
  627. for (size_t i = 0; i < indexes.size(); i++) {
  628. for (size_t j = 0; j < mOperands[indexes[i]].dimensions.size(); j++) {
  629. if (mOperands[indexes[i]].dimensions[j] == 0) {
  630. LOG(ERROR) << getOperationName(operation.type)
  631. << " does not support zero-sized tensor, but input " << i
  632. << " dimension " << j << " is zero.";
  633. return false;
  634. }
  635. }
  636. }
  637. return true;
  638. };
  639. return verify(requiredIns, ins, "in") && verify(requiredOuts, outs, "out") &&
  640. verifyNoZeroSizedInputs(ins);
  641. };
  642. switch (operation.type) {
  643. case OperationType::OEM_OPERATION: {
  644. LOG(ERROR) << "OEM operation not supported for CPU execution";
  645. success = false;
  646. } break;
  647. case OperationType::FLOOR: {
  648. if (!allParametersPresent(1, 1)) {
  649. return ANEURALNETWORKS_BAD_DATA;
  650. }
  651. const RunTimeOperandInfo& input = mOperands[ins[0]];
  652. RunTimeOperandInfo& output = mOperands[outs[0]];
  653. Shape outShape = output.shape();
  654. if (!floorPrepare(input.shape(), &outShape) ||
  655. !setInfoAndAllocateIfNeeded(&output, outShape, &result)) {
  656. break;
  657. }
  658. if (input.type == OperandType::TENSOR_FLOAT32) {
  659. success = floorFloat32(reinterpret_cast<const float*>(input.buffer),
  660. reinterpret_cast<float*>(output.buffer), outShape);
  661. } else if (input.type == OperandType::TENSOR_FLOAT16) {
  662. success = floorFloat16(reinterpret_cast<const _Float16*>(input.buffer),
  663. reinterpret_cast<_Float16*>(output.buffer), outShape);
  664. }
  665. } break;
  666. case OperationType::DEPTHWISE_CONV_2D: {
  667. const size_t inCount = ins.size();
  668. if ((inCount != 14 && inCount != 12 && inCount != 11 && inCount != 9 && inCount != 8) ||
  669. !allParametersPresent(inCount, 1)) {
  670. return ANEURALNETWORKS_BAD_DATA;
  671. }
  672. const RunTimeOperandInfo& input = mOperands[ins[0]];
  673. const RunTimeOperandInfo& filter = mOperands[ins[1]];
  674. const RunTimeOperandInfo& bias = mOperands[ins[2]];
  675. int32_t padding_left, padding_right;
  676. int32_t padding_top, padding_bottom;
  677. int32_t padding_implicit = 0;
  678. int32_t stride_width, stride_height;
  679. int32_t dilation_width_factor = 1, dilation_height_factor = 1;
  680. int32_t depth_multiplier;
  681. int32_t activation;
  682. bool data_layout = false;
  683. bool useImplicitPadding = false;
  684. if ((inCount >= 9 && mOperands[ins[8]].type == OperandType::BOOL) || inCount == 8) {
  685. padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]);
  686. stride_width = getScalarData<int32_t>(mOperands[ins[4]]);
  687. stride_height = getScalarData<int32_t>(mOperands[ins[5]]);
  688. depth_multiplier = getScalarData<int32_t>(mOperands[ins[6]]);
  689. activation = getScalarData<int32_t>(mOperands[ins[7]]);
  690. if (inCount >= 9) {
  691. data_layout = getScalarData<bool>(mOperands[ins[8]]);
  692. }
  693. if (inCount == 11) {
  694. dilation_width_factor = getScalarData<int32_t>(mOperands[ins[9]]);
  695. dilation_height_factor = getScalarData<int32_t>(mOperands[ins[10]]);
  696. }
  697. useImplicitPadding = true;
  698. } else if (inCount >= 11 && mOperands[ins[8]].type == OperandType::INT32) {
  699. padding_left = getScalarData<int32_t>(mOperands[ins[3]]);
  700. padding_right = getScalarData<int32_t>(mOperands[ins[4]]);
  701. padding_top = getScalarData<int32_t>(mOperands[ins[5]]);
  702. padding_bottom = getScalarData<int32_t>(mOperands[ins[6]]);
  703. stride_width = getScalarData<int32_t>(mOperands[ins[7]]);
  704. stride_height = getScalarData<int32_t>(mOperands[ins[8]]);
  705. depth_multiplier = getScalarData<int32_t>(mOperands[ins[9]]);
  706. activation = getScalarData<int32_t>(mOperands[ins[10]]);
  707. if (inCount >= 12) {
  708. data_layout = getScalarData<bool>(mOperands[ins[11]]);
  709. }
  710. if (inCount == 14) {
  711. dilation_width_factor = getScalarData<int32_t>(mOperands[ins[12]]);
  712. dilation_height_factor = getScalarData<int32_t>(mOperands[ins[13]]);
  713. }
  714. } else {
  715. return ANEURALNETWORKS_BAD_DATA;
  716. }
  717. RunTimeOperandInfo& output = mOperands[outs[0]];
  718. Shape outShape = output.shape();
  719. RunTimeOperandInfo input_tmp, output_tmp;
  720. std::unique_ptr<uint8_t[]> input_tmp_guard, output_tmp_guard;
  721. if (!convertToNhwc(input_tmp, input, input_tmp_guard, data_layout)) {
  722. success = false;
  723. break;
  724. }
  725. output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
  726. output_tmp.buffer = data_layout ? nullptr : output.buffer;
  727. output_tmp.length = data_layout ? 0 : output.length;
  728. if (useImplicitPadding) {
  729. Shape inputShape = input_tmp.shape();
  730. Shape filterShape = filter.shape();
  731. int32_t input_width = getSizeOfDimension(inputShape, 2);
  732. int32_t input_height = getSizeOfDimension(inputShape, 1);
  733. int32_t filter_width = getSizeOfDimension(filterShape, 2);
  734. int32_t filter_height = getSizeOfDimension(filterShape, 1);
  735. calculateExplicitPadding(input_width, stride_width, dilation_width_factor,
  736. filter_width, padding_implicit, &padding_left,
  737. &padding_right);
  738. calculateExplicitPadding(input_height, stride_height, dilation_height_factor,
  739. filter_height, padding_implicit, &padding_top,
  740. &padding_bottom);
  741. }
  742. if (!depthwiseConvPrepare(input_tmp.shape(), filter.shape(), bias.shape(), padding_left,
  743. padding_right, padding_top, padding_bottom, stride_width,
  744. stride_height, depth_multiplier, dilation_width_factor,
  745. dilation_height_factor, &outShape) ||
  746. !setInfoAndAllocateIfNeeded(&output_tmp, outShape, &result)) {
  747. if (!data_layout) output.dimensions = output_tmp.dimensions;
  748. success = false;
  749. break;
  750. }
  751. if (input_tmp.type == OperandType::TENSOR_FLOAT32) {
  752. success = depthwiseConvFloat32(
  753. reinterpret_cast<const float*>(input_tmp.buffer), input_tmp.shape(),
  754. reinterpret_cast<const float*>(filter.buffer), filter.shape(),
  755. reinterpret_cast<const float*>(bias.buffer), bias.shape(), padding_left,
  756. padding_right, padding_top, padding_bottom, stride_width, stride_height,
  757. dilation_width_factor, dilation_height_factor, depth_multiplier, activation,
  758. reinterpret_cast<float*>(output_tmp.buffer), outShape);
  759. } else if (input_tmp.type == OperandType::TENSOR_FLOAT16) {
  760. success = depthwiseConvFloat16(
  761. reinterpret_cast<const _Float16*>(input_tmp.buffer), input_tmp.shape(),
  762. reinterpret_cast<const _Float16*>(filter.buffer), filter.shape(),
  763. reinterpret_cast<const _Float16*>(bias.buffer), bias.shape(), padding_left,
  764. padding_right, padding_top, padding_bottom, stride_width, stride_height,
  765. dilation_width_factor, dilation_height_factor, depth_multiplier, activation,
  766. reinterpret_cast<_Float16*>(output_tmp.buffer), outShape);
  767. } else if (input_tmp.type == OperandType::TENSOR_QUANT8_ASYMM) {
  768. if (filter.type == OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL) {
  769. success = depthwiseConvQuant8PerChannel(
  770. reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(),
  771. reinterpret_cast<const int8_t*>(filter.buffer), filter.shape(),
  772. filter.extraParams.channelQuant().scales.data(),
  773. reinterpret_cast<const int32_t*>(bias.buffer), bias.shape(),
  774. padding_left, padding_right, padding_top, padding_bottom, stride_width,
  775. stride_height, dilation_width_factor, dilation_height_factor,
  776. depth_multiplier, activation,
  777. reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape);
  778. } else if (filter.type == OperandType::TENSOR_QUANT8_ASYMM) {
  779. success = depthwiseConvQuant8(
  780. reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(),
  781. reinterpret_cast<const uint8_t*>(filter.buffer), filter.shape(),
  782. reinterpret_cast<const int32_t*>(bias.buffer), bias.shape(),
  783. padding_left, padding_right, padding_top, padding_bottom, stride_width,
  784. stride_height, dilation_width_factor, dilation_height_factor,
  785. depth_multiplier, activation,
  786. reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape);
  787. }
  788. }
  789. if (data_layout) {
  790. output_tmp_guard.reset(output_tmp.buffer);
  791. }
  792. if (!success || !convertFromNhwc(output, output_tmp, data_layout, &result)) {
  793. success = false;
  794. break;
  795. }
  796. } break;
  797. case OperationType::LOCAL_RESPONSE_NORMALIZATION: {
  798. const size_t inCount = ins.size();
  799. if ((inCount != 6 && inCount != 5) || !allParametersPresent(inCount, 1)) {
  800. return ANEURALNETWORKS_BAD_DATA;
  801. }
  802. const RunTimeOperandInfo& input = mOperands[ins[0]];
  803. int32_t radius = getScalarData<int32_t>(mOperands[ins[1]]);
  804. float bias = (input.type == OperandType::TENSOR_FLOAT16)
  805. ? getScalarData<_Float16>(mOperands[ins[2]])
  806. : getScalarData<float>(mOperands[ins[2]]);
  807. float alpha = (input.type == OperandType::TENSOR_FLOAT16)
  808. ? getScalarData<_Float16>(mOperands[ins[3]])
  809. : getScalarData<float>(mOperands[ins[3]]);
  810. float beta = (input.type == OperandType::TENSOR_FLOAT16)
  811. ? getScalarData<_Float16>(mOperands[ins[4]])
  812. : getScalarData<float>(mOperands[ins[4]]);
  813. const int32_t axis = inCount == 6 ? getScalarData<int32_t>(mOperands[ins[5]]) : -1;
  814. RunTimeOperandInfo& output = mOperands[outs[0]];
  815. Shape outShape = output.shape();
  816. if (!genericNormalizationPrepare(input.shape(), &outShape) ||
  817. !setInfoAndAllocateIfNeeded(&output, outShape, &result)) {
  818. success = false;
  819. break;
  820. }
  821. if (input.type == OperandType::TENSOR_FLOAT32) {
  822. success = localResponseNormFloat32(
  823. reinterpret_cast<const float*>(input.buffer), input.shape(), radius, bias,
  824. alpha, beta, axis, reinterpret_cast<float*>(output.buffer), outShape);
  825. } else if (input.type == OperandType::TENSOR_FLOAT16) {
  826. success = localResponseNormFloat16(reinterpret_cast<const _Float16*>(input.buffer),
  827. input.shape(), radius, bias, alpha, beta, axis,
  828. reinterpret_cast<_Float16*>(output.buffer),
  829. outShape);
  830. }
  831. } break;
  832. case OperationType::RESHAPE: {
  833. if (!allParametersPresent(2, 1)) {
  834. return ANEURALNETWORKS_BAD_DATA;
  835. }
  836. const RunTimeOperandInfo& input = mOperands[ins[0]];
  837. const RunTimeOperandInfo& targetShape = mOperands[ins[1]];
  838. RunTimeOperandInfo& output = mOperands[outs[0]];
  839. Shape outShape = output.shape();
  840. success = reshapePrepare(input.shape(),
  841. reinterpret_cast<const int32_t*>(targetShape.buffer),
  842. getNumberOfElements(targetShape.shape()), &outShape) &&
  843. setInfoAndAllocateIfNeeded(&output, outShape, &result) &&
  844. copyData(input.buffer, input.shape(), output.buffer, outShape);
  845. } break;
  846. case OperationType::DEPTH_TO_SPACE: {
  847. const size_t inCount = ins.size();
  848. if ((inCount != 3 && inCount != 2) || !allParametersPresent(inCount, 1)) {
  849. return ANEURALNETWORKS_BAD_DATA;
  850. }
  851. const RunTimeOperandInfo& input = mOperands[ins[0]];
  852. int32_t blockSize = getScalarData<int32_t>(mOperands[ins[1]]);
  853. bool data_layout = inCount == 3 ? getScalarData<bool>(mOperands[ins[2]]) : false;
  854. RunTimeOperandInfo& output = mOperands[outs[0]];
  855. Shape outShape = output.shape();
  856. RunTimeOperandInfo input_tmp, output_tmp;
  857. std::unique_ptr<uint8_t[]> input_tmp_guard, output_tmp_guard;
  858. if (!convertToNhwc(input_tmp, input, input_tmp_guard, data_layout)) {
  859. success = false;
  860. break;
  861. }
  862. output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
  863. output_tmp.buffer = data_layout ? nullptr : output.buffer;
  864. output_tmp.length = data_layout ? 0 : output.length;
  865. if (!depthToSpacePrepare(input_tmp.shape(), blockSize, &outShape) ||
  866. !setInfoAndAllocateIfNeeded(&output_tmp, outShape, &result)) {
  867. if (!data_layout) output.dimensions = output_tmp.dimensions;
  868. break;
  869. }
  870. switch (input_tmp.type) {
  871. case OperandType::TENSOR_FLOAT32: {
  872. success = depthToSpaceGeneric(
  873. reinterpret_cast<const float*>(input_tmp.buffer), input_tmp.shape(),
  874. blockSize, reinterpret_cast<float*>(output_tmp.buffer), outShape);
  875. break;
  876. }
  877. case OperandType::TENSOR_FLOAT16: {
  878. success = depthToSpaceGeneric(
  879. reinterpret_cast<const _Float16*>(input_tmp.buffer), input_tmp.shape(),
  880. blockSize, reinterpret_cast<_Float16*>(output_tmp.buffer), outShape);
  881. break;
  882. }
  883. case OperandType::TENSOR_QUANT8_ASYMM: {
  884. success = depthToSpaceGeneric(
  885. reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(),
  886. blockSize, reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape);
  887. break;
  888. }
  889. default: {
  890. LOG(ERROR) << "Unsupported data type";
  891. success = false;
  892. }
  893. }
  894. if (data_layout) {
  895. output_tmp_guard.reset(output_tmp.buffer);
  896. }
  897. if (!success || !convertFromNhwc(output, output_tmp, data_layout, &result)) {
  898. success = false;
  899. break;
  900. }
  901. } break;
  902. case OperationType::SPACE_TO_DEPTH: {
  903. const size_t inCount = ins.size();
  904. if ((inCount != 3 && inCount != 2) || !allParametersPresent(inCount, 1)) {
  905. return ANEURALNETWORKS_BAD_DATA;
  906. }
  907. const RunTimeOperandInfo& input = mOperands[ins[0]];
  908. int32_t blockSize = getScalarData<int32_t>(mOperands[ins[1]]);
  909. bool data_layout = inCount == 3 ? getScalarData<bool>(mOperands[ins[2]]) : false;
  910. RunTimeOperandInfo& output = mOperands[outs[0]];
  911. Shape outShape = output.shape();
  912. RunTimeOperandInfo input_tmp, output_tmp;
  913. std::unique_ptr<uint8_t[]> input_tmp_guard, output_tmp_guard;
  914. if (!convertToNhwc(input_tmp, input, input_tmp_guard, data_layout)) {
  915. success = false;
  916. break;
  917. }
  918. output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
  919. output_tmp.buffer = data_layout ? nullptr : output.buffer;
  920. output_tmp.length = data_layout ? 0 : output.length;
  921. if (!spaceToDepthPrepare(input_tmp.shape(), blockSize, &outShape) ||
  922. !setInfoAndAllocateIfNeeded(&output_tmp, outShape, &result)) {
  923. if (!data_layout) output.dimensions = output_tmp.dimensions;
  924. break;
  925. }
  926. switch (input_tmp.type) {
  927. case OperandType::TENSOR_FLOAT32: {
  928. success = spaceToDepthGeneric(
  929. reinterpret_cast<const float*>(input_tmp.buffer), input_tmp.shape(),
  930. blockSize, reinterpret_cast<float*>(output_tmp.buffer), outShape);
  931. break;
  932. }
  933. case OperandType::TENSOR_FLOAT16: {
  934. success = spaceToDepthGeneric(
  935. reinterpret_cast<const _Float16*>(input_tmp.buffer), input_tmp.shape(),
  936. blockSize, reinterpret_cast<_Float16*>(output_tmp.buffer), outShape);
  937. break;
  938. }
  939. case OperandType::TENSOR_QUANT8_ASYMM: {
  940. success = spaceToDepthGeneric(
  941. reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(),
  942. blockSize, reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape);
  943. break;
  944. }
  945. default: {
  946. LOG(ERROR) << "Unsupported data type";
  947. success = false;
  948. }
  949. }
  950. if (data_layout) {
  951. output_tmp_guard.reset(output_tmp.buffer);
  952. }
  953. if (!success || !convertFromNhwc(output, output_tmp, data_layout, &result)) {
  954. success = false;
  955. break;
  956. }
  957. } break;
  958. case OperationType::EMBEDDING_LOOKUP: {
  959. const RunTimeOperandInfo& values = mOperands[ins[EmbeddingLookup::kValueTensor]];
  960. const RunTimeOperandInfo& lookups = mOperands[ins[EmbeddingLookup::kLookupTensor]];
  961. RunTimeOperandInfo& output = mOperands[outs[EmbeddingLookup::kOutputTensor]];
  962. Shape outputShape;
  963. EmbeddingLookup lookup(operation, mOperands);
  964. success = embeddingLookupPrepare(values.shape(), lookups.shape(), &outputShape) &&
  965. setInfoAndAllocateIfNeeded(&output, outputShape, &result) && lookup.Eval();
  966. } break;
  967. case OperationType::HASHTABLE_LOOKUP: {
  968. const RunTimeOperandInfo& lookups = mOperands[ins[HashtableLookup::kLookupTensor]];
  969. const RunTimeOperandInfo& keys = mOperands[ins[HashtableLookup::kKeyTensor]];
  970. const RunTimeOperandInfo& values = mOperands[ins[HashtableLookup::kValueTensor]];
  971. RunTimeOperandInfo& output = mOperands[outs[HashtableLookup::kOutputTensor]];
  972. RunTimeOperandInfo& hits = mOperands[outs[HashtableLookup::kHitsTensor]];
  973. Shape outputShape, hitShape;
  974. HashtableLookup lookup(operation, mOperands);
  975. success = hashtableLookupPrepare(lookups.shape(), keys.shape(), values.shape(),
  976. &outputShape, &hitShape) &&
  977. setInfoAndAllocateIfNeeded(&output, outputShape, &result) &&
  978. setInfoAndAllocateIfNeeded(&hits, hitShape, &result) && lookup.Eval();
  979. } break;
  980. case OperationType::LSH_PROJECTION: {
  981. RunTimeOperandInfo& output = mOperands[outs[LSHProjection::kOutputTensor]];
  982. Shape outputShape;
  983. if (!LSHProjection::Prepare(operation, mOperands, &outputShape) ||
  984. !setInfoAndAllocateIfNeeded(&output, outputShape, &result)) {
  985. break;
  986. }
  987. LSHProjection lsh(operation, mOperands);
  988. const RunTimeOperandInfo& hash = mOperands[ins[LSHProjection::kHashTensor]];
  989. switch (hash.type) {
  990. case OperandType::TENSOR_FLOAT32: {
  991. success = lsh.Eval<float>();
  992. break;
  993. }
  994. case OperandType::TENSOR_FLOAT16: {
  995. success = lsh.Eval<_Float16>();
  996. break;
  997. }
  998. default: {
  999. success = false;
  1000. LOG(ERROR) << "Unsupported data type";
  1001. }
  1002. }
  1003. } break;
  1004. case OperationType::BIDIRECTIONAL_SEQUENCE_LSTM: {
  1005. const auto merge_outputs = getScalarData<bool>(
  1006. mOperands[ins[BidirectionalSequenceLSTM::kMergeOutputsParam]]);
  1007. RunTimeOperandInfo& fwOutput =
  1008. mOperands[outs[BidirectionalSequenceLSTM::kFwOutputTensor]];
  1009. Shape fwOutputShape, bwOutputShape;
  1010. BidirectionalSequenceLSTM lstm(operation, mOperands);
  1011. success = lstm.Prepare(operation, mOperands, &fwOutputShape, &bwOutputShape) &&
  1012. setInfoAndAllocateIfNeeded(&fwOutput, fwOutputShape, &result);
  1013. if (!merge_outputs) {
  1014. RunTimeOperandInfo& bwOutput =
  1015. mOperands[outs[BidirectionalSequenceLSTM::kBwOutputTensor]];
  1016. success = success && setInfoAndAllocateIfNeeded(&bwOutput, bwOutputShape, &result);
  1017. }
  1018. success = success && lstm.Eval();
  1019. } break;
  1020. case OperationType::LSTM: {
  1021. RunTimeOperandInfo& scratch = mOperands[outs[LSTMCell::kScratchBufferTensor]];
  1022. RunTimeOperandInfo& outputStateOut = mOperands[outs[LSTMCell::kOutputStateOutTensor]];
  1023. RunTimeOperandInfo& cellStateOut = mOperands[outs[LSTMCell::kCellStateOutTensor]];
  1024. RunTimeOperandInfo& output = mOperands[outs[LSTMCell::kOutputTensor]];
  1025. Shape scratchShape, outputStateShape, cellStateShape, outputShape;
  1026. LSTMCell lstm_cell(operation, mOperands);
  1027. success = lstm_cell.Prepare(operation, mOperands, &scratchShape, &outputStateShape,
  1028. &cellStateShape, &outputShape) &&
  1029. setInfoAndAllocateIfNeeded(&scratch, scratchShape, &result) &&
  1030. setInfoAndAllocateIfNeeded(&outputStateOut, outputStateShape, &result) &&
  1031. setInfoAndAllocateIfNeeded(&cellStateOut, cellStateShape, &result) &&
  1032. setInfoAndAllocateIfNeeded(&output, outputShape, &result) && lstm_cell.Eval();
  1033. } break;
  1034. case OperationType::RANDOM_MULTINOMIAL: {
  1035. const RunTimeOperandInfo& lookups = mOperands[ins[HashtableLookup::kLookupTensor]];
  1036. const RunTimeOperandInfo& keys = mOperands[ins[HashtableLookup::kKeyTensor]];
  1037. const RunTimeOperandInfo& values = mOperands[ins[HashtableLookup::kValueTensor]];
  1038. RunTimeOperandInfo& output = mOperands[outs[Multinomial::kOutputTensor]];
  1039. Shape outputShape;
  1040. Multinomial multinomial(operation, mOperands);
  1041. success = Multinomial::Prepare(operation, mOperands, &outputShape) &&
  1042. setInfoAndAllocateIfNeeded(&output, outputShape, &result) &&
  1043. multinomial.Eval();
  1044. } break;
  1045. case OperationType::RNN: {
  1046. RunTimeOperandInfo& hiddenStateOut = mOperands[outs[RNN::kHiddenStateOutTensor]];
  1047. RunTimeOperandInfo& output = mOperands[outs[RNN::kOutputTensor]];
  1048. Shape hiddenStateShape, outputShape;
  1049. RNN rnn_cell(operation, mOperands);
  1050. success = RNN::Prepare(operation, mOperands, &hiddenStateShape, &outputShape) &&
  1051. setInfoAndAllocateIfNeeded(&hiddenStateOut, hiddenStateShape, &result) &&
  1052. setInfoAndAllocateIfNeeded(&output, outputShape, &result) && rnn_cell.Eval();
  1053. } break;
  1054. case OperationType::SVDF: {
  1055. RunTimeOperandInfo& stateOut = mOperands[outs[SVDF::kStateOutTensor]];
  1056. RunTimeOperandInfo& output = mOperands[outs[SVDF::kOutputTensor]];
  1057. Shape stateShape, outputShape;
  1058. SVDF svdf(operation, mOperands);
  1059. success = SVDF::Prepare(operation, mOperands, &stateShape, &outputShape) &&
  1060. setInfoAndAllocateIfNeeded(&stateOut, stateShape, &result) &&
  1061. setInfoAndAllocateIfNeeded(&output, outputShape, &result) && svdf.Eval();
  1062. } break;
  1063. case OperationType::BATCH_TO_SPACE_ND: {
  1064. const size_t inCount = ins.size();
  1065. if ((inCount != 3 && inCount != 2) || !allParametersPresent(inCount, 1)) {
  1066. return ANEURALNETWORKS_BAD_DATA;
  1067. }
  1068. const RunTimeOperandInfo& input = mOperands[ins[0]];
  1069. const RunTimeOperandInfo& blockSize = mOperands[ins[1]];
  1070. bool data_layout = inCount == 3 ? getScalarData<bool>(mOperands[ins[2]]) : false;
  1071. RunTimeOperandInfo& output = mOperands[outs[0]];
  1072. Shape outShape = output.shape();
  1073. RunTimeOperandInfo input_tmp, output_tmp;
  1074. std::unique_ptr<uint8_t[]> input_tmp_guard, output_tmp_guard;
  1075. if (!convertToNhwc(input_tmp, input, input_tmp_guard, data_layout)) {
  1076. success = false;
  1077. break;
  1078. }
  1079. output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
  1080. output_tmp.buffer = data_layout ? nullptr : output.buffer;
  1081. output_tmp.length = data_layout ? 0 : output.length;
  1082. if (!batchToSpacePrepare(input_tmp.shape(),
  1083. reinterpret_cast<const int32_t*>(blockSize.buffer),
  1084. blockSize.shape(), &outShape) ||
  1085. !setInfoAndAllocateIfNeeded(&output_tmp, outShape, &result)) {
  1086. if (!data_layout) output.dimensions = output_tmp.dimensions;
  1087. break;
  1088. }
  1089. switch (input_tmp.type) {
  1090. case OperandType::TENSOR_FLOAT32: {
  1091. success = batchToSpaceGeneric(
  1092. reinterpret_cast<const float*>(input_tmp.buffer), input_tmp.shape(),
  1093. reinterpret_cast<const int32_t*>(blockSize.buffer),
  1094. reinterpret_cast<float*>(output_tmp.buffer), outShape);
  1095. break;
  1096. }
  1097. case OperandType::TENSOR_FLOAT16: {
  1098. success = batchToSpaceGeneric(
  1099. reinterpret_cast<const _Float16*>(input_tmp.buffer), input_tmp.shape(),
  1100. reinterpret_cast<const int32_t*>(blockSize.buffer),
  1101. reinterpret_cast<_Float16*>(output_tmp.buffer), outShape);
  1102. break;
  1103. }
  1104. case OperandType::TENSOR_QUANT8_ASYMM: {
  1105. success = batchToSpaceGeneric(
  1106. reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(),
  1107. reinterpret_cast<const int32_t*>(blockSize.buffer),
  1108. reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape);
  1109. break;
  1110. }
  1111. default: {
  1112. LOG(ERROR) << "Unsupported data type";
  1113. success = false;
  1114. }
  1115. }
  1116. if (data_layout) {
  1117. output_tmp_guard.reset(output_tmp.buffer);
  1118. }
  1119. if (!success || !convertFromNhwc(output, output_tmp, data_layout, &result)) {
  1120. success = false;
  1121. break;
  1122. }
  1123. } break;
  1124. case OperationType::SPACE_TO_BATCH_ND: {
  1125. const size_t inCount = ins.size();
  1126. if ((inCount != 4 && inCount != 3) || !allParametersPresent(inCount, 1)) {
  1127. return ANEURALNETWORKS_BAD_DATA;
  1128. }
  1129. const RunTimeOperandInfo& input = mOperands[ins[0]];
  1130. const RunTimeOperandInfo& blockSize = mOperands[ins[1]];
  1131. const RunTimeOperandInfo& paddings = mOperands[ins[2]];
  1132. bool data_layout = inCount == 4 ? getScalarData<bool>(mOperands[ins[3]]) : false;
  1133. RunTimeOperandInfo& output = mOperands[outs[0]];
  1134. Shape outShape = output.shape();
  1135. RunTimeOperandInfo input_tmp, output_tmp;
  1136. std::unique_ptr<uint8_t[]> input_tmp_guard, output_tmp_guard;
  1137. if (!convertToNhwc(input_tmp, input, input_tmp_guard, data_layout)) {
  1138. success = false;
  1139. break;
  1140. }
  1141. output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
  1142. output_tmp.buffer = data_layout ? nullptr : output.buffer;
  1143. output_tmp.length = data_layout ? 0 : output.length;
  1144. if (!spaceToBatchPrepare(
  1145. input_tmp.shape(), reinterpret_cast<const int32_t*>(blockSize.buffer),
  1146. blockSize.shape(), reinterpret_cast<const int32_t*>(paddings.buffer),
  1147. paddings.shape(), &outShape) ||
  1148. !setInfoAndAllocateIfNeeded(&output_tmp, outShape, &result)) {
  1149. if (!data_layout) output.dimensions = output_tmp.dimensions;
  1150. break;
  1151. }
  1152. switch (input_tmp.type) {
  1153. case OperandType::TENSOR_FLOAT32: {
  1154. success = spaceToBatchGeneric(
  1155. reinterpret_cast<const float*>(input_tmp.buffer), input_tmp.shape(),
  1156. reinterpret_cast<const int32_t*>(blockSize.buffer),
  1157. reinterpret_cast<const int32_t*>(paddings.buffer), paddings.shape(),
  1158. reinterpret_cast<float*>(output_tmp.buffer), outShape);
  1159. break;
  1160. }
  1161. case OperandType::TENSOR_FLOAT16: {
  1162. success = spaceToBatchGeneric(
  1163. reinterpret_cast<const _Float16*>(input_tmp.buffer), input_tmp.shape(),
  1164. reinterpret_cast<const int32_t*>(blockSize.buffer),
  1165. reinterpret_cast<const int32_t*>(paddings.buffer), paddings.shape(),
  1166. reinterpret_cast<_Float16*>(output_tmp.buffer), outShape);
  1167. break;
  1168. }
  1169. case OperandType::TENSOR_QUANT8_ASYMM: {
  1170. success = spaceToBatchGeneric(
  1171. reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(),
  1172. reinterpret_cast<const int32_t*>(blockSize.buffer),
  1173. reinterpret_cast<const int32_t*>(paddings.buffer), paddings.shape(),
  1174. reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape);
  1175. break;
  1176. }
  1177. default: {
  1178. LOG(ERROR) << "Unsupported data type";
  1179. success = false;
  1180. }
  1181. }
  1182. if (data_layout) {
  1183. output_tmp_guard.reset(output_tmp.buffer);
  1184. }
  1185. if (!success || !convertFromNhwc(output, output_tmp, data_layout, &result)) {
  1186. success = false;
  1187. break;
  1188. }
  1189. } break;
  1190. case OperationType::PAD:
  1191. case OperationType::PAD_V2: {
  1192. const bool isV2 = operation.type == OperationType::PAD_V2;
  1193. if (!allParametersPresent(isV2 ? 3 : 2, 1)) {
  1194. return ANEURALNETWORKS_BAD_DATA;
  1195. }
  1196. const RunTimeOperandInfo& input = mOperands[ins[0]];
  1197. const RunTimeOperandInfo& paddings = mOperands[ins[1]];
  1198. RunTimeOperandInfo& output = mOperands[outs[0]];
  1199. Shape outShape = output.shape();
  1200. if (!padPrepare(input.shape(), reinterpret_cast<const int32_t*>(paddings.buffer),
  1201. paddings.shape(), &outShape) ||
  1202. !setInfoAndAllocateIfNeeded(&output, outShape, &result)) {
  1203. break;
  1204. }
  1205. if (input.type == OperandType::TENSOR_FLOAT32) {
  1206. float pad_value = isV2 ? getScalarData<float>(mOperands[ins[2]]) : 0;
  1207. success = padGeneric(reinterpret_cast<const float*>(input.buffer), input.shape(),
  1208. reinterpret_cast<const int32_t*>(paddings.buffer), pad_value,
  1209. reinterpret_cast<float*>(output.buffer), outShape);
  1210. } else if (input.type == OperandType::TENSOR_FLOAT16) {
  1211. _Float16 pad_value = isV2 ? getScalarData<_Float16>(mOperands[ins[2]]) : 0;
  1212. success = padGeneric(reinterpret_cast<const _Float16*>(input.buffer), input.shape(),
  1213. reinterpret_cast<const int32_t*>(paddings.buffer),
  1214. static_cast<_Float16>(pad_value),
  1215. reinterpret_cast<_Float16*>(output.buffer), outShape);
  1216. } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
  1217. uint8_t pad_value =
  1218. isV2 ? getScalarData<uint8_t>(mOperands[ins[2]]) : outShape.offset;
  1219. success = padGeneric(input.buffer, input.shape(),
  1220. reinterpret_cast<const int32_t*>(paddings.buffer), pad_value,
  1221. output.buffer, outShape);
  1222. }
  1223. } break;
  1224. case OperationType::CAST: {
  1225. if (!allParametersPresent(1, 1)) {
  1226. return ANEURALNETWORKS_BAD_DATA;
  1227. }
  1228. const RunTimeOperandInfo& input = mOperands[ins[0]];
  1229. RunTimeOperandInfo& output = mOperands[outs[0]];
  1230. Shape outShape = output.shape();
  1231. success = cast::prepare(input.shape(), &outShape) &&
  1232. setInfoAndAllocateIfNeeded(&output, outShape, &result) &&
  1233. cast::eval(input.buffer, input.shape(), output.buffer, outShape);
  1234. } break;
  1235. case OperationType::SQUEEZE: {
  1236. if (ins.size() != 2 || outs.size() != 1 ||
  1237. mOperands[ins[0]].lifetime == OperandLifeTime::NO_VALUE ||
  1238. mOperands[outs[0]].lifetime == OperandLifeTime::NO_VALUE) {
  1239. LOG(ERROR) << "Wrong input/output count or lifetime for SQUEEZE op.";
  1240. return ANEURALNETWORKS_BAD_DATA;
  1241. }
  1242. const RunTimeOperandInfo& input = mOperands[ins[0]];
  1243. const RunTimeOperandInfo& squeezeDims = mOperands[ins[1]];
  1244. RunTimeOperandInfo& output = mOperands[outs[0]];
  1245. Shape outShape = output.shape();
  1246. success = squeezePrepare(input.shape(),
  1247. reinterpret_cast<const int32_t*>(squeezeDims.buffer),
  1248. squeezeDims.shape(), &outShape) &&
  1249. setInfoAndAllocateIfNeeded(&output, outShape, &result) &&
  1250. copyData(input.buffer, input.shape(), output.buffer, outShape);
  1251. } break;
  1252. case OperationType::STRIDED_SLICE: {
  1253. if (!allParametersPresent(7, 1)) {
  1254. return ANEURALNETWORKS_BAD_DATA;
  1255. }
  1256. const RunTimeOperandInfo& input = mOperands[ins[0]];
  1257. const RunTimeOperandInfo& begins = mOperands[ins[1]];
  1258. const RunTimeOperandInfo& ends = mOperands[ins[2]];
  1259. const RunTimeOperandInfo& strides = mOperands[ins[3]];
  1260. int32_t beginMask = getScalarData<int32_t>(mOperands[ins[4]]);
  1261. int32_t endMask = getScalarData<int32_t>(mOperands[ins[5]]);
  1262. int32_t shrinkAxisMask = getScalarData<int32_t>(mOperands[ins[6]]);
  1263. RunTimeOperandInfo& output = mOperands[outs[0]];
  1264. Shape outShape = output.shape();
  1265. success =
  1266. stridedSlicePrepare(
  1267. input.shape(), reinterpret_cast<const int32_t*>(begins.buffer),
  1268. begins.shape(), reinterpret_cast<const int32_t*>(ends.buffer),
  1269. ends.shape(), reinterpret_cast<const int32_t*>(strides.buffer),
  1270. strides.shape(), beginMask, endMask, shrinkAxisMask, &outShape) &&
  1271. setInfoAndAllocateIfNeeded(&output, outShape, &result) &&
  1272. stridedSliceGeneric(input.buffer, input.shape(),
  1273. reinterpret_cast<const int32_t*>(begins.buffer),
  1274. reinterpret_cast<const int32_t*>(ends.buffer),
  1275. reinterpret_cast<const int32_t*>(strides.buffer), beginMask,
  1276. endMask, shrinkAxisMask, output.buffer, outShape);
  1277. } break;
  1278. case OperationType::MEAN: {
  1279. if (!allParametersPresent(3, 1)) {
  1280. return ANEURALNETWORKS_BAD_DATA;
  1281. }
  1282. const RunTimeOperandInfo& input = mOperands[ins[0]];
  1283. const RunTimeOperandInfo& axis = mOperands[ins[1]];
  1284. int32_t keepDims = getScalarData<int32_t>(mOperands[ins[2]]);
  1285. RunTimeOperandInfo& output = mOperands[outs[0]];
  1286. Shape outShape = output.shape();
  1287. if (!meanPrepare(input.shape(), reinterpret_cast<const int32_t*>(axis.buffer),
  1288. axis.shape(), keepDims > 0, &outShape) ||
  1289. !setInfoAndAllocateIfNeeded(&output, outShape, &result)) {
  1290. break;
  1291. }
  1292. if (input.type == OperandType::TENSOR_FLOAT16) {
  1293. success = meanFloat16(reinterpret_cast<_Float16*>(input.buffer), input.shape(),
  1294. reinterpret_cast<const int32_t*>(axis.buffer), axis.shape(),
  1295. keepDims > 0, reinterpret_cast<_Float16*>(output.buffer),
  1296. outShape);
  1297. } else if (input.type == OperandType::TENSOR_FLOAT32) {
  1298. success = meanGeneric<float, float>(
  1299. reinterpret_cast<float*>(input.buffer), input.shape(),
  1300. reinterpret_cast<const int32_t*>(axis.buffer), axis.shape(), keepDims > 0,
  1301. reinterpret_cast<float*>(output.buffer), outShape);
  1302. } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
  1303. success = meanGeneric<uint8_t, int32_t>(
  1304. reinterpret_cast<uint8_t*>(input.buffer), input.shape(),
  1305. reinterpret_cast<const int32_t*>(axis.buffer), axis.shape(), keepDims > 0,
  1306. reinterpret_cast<uint8_t*>(output.buffer), outShape);
  1307. }
  1308. } break;
  1309. case OperationType::ARGMAX:
  1310. case OperationType::ARGMIN: {
  1311. if (!allParametersPresent(2, 1)) {
  1312. return ANEURALNETWORKS_BAD_DATA;
  1313. }
  1314. const RunTimeOperandInfo& input = mOperands[ins[0]];
  1315. int32_t axis = getScalarData<int32_t>(mOperands[ins[1]]);
  1316. RunTimeOperandInfo& output = mOperands[outs[0]];
  1317. Shape outShape = output.shape();
  1318. const bool isArgMin = operation.type == OperationType::ARGMIN;
  1319. success = argMinMaxPrepare(input.shape(), axis, &outShape) &&
  1320. setInfoAndAllocateIfNeeded(&output, outShape, &result) &&
  1321. argMinMaxGeneric(input.buffer, input.shape(), axis, isArgMin, output.buffer,
  1322. outShape);
  1323. } break;
  1324. case OperationType::EXPAND_DIMS: {
  1325. if (!allParametersPresent(2, 1)) {
  1326. return ANEURALNETWORKS_BAD_DATA;
  1327. }
  1328. const RunTimeOperandInfo& input = mOperands[ins[0]];
  1329. int32_t axis = getScalarData<int32_t>(mOperands[ins[1]]);
  1330. RunTimeOperandInfo& output = mOperands[outs[0]];
  1331. Shape outShape = output.shape();
  1332. success = expand_dims::prepare(input.shape(), axis, &outShape) &&
  1333. setInfoAndAllocateIfNeeded(&output, outShape, &result) &&
  1334. expand_dims::eval(input.buffer, input.shape(), axis, output.buffer, outShape);
  1335. } break;
  1336. case OperationType::SPLIT: {
  1337. if (ins.size() != 3) {
  1338. LOG(ERROR) << "Wrong input count";
  1339. return ANEURALNETWORKS_BAD_DATA;
  1340. }
  1341. const RunTimeOperandInfo& input = mOperands[ins[0]];
  1342. const int32_t axis = getScalarData<int32_t>(mOperands[ins[1]]);
  1343. const int32_t numOutputs = getScalarData<int32_t>(mOperands[ins[2]]);
  1344. if (numOutputs != outs.size()) {
  1345. return ANEURALNETWORKS_BAD_DATA;
  1346. }
  1347. std::vector<Shape> outputShapes(numOutputs);
  1348. for (int i = 0; i < numOutputs; ++i) {
  1349. outputShapes[i] = mOperands[outs[i]].shape();
  1350. }
  1351. success = splitPrepare(input.shape(), axis, numOutputs, &outputShapes);
  1352. for (int i = 0; i < numOutputs; ++i) {
  1353. success = success && setInfoAndAllocateIfNeeded(&(mOperands[outs[i]]),
  1354. outputShapes[i], &result);
  1355. }
  1356. switch (input.type) {
  1357. case OperandType::TENSOR_FLOAT16: {
  1358. std::vector<_Float16*> outputDataPtrs(numOutputs);
  1359. for (int i = 0; i < numOutputs; ++i) {
  1360. outputDataPtrs[i] = reinterpret_cast<_Float16*>(mOperands[outs[i]].buffer);
  1361. }
  1362. success = success &&
  1363. splitFloat16(reinterpret_cast<const _Float16*>(input.buffer),
  1364. input.shape(), axis, &outputDataPtrs, outputShapes);
  1365. } break;
  1366. case OperandType::TENSOR_FLOAT32: {
  1367. std::vector<float*> outputDataPtrs(numOutputs);
  1368. for (int i = 0; i < numOutputs; ++i) {
  1369. outputDataPtrs[i] = reinterpret_cast<float*>(mOperands[outs[i]].buffer);
  1370. }
  1371. success = success &&
  1372. splitFloat32(reinterpret_cast<const float*>(input.buffer),
  1373. input.shape(), axis, &outputDataPtrs, outputShapes);
  1374. } break;
  1375. case OperandType::TENSOR_INT32: {
  1376. std::vector<int32_t*> outputDataPtrs(numOutputs);
  1377. for (int i = 0; i < numOutputs; ++i) {
  1378. outputDataPtrs[i] = reinterpret_cast<int32_t*>(mOperands[outs[i]].buffer);
  1379. }
  1380. success = success &&
  1381. splitInt32(reinterpret_cast<const int32_t*>(input.buffer),
  1382. input.shape(), axis, &outputDataPtrs, outputShapes);
  1383. } break;
  1384. case OperandType::TENSOR_QUANT8_ASYMM: {
  1385. std::vector<uint8_t*> outputDataPtrs(numOutputs);
  1386. for (int i = 0; i < numOutputs; ++i) {
  1387. outputDataPtrs[i] = reinterpret_cast<uint8_t*>(mOperands[outs[i]].buffer);
  1388. }
  1389. success = success &&
  1390. splitQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
  1391. input.shape(), axis, &outputDataPtrs, outputShapes);
  1392. } break;
  1393. default: {
  1394. return ANEURALNETWORKS_BAD_DATA;
  1395. }
  1396. }
  1397. } break;
  1398. case OperationType::MAXIMUM:
  1399. case OperationType::MINIMUM: {
  1400. if (!allParametersPresent(2, 1)) {
  1401. return ANEURALNETWORKS_BAD_DATA;
  1402. }
  1403. const RunTimeOperandInfo& in1 = mOperands[ins[0]];
  1404. const RunTimeOperandInfo& in2 = mOperands[ins[1]];
  1405. RunTimeOperandInfo& output = mOperands[outs[0]];
  1406. Shape outputShape = output.shape();
  1407. const bool isMinimum = operation.type == OperationType::MINIMUM;
  1408. success = maximum_minimum::prepare(in1.shape(), in2.shape(), &outputShape) &&
  1409. setInfoAndAllocateIfNeeded(&output, outputShape, &result) &&
  1410. maximum_minimum::eval(in1.buffer, in1.shape(), in2.buffer, in2.shape(),
  1411. isMinimum, output.buffer, outputShape);
  1412. } break;
  1413. case OperationType::GROUPED_CONV_2D: {
  1414. const size_t inCount = ins.size();
  1415. if ((inCount != 12 && inCount != 9) || !allParametersPresent(inCount, 1)) {
  1416. return ANEURALNETWORKS_BAD_DATA;
  1417. }
  1418. const RunTimeOperandInfo& input = mOperands[ins[0]];
  1419. const RunTimeOperandInfo& filter = mOperands[ins[1]];
  1420. const RunTimeOperandInfo& bias = mOperands[ins[2]];
  1421. int32_t padding_left, padding_right;
  1422. int32_t padding_top, padding_bottom;
  1423. int32_t padding_implicit = 0;
  1424. int32_t stride_width, stride_height;
  1425. int32_t numGroups;
  1426. int32_t activation;
  1427. bool data_layout = false;
  1428. if (inCount == 12) {
  1429. padding_left = getScalarData<int32_t>(mOperands[ins[3]]);
  1430. padding_right = getScalarData<int32_t>(mOperands[ins[4]]);
  1431. padding_top = getScalarData<int32_t>(mOperands[ins[5]]);
  1432. padding_bottom = getScalarData<int32_t>(mOperands[ins[6]]);
  1433. stride_width = getScalarData<int32_t>(mOperands[ins[7]]);
  1434. stride_height = getScalarData<int32_t>(mOperands[ins[8]]);
  1435. numGroups = getScalarData<int32_t>(mOperands[ins[9]]);
  1436. activation = getScalarData<int32_t>(mOperands[ins[10]]);
  1437. data_layout = getScalarData<bool>(mOperands[ins[11]]);
  1438. } else {
  1439. padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]);
  1440. stride_width = getScalarData<int32_t>(mOperands[ins[4]]);
  1441. stride_height = getScalarData<int32_t>(mOperands[ins[5]]);
  1442. numGroups = getScalarData<int32_t>(mOperands[ins[6]]);
  1443. activation = getScalarData<int32_t>(mOperands[ins[7]]);
  1444. data_layout = getScalarData<bool>(mOperands[ins[8]]);
  1445. }
  1446. RunTimeOperandInfo& output = mOperands[outs[0]];
  1447. Shape outShape = output.shape();
  1448. RunTimeOperandInfo input_tmp, output_tmp;
  1449. std::unique_ptr<uint8_t[]> input_tmp_guard, output_tmp_guard;
  1450. if (!convertToNhwc(input_tmp, input, input_tmp_guard, data_layout)) {
  1451. success = false;
  1452. break;
  1453. }
  1454. output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
  1455. output_tmp.buffer = data_layout ? nullptr : output.buffer;
  1456. output_tmp.length = data_layout ? 0 : output.length;
  1457. if (inCount == 9) {
  1458. Shape inputShape = input_tmp.shape();
  1459. Shape filterShape = filter.shape();
  1460. int32_t input_width = getSizeOfDimension(inputShape, 2);
  1461. int32_t input_height = getSizeOfDimension(inputShape, 1);
  1462. int32_t filter_width = getSizeOfDimension(filterShape, 2);
  1463. int32_t filter_height = getSizeOfDimension(filterShape, 1);
  1464. calculateExplicitPadding(input_width, stride_width, filter_width, padding_implicit,
  1465. &padding_left, &padding_right);
  1466. calculateExplicitPadding(input_height, stride_height, filter_height,
  1467. padding_implicit, &padding_top, &padding_bottom);
  1468. }
  1469. if (!groupedConvPrepare(input_tmp.shape(), filter.shape(), bias.shape(), padding_left,
  1470. padding_right, padding_top, padding_bottom, stride_width,
  1471. stride_height, numGroups, &outShape) ||
  1472. !setInfoAndAllocateIfNeeded(&output_tmp, outShape, &result)) {
  1473. if (!data_layout) output.dimensions = output_tmp.dimensions;
  1474. success = false;
  1475. break;
  1476. }
  1477. if (input_tmp.type == OperandType::TENSOR_FLOAT32) {
  1478. success = groupedConvFloat32(
  1479. reinterpret_cast<const float*>(input_tmp.buffer), input_tmp.shape(),
  1480. reinterpret_cast<const float*>(filter.buffer), filter.shape(),
  1481. reinterpret_cast<const float*>(bias.buffer), bias.shape(), padding_left,
  1482. padding_right, padding_top, padding_bottom, stride_width, stride_height,
  1483. numGroups, activation, reinterpret_cast<float*>(output_tmp.buffer),
  1484. outShape);
  1485. } else if (input_tmp.type == OperandType::TENSOR_FLOAT16) {
  1486. success = groupedConvFloat16(
  1487. reinterpret_cast<const _Float16*>(input_tmp.buffer), input_tmp.shape(),
  1488. reinterpret_cast<const _Float16*>(filter.buffer), filter.shape(),
  1489. reinterpret_cast<const _Float16*>(bias.buffer), bias.shape(), padding_left,
  1490. padding_right, padding_top, padding_bottom, stride_width, stride_height,
  1491. numGroups, activation, reinterpret_cast<_Float16*>(output_tmp.buffer),
  1492. outShape);
  1493. } else if (input_tmp.type == OperandType::TENSOR_QUANT8_ASYMM) {
  1494. if (filter.type == OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL) {
  1495. success = groupedConvQuant8PerChannel(
  1496. reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(),
  1497. reinterpret_cast<const int8_t*>(filter.buffer), filter.shape(),
  1498. filter.extraParams.channelQuant().scales.data(),
  1499. reinterpret_cast<const int32_t*>(bias.buffer), bias.shape(),
  1500. padding_left, padding_right, padding_top, padding_bottom, stride_width,
  1501. stride_height, numGroups, activation,
  1502. reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape);
  1503. } else if (filter.type == OperandType::TENSOR_QUANT8_ASYMM) {
  1504. success = groupedConvQuant8(
  1505. reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(),
  1506. reinterpret_cast<const uint8_t*>(filter.buffer), filter.shape(),
  1507. reinterpret_cast<const int32_t*>(bias.buffer), bias.shape(),
  1508. padding_left, padding_right, padding_top, padding_bottom, stride_width,
  1509. stride_height, numGroups, activation,
  1510. reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape);
  1511. }
  1512. }
  1513. if (data_layout) {
  1514. output_tmp_guard.reset(output_tmp.buffer);
  1515. }
  1516. if (!success || !convertFromNhwc(output, output_tmp, data_layout, &result)) {
  1517. success = false;
  1518. break;
  1519. }
  1520. } break;
  1521. case OperationType::TILE: {
  1522. if (!allParametersPresent(2, 1)) {
  1523. return ANEURALNETWORKS_BAD_DATA;
  1524. }
  1525. const RunTimeOperandInfo& input = mOperands[ins[0]];
  1526. const RunTimeOperandInfo& multiples = mOperands[ins[1]];
  1527. RunTimeOperandInfo& output = mOperands[outs[0]];
  1528. Shape outShape = output.shape();
  1529. success =
  1530. tile::prepare(input.shape(), reinterpret_cast<const int32_t*>(multiples.buffer),
  1531. multiples.shape(), &outShape) &&
  1532. setInfoAndAllocateIfNeeded(&output, outShape, &result) &&
  1533. tile::eval(input.buffer, input.shape(),
  1534. reinterpret_cast<const int32_t*>(multiples.buffer), output.buffer,
  1535. outShape);
  1536. } break;
  1537. case OperationType::QUANTIZED_16BIT_LSTM: {
  1538. if (!allParametersPresent(15, 2)) {
  1539. return ANEURALNETWORKS_BAD_DATA;
  1540. }
  1541. RunTimeOperandInfo& cellStateOut =
  1542. mOperands[outs[QuantizedLSTMCell::kCellStateOutTensor]];
  1543. RunTimeOperandInfo& output = mOperands[outs[QuantizedLSTMCell::kOutputTensor]];
  1544. Shape cellStateOutShape, outputShape;
  1545. QuantizedLSTMCell quantizedLSTMCell(operation, mOperands);
  1546. success = QuantizedLSTMCell::prepare(operation, mOperands, &cellStateOutShape,
  1547. &outputShape) &&
  1548. setInfoAndAllocateIfNeeded(&cellStateOut, cellStateOutShape, &result) &&
  1549. setInfoAndAllocateIfNeeded(&output, outputShape, &result) &&
  1550. quantizedLSTMCell.eval();
  1551. } break;
  1552. case OperationType::POW: {
  1553. if (!allParametersPresent(2, 1)) {
  1554. return ANEURALNETWORKS_BAD_DATA;
  1555. }
  1556. const RunTimeOperandInfo& base = mOperands[ins[0]];
  1557. const RunTimeOperandInfo& exponent = mOperands[ins[1]];
  1558. RunTimeOperandInfo& output = mOperands[outs[0]];
  1559. Shape outShape = output.shape();
  1560. success = pow::prepare(base.shape(), exponent.shape(), &outShape) &&
  1561. setInfoAndAllocateIfNeeded(&output, outShape, &result) &&
  1562. pow::eval(base.buffer, base.shape(), exponent.buffer, exponent.shape(),
  1563. output.buffer, outShape);
  1564. } break;
  1565. case OperationType::TOPK_V2: {
  1566. if (!allParametersPresent(2, 2)) {
  1567. return ANEURALNETWORKS_BAD_DATA;
  1568. }
  1569. const RunTimeOperandInfo& input = mOperands[ins[0]];
  1570. int32_t k = getScalarData<int32_t>(mOperands[ins[1]]);
  1571. RunTimeOperandInfo& values = mOperands[outs[0]];
  1572. Shape valuesShape = values.shape();
  1573. RunTimeOperandInfo& indices = mOperands[outs[1]];
  1574. Shape indicesShape = indices.shape();
  1575. success = topk_v2::prepare(input.shape(), k, &valuesShape, &indicesShape) &&
  1576. setInfoAndAllocateIfNeeded(&values, valuesShape, &result) &&
  1577. setInfoAndAllocateIfNeeded(&indices, indicesShape, &result) &&
  1578. topk_v2::eval(input.buffer, input.shape(), k, values.buffer, valuesShape,
  1579. indices.buffer, indicesShape);
  1580. } break;
  1581. default: {
  1582. const OperationRegistration* operationRegistration =
  1583. mOperationResolver->findOperation(operation.type);
  1584. if (operationRegistration == nullptr) {
  1585. LOG(ERROR) << getOperationName(operation.type) << " not registered";
  1586. } else if (operationRegistration->prepare == nullptr ||
  1587. operationRegistration->execute == nullptr) {
  1588. LOG(ERROR) << "Incomplete operation registration: "
  1589. << getOperationName(operation.type);
  1590. } else {
  1591. OperationExecutionContext context(&operation, mOperands.data());
  1592. success = operationRegistration->flags.allowOmittedOperand ||
  1593. context.checkNoOmittedOperand();
  1594. success = success && (operationRegistration->flags.allowZeroSizedInput ||
  1595. context.checkNoZeroSizedInput());
  1596. success = success && operationRegistration->prepare(&context) &&
  1597. operationRegistration->execute(&context);
  1598. result = context.getResultCode();
  1599. }
  1600. }
  1601. }
  1602. if (!success && result == ANEURALNETWORKS_NO_ERROR) {
  1603. result = ANEURALNETWORKS_OP_FAILED;
  1604. }
  1605. if (result != ANEURALNETWORKS_NO_ERROR) {
  1606. LOG(ERROR) << getOperationName(operation.type) << " failed.";
  1607. return result;
  1608. }
  1609. freeNoLongerUsedOperands(ins);
  1610. return ANEURALNETWORKS_NO_ERROR;
  1611. }
  1612. void CpuExecutor::finish(int result) {
  1613. // Free allocated temporary operands.
  1614. for (auto& info : mOperands) {
  1615. if (info.lifetime == OperandLifeTime::TEMPORARY_VARIABLE && info.buffer != nullptr) {
  1616. delete[] info.buffer;
  1617. info.buffer = nullptr;
  1618. }
  1619. }
  1620. // Only report the output shapes when the result code is NO_ERROR or
  1621. // OUTPUT_INSUFFICIENT_SIZE.
  1622. if (result == ANEURALNETWORKS_NO_ERROR || result == ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE) {
  1623. const auto& outputs = mModel->outputIndexes;
  1624. mOutputShapes.resize(outputs.size());
  1625. for (uint32_t i = 0; i < outputs.size(); i++) {
  1626. const uint32_t operandIndex = outputs[i];
  1627. RunTimeOperandInfo& from = mOperands[operandIndex];
  1628. mOutputShapes[i].dimensions = from.dimensions;
  1629. mOutputShapes[i].isSufficient = from.isSufficient();
  1630. }
  1631. } else {
  1632. mOutputShapes.clear();
  1633. }
  1634. mModel = nullptr;
  1635. mRequest = nullptr;
  1636. mFinished = true;
  1637. }
  1638. // b/109953668, disable OpenMP
  1639. #ifdef NNAPI_OPENMP
  1640. ScopedOpenmpSettings::ScopedOpenmpSettings() {
  1641. mBlocktimeInitial = kmp_get_blocktime();
  1642. kmp_set_blocktime(20); // ms, see b/109645291
  1643. #if NNAPI_LIMIT_CPU_THREADS
  1644. // Code not yet enabled. Choosing the number of threads to be based on
  1645. // benchmarking. See longer comment by the class declaration.
  1646. mMaxThreadsInitial = Eigen::nbThreads();
  1647. const int nProcs = omp_get_num_procs();
  1648. int threads = nProcs;
  1649. if (nProcs >= 8) {
  1650. threads = nProcs - 4;
  1651. } else if (nProcs >= 4) {
  1652. threads = nProcs - 2;
  1653. }
  1654. Eigen::setNbThreads(threads);
  1655. #endif
  1656. }
  1657. ScopedOpenmpSettings::~ScopedOpenmpSettings() {
  1658. kmp_set_blocktime(mBlocktimeInitial);
  1659. #if NNAPI_LIMIT_CPU_THREADS
  1660. Eigen::setNbThreads(mMaxThreadsInitial);
  1661. #endif
  1662. }
  1663. #endif // NNAPI_OPENMP
  1664. } // namespace nn
  1665. } // namespace android