rsCpuScriptGroup2.cpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707
  1. #include "rsCpuScriptGroup2.h"
  2. #include <dlfcn.h>
  3. #include <stdio.h>
  4. #include <stdlib.h>
  5. #include <unistd.h>
  6. #include <set>
  7. #include <sstream>
  8. #include <string>
  9. #include <vector>
  10. #ifndef RS_COMPATIBILITY_LIB
  11. #include "bcc/Config.h"
  12. #endif
  13. #include "cpu_ref/rsCpuCore.h"
  14. #include "rsClosure.h"
  15. #include "rsContext.h"
  16. #include "rsCpuCore.h"
  17. #include "rsCpuExecutable.h"
  18. #include "rsCpuScript.h"
  19. #include "rsScript.h"
  20. #include "rsScriptGroup2.h"
  21. #include "rsScriptIntrinsic.h"
  22. using std::string;
  23. using std::vector;
  24. namespace android {
  25. namespace renderscript {
  26. namespace {
  27. const size_t DefaultKernelArgCount = 2;
  28. void groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart,
  29. uint32_t xend, uint32_t outstep) {
  30. const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr;
  31. RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
  32. const size_t oldInLen = mutable_kinfo->inLen;
  33. decltype(mutable_kinfo->inStride) oldInStride;
  34. memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride));
  35. for (CPUClosure* cpuClosure : closures) {
  36. const Closure* closure = cpuClosure->mClosure;
  37. // There had better be enough space in mutable_kinfo
  38. rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT);
  39. for (size_t i = 0; i < closure->mNumArg; i++) {
  40. const void* arg = closure->mArgs[i];
  41. const Allocation* a = (const Allocation*)arg;
  42. const uint32_t eStride = a->mHal.state.elementSizeBytes;
  43. const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
  44. eStride * xstart;
  45. if (kinfo->dim.y > 1) {
  46. ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y;
  47. }
  48. mutable_kinfo->inPtr[i] = ptr;
  49. mutable_kinfo->inStride[i] = eStride;
  50. }
  51. mutable_kinfo->inLen = closure->mNumArg;
  52. const Allocation* out = closure->mReturnValue;
  53. const uint32_t ostep = out->mHal.state.elementSizeBytes;
  54. const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
  55. ostep * xstart;
  56. if (kinfo->dim.y > 1) {
  57. ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y;
  58. }
  59. mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr);
  60. // The implementation of an intrinsic relies on kinfo->usr being
  61. // the "this" pointer to the intrinsic (an RsdCpuScriptIntrinsic object)
  62. mutable_kinfo->usr = cpuClosure->mSi;
  63. cpuClosure->mFunc(kinfo, xstart, xend, ostep);
  64. }
  65. mutable_kinfo->inLen = oldInLen;
  66. mutable_kinfo->usr = &closures;
  67. memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride));
  68. }
  69. } // namespace
  70. Batch::Batch(CpuScriptGroup2Impl* group, const char* name) :
  71. mGroup(group), mFunc(nullptr) {
  72. mName = strndup(name, strlen(name));
  73. }
  74. Batch::~Batch() {
  75. for (CPUClosure* c : mClosures) {
  76. delete c;
  77. }
  78. free(mName);
  79. }
  80. bool Batch::conflict(CPUClosure* cpuClosure) const {
  81. if (mClosures.empty()) {
  82. return false;
  83. }
  84. const Closure* closure = cpuClosure->mClosure;
  85. if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) {
  86. // An invoke should be in a batch by itself, so it conflicts with any other
  87. // closure.
  88. return true;
  89. }
  90. const auto& globalDeps = closure->mGlobalDeps;
  91. const auto& argDeps = closure->mArgDeps;
  92. for (CPUClosure* c : mClosures) {
  93. const Closure* batched = c->mClosure;
  94. if (globalDeps.find(batched) != globalDeps.end()) {
  95. return true;
  96. }
  97. const auto& it = argDeps.find(batched);
  98. if (it != argDeps.end()) {
  99. const auto& args = (*it).second;
  100. for (const auto &p1 : *args) {
  101. if (p1.second.get() != nullptr) {
  102. return true;
  103. }
  104. }
  105. }
  106. }
  107. // The compiler fusion pass in bcc expects that kernels chained up through
  108. // (1st) input and output.
  109. const Closure* lastBatched = mClosures.back()->mClosure;
  110. const auto& it = argDeps.find(lastBatched);
  111. if (it == argDeps.end()) {
  112. return true;
  113. }
  114. const auto& args = (*it).second;
  115. for (const auto &p1 : *args) {
  116. if (p1.first == 0 && p1.second.get() == nullptr) {
  117. // The new closure depends on the last batched closure's return
  118. // value (fieldId being nullptr) for its first argument (argument 0)
  119. return false;
  120. }
  121. }
  122. return true;
  123. }
  124. CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
  125. const ScriptGroupBase *sg) :
  126. mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)),
  127. mExecutable(nullptr), mScriptObj(nullptr) {
  128. rsAssert(!mGroup->mClosures.empty());
  129. mCpuRefImpl->lockMutex();
  130. Batch* batch = new Batch(this, "Batch0");
  131. int i = 0;
  132. for (Closure* closure: mGroup->mClosures) {
  133. CPUClosure* cc;
  134. const IDBase* funcID = closure->mFunctionID.get();
  135. RsdCpuScriptImpl* si =
  136. (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript);
  137. if (closure->mIsKernel) {
  138. MTLaunchStructForEach mtls;
  139. si->forEachKernelSetup(funcID->mSlot, &mtls);
  140. cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel);
  141. } else {
  142. cc = new CPUClosure(closure, si);
  143. }
  144. if (batch->conflict(cc)) {
  145. mBatches.push_back(batch);
  146. std::stringstream ss;
  147. ss << "Batch" << ++i;
  148. std::string batchStr(ss.str());
  149. batch = new Batch(this, batchStr.c_str());
  150. }
  151. batch->mClosures.push_back(cc);
  152. }
  153. rsAssert(!batch->mClosures.empty());
  154. mBatches.push_back(batch);
  155. #ifndef RS_COMPATIBILITY_LIB
  156. compile(mGroup->mCacheDir);
  157. if (mScriptObj != nullptr && mExecutable != nullptr) {
  158. for (Batch* batch : mBatches) {
  159. batch->resolveFuncPtr(mScriptObj);
  160. }
  161. }
  162. #endif // RS_COMPATIBILITY_LIB
  163. mCpuRefImpl->unlockMutex();
  164. }
  165. void Batch::resolveFuncPtr(void* sharedObj) {
  166. std::string funcName(mName);
  167. if (mClosures.front()->mClosure->mIsKernel) {
  168. funcName.append(".expand");
  169. }
  170. mFunc = dlsym(sharedObj, funcName.c_str());
  171. rsAssert (mFunc != nullptr);
  172. }
  173. CpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
  174. for (Batch* batch : mBatches) {
  175. delete batch;
  176. }
  177. delete mExecutable;
  178. // TODO: move this dlclose into ~ScriptExecutable().
  179. if (mScriptObj != nullptr) {
  180. dlclose(mScriptObj);
  181. }
  182. }
  183. namespace {
  184. #ifndef RS_COMPATIBILITY_LIB
  185. string getCoreLibPath(Context* context, string* coreLibRelaxedPath) {
  186. *coreLibRelaxedPath = "";
  187. // If we're debugging, use the debug library.
  188. if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
  189. return SYSLIBPATH_BC"/libclcore_debug.bc";
  190. }
  191. // Check for a platform specific library
  192. #if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
  193. // NEON-capable ARMv7a devices can use an accelerated math library
  194. // for all reduced precision scripts.
  195. // ARMv8 does not use NEON, as ASIMD can be used with all precision
  196. // levels.
  197. *coreLibRelaxedPath = SYSLIBPATH_BC"/libclcore_neon.bc";
  198. #endif
  199. #if defined(__i386__) || defined(__x86_64__)
  200. // x86 devices will use an optimized library.
  201. return SYSLIBPATH_BC"/libclcore_x86.bc";
  202. #else
  203. return SYSLIBPATH_BC"/libclcore.bc";
  204. #endif
  205. }
  206. void setupCompileArguments(
  207. const vector<const char*>& inputs, const vector<string>& kernelBatches,
  208. const vector<string>& invokeBatches,
  209. const char* outputDir, const char* outputFileName,
  210. const char* coreLibPath, const char* coreLibRelaxedPath,
  211. const bool emitGlobalInfo, const bool emitGlobalInfoSkipConstant,
  212. int optLevel, vector<const char*>* args) {
  213. args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
  214. args->push_back("-fPIC");
  215. args->push_back("-embedRSInfo");
  216. if (emitGlobalInfo) {
  217. args->push_back("-rs-global-info");
  218. if (emitGlobalInfoSkipConstant) {
  219. args->push_back("-rs-global-info-skip-constant");
  220. }
  221. }
  222. args->push_back("-mtriple");
  223. args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
  224. args->push_back("-bclib");
  225. args->push_back(coreLibPath);
  226. args->push_back("-bclib_relaxed");
  227. args->push_back(coreLibRelaxedPath);
  228. for (const char* input : inputs) {
  229. args->push_back(input);
  230. }
  231. for (const string& batch : kernelBatches) {
  232. args->push_back("-merge");
  233. args->push_back(batch.c_str());
  234. }
  235. for (const string& batch : invokeBatches) {
  236. args->push_back("-invoke");
  237. args->push_back(batch.c_str());
  238. }
  239. args->push_back("-output_path");
  240. args->push_back(outputDir);
  241. args->push_back("-O");
  242. switch (optLevel) {
  243. case 0:
  244. args->push_back("0");
  245. break;
  246. case 3:
  247. args->push_back("3");
  248. break;
  249. default:
  250. ALOGW("Expected optimization level of 0 or 3. Received %d", optLevel);
  251. args->push_back("3");
  252. break;
  253. }
  254. // The output filename has to be the last, in case we need to pop it out and
  255. // replace with a different name.
  256. args->push_back("-o");
  257. args->push_back(outputFileName);
  258. }
  259. void generateSourceSlot(RsdCpuReferenceImpl* ctxt,
  260. const Closure& closure,
  261. const std::vector<const char*>& inputs,
  262. std::stringstream& ss) {
  263. const IDBase* funcID = (const IDBase*)closure.mFunctionID.get();
  264. const Script* script = funcID->mScript;
  265. rsAssert (!script->isIntrinsic());
  266. const RsdCpuScriptImpl *cpuScript =
  267. (const RsdCpuScriptImpl *)ctxt->lookupScript(script);
  268. const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
  269. const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) -
  270. inputs.begin();
  271. ss << index << "," << funcID->mSlot << ".";
  272. }
  273. #endif // RS_COMPATIBILTY_LIB
  274. } // anonymous namespace
  275. // This function is used by the debugger to inspect ScriptGroup
  276. // compilations.
  277. //
  278. // "__attribute__((noinline))" and "__asm__" are used to prevent the
  279. // function call from being eliminated as a no-op (see the "noinline"
  280. // attribute in gcc documentation).
  281. //
  282. // "__attribute__((weak))" is used to prevent callers from recognizing
  283. // that this is guaranteed to be the function definition, recognizing
  284. // that certain arguments are unused, and optimizing away the passing
  285. // of those arguments (see the LLVM optimization
  286. // DeadArgumentElimination). Theoretically, the compiler could get
  287. // aggressive enough with link-time optimization that even marking the
  288. // entry point as a weak definition wouldn't solve the problem.
  289. //
  290. extern __attribute__((noinline)) __attribute__((weak))
  291. void debugHintScriptGroup2(const char* groupName,
  292. const uint32_t groupNameSize,
  293. const ExpandFuncTy* kernel,
  294. const uint32_t kernelCount) {
  295. ALOGV("group name: %d:%s\n", groupNameSize, groupName);
  296. for (uint32_t i=0; i < kernelCount; ++i) {
  297. const char* f1 = (const char*)(kernel[i]);
  298. __asm__ __volatile__("");
  299. ALOGV(" closure: %p\n", (const void*)f1);
  300. }
  301. // do nothing, this is just a hook point for the debugger.
  302. return;
  303. }
  304. void CpuScriptGroup2Impl::compile(const char* cacheDir) {
  305. #ifndef RS_COMPATIBILITY_LIB
  306. if (mGroup->mClosures.size() < 2) {
  307. return;
  308. }
  309. const int optLevel = getCpuRefImpl()->getContext()->getOptLevel();
  310. if (optLevel == 0) {
  311. std::vector<ExpandFuncTy> kernels;
  312. for (const Batch* b : mBatches)
  313. for (const CPUClosure* c : b->mClosures)
  314. kernels.push_back(c->mFunc);
  315. if (kernels.size()) {
  316. // pass this information on to the debugger via a hint function.
  317. debugHintScriptGroup2(mGroup->mName,
  318. strlen(mGroup->mName),
  319. kernels.data(),
  320. kernels.size());
  321. }
  322. // skip script group compilation forcing the driver to use the fallback
  323. // execution path which currently has better support for debugging.
  324. return;
  325. }
  326. auto comparator = [](const char* str1, const char* str2) -> bool {
  327. return strcmp(str1, str2) < 0;
  328. };
  329. std::set<const char*, decltype(comparator)> inputSet(comparator);
  330. for (Closure* closure : mGroup->mClosures) {
  331. const Script* script = closure->mFunctionID.get()->mScript;
  332. // If any script is an intrinsic, give up trying fusing the kernels.
  333. if (script->isIntrinsic()) {
  334. return;
  335. }
  336. const RsdCpuScriptImpl *cpuScript =
  337. (const RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(script);
  338. const char* bitcodeFilename = cpuScript->getBitcodeFilePath();
  339. inputSet.insert(bitcodeFilename);
  340. }
  341. std::vector<const char*> inputs(inputSet.begin(), inputSet.end());
  342. std::vector<string> kernelBatches;
  343. std::vector<string> invokeBatches;
  344. int i = 0;
  345. for (const auto& batch : mBatches) {
  346. rsAssert(batch->size() > 0);
  347. std::stringstream ss;
  348. ss << batch->mName << ":";
  349. if (!batch->mClosures.front()->mClosure->mIsKernel) {
  350. rsAssert(batch->size() == 1);
  351. generateSourceSlot(mCpuRefImpl, *batch->mClosures.front()->mClosure, inputs, ss);
  352. invokeBatches.push_back(ss.str());
  353. } else {
  354. for (const auto& cpuClosure : batch->mClosures) {
  355. generateSourceSlot(mCpuRefImpl, *cpuClosure->mClosure, inputs, ss);
  356. }
  357. kernelBatches.push_back(ss.str());
  358. }
  359. }
  360. rsAssert(cacheDir != nullptr);
  361. string objFilePath(cacheDir);
  362. objFilePath.append("/");
  363. objFilePath.append(mGroup->mName);
  364. objFilePath.append(".o");
  365. const char* resName = mGroup->mName;
  366. string coreLibRelaxedPath;
  367. const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(),
  368. &coreLibRelaxedPath);
  369. vector<const char*> arguments;
  370. bool emitGlobalInfo = getCpuRefImpl()->getEmbedGlobalInfo();
  371. bool emitGlobalInfoSkipConstant = getCpuRefImpl()->getEmbedGlobalInfoSkipConstant();
  372. setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir,
  373. resName, coreLibPath.c_str(), coreLibRelaxedPath.c_str(),
  374. emitGlobalInfo, emitGlobalInfoSkipConstant,
  375. optLevel, &arguments);
  376. std::unique_ptr<const char> cmdLine(rsuJoinStrings(arguments.size() - 1,
  377. arguments.data()));
  378. inputs.push_back(coreLibPath.c_str());
  379. inputs.push_back(coreLibRelaxedPath.c_str());
  380. uint32_t checksum = constructBuildChecksum(nullptr, 0, cmdLine.get(),
  381. inputs.data(), inputs.size());
  382. if (checksum == 0) {
  383. return;
  384. }
  385. std::stringstream ss;
  386. ss << std::hex << checksum;
  387. std::string checksumStr(ss.str());
  388. //===--------------------------------------------------------------------===//
  389. // Try to load a shared lib from code cache matching filename and checksum
  390. //===--------------------------------------------------------------------===//
  391. bool alreadyLoaded = false;
  392. std::string cloneName;
  393. const bool useRSDebugContext =
  394. (mCpuRefImpl->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG);
  395. const bool reuse = !is_force_recompile() && !useRSDebugContext;
  396. if (reuse) {
  397. mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName, nullptr,
  398. &alreadyLoaded);
  399. }
  400. if (mScriptObj != nullptr) {
  401. // A shared library named resName is found in code cache directory
  402. // cacheDir, and loaded with the handle stored in mScriptObj.
  403. mExecutable = ScriptExecutable::createFromSharedObject(
  404. mScriptObj, checksum);
  405. if (mExecutable != nullptr) {
  406. // The loaded shared library in mScriptObj has a matching checksum.
  407. // An executable object has been created.
  408. return;
  409. }
  410. ALOGV("Failed to create an executable object from so file due to "
  411. "mismatching checksum");
  412. if (alreadyLoaded) {
  413. // The shared object found in code cache has already been loaded.
  414. // A different file name is needed for the new shared library, to
  415. // avoid corrupting the currently loaded instance.
  416. cloneName.append(resName);
  417. cloneName.append("#");
  418. cloneName.append(SharedLibraryUtils::getRandomString(6).c_str());
  419. // The last element in arguments is the output filename.
  420. arguments.pop_back();
  421. arguments.push_back(cloneName.c_str());
  422. }
  423. dlclose(mScriptObj);
  424. mScriptObj = nullptr;
  425. }
  426. //===--------------------------------------------------------------------===//
  427. // Fuse the input kernels and generate native code in an object file
  428. //===--------------------------------------------------------------------===//
  429. arguments.push_back("-build-checksum");
  430. arguments.push_back(checksumStr.c_str());
  431. arguments.push_back(nullptr);
  432. bool compiled = rsuExecuteCommand(RsdCpuScriptImpl::BCC_EXE_PATH,
  433. arguments.size()-1,
  434. arguments.data());
  435. if (!compiled) {
  436. return;
  437. }
  438. //===--------------------------------------------------------------------===//
  439. // Create and load the shared lib
  440. //===--------------------------------------------------------------------===//
  441. std::string SOPath;
  442. if (!SharedLibraryUtils::createSharedLibrary(
  443. getCpuRefImpl()->getContext()->getDriverName(), cacheDir, resName,
  444. reuse, &SOPath)) {
  445. ALOGE("Failed to link object file '%s'", resName);
  446. unlink(objFilePath.c_str());
  447. return;
  448. }
  449. unlink(objFilePath.c_str());
  450. if (reuse) {
  451. mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
  452. } else {
  453. mScriptObj = SharedLibraryUtils::loadAndDeleteSharedLibrary(SOPath.c_str());
  454. }
  455. if (mScriptObj == nullptr) {
  456. ALOGE("Unable to load '%s'", resName);
  457. return;
  458. }
  459. if (alreadyLoaded) {
  460. // Delete the temporary, random-named file that we created to avoid
  461. // interfering with an already loaded shared library.
  462. string cloneFilePath(cacheDir);
  463. cloneFilePath.append("/");
  464. cloneFilePath.append(cloneName.c_str());
  465. cloneFilePath.append(".so");
  466. unlink(cloneFilePath.c_str());
  467. }
  468. mExecutable = ScriptExecutable::createFromSharedObject(mScriptObj);
  469. #endif // RS_COMPATIBILITY_LIB
  470. }
  471. void CpuScriptGroup2Impl::execute() {
  472. for (auto batch : mBatches) {
  473. batch->setGlobalsForBatch();
  474. batch->run();
  475. }
  476. }
  477. void Batch::setGlobalsForBatch() {
  478. for (CPUClosure* cpuClosure : mClosures) {
  479. const Closure* closure = cpuClosure->mClosure;
  480. const IDBase* funcID = closure->mFunctionID.get();
  481. Script* s = funcID->mScript;;
  482. for (const auto& p : closure->mGlobals) {
  483. const int64_t value = p.second.first;
  484. int size = p.second.second;
  485. if (value == 0 && size == 0) {
  486. // This indicates the current closure depends on another closure for a
  487. // global in their shared module (script). In this case we don't need to
  488. // copy the value. For example, an invoke intializes a global variable
  489. // which a kernel later reads.
  490. continue;
  491. }
  492. rsAssert(p.first != nullptr);
  493. Script* script = p.first->mScript;
  494. rsAssert(script == s);
  495. RsdCpuReferenceImpl* ctxt = mGroup->getCpuRefImpl();
  496. const RsdCpuScriptImpl *cpuScript =
  497. (const RsdCpuScriptImpl *)ctxt->lookupScript(script);
  498. int slot = p.first->mSlot;
  499. ScriptExecutable* exec = mGroup->getExecutable();
  500. if (exec != nullptr) {
  501. const char* varName = cpuScript->getFieldName(slot);
  502. void* addr = exec->getFieldAddress(varName);
  503. if (size < 0) {
  504. rsrSetObject(mGroup->getCpuRefImpl()->getContext(),
  505. (rs_object_base*)addr, (ObjectBase*)value);
  506. } else {
  507. memcpy(addr, (const void*)&value, size);
  508. }
  509. } else {
  510. // We use -1 size to indicate an ObjectBase rather than a primitive type
  511. if (size < 0) {
  512. s->setVarObj(slot, (ObjectBase*)value);
  513. } else {
  514. s->setVar(slot, (const void*)&value, size);
  515. }
  516. }
  517. }
  518. }
  519. }
  520. void Batch::run() {
  521. if (!mClosures.front()->mClosure->mIsKernel) {
  522. rsAssert(mClosures.size() == 1);
  523. // This batch contains a single closure for an invoke function
  524. CPUClosure* cc = mClosures.front();
  525. const Closure* c = cc->mClosure;
  526. if (mFunc != nullptr) {
  527. // TODO: Need align pointers for x86_64.
  528. // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp
  529. ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength);
  530. } else {
  531. const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get();
  532. rsAssert(invokeID != nullptr);
  533. cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength);
  534. }
  535. return;
  536. }
  537. if (mFunc != nullptr) {
  538. MTLaunchStructForEach mtls;
  539. const CPUClosure* firstCpuClosure = mClosures.front();
  540. const CPUClosure* lastCpuClosure = mClosures.back();
  541. firstCpuClosure->mSi->forEachMtlsSetup(
  542. (const Allocation**)firstCpuClosure->mClosure->mArgs,
  543. firstCpuClosure->mClosure->mNumArg,
  544. lastCpuClosure->mClosure->mReturnValue,
  545. nullptr, 0, nullptr, &mtls);
  546. mtls.script = nullptr;
  547. mtls.fep.usr = nullptr;
  548. mtls.kernel = (ForEachFunc_t)mFunc;
  549. mGroup->getCpuRefImpl()->launchForEach(
  550. (const Allocation**)firstCpuClosure->mClosure->mArgs,
  551. firstCpuClosure->mClosure->mNumArg,
  552. lastCpuClosure->mClosure->mReturnValue,
  553. nullptr, &mtls);
  554. return;
  555. }
  556. for (CPUClosure* cpuClosure : mClosures) {
  557. const Closure* closure = cpuClosure->mClosure;
  558. const ScriptKernelID* kernelID =
  559. (const ScriptKernelID*)closure->mFunctionID.get();
  560. cpuClosure->mSi->preLaunch(kernelID->mSlot,
  561. (const Allocation**)closure->mArgs,
  562. closure->mNumArg, closure->mReturnValue,
  563. nullptr, 0, nullptr);
  564. }
  565. const CPUClosure* cpuClosure = mClosures.front();
  566. const Closure* closure = cpuClosure->mClosure;
  567. MTLaunchStructForEach mtls;
  568. if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs,
  569. closure->mNumArg,
  570. closure->mReturnValue,
  571. nullptr, 0, nullptr, &mtls)) {
  572. mtls.script = nullptr;
  573. mtls.kernel = &groupRoot;
  574. mtls.fep.usr = &mClosures;
  575. mGroup->getCpuRefImpl()->launchForEach(nullptr, 0, nullptr, nullptr, &mtls);
  576. }
  577. for (CPUClosure* cpuClosure : mClosures) {
  578. const Closure* closure = cpuClosure->mClosure;
  579. const ScriptKernelID* kernelID =
  580. (const ScriptKernelID*)closure->mFunctionID.get();
  581. cpuClosure->mSi->postLaunch(kernelID->mSlot,
  582. (const Allocation**)closure->mArgs,
  583. closure->mNumArg, closure->mReturnValue,
  584. nullptr, 0, nullptr);
  585. }
  586. }
  587. } // namespace renderscript
  588. } // namespace android