Tracing.h 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. /*
  2. * Copyright (C) 2018 The Android Open Source Project
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef ANDROID_ML_NN_COMMON_TRACING_H
  17. #define ANDROID_ML_NN_COMMON_TRACING_H
  18. #define ATRACE_TAG ATRACE_TAG_NNAPI
  19. #include "utils/Trace.h"
  20. // Neural Networks API (NNAPI) systracing
  21. //
  22. // Primary goal of the tracing is to capture and present timings for NNAPI.
  23. // (Other uses include providing visibility to split of execution between
  24. // drivers and the CPU fallback, and the ability to visualize call sequences).
  25. //
  26. // The tracing has three parts:
  27. // 1 Trace macros defined in this file and used throughout the codebase,
  28. // modelled after and using atrace. These implement a naming convention for
  29. // the tracepoints, interpreted by the systrace parser.
  30. // 2 Android systrace (atrace) on-device capture and host-based analysis.
  31. // 3 A systrace parser (TODO) to summarize the timings.
  32. //
  33. // For an overview and introduction, please refer to the "NNAPI Systrace design
  34. // and HOWTO" (internal Docs for now). This header doesn't try to replicate all
  35. // the information in that document. For the contract between traces in code and
  36. // the statistics created by the systrace parser, see
  37. // tools/systrace-parser/contract-between-code-and-parser.txt.
  38. //
  39. // Glossary:
  40. // - Phase: stage in processing (e.g., Preparation, Compilation, Execution);
  41. // Overall phase nests rest, Execution nests Input/Output, Transformation,
  42. // Computation and Results; optionally Executions can be nested in a
  43. // Warmup and Benchmark - otherwise not nested (Initialization phase
  44. // functions may occur inside other phases but will be counted out during
  45. // analysis). Nested phases (other than Initialization) are analysed as a
  46. // breakdown of the parent phase.
  47. // - Layer: component in the stack (from top to bottom: App, Runtime, IPC,
  48. // Driver/CPU). Calls to lower layers are typically nested within calls to upper
  49. // layers.
  50. // - Bucket: unit of timing analysis, the combination of Phase and Layer (and
  51. // thus also typically nested).
  52. // - Detail: specific unit being executed, typically a function.
  53. // Convenience macros to be used in the code (phases defined below).
  54. // (Macros so that string concatenation is done at compile time).
  55. //
  56. // These exist in three variants:
  57. // - Simple (NNTRACE_<layer and potentially phase>) - to be used when only one
  58. // Phase is active within a scope
  59. // - "Switch" (NNTRACE_<...>_SWITCH) - to be used when multiple Phases
  60. // share a scope (e.g., transformation of data and computation in same
  61. // function).
  62. // - "Subtract" (NNTRACE_<...>_SUBTRACT) - to be used when nesting is violated
  63. // and the time should be subtracted from the parent scope
  64. // Arguments:
  65. // - phase: one of the NNTRACE_PHASE_* macros defined below.
  66. // - detail: free-form string constant, typically function name.
  67. // Example usage:
  68. // // Simple
  69. // int ANeuralNetworksMemory_createFromFd(...) {
  70. // NNTRACE_RT(NNTRACE_PHASE_PREPARATION, "ANeuralNetworksMemory_createFromFd");
  71. // }
  72. // // Switch
  73. // bool concatenationFloat32(...) {
  74. // NNTRACE_TRANS("concatenationFloat32"); // Transformation of data begins
  75. // ...
  76. // NNTRACE_COMP_SWITCH("optimized_ops::Concatenation"); // Transformation
  77. // // ends and computation
  78. // // begins
  79. // }
  80. // // Subtract
  81. // static int compile(...) {
  82. // NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_COMPILATION, "prepareModel");
  83. // device->getInterface()->prepareModel(..., preparedModelCallback);
  84. // preparedModelCallback->wait()
  85. // }
  86. // ErrorStatus VersionedIDevice::prepareModel(...) {
  87. // ... IPC work ...
  88. // {
  89. // NNTRACE_FULL_SUBTRACT(NNTRACE_LAYER_RUNTIME, NNTRACE_PHASE_COMPILATION,
  90. // "VersionedIDevice::prepareModel");
  91. // ... Runtime work ...
  92. // }
  93. // ... IPC work ...
  94. // }
  95. //
  96. // Layer Application - For native applications (e.g., unit tests)
  97. #define NNTRACE_APP(phase, detail) NNTRACE_FULL(NNTRACE_LAYER_APPLICATION, phase, detail)
  98. #define NNTRACE_APP_SWITCH(phase, detail) \
  99. NNTRACE_FULL_SWITCH(NNTRACE_LAYER_APPLICATION, phase, detail)
  100. // Layer Runtime - For the NNAPI runtime
  101. #define NNTRACE_RT(phase, detail) NNTRACE_FULL(NNTRACE_LAYER_RUNTIME, phase, detail)
  102. #define NNTRACE_RT_SWITCH(phase, detail) NNTRACE_FULL_SWITCH(NNTRACE_LAYER_RUNTIME, phase, detail)
  103. // Layer CPU - CPU executor
  104. #define NNTRACE_CPU(phase, detail) NNTRACE_FULL(NNTRACE_LAYER_CPU, phase, detail)
  105. #define NNTRACE_COMP(detail) NNTRACE_FULL(NNTRACE_LAYER_CPU, \
  106. NNTRACE_PHASE_COMPUTATION, detail)
  107. #define NNTRACE_COMP_SWITCH(detail) NNTRACE_FULL_SWITCH(NNTRACE_LAYER_CPU, \
  108. NNTRACE_PHASE_COMPUTATION, detail)
  109. #define NNTRACE_TRANS(detail) NNTRACE_FULL(NNTRACE_LAYER_CPU, \
  110. NNTRACE_PHASE_TRANSFORMATION, detail)
  111. // Fully specified macros to be used when no convenience wrapper exists for your
  112. // need.
  113. #define NNTRACE_FULL(layer, phase, detail) NNTRACE_NAME_1(("[NN_" layer "_" phase "]" detail))
  114. #define NNTRACE_FULL_SWITCH(layer, phase, detail) \
  115. NNTRACE_NAME_SWITCH(("[SW][NN_" layer "_" phase "]" detail))
  116. #define NNTRACE_FULL_SUBTRACT(layer, phase, detail) \
  117. NNTRACE_NAME_1(("[SUB][NN_" layer "_" phase "]" detail))
  118. // Raw macro without scoping requirements, for special cases
  119. #define NNTRACE_FULL_RAW(layer, phase, detail) android::ScopedTrace PASTE(___tracer, __LINE__) \
  120. (ATRACE_TAG, ("[NN_" layer "_" phase "]" detail))
  121. // Tracing buckets - for calculating timing summaries over.
  122. //
  123. // Application-only phases
  124. #define NNTRACE_PHASE_OVERALL "PO" // Overall program, e.g., one benchmark case
  125. #define NNTRACE_PHASE_WARMUP "PWU" // Warmup (nesting multiple executions)
  126. #define NNTRACE_PHASE_BENCHMARK "PBM" // Benchmark (nesting multiple executions)
  127. // Main phases, usable by all layers
  128. #define NNTRACE_PHASE_INITIALIZATION "PI" // Initialization - not related to a model
  129. #define NNTRACE_PHASE_PREPARATION "PP" // Model construction
  130. #define NNTRACE_PHASE_COMPILATION "PC" // Model compilation
  131. #define NNTRACE_PHASE_EXECUTION "PE" // Executing the model
  132. #define NNTRACE_PHASE_TERMINATION "PT" // Tearing down
  133. #define NNTRACE_PHASE_UNSPECIFIED "PU" // Helper code called from multiple phases
  134. // Subphases of execution
  135. #define NNTRACE_PHASE_INPUTS_AND_OUTPUTS "PIO" // Setting inputs/outputs and allocating buffers
  136. #define NNTRACE_PHASE_TRANSFORMATION "PTR" // Transforming data for computation
  137. #define NNTRACE_PHASE_COMPUTATION "PCO" // Computing operations' outputs
  138. #define NNTRACE_PHASE_RESULTS "PR" // Reading out results
  139. // Layers
  140. #define NNTRACE_LAYER_APPLICATION "LA"
  141. #define NNTRACE_LAYER_RUNTIME "LR"
  142. #define NNTRACE_LAYER_IPC "LI"
  143. #define NNTRACE_LAYER_DRIVER "LD"
  144. #define NNTRACE_LAYER_CPU "LC"
  145. #define NNTRACE_LAYER_OTHER "LO"
  146. #define NNTRACE_LAYER_UTILITY "LU" // Code used from multiple layers
  147. // Implementation
  148. //
  149. // Almost same as ATRACE_NAME, but enforcing explicit distinction between
  150. // phase-per-scope and switching phases.
  151. //
  152. // Basic trace, one per scope allowed to enforce disjointness
  153. #define NNTRACE_NAME_1(name) android::ScopedTrace ___tracer_1(ATRACE_TAG, name)
  154. // Switching trace, more than one per scope allowed, translated by
  155. // systrace_parser.py. This is mainly useful for tracing multiple phases through
  156. // one function / scope.
  157. #define NNTRACE_NAME_SWITCH(name) android::ScopedTrace PASTE(___tracer, __LINE__) \
  158. (ATRACE_TAG, name); \
  159. (void)___tracer_1 // ensure switch is only used after a basic trace
  160. // Disallow use of raw ATRACE macros
  161. #undef ATRACE_NAME
  162. #undef ATRACE_CALL
  163. #endif // ANDROID_ML_NN_COMMON_TRACING_H