CallChainJoiner.h 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. /*
  2. * Copyright (C) 2017 The Android Open Source Project
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef SIMPLE_PERF_CALLCHAIN_JOINER_H_
  17. #define SIMPLE_PERF_CALLCHAIN_JOINER_H_
  18. #include <inttypes.h>
  19. #include <stdio.h>
  20. #include <unistd.h>
  21. #include <unordered_set>
  22. #include <vector>
  23. namespace simpleperf {
  24. namespace call_chain_joiner_impl {
  25. // Each node represents a function in a call chain, having a tuple info (tid, ip, sp).
  26. // A node remembers its parent in the call chain.
  27. struct CacheNode {
  28. uint64_t ip;
  29. uint64_t sp;
  30. uint32_t tid;
  31. // Whether the node is at the bottom of a call chain.
  32. uint32_t is_leaf : 1;
  33. // The index of the parent node in a call chain.
  34. uint32_t parent_index : 31;
  35. // When is_leaf = false, children_count remembers how many nodes have current node as parent.
  36. // When is_leaf = true, leaf_link_prev and leaf_link_next keeps current node in a LRU linked list.
  37. union {
  38. uint32_t children_count;
  39. struct {
  40. uint32_t leaf_link_prev;
  41. uint32_t leaf_link_next;
  42. };
  43. };
  44. };
  45. static_assert(sizeof(CacheNode) == 32, "");
  46. struct LRUCacheStat {
  47. size_t cache_size = 0u;
  48. size_t matched_node_count_to_extend_callchain = 0u;
  49. size_t max_node_count = 0u;
  50. size_t used_node_count = 0u;
  51. size_t recycled_node_count = 0u;
  52. };
  53. // LRUCache caches call chains in memory, and supports extending a call chain if the (tid, ip, sp)
  54. // tuples of its top [matched_node_count_to_extend_callchain] appear in the cache.
  55. class LRUCache {
  56. public:
  57. // cache_size is the bytes of memory we want to use in this cache.
  58. // matched_node_count_to_extend_callchain decides how many nodes we need to match to extend a
  59. // call chain. Higher value means more strict.
  60. LRUCache(size_t cache_size = 8 * 1024 * 1024, size_t matched_node_count_to_extend_callchain = 1);
  61. ~LRUCache();
  62. // Add a call chain in the cache, and extend it if possible.
  63. void AddCallChain(pid_t tid, std::vector<uint64_t>& ips, std::vector<uint64_t>& sps);
  64. const LRUCacheStat& Stat() {
  65. return cache_stat_;
  66. }
  67. CacheNode* FindNode(uint32_t tid, uint64_t ip, uint64_t sp) {
  68. CacheNode key;
  69. key.tid = tid;
  70. key.ip = ip;
  71. key.sp = sp;
  72. auto it = node_set_.find(&key);
  73. return it != node_set_.end() ? *it : nullptr;
  74. }
  75. private:
  76. static bool CacheNodeEqual(const CacheNode* n1, const CacheNode* n2);
  77. static size_t CacheNodeHash(const CacheNode* n);
  78. typedef std::unordered_set<CacheNode*, decltype(&CacheNodeHash), decltype(&CacheNodeEqual)>
  79. set_type;
  80. CacheNode* GetParent(CacheNode* node) {
  81. return node->parent_index == 0u ? nullptr : nodes_ + node->parent_index;
  82. }
  83. int GetNodeIndex(CacheNode* node) {
  84. return node - nodes_;
  85. }
  86. void RemoveNodeFromLRUList(CacheNode* node) {
  87. CacheNode* prev = &nodes_[node->leaf_link_prev];
  88. CacheNode* next = &nodes_[node->leaf_link_next];
  89. prev->leaf_link_next = node->leaf_link_next;
  90. next->leaf_link_prev = node->leaf_link_prev;
  91. }
  92. void AppendNodeToLRUList(CacheNode* node) {
  93. CacheNode* next = &nodes_[0];
  94. CacheNode* prev = &nodes_[next->leaf_link_prev];
  95. node->leaf_link_next = 0;
  96. node->leaf_link_prev = next->leaf_link_prev;
  97. next->leaf_link_prev = prev->leaf_link_next = GetNodeIndex(node);
  98. }
  99. void DecreaseChildCountOfNode(CacheNode* node) {
  100. if (--node->children_count == 0u) {
  101. node->is_leaf = true;
  102. AppendNodeToLRUList(node);
  103. }
  104. }
  105. CacheNode* GetNode(uint32_t tid, uint64_t ip, uint64_t sp);
  106. CacheNode* AllocNode();
  107. void LinkParent(CacheNode* child, CacheNode* new_parent);
  108. void UnlinkParent(CacheNode* child);
  109. CacheNode* nodes_;
  110. set_type node_set_;
  111. LRUCacheStat cache_stat_;
  112. };
  113. } // namespace call_chain_joiner_impl
  114. // CallChainJoiner is used to join callchains of samples in the same thread, in order to get
  115. // complete call graph. For example, if we have two samples for a thread:
  116. // sample 1: (ip A, sp A) -> (ip B, sp B) -> (ip C, sp C) -> ...
  117. // sample 2: (ip B, sp B) -> (ip C, sp C) -> ...
  118. // Because we know (ip A, sp A) calls (ip B, sp B) in sample 1, then we can guess the (ip B, sp B)
  119. // in sample 2 is also called from (ip A, sp A). So we can add (ip A, sp A) in sample 2 as below.
  120. // sample 2: (ip A, sp A) -> (ip B, sp B) -> (ip C, sp C) -> ...
  121. class CallChainJoiner {
  122. public:
  123. // The parameters are used in LRUCache.
  124. CallChainJoiner(size_t cache_size, size_t matched_node_count_to_extend_callchain,
  125. bool keep_original_callchains);
  126. ~CallChainJoiner();
  127. enum ChainType {
  128. ORIGINAL_OFFLINE,
  129. ORIGINAL_REMOTE,
  130. JOINED_OFFLINE,
  131. JOINED_REMOTE,
  132. };
  133. bool AddCallChain(pid_t pid, pid_t tid, ChainType type, const std::vector<uint64_t>& ips,
  134. const std::vector<uint64_t>& sps);
  135. bool JoinCallChains();
  136. bool GetNextCallChain(pid_t& pid, pid_t& tid, ChainType& type, std::vector<uint64_t>& ips,
  137. std::vector<uint64_t>& sps);
  138. struct Stat {
  139. size_t chain_count = 0u;
  140. size_t before_join_node_count = 0u;
  141. size_t after_join_node_count = 0u;
  142. size_t after_join_max_chain_length = 0u;
  143. };
  144. void DumpStat();
  145. const Stat& GetStat() {
  146. return stat_;
  147. }
  148. const call_chain_joiner_impl::LRUCacheStat& GetCacheStat() {
  149. return cache_stat_;
  150. }
  151. private:
  152. bool keep_original_callchains_;
  153. FILE* original_chains_fp_;
  154. FILE* joined_chains_fp_;
  155. size_t next_chain_index_;
  156. call_chain_joiner_impl::LRUCacheStat cache_stat_;
  157. Stat stat_;
  158. };
  159. } // namespace simpleperf
  160. #endif // SIMPLE_PERF_CALLCHAIN_JOINER_H_