histogram.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. #
  2. # Copyright (C) 2013 The Android Open Source Project
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. """Histogram generation tools."""
  17. from collections import defaultdict
  18. from update_payload import format_utils
  19. class Histogram(object):
  20. """A histogram generating object.
  21. This object serves the sole purpose of formatting (key, val) pairs as an
  22. ASCII histogram, including bars and percentage markers, and taking care of
  23. label alignment, scaling, etc. In addition to the standard __init__
  24. interface, two static methods are provided for conveniently converting data
  25. in different formats into a histogram. Histogram generation is exported via
  26. its __str__ method, and looks as follows:
  27. Yes |################ | 5 (83.3%)
  28. No |### | 1 (16.6%)
  29. TODO(garnold) we may want to add actual methods for adding data or tweaking
  30. the output layout and formatting. For now, though, this is fine.
  31. """
  32. def __init__(self, data, scale=20, formatter=None):
  33. """Initialize a histogram object.
  34. Args:
  35. data: list of (key, count) pairs constituting the histogram
  36. scale: number of characters used to indicate 100%
  37. formatter: function used for formatting raw histogram values
  38. """
  39. self.data = data
  40. self.scale = scale
  41. self.formatter = formatter or str
  42. self.max_key_len = max([len(str(key)) for key, count in self.data])
  43. self.total = sum([count for key, count in self.data])
  44. @staticmethod
  45. def FromCountDict(count_dict, scale=20, formatter=None, key_names=None):
  46. """Takes a dictionary of counts and returns a histogram object.
  47. This simply converts a mapping from names to counts into a list of (key,
  48. count) pairs, optionally translating keys into name strings, then
  49. generating and returning a histogram for them. This is a useful convenience
  50. call for clients that update a dictionary of counters as they (say) scan a
  51. data stream.
  52. Args:
  53. count_dict: dictionary mapping keys to occurrence counts
  54. scale: number of characters used to indicate 100%
  55. formatter: function used for formatting raw histogram values
  56. key_names: dictionary mapping keys to name strings
  57. Returns:
  58. A histogram object based on the given data.
  59. """
  60. namer = None
  61. if key_names:
  62. namer = lambda key: key_names[key]
  63. else:
  64. namer = lambda key: key
  65. hist = [(namer(key), count) for key, count in count_dict.items()]
  66. return Histogram(hist, scale, formatter)
  67. @staticmethod
  68. def FromKeyList(key_list, scale=20, formatter=None, key_names=None):
  69. """Takes a list of (possibly recurring) keys and returns a histogram object.
  70. This converts the list into a dictionary of counters, then uses
  71. FromCountDict() to generate the actual histogram. For example:
  72. ['a', 'a', 'b', 'a', 'b'] --> {'a': 3, 'b': 2} --> ...
  73. Args:
  74. key_list: list of (possibly recurring) keys
  75. scale: number of characters used to indicate 100%
  76. formatter: function used for formatting raw histogram values
  77. key_names: dictionary mapping keys to name strings
  78. Returns:
  79. A histogram object based on the given data.
  80. """
  81. count_dict = defaultdict(int) # Unset items default to zero
  82. for key in key_list:
  83. count_dict[key] += 1
  84. return Histogram.FromCountDict(count_dict, scale, formatter, key_names)
  85. def __str__(self):
  86. hist_lines = []
  87. hist_bar = '|'
  88. for key, count in self.data:
  89. if self.total:
  90. bar_len = count * self.scale / self.total
  91. hist_bar = '|%s|' % ('#' * bar_len).ljust(self.scale)
  92. line = '%s %s %s' % (
  93. str(key).ljust(self.max_key_len),
  94. hist_bar,
  95. self.formatter(count))
  96. percent_str = format_utils.NumToPercent(count, self.total)
  97. if percent_str:
  98. line += ' (%s)' % percent_str
  99. hist_lines.append(line)
  100. return '\n'.join(hist_lines)
  101. def GetKeys(self):
  102. """Returns the keys of the histogram."""
  103. return [key for key, _ in self.data]