Add 'update-payload-extractor/' from commit '4632cf0a0a6db27cce20c25cb40fc469a2c8e9aa'

https://github.com/gmrt/update_payload_extractor git-subtree-dir: update-payload-extractor git-subtree-mainline: 9a231bd70b git-subtree-split: 4632cf0a0a Change-Id: I9ae25d32a7e9aa6664309e8b916811844d0cac50
2020-03-15 15:06:30 -06:00
parent 9a231bd70b 4632cf0a0a
commit 32a0b66b14
17 changed files with 5471 additions and 0 deletions
--- a/update-payload-extractor/update_payload/histogram.py
+++ b/update-payload-extractor/update_payload/histogram.py
@@ -0,0 +1,129 @@
+#
+# Copyright (C) 2013 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Histogram generation tools."""
+
+from collections import defaultdict
+
+from update_payload import format_utils
+
+
+class Histogram(object):
+  """A histogram generating object.
+
+  This object serves the sole purpose of formatting (key, val) pairs as an
+  ASCII histogram, including bars and percentage markers, and taking care of
+  label alignment, scaling, etc. In addition to the standard __init__
+  interface, two static methods are provided for conveniently converting data
+  in different formats into a histogram. Histogram generation is exported via
+  its __str__ method, and looks as follows:
+
+    Yes |################    | 5 (83.3%)
+    No  |###                 | 1 (16.6%)
+
+  TODO(garnold) we may want to add actual methods for adding data or tweaking
+  the output layout and formatting. For now, though, this is fine.
+
+  """
+
+  def __init__(self, data, scale=20, formatter=None):
+    """Initialize a histogram object.
+
+    Args:
+      data: list of (key, count) pairs constituting the histogram
+      scale: number of characters used to indicate 100%
+      formatter: function used for formatting raw histogram values
+
+    """
+    self.data = data
+    self.scale = scale
+    self.formatter = formatter or str
+    self.max_key_len = max([len(str(key)) for key, count in self.data])
+    self.total = sum([count for key, count in self.data])
+
+  @staticmethod
+  def FromCountDict(count_dict, scale=20, formatter=None, key_names=None):
+    """Takes a dictionary of counts and returns a histogram object.
+
+    This simply converts a mapping from names to counts into a list of (key,
+    count) pairs, optionally translating keys into name strings, then
+    generating and returning a histogram for them. This is a useful convenience
+    call for clients that update a dictionary of counters as they (say) scan a
+    data stream.
+
+    Args:
+      count_dict: dictionary mapping keys to occurrence counts
+      scale: number of characters used to indicate 100%
+      formatter: function used for formatting raw histogram values
+      key_names: dictionary mapping keys to name strings
+    Returns:
+      A histogram object based on the given data.
+
+    """
+    namer = None
+    if key_names:
+      namer = lambda key: key_names[key]
+    else:
+      namer = lambda key: key
+
+    hist = [(namer(key), count) for key, count in count_dict.items()]
+    return Histogram(hist, scale, formatter)
+
+  @staticmethod
+  def FromKeyList(key_list, scale=20, formatter=None, key_names=None):
+    """Takes a list of (possibly recurring) keys and returns a histogram object.
+
+    This converts the list into a dictionary of counters, then uses
+    FromCountDict() to generate the actual histogram. For example:
+
+      ['a', 'a', 'b', 'a', 'b'] --> {'a': 3, 'b': 2} --> ...
+
+    Args:
+      key_list: list of (possibly recurring) keys
+      scale: number of characters used to indicate 100%
+      formatter: function used for formatting raw histogram values
+      key_names: dictionary mapping keys to name strings
+    Returns:
+      A histogram object based on the given data.
+
+    """
+    count_dict = defaultdict(int)  # Unset items default to zero
+    for key in key_list:
+      count_dict[key] += 1
+    return Histogram.FromCountDict(count_dict, scale, formatter, key_names)
+
+  def __str__(self):
+    hist_lines = []
+    hist_bar = '|'
+    for key, count in self.data:
+      if self.total:
+        bar_len = count * self.scale / self.total
+        hist_bar = '|%s|' % ('#' * bar_len).ljust(self.scale)
+
+      line = '%s %s %s' % (
+          str(key).ljust(self.max_key_len),
+          hist_bar,
+          self.formatter(count))
+      percent_str = format_utils.NumToPercent(count, self.total)
+      if percent_str:
+        line += ' (%s)' % percent_str
+      hist_lines.append(line)
+
+    return '\n'.join(hist_lines)
+
+  def GetKeys(self):
+    """Returns the keys of the histogram."""
+    return [key for key, _ in self.data]