From 84e8ab539592605cc236ae8e6e81d91857d84294 Mon Sep 17 00:00:00 2001 From: Zhiming Wang Date: Tue, 21 Apr 2015 02:03:20 -0700 Subject: [PATCH] improve util.py and test_util.py Also updated doc to numpy style. --- src/storyboard/metadata.py | 2 +- src/storyboard/storyboard.py | 2 +- src/storyboard/util.py | 332 +++++++++++++++++++++++++++-------- tests/test_util.py | 18 ++ 4 files changed, 283 insertions(+), 71 deletions(-) diff --git a/src/storyboard/metadata.py b/src/storyboard/metadata.py index 5299d67..0ec0f2d 100755 --- a/src/storyboard/metadata.py +++ b/src/storyboard/metadata.py @@ -268,7 +268,7 @@ class Video(object): def __init__(self, video, params=None): """Initialize the Video class. - See module docstring for parameters of the constructor. + See class docstring for parameters of the constructor. """ diff --git a/src/storyboard/storyboard.py b/src/storyboard/storyboard.py index d5ba4a7..183760d 100755 --- a/src/storyboard/storyboard.py +++ b/src/storyboard/storyboard.py @@ -99,7 +99,7 @@ class Font(object): def __init__(self, font_file=None, font_size=None): """Initialize the Font class. - See module docstring for parameters of the constructor. + See class docstring for parameters of the constructor. """ diff --git a/src/storyboard/util.py b/src/storyboard/util.py index 783999a..68b4753 100755 --- a/src/storyboard/util.py +++ b/src/storyboard/util.py @@ -61,14 +61,36 @@ def read_param(params, key, default): def round_up(number, ndigits=0): - """Round a nonnegative number UPWARD to a given precision in decimal digits. + """Round a floating point number *upward* to a given precision. - Keyword arguments: - number -- nonnegative floating point number - ndigits -- number of decimal digits to round to, default is 0 + Unlike the builtin `round`, the return value `round_up` is always + the smallest float *greater than or equal to* the given number + matching the specified precision. + + Parameters + ---------- + number : float + Number to be rounded up. + ndigits : int, optional + Number of decimal digits in the result. Default is 0. + + Returns + ------- + float + + Examples + -------- + >>> round_up(math.pi) + 4.0 + >>> round_up(math.pi, ndigits=1) + 3.2 + >>> round_up(math.pi, ndigits=2) + 3.15 + >>> round_up(-math.pi, ndigits=4) + -3.1415 - Returns: float """ + multiplier = 10 ** ndigits return math.ceil(number * multiplier) / multiplier @@ -81,14 +103,32 @@ def round_up(number, ndigits=0): def evaluate_ratio(ratio_str): """Evaluate ratio in the form num:den or num/den. - Note that numerator and denominator should both be positive integers. + Note that numerator and denominator should both be positive + integers. - Keyword arguments: - ratio_str: the ratio as a string (either 'num:den' or 'num/den' where num - and den are positive integers + Parameters + ---------- + ratio_str : str + The ratio as a string (either ``'num:den'`` or ``'num/den'`` + where ``num`` and ``den``, the numerator and denominator, are + positive integers. + + Returns + ------- + ratio : float + The ratio as a float, or ``None`` if `ratio_str` is malformed. + + Examples + -------- + >>> evaluate_ratio('16:9') + 1.7777777777777777 + >>> evaluate_ratio('16/9') + 1.7777777777777777 + >>> print(evaluate_ratio('0/9')) + None - Returns: the ratio as a float (or None if malformed) """ + match = _NUM_COLON_DEN.match(ratio_str) if match: numerator = int(match.group(1)) @@ -122,16 +162,56 @@ def humansize(size): def humantime(seconds, ndigits=2, one_hour_digit=False): - """Return a human readable string of the given duration in seconds. + """Format a duration as a human readable string. + + The duration in seconds (a nonnegative float) is formatted as + ``HH:MM:SS.frac``, where the number of fractional digits is + controlled by `ndigits`; if `ndigits` is 0, the decimal point is not + printed. The number of hour digits (``HH``) can be reduced to one + with the `one_hour_digits` option. - Raises ValueError if seconds is negative. + Parameters + ---------- + seconds : float + Duration in seconds, must be nonnegative. + ndigits : int, optional + Number of digits after the decimal point for the seconds part. + Default is 2. If 0, the decimal point is suppressed. + one_hour_digit : bool, optional + If ``True``, only print one hour digit (e.g., nine hours is + printed as 9:00:00.00). Default is ``False``, i.e., two hour + digits (nine hours is printed as 09:00:00.00). + + Returns + ------- + human_readable_duration : str + + Raises + ------ + ValueError: + If `seconds` is negative. + + Examples + -------- + >>> humantime(10.55) + '00:00:10.55' + >>> humantime(10.55, ndigits=1) + '00:00:10.6' + >>> humantime(10.55, ndigits=0) + '00:00:11' + >>> humantime(10.55, one_hour_digit=True) + '0:00:10.55' + >>> # two hours digits for >= 10 hours, even if one_hour_digit is + >>> # True + >>> humantime(86400, one_hour_digit=True) + '24:00:00.00' + >>> humantime(-1) + Traceback (most recent call last): + ... + ValueError: seconds=-1.000000 is negative, expected nonnegative value - Keyword arguments: - ndigits - number of digits after the decimal point for the seconds part, - default is 2 - one_hour_digit - if True, only print one hour digit; default is two hour - digits """ + # pylint: disable=invalid-name if seconds < 0: raise ValueError("seconds=%f is negative, " @@ -140,12 +220,14 @@ def humantime(seconds, ndigits=2, one_hour_digit=False): hh = int(seconds) // 3600 # hours mm = (int(seconds) // 60) % 60 # minutes ss = seconds - (int(seconds) // 60) * 60 # seconds - hh_format = "%01d" if one_hour_digit else "%02d" - mm_format = "%02d" - ss_format = "%02d" if ndigits == 0 else \ - "%0{0}.{1}f".format(ndigits + 3, ndigits) - format_string = "{0}:{1}:{2}".format(hh_format, mm_format, ss_format) - return format_string % (hh, mm, ss) + hh_str = "%01d" % hh if one_hour_digit else "%02d" % hh + mm_str = "%02d" % mm + if ndigits == 0: + ss_str = "%02d" % round(ss) + else: + ss_format = "%0{0}.{1}f".format(ndigits + 3, ndigits) + ss_str = ss_format % ss + return "%s:%s:%s" % (hh_str, mm_str, ss_str) # default progress bar update interval @@ -166,42 +248,78 @@ class ProgressBar(object): """Progress bar for file processing. To generate a progress bar, init a ProgressBar instance, then update - frequently with the update method, passing in the size of newly processed - chunk. The force_update process should only be called if you want to - overwrite the processed size which is automatically calculated - incrementally. After you finish processing the file/stream, call the finish - method to wrap it up. Any further calls after the finish method has been - called lead to undefined behavior (probably exceptions). - - Format inspired by pv(1) (pipe viewer). - - Initializer arguments: - totalsize: total size in bytes of the file/stream to be processed - interval: update interval in seconds of the progress bar, default is 1.0 - - Public instance attributes: - These attributes can be queried for informational purposes, but not meant - for manual manipulation. - - During processing: - totalsize - total size of file/stream - processed - size of processed part - start - starting time (absolute time returned by time.time()) - interval - update interval - - After processing (after finish is called): - totalsize - start - elapsed - total elapsed time, in seconds + frequently with the `update` method, passing in the size of newly + processed chunk. The `force_update` method should only be called if + you want to overwrite the processed size, which is automatically + calculated incrementally. After you finish processing the + file/stream, you must call the `finish` method to wrap it up. Any + further calls after the `finish` method has been called lead to + a ``RuntimeError``. + + Each ProgressBar instance defines several public attributes listed + below. Some are available during processing, and some after + processing. These attributes are meant for informational purposes, + and you should not manually tamper with them (which mostly likely + leads to undefined behavior). + + The progress bar format is inspired by ``pv(1)`` (pipe viewer). + + Paramters + --------- + totalsize : int + Total size, in bytes, of the file/stream to be processed. + interval : float, optional + Update (refresh) interval of the progress bar, in + seconds. Default is 1.0. + + Attributes + ---------- + totalsize : int + Total size of file/stream, in bytes. Available throughout. + processed : int + Process size. Available only during processing (deleted after + the `finish` call). + start : float + Starting time (an absolute time returned by + ``time.time()``). Available throughout. + interval : float + Update (refresh) interval of the progress bar, in + seconds. Available only during processing (deleted after the + `finish` call). + elapsed : float + Total elapsed time, in seconds. Only available after the + `finish` call. + + Notes + ----- + For developers: ProgressBar also defines three private attributes, + `_last`, `_last_processed` and `_barlen`, during processing (deleted + after the `finish` call). `_last` stores the absolute time of last + update (refresh), `_last_processed` stores the processed size at the + time of the last update (refresh), and `_barlen` stores the length + of the progress bar (only the bar portion). + + There is another private attribute `__finished` (bool) keeping track + of whether `finish` has been called. (Protected with double leading + underscores since no one should ever tamper with this.) + """ def __init__(self, totalsize, interval=_PROGRESS_UPDATE_INTERVAL): + """Initialize the ProgressBar class. + + See class docstring for parameters of the constructor. + + """ + self.totalsize = totalsize self.processed = 0 self.start = time.time() self.interval = interval self._last = self.start self._last_processed = 0 + self.__finished = False + # calculate bar length try: ncol, _ = os.get_terminal_size() @@ -215,11 +333,29 @@ def __init__(self, totalsize, interval=_PROGRESS_UPDATE_INTERVAL): self._barlen = (ncol - 48) if ncol >= 58 else 10 def update(self, chunk_size): - """Update the progress bar for the newly processed chunk. + """Update the progress bar for a newly processed chunk. + + The size of the processed chunk is registered. Whether the + progress bar is refreshed depends on whether we have reached the + refresh interval since the last refresh (handled automatically). + + Parameters + ---------- + chunk_size : int + The size of the newly processed chunk (since last update), + in bytes. This size will be added to the `processed` + attribute. + + Raises + ------ + RuntimeError: + If `finish` has been called on the ProgressBar instance. - Keyword arguments: - chunk_size: the size of the new chunk since the last update """ + + if self.__finished: + raise RuntimeError('operation on finished progress bar') + self.processed += chunk_size if self.processed > self.totalsize: self.processed = self.totalsize @@ -228,19 +364,55 @@ def update(self, chunk_size): def force_update(self, processed_size): """Force update the progress bar with a given processed size. - Keyword arguments: - processed_size: size of the processed part of the file, overwrites - existing value + The `processed` attribute is overwritten by the new value. + + Parameters + ---------- + processed_size : + Processed size of the file/stream, in bytes. Existing value + is overwritten by this value. + + Raises + ------ + RuntimeError: + If `finish` has been called on the ProgressBar instance. + """ + + if self.__finished: + raise RuntimeError('operation on finished progress bar') + self.processed = processed_size if self.processed > self.totalsize: self.processed = self.totalsize self._update_output() def finish(self): - """Finish file progressing and wrap up on the progress bar.""" + """Finish file progressing and wrap up on the progress bar. + + Always call this method exactly once after you finish + processing. This method adds the finishing touches to the + progress bar, deletes several attributes (`processed`, + `interval`), and adds a new attribute (`elapsed`). + + After `finish` is called on a ProgressBar attribute, it enters a + read-only mode: you may read the `totalsize`, `start`, and + `elapsed` attributes, but any method call leads to a + ``RuntimeError``. + + Raises + ------ + RuntimeError: + If `finish` has already been called on the ProgressBar + instance before. + + """ + # pylint: disable=attribute-defined-outside-init - # new attribute elapsed created on the fly after processing finishes + + if self.__finished: + raise RuntimeError('operation on finished progress bar') + self.elapsed = time.time() - self.start if self.elapsed < 0.001: self.elapsed = 0.001 # avoid division by zero @@ -249,8 +421,10 @@ def finish(self): del self._last del self._last_processed + self.__finished = True + processed_s = humansize(self.totalsize) - elapsed_s = self.humantime(self.elapsed) + elapsed_s = self._humantime(self.elapsed) speed_s = humansize(self.totalsize / self.elapsed) bar_s = '=' * (self._barlen - 1) + '>' percent_s = '100' @@ -262,20 +436,40 @@ def finish(self): sys.stderr.flush() def _update_output(self): - """Update the progress bar and surrounding data as appropriate.""" - if time.time() - self._last < self.interval: + """Update the progress bar and surrounding data as appropriate. + + Whether the progress bar is refreshed depends on whether we have + reached the refresh interval since the last refresh (handled + automatically). + + Raises + ------ + RuntimeError: + If `finish` has already been called on the ProgressBar + instance before. + + """ + + if self.__finished: + raise RuntimeError('operation on finished progress bar') + + elapsed_since_last = time.time() - self._last + if elapsed_since_last < self.interval: return - # speed in the last second - speed = (self.processed - self._last_processed) / \ - (time.time() - self._last) # bytes per second + if elapsed_since_last < 0.001: + elapsed_since_last = 0.001 # avoid division by zero + + # speed in the last second, in bytes per second + speed = ((self.processed - self._last_processed) / elapsed_since_last) + # update last stats for the next update self._last = time.time() self._last_processed = self.processed # _s suffix stands for string processed_s = humansize(self.processed) - elapsed_s = self.humantime(time.time() - self.start) + elapsed_s = self._humantime(time.time() - self.start) speed_s = humansize(speed) percentage = self.processed / self.totalsize # absolute percent_s = str(int(percentage * 100)) @@ -290,14 +484,14 @@ def _update_output(self): remaining = self.totalsize - self.processed # estimate based on current speed eta = remaining / speed - eta_s = "ETA %s" % self.humantime(eta) + eta_s = "ETA %s" % self._humantime(eta) sys.stderr.write(_FORMAT_STRING.format( processed_s, elapsed_s, speed_s, bar_s, percent_s, eta_s )) sys.stderr.flush() - def humantime(self, seconds): - # pylint: disable=no-self-use - """Customized humantime.""" + @staticmethod + def _humantime(seconds): + """Customized humantime for ProgressBar.""" return humantime(seconds, ndigits=0, one_hour_digit=True) diff --git a/tests/test_util.py b/tests/test_util.py index cc6aaa2..2b8dc46 100755 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -55,12 +55,30 @@ def test_progress_bar(self): totalsize = chunksize * nchunks sha1 = hashlib.sha1() pbar = ProgressBar(totalsize, interval=0.001) + pbar.force_update(totalsize) + self.assertEqual(pbar.processed, totalsize) + pbar.force_update(0) + self.assertEqual(pbar.processed, 0) + processed = 0 for _ in range(0, nchunks): sha1.update(chunk) + processed += chunksize pbar.update(chunksize) + self.assertEqual(pbar.processed, processed) pbar.finish() self.assertEqual(sha1.hexdigest(), '3b71f43ff30f4b15b5cd85dd9e95ebc7e84eb5a3') + self.assertEqual(pbar.totalsize, totalsize) + with self.assertRaises(AttributeError): + pbar.processed + with self.assertRaises(AttributeError): + pbar.interval + with self.assertRaises(RuntimeError): + pbar.update(chunksize) + with self.assertRaises(RuntimeError): + pbar.force_update(0) + with self.assertRaises(RuntimeError): + pbar.finish() if __name__ == '__main__':