In [None]:
class Stats1:

    def __init__(self):
        self.clear()

    def clear(self):
        self.values = []

    def add_item(self, value):
        self.values.append(value)

    def get_average(self):
        n = len(self.values)
        if n < 1: return None

        sum = 0.0
        for value in self.values:
            sum += value
        return sum / float(n)

In [None]:
s1 = Stats1()
s1.add_item(10.0)
s1.add_item(5.0)
s1.get_average()

In [None]:
class Stats2:
    def __init__(self):
        self.clear()

    def clear(self):
        self.n = 0
        self.average = None

    def add_item(self, value):
        self.n += 1
        if self.n == 1:
            self.average = value
        else:
            self.average += (value - self.average) / float(self.n)

    def get_average(self):
        return self.average

In [None]:
s2 = Stats2()
s2.add_item(10.0)
s2.add_item(5.0)
s2.get_average()

In [None]:
import sys
import time
import numpy as np
import pandas as pd
from numbers import Number
from collections import Set, Mapping, deque
import matplotlib.pyplot as plt

In [None]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;

In [None]:
SIZE_S = 16
SIZE_M = 18
SIZE_L = 22

plt.rc('font', size=SIZE_S)         # controls default text sizes
plt.rc('axes', titlesize=SIZE_S)    # fontsize of the axes title
plt.rc('axes', labelsize=SIZE_M)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SIZE_S)   # fontsize of the tick labels
plt.rc('ytick', labelsize=SIZE_S)   # fontsize of the tick labels
plt.rc('legend', fontsize=SIZE_S)   # legend fontsize
plt.rc('figure', titlesize=SIZE_L)  # fontsize of the figure title

In [None]:
def get_size(obj_0):
    zero_depth_bases = (str, bytes, Number, range, bytearray)
    iteritems = 'items'
    def inner(obj, _seen_ids = set()):
        obj_id = id(obj)
        if obj_id in _seen_ids:
            return 0
        _seen_ids.add(obj_id)
        size = sys.getsizeof(obj)
        if isinstance(obj, zero_depth_bases):
            pass
        elif isinstance(obj, (tuple, list, Set, deque)):
            size += sum(inner(i) for i in obj)
        elif isinstance(obj, Mapping) or hasattr(obj, iteritems):
            size += sum(inner(k) + inner(v) for k, v in getattr(obj, iteritems)())
        if hasattr(obj, '__dict__'):
            size += inner(vars(obj))
        if hasattr(obj, '__slots__'):
            size += sum(inner(getattr(obj, s)) for s in obj.__slots__ if hasattr(obj, s))
        return size
    return inner(obj_0)

In [None]:
def use_case_1_1(stats, series):
    # as many get_item as get_average (ratio of 1:1)
    for v in series:
        stats.add_item(v)
        avg = stats.get_average()
    return avg

In [None]:
series1 = np.random.random(50)
series1

In [None]:
s1 = Stats1()
print(get_size(s1), 'B') 
t0 = time.time()
avg = use_case_1_1(s1, series1)
print(avg)
elapsed_time = time.time() - t0
size = get_size(s1)
print('Stats1', elapsed_time * 1000.0, 'ms', size, 'B')
s1.clear()
print(get_size(s1), 'B')

In [None]:
s2 = Stats2()
print(get_size(s2), 'B') 
t0 = time.time()
avg = use_case_1_1(s2, series1)
print(avg)
elapsed_time = time.time() - t0
size = get_size(s2)
print('Stats1', elapsed_time * 1000.0, 'ms', size, 'B')
s1.clear()
print(get_size(s2), 'B')

In [None]:
def use_case_n_1(stats, series):
    # add_item predominates (ratio of len(series):1)
    for v in series:
        stats.add_item(v)
    avg = stats.get_average()
    return avg

In [None]:
def use_case_1_m(stats, series, m=5):
    # get_average predominates (ratio of m:1)
    for v in series:
        stats.add_item(v)
    for i in range(len(series) * m):
        avg = stats.get_average()
    return avg

In [None]:
class Stats3:
    def __init__(self):
        self.clear()

    def clear(self):
        self.n = 0
        self.values = []
        self.average = 0.0
        self.average_is_up_to_date = True

    def add_item(self, value):
        self.values.append(value)
        self.average_is_up_to_date = False

    def get_average(self):
        if not self.average_is_up_to_date:
            inc_n = len(self.values)
            self.n += inc_n
            for i in range(inc_n):
                self.average += (self.values[i] - self.average) / float(self.n - inc_n + i + 1)
            self.average_is_up_to_date = True
            self.values = []
        return self.average


In [None]:
s3 = Stats3()
s3.add_item(10.0)
s3.add_item(5.0)
s3.get_average()

In [None]:
class Stats4:
    def __init__(self):
        self.clear()

    def clear(self):
        self.values = []
        self.average = None
        self.average_is_up_to_date = True

    def add_item(self, value):
        self.values.append(value)
        self.average_is_up_to_date = False

    def get_average(self):
        if not self.average_is_up_to_date:
            n = len(self.values)
            sum = 0.0
            for value in self.values:
                sum += value
            self.average = sum / float(n)
            self.average_is_up_to_date = True
        return self.average

In [None]:
s4 = Stats4()
s4.add_item(10.0)
s4.add_item(5.0)
s4.get_average()

In [None]:
def sim(n_runs=10):

    stats = {
        's1': Stats1(),
        's2': Stats2(),
        's3': Stats3(),
        's4': Stats4()
    }

    use_cases = ['n_1', '1_1', '1_2', '1_20']
    times = {}
    sizes = {}
    
    results_df = pd.DataFrame(columns=['Run', 'Use Case', 'Stats', 'Time (ms)', 'Size (B)', 'Avg'], index=range(len(stats)*len(use_cases*n_runs)))
    i_row = 0

    for i_run in range(n_runs):
        series1 = np.random.random(500)
        
        for use_case in use_cases:            
            for s_name in sorted(stats.keys()):
                s_obj = stats[s_name]
                if use_case != '1_1' and use_case.startswith('1_'):
                    func_name = 'use_case_1_m'
                    m = int(use_case.split('_')[1])
                    t0 = time.time()
                    avg = globals()[func_name](s_obj, series1, m=m)
                else:
                    func_name = 'use_case_' + use_case
                    t0 = time.time()
                    avg = globals()[func_name](s_obj, series1)
                elapsed_time = time.time() - t0
                size = get_size(s_obj)
                s_obj.clear()
                row = (i_run, use_case, s_name, elapsed_time * 1000.0, size, avg)
                results_df.iloc[i_row] = row
                i_row += 1
                
    results_df = results_df.apply(pd.to_numeric, errors='ignore')
    return results_df

In [None]:
r = sim()
r.style.format({'Time (ms)': '{:,.2f}'.format, 'Size (B)': '{:,.0f}'.format})

In [None]:
times = r.groupby(['Use Case', 'Stats'])['Time (ms)'].mean()
sizes = r.groupby(['Use Case', 'Stats'])['Size (B)'].mean()
df = pd.DataFrame(columns=['Time (ms)', 'Size (B)'])
df['Time (ms)'] = times
df['Size (B)'] = sizes
df = df.reset_index()
df.style.format({'Time (ms)': '{:,.2f}'.format, 'Size (B)': '{:,.0f}'.format})

In [None]:
colors = ['#af78aa', '#e55965', '#ecc96c', '#e6966f']
for metric in ['Time (ms)', 'Size (B)']:
    for use_case, group in df.groupby('Use Case'):
        title = metric.split()[0] + ' — ' + 'Use Case ' + use_case
        group.plot.barh(x='Stats', y=metric, title=title, color=colors, legend=False, figsize=(8,4))
        plt.xscale('log')
        plt.xlabel(metric)