-
Notifications
You must be signed in to change notification settings - Fork 210
/
Copy pathplot_lengths_histogram_add.py
108 lines (81 loc) · 3.16 KB
/
plot_lengths_histogram_add.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
"""Plot histogram of SortedList sublist lengths while items are added.
Script output is a video named `sublist-lengths.mp4`. The video displays a line
graph where the x-axis is sublist length and the y-axis is the percentage of
sublists with that length. Sublist lengths are divided into buckets as with a
histogram. A line fitting the histogram as a normal distribution is plotted
alongside in green.
A line graph was used to represent the histogram because it was easier to
animate.
Observations:
* Average sublist length cycles between `load` and `load * 2` with an
exponentially increasing period.
* As the number of elements increases, the sublist length distribution can be
approximated by a normal distribution. The fit is valid within 30% of the
mean.
* At the end of the video, the normal distribution fitting the histogram with
mean=1500 has standard deviation=102.94.
* Observing the mean and standard deviation over time shows that the standard
deviation tends to increase at a given mean over time. In this way, the curve
flattens as more values are added to it.
"""
import argparse
import itertools as it
import numpy as np
import scipy
import scipy.stats
import sortedcontainers as sc
from matplotlib import pyplot as plt
from matplotlib import animation
import random
DISPLAY_FIT = False
def background():
"Plot the background of each animated frame."
hist_line.set_data([], [])
norm_line.set_data([], [])
return hist_line, norm_line
def frame(num):
"Draw frame."
values.update(func() for func in it.repeat(random.random, LOAD))
data = np.array([len(sublist) for sublist in values._lists])
hist, bins = np.histogram(data, bins=BINS, normed=True)
hist_line.set_data(bins[:-1], hist)
norm = scipy.stats.norm
params = norm.fit(data)
norm_line.set_data(LENGTHS, norm.pdf(LENGTHS, *params))
if DISPLAY_FIT:
print('Normal', '%10.3f %10.3f' % params)
return hist_line, norm_line
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--seed', type=int, default=0)
parser.add_argument('--load', type=float, default=1e3)
parser.add_argument('--show', action='store_true')
parser.add_argument('--filename', default='sublist-lengths-add.mp4')
parser.add_argument('--display-fit', action='store_true')
args = parser.parse_args()
random.seed(args.seed)
LOAD = int(args.load)
BINS = np.array(range(LOAD, LOAD * 2 + 1, LOAD // 20))
LENGTHS = np.array(range(LOAD, LOAD * 2))
DISPLAY_FIT = args.display_fit
values = sc.SortedList(load=LOAD)
# Initialize with random values.
for func in it.repeat(random.random, LOAD):
values.add(func())
fig = plt.figure()
bounds = (LOAD - 5 * LOAD // 100, LOAD * 2 - 5 * LOAD // 100)
ax = plt.axes(xlim=bounds, ylim=(0, 0.01))
(hist_line,) = ax.plot([], [])
(norm_line,) = ax.plot([], [])
animator = animation.FuncAnimation(
fig,
frame,
init_func=background,
frames=1000,
interval=10,
blit=True,
)
if args.show:
plt.show()
else:
animator.save(args.filename, fps=20, extra_args=['-vcodec', 'libx264'])