In [16]:
# Dependencies
from math import sqrt
import statistics
from scipy.stats.mstats import zscore

In [1]:
def mean(arr):
    """
    Calculates the mean of a list of numbers.

    Usage: mean([3, 4, 5, 6, 7]) # 5
    """
    return sum(arr) / len(arr)

In [6]:
tmp = [1,2,3,4]
print(mean(tmp))
print(statistics.mean(tmp))

2.5
2.5


In [9]:
def variance(arr):
    mu = mean(arr)
    return sum([(x - mu)**2 for x in arr]) / len(arr)

In [11]:
tmp = [1,2,3,4]
print(variance(tmp))
print(statistics.pvariance(tmp))

1.25
1.25


In [12]:
def standard_deviation(arr):
    return sqrt(variance(arr))

In [13]:
tmp = [1,2,3,4]
print(standard_deviation(tmp))
print(statistics.pstdev(tmp))

1.118033988749895
1.118033988749895


In [14]:
def z_score(arr, index):
    return (arr[index] - mean(arr)) / standard_deviation(arr)

In [17]:
tmp = [1,2,3,4]
print(z_score(tmp,0))
print(zscore(tmp)[0])

-1.3416407864998738
-1.3416407865


In [18]:
def z_scores(arr):
    return [z_score(arr, ind) for ind in range(0, len(arr))]

In [20]:
tmp = [1,2,3,4]
print(z_scores(tmp))
print(zscore(tmp))

[-1.3416407864998738, -0.4472135954999579, 0.4472135954999579, 1.3416407864998738]
[-1.34164079 -0.4472136   0.4472136   1.34164079]


In [22]:
def zipped_z_scores(arr):
    return list(zip(arr, z_scores(arr)))

In [24]:
tmp = [1,2,3,4]
print(zipped_z_scores(tmp))

[(1, -1.3416407864998738), (2, -0.4472135954999579), (3, 0.4472135954999579), (4, 1.3416407864998738)]


In [25]:
def summarize(title, arr):
    print("Summarizing {}".format(title))
    print("Variance: {}".format(variance(arr)))
    print("Standard Deviation: {}".format(standard_deviation(arr)))
    print("Z-Scores: {}".format(zipped_z_scores(arr)))
    print("======")

In [26]:
# Prices of random electronics at Best Buy
prices = [4, 425, 984, 2932, 49]
summarize("Prices", prices)

# Ages of students in bootcamp
bootcamp_classroom_ages = [27, 35, 42, 52, 36, 28]
summarize("Bootcamp Ages", bootcamp_classroom_ages)

# Ages of children and parents at child's party
birthday_party_ages = [6, 5, 6, 6, 35, 34, 42]
summarize("Birthday Party Ages", birthday_party_ages)

# Test score from a 2nd grade geography test
geo_grades = [87, 89, 91, 93, 95]
summarize("Geograph Grades", geo_grades)

# Test scores from a graduate quantum mechanics midterm
quantum_grades = [63, 63, 98, 13, 58, 13, 8]
summarize("Quantum Mechanics Grades", quantum_grades)

# Prices
summarize("Prices", [30, 31, 31, 32, 32, 40, 41, 41, 1000])


Summarizing Prices
Variance: 1177294.9599999997
Standard Deviation: 1085.032239152367
Z-Scores: [(4, -0.8062433247913429), (425, -0.41823642065650596), (984, 0.09695564445364577), (2932, 1.8922940037283782), (49, -0.7647699027341751)]
Summarizing Bootcamp Ages
Variance: 72.55555555555556
Standard Deviation: 8.517954892787092
Z-Scores: [(27, -1.1348576962825065), (35, -0.19566512004870779), (42, 0.6261283841558661), (52, 1.8001191044481146), (36, -0.07826604801948295), (28, -1.0174586242532817)]
Summarizing Birthday Party Ages
Variance: 244.6938775510204
Standard Deviation: 15.64269406307687
Z-Scores: [(6, -0.8401914075580905), (5, -0.9041190146549017), (6, -0.8401914075580905), (6, -0.8401914075580905), (35, 1.0137091982494355), (34, 0.9497815911526242), (42, 1.4612024479271142)]
Summarizing Geograph Grades
Variance: 8.0
Standard Deviation: 2.8284271247461903
Z-Scores: [(87, -1.414213562373095), (89, -0.7071067811865475), (91, 0.0), (93, 0.7071067811865475), (95, 1.414213562373095)]
Su