## Descriptive and Inferential Statistics

In [1]:
import math
import numpy as np
import scipy
import sympy
import sklearn
import matplotlib.pyplot as plt

In [2]:
# mean
sample = [1,3,2,5,7,0,2,3]
mean = sum(sample) / len(sample)
print(mean)

2.875


In [3]:
# weighted mean
sample = [90, 80, 63, 87]
weights = [.20, .20, .20, .40]
weighted_mean = sum(s*w for s,w in zip(sample, weights)) / sum(weights)
print(weighted_mean)

81.4


In [4]:
# median
sample = [0,1,5,7,9,10,14]
def median(values):
    ordered = sorted(values)
    n = len(ordered)
    mid = n//2 if n%2 else n//2-1
    if n%2:
        return ordered[mid]
    else:
        return (ordered[mid] + ordered[mid+1]) / 2.0

print(median(sample))

7


In [5]:
# mode (unimodal and multimodal)
sample = [1,3,2,5,7,0,2,3]
def mode(values):
    counts = dict()
    for s in values:
        counts[s] = counts.get(s, 0)+1
    max_count = max(counts.values())
    modes = [v for v in set(values) if counts[v] == max_count]
    return modes

print(mode(sample))

[2, 3]


In [6]:
# variance
data = [0,1,5,7,9,10,14]
def variance(values):
    mean = sum(values) / len(values)
    _variance = sum((v-mean)**2 for v in values) / len(values)
    return _variance

print(variance(data))

21.387755102040813


In [7]:
# standard deviation
data = [0,1,5,7,9,10,14]
def std_dev(values):
    return math.sqrt(variance(values))

print(std_dev(data))

4.624689730353898


In [8]:
# variance and standard deviation for a sample
def variance(values, is_sample: bool = False):
    mean = sum(values) / len(values)
    _variance = (
        sum((v-mean)**2 for v in values) /
        (len(values) - (1 if is_sample else 0))
    )

    return _variance

def std_dev(values, is_sample: bool = False):
    return math.sqrt(variance(values, is_sample))

print(f"VARIANCE = {variance(data, is_sample=True)}")
print(f"STD DEV = {std_dev(data, is_sample=True)}")

VARIANCE = 24.95238095238095
STD DEV = 4.99523582550223
