# #2: Statistics Calculator

Implement a statistics calculator that takes a list of numbers in the file mydata.txt and then calculates and prints their mean, median, mode, variance, and standard deviation using the functions we wrote earlier in this chapter.

In [1]:
'''
finding mean (average) 
'''
mean = lambda l: sum(l) / len(l)

In [2]:
'''
To find the median, we sort the numbers in ascending order. 
If the length of the list of numbers is odd, 
the number in the middle of the list is the median. 
If the length of the list of numbers is even, 
we get the median by taking the mean of the two middle numbers.
'''
def median(numbers):
    """Find the median (average) from list of numbers."""
    numbers.sort()
    length = len(numbers)
    mid = length // 2
    return (numbers[mid] 
            if length % 2 == 1 
            else mean(numbers[mid - 1 : mid + 1]))

In [3]:
'''
finding the most common elements (mode)
'''
from collections import Counter

mode = lambda n: Counter(n).most_common()[0][0]

In [4]:
'''
The variance measures how far each number in the set is from the mean. 
Variance is calculated by taking the differences between each number
in the set and the mean, squaring the differences 
(to make them positive) and dividing the sum of the squares 
by the number of values in the set.
'''
def variance(numbers):
    """Find the variance of a list of numbers."""
    # find the mean
    avg = mean(numbers)
    # find the difference of each number from the mean
    squared_diff = ((n - avg)**2 for n in numbers)
    return sum(squared_diff)/len(numbers)

In [5]:
import unittest

class StatisticsCalculatorTests(unittest.TestCase):
    
    @classmethod
    def setUpClass(cls):
        with open('mydata.txt') as f:            
            cls.data = [float(line) for line in f]

    def test_mean(self):        
        result = mean(self.data)
        print(f'Mean: {result:.5f}')
        self.assertEqual(result, 477.75)
        
    def test_median(self):
        result = median(self.data)
        print(f'Median: {result:.5f}')
        self.assertEqual(result, 500)
        
    def test_mode(self):
        result = mode(self.data)
        print(f'Mode: {result:.5f}')
        self.assertEqual(result, 100)
        
    def test_variance(self):
        var = variance(self.data)
        print(f'Variance {var:.5f}')
        self.assertEqual(round(var, 2), 141047.35)
        
    def test_standard_deviation(self):
        std = variance(self.data)**0.5
        print(f'Standard Deviation {std:.5f}')
        self.assertEqual(round(std, 2), 375.56)

In [6]:
if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False)

.....

Mean: 477.75000
Median: 500.00000
Mode: 100.00000
Standard Deviation 375.56272
Variance 141047.35417



----------------------------------------------------------------------
Ran 5 tests in 0.010s

OK
