In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
# EXAMPLE 1: bucketizing

In [3]:
import csv

# copied from https://automatetheboringstuff.com/chapter14/
def process_csv(filename):
    exampleFile = open(filename, encoding="utf-8")
    exampleReader = csv.reader(exampleFile)
    exampleData = list(exampleReader)
    exampleFile.close()
    return exampleData

In [4]:
rows = process_csv("tornados.csv")
header = rows[0]
rows = rows[1:]
header

['year', 'id', 'location', 'speed']

In [5]:
# KEY is year, VAL is list of rows
def bucketize(group_by_col):
    buckets = {}
    for row in rows:
        key = row[header.index(group_by_col)]
        if not key in buckets:
            buckets[key] = []
        buckets[key].append(row)
    return buckets

def avg_per_bucket(buckets, avg_col):
    stats = {}
    for key in buckets:
        #print("KEY", key)
        bucket = buckets[key] # list of lists for a single year
        total = 0
        for row in bucket:
            #print("ROW", row)
            total += float(row[header.index(avg_col)])
        avg = total / len(bucket)
        #print("AVG", avg)
        stats[key] = avg
    return stats

avg_per_bucket(bucketize("location"), "speed")

{'site B': 213.86666666666667,
 'site C': 206.7058823529412,
 'site A': 217.69230769230768}

In [6]:
avg_per_bucket(bucketize("year"), "speed")

{'2006': 175.0,
 '1996': 242.8,
 '2016': 225.25,
 '2014': 208.75,
 '2015': 138.5,
 '2005': 179.0,
 '2002': 221.2,
 '1995': 211.0,
 '1997': 247.0,
 '2001': 234.66666666666666,
 '2011': 236.5,
 '2010': 201.5,
 '2017': 193.0,
 '2008': 261.3333333333333,
 '2003': 197.5,
 '2004': 212.0,
 '2013': 239.5,
 '2009': 170.33333333333334,
 '1998': 240.0,
 '2007': 212.0}

In [7]:
avg_per_bucket(bucketize("location"), "year")

{'site B': 2006.4, 'site C': 2005.5294117647059, 'site A': 2006.1923076923076}

In [8]:
# EXAMPLE 2: table as a list of dicts

In [23]:
def load_csv_dicts(path):
    rows = process_csv(path)
    header = rows[0]
    rows = rows[1:] # list of lists
    dict_rows = [] # list of dicts
    for row in rows:
        d = {}
        for i in range(len(header)):
            d[header[i]] = row[i]
        dict_rows.append(d)
    return dict_rows
    
new_data = load_csv_dicts("tornados.csv")

In [24]:
new_data[-1]["speed"]

'230'

In [25]:
new_data[0]["location"]

'site B'

In [26]:
new_data

[{'year': '2006', 'id': 'QPIQPWDP', 'location': 'site B', 'speed': '175'},
 {'year': '1996', 'id': 'MMMHKDDK', 'location': 'site B', 'speed': '290'},
 {'year': '2016', 'id': 'QSCAPJBU', 'location': 'site B', 'speed': '290'},
 {'year': '2014', 'id': 'KKGOICYZ', 'location': 'site C', 'speed': '122'},
 {'year': '2015', 'id': 'ZDMHZTXL', 'location': 'site A', 'speed': '147'},
 {'year': '2005', 'id': 'FEBIJZIF', 'location': 'site A', 'speed': '198'},
 {'year': '2002', 'id': 'EYIVKEWL', 'location': 'site B', 'speed': '199'},
 {'year': '1995', 'id': 'JDUTRHFQ', 'location': 'site A', 'speed': '281'},
 {'year': '1997', 'id': 'NSCJTEAU', 'location': 'site B', 'speed': '222'},
 {'year': '2005', 'id': 'AWLDIUCW', 'location': 'site A', 'speed': '173'},
 {'year': '1996', 'id': 'JRHLYGLS', 'location': 'site C', 'speed': '238'},
 {'year': '2001', 'id': 'QRYMLENE', 'location': 'site C', 'speed': '174'},
 {'year': '1995', 'id': 'RCAOONFD', 'location': 'site A', 'speed': '198'},
 {'year': '2002', 'id': '