### Goal 1:

Create a lazy iterator that will return a named tuple of the data in each row. The data types should be appropriate - i.e. if the column is a date, you should be storing dates in the named tuple, if the field is an integer, then it should be stored as an integer, etc.

In [1]:
from collections import namedtuple
import datetime

In [2]:
def parse_int(data):
    try:
        return int(data)
    except ValueError:
        return data
    
def parse_date(data):
    try:
        return datetime.datetime.strptime('3/28/2018', '%m/%d/%Y').date()
    except ValueError:
        return data
    
def parse_string(data):
    data.strip()
    if data != '':
        return data
    else:
        pass

In [3]:
def read_all_data(file):
    with open(file) as f:
        header = namedtuple('Ticket', [ele.lower().replace(' ', '_') for ele in f.readline().strip('\n').split(',')])
        parser = (parse_int,
                  parse_string,
                  parse_string,
                  parse_string,
                  parse_date,
                  parse_int,
                  parse_string,
                  parse_string,
                  parse_string
                  )
        for line in f:
            fields = line.strip('\n').split(',')            
            yield header(*[func(field) for func, field in zip(parser, fields)])

In [4]:
file = './nyc_parking_tickets_extract.csv'

In [5]:
read_gen = read_all_data(file)

In [6]:
for i in range(5):
    print((next(read_gen)))

Ticket(summons_number=4006478550, plate_id='VAD7274', registration_state='VA', plate_type='PAS', issue_date=datetime.date(2018, 3, 28), violation_code=5, vehicle_body_type='4D', vehicle_make='BMW', violation_description='BUS LANE VIOLATION')
Ticket(summons_number=4006462396, plate_id='22834JK', registration_state='NY', plate_type='COM', issue_date=datetime.date(2018, 3, 28), violation_code=5, vehicle_body_type='VAN', vehicle_make='CHEVR', violation_description='BUS LANE VIOLATION')
Ticket(summons_number=4007117810, plate_id='21791MG', registration_state='NY', plate_type='COM', issue_date=datetime.date(2018, 3, 28), violation_code=5, vehicle_body_type='VAN', vehicle_make='DODGE', violation_description='BUS LANE VIOLATION')
Ticket(summons_number=4006265037, plate_id='FZX9232', registration_state='NY', plate_type='PAS', issue_date=datetime.date(2018, 3, 28), violation_code=5, vehicle_body_type='SUBN', vehicle_make='FORD', violation_description='BUS LANE VIOLATION')
Ticket(summons_number=4

---

### Goal 2:

Calculate the number of violations by car make.

In [7]:
car_make_count = {}
# while True:
for data in read_all_data(file):
    if data.vehicle_make in car_make_count:
        car_make_count[data.vehicle_make] += 1
    else:
        car_make_count[data.vehicle_make] = 1

In [8]:
print(sorted(car_make_count.items(), key=lambda x:x[1], reverse=True))

[('TOYOT', 112), ('HONDA', 106), ('FORD', 104), ('CHEVR', 76), ('NISSA', 70), ('DODGE', 45), ('FRUEH', 44), ('ME/BE', 38), ('GMC', 35), ('HYUND', 35), ('BMW', 34), ('LEXUS', 26), ('INTER', 25), ('JEEP', 22), ('NS/OT', 18), ('SUBAR', 18), ('INFIN', 13), ('LINCO', 12), ('CHRYS', 12), ('ACURA', 12), ('AUDI', 12), ('VOLVO', 12), ('MITSU', 11), ('ISUZU', 10), ('CADIL', 9), ('KIA', 8), ('VOLKS', 8), ('HIN', 6), ('KENWO', 5), (None, 5), ('ROVER', 5), ('BUICK', 5), ('MAZDA', 5), ('MERCU', 4), ('JAGUA', 3), ('SMART', 3), ('PORSC', 3), ('WORKH', 2), ('SATUR', 2), ('SCION', 2), ('SAAB', 2), ('HINO', 2), ('FIR', 1), ('OLDSM', 1), ('PETER', 1), ('CITRO', 1), ('GEO', 1), ('YAMAH', 1), ('BSA', 1), ('MINI', 1), ('PONTI', 1), ('SPRI', 1), ('PLYMO', 1), ('UPS', 1), ('FIAT', 1), ('UD', 1), ('UTILI', 1), ('GMCQ', 1), ('STAR', 1), ('AM/T', 1), ('MI/F', 1)]
