In [1]:
import csv
from collections import namedtuple
from collections import Counter

In [2]:
# To look at the header and get the key words to form a namedtuple
# Note that these field names are cut-short from actual field names appearing in the header

open('nyc_parking_tickets_extract-1.csv', 'r').readline().strip().split(',')

['Summons Number',
 'Plate ID',
 'Registration State',
 'Plate Type',
 'Issue Date',
 'Violation Code',
 'Vehicle Body Type',
 'Vehicle Make',
 'Violation Description']

In [3]:
# Make a namedtuple with the above keywords as fields in that order

ParkingTickets = namedtuple('ParkingTickets', ('summons_no', 'plate_id', 'regn_state', 'plate_type', 'issue_date', 'violation_code', 'vehicle_type', 'vehicle_make', 'violation_descr'))

In [4]:
# To print out the first 10 lines just for helping us code (useful when we try next() and check
# if we are getting what we should get)

open('nyc_parking_tickets_extract-1.csv', 'r').readlines()[:10]

['Summons Number,Plate ID,Registration State,Plate Type,Issue Date,Violation Code,Vehicle Body Type,Vehicle Make,Violation Description\n',
 '4006478550,VAD7274,VA,PAS,10/5/2016,5,4D,BMW,BUS LANE VIOLATION\n',
 '4006462396,22834JK,NY,COM,9/30/2016,5,VAN,CHEVR,BUS LANE VIOLATION\n',
 '4007117810,21791MG,NY,COM,4/10/2017,5,VAN,DODGE,BUS LANE VIOLATION\n',
 '4006265037,FZX9232,NY,PAS,8/23/2016,5,SUBN,FORD,BUS LANE VIOLATION\n',
 '4006535600,N203399C,NY,OMT,10/19/2016,5,SUBN,FORD,BUS LANE VIOLATION\n',
 '4007156700,92163MG,NY,COM,4/13/2017,5,VAN,FRUEH,BUS LANE VIOLATION\n',
 '4006687989,MIQ600,SC,PAS,11/21/2016,5,VN,HONDA,BUS LANE VIOLATION\n',
 '4006943052,2AE3984,MD,PAS,2/1/2017,5,SW,LINCO,BUS LANE VIOLATION\n',
 '4007306795,HLG4926,NY,PAS,5/30/2017,5,SUBN,TOYOT,BUS LANE VIOLATION\n']

In [5]:
# Define a function to yield one row at a time (leaving out first line)

def get_data():
    with open('nyc_parking_tickets_extract-1.csv', 'r') as f:
        cols = f.readline().strip().split(',') # removing first line as it is a header
        reader = csv.reader(f, delimiter=',')
        yield from reader

In [6]:
data_gen = get_data() # data generator

Goal 1: Create a lazy iterator returning namedtuple with fields from the data.

In [7]:
# Goal 1: Lazy iterator
def gen_tkts():
    return ParkingTickets(*next(data_gen))

In [8]:
gen_tkts() # This is a generator function going over the data row by row.

ParkingTickets(summons_no='4006478550', plate_id='VAD7274', regn_state='VA', plate_type='PAS', issue_date='10/5/2016', violation_code='5', vehicle_type='4D', vehicle_make='BMW', violation_descr='BUS LANE VIOLATION')

Goal 2: Calculate the number of violations by car make

In [9]:
# Making a function to get the count of carmakes from the data

def carmake_data(func):
    data = [ParkingTickets._make(item).vehicle_make for item in func]
    return Counter(data)   

In [10]:
# Just to ensure the data_gen is reset to begin from the top

data_gen = get_data()

In [11]:
s = carmake_data(data_gen) # passing data generator to the carmake_data() counter function

In [12]:
# s is collection Counter object in the dictionary format
print(s)

Counter({'TOYOT': 112, 'HONDA': 106, 'FORD': 104, 'CHEVR': 76, 'NISSA': 70, 'DODGE': 45, 'FRUEH': 44, 'ME/BE': 38, 'GMC': 35, 'HYUND': 35, 'BMW': 34, 'LEXUS': 26, 'INTER': 25, 'JEEP': 22, 'NS/OT': 18, 'SUBAR': 18, 'INFIN': 13, 'LINCO': 12, 'CHRYS': 12, 'ACURA': 12, 'AUDI': 12, 'VOLVO': 12, 'MITSU': 11, 'ISUZU': 10, 'CADIL': 9, 'KIA': 8, 'VOLKS': 8, 'HIN': 6, 'KENWO': 5, '': 5, 'ROVER': 5, 'BUICK': 5, 'MAZDA': 5, 'MERCU': 4, 'JAGUA': 3, 'SMART': 3, 'PORSC': 3, 'WORKH': 2, 'SATUR': 2, 'SCION': 2, 'SAAB': 2, 'HINO': 2, 'FIR': 1, 'OLDSM': 1, 'PETER': 1, 'CITRO': 1, 'GEO': 1, 'YAMAH': 1, 'BSA': 1, 'MINI': 1, 'PONTI': 1, 'SPRI': 1, 'PLYMO': 1, 'UPS': 1, 'FIAT': 1, 'UD': 1, 'UTILI': 1, 'GMCQ': 1, 'STAR': 1, 'AM/T': 1, 'MI/F': 1})


In [13]:
s.most_common(3) # gives the top 3 carmakes found in the violations list.

[('TOYOT', 112), ('HONDA', 106), ('FORD', 104)]