In [21]:
# First we must import pandas, a data analysis module for python
# See: https://pandas.pydata.org/pandas-docs/stable/10min.html
import pandas as pd

In [22]:
# "Import" data into pandas
# See: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html
citations = pd.read_csv('citations.csv')

# Get snapshot of the data
citations.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31086 entries, 0 to 31085
Data columns (total 5 columns):
ISSUE DATE                31086 non-null object
ISSUE TIME                31050 non-null object
FINE AMOUNT               31086 non-null object
TICKET LOCATION STREET    31086 non-null object
VIOLATION DESCRIPTION     31086 non-null object
dtypes: object(5)
memory usage: 1.2+ MB


In [23]:
# Get the first few rows
citations.head(15)

Unnamed: 0,ISSUE DATE,ISSUE TIME,FINE AMOUNT,TICKET LOCATION STREET,VIOLATION DESCRIPTION
0,1/4/2015,2:08,$45.00,0,RESERVED SPACE
1,1/5/2015,10:38,$20.00,13TH AVE METERS,OVERTIME STREET MTR
2,1/5/2015,10:39,$20.00,13TH AVE METERS,OVERTIME STREET MTR
3,1/5/2015,12:26,$30.00,15TH AVE,NO PERMIT DISPLAYED
4,1/5/2015,12:27,$30.00,15TH AVE,NO PERMIT DISPLAYED
5,1/5/2015,12:29,$30.00,15TH AVE,NO PERMIT DISPLAYED
6,1/5/2015,12:31,$30.00,15TH AVE,NO PERMIT DISPLAYED
7,1/5/2015,12:32,$35.00,15TH AVE,RESTRICTED AREA
8,1/5/2015,12:33,$30.00,15TH AVE,NO PERMIT DISPLAYED
9,1/5/2015,12:36,$20.00,15TH AVE,OVERTIME LOT METER


In [24]:
# Show a quick description of the data
citations.describe()

Unnamed: 0,ISSUE DATE,ISSUE TIME,FINE AMOUNT,TICKET LOCATION STREET,VIOLATION DESCRIPTION
count,31086,31050,31086,31086,31086
unique,626,1137,15,429,47
top,10/3/2016,10:34,$20.00,UNIVERSITY ST METERS,OVERTIME STREET MTR
freq,178,113,12355,5933,7415


In [28]:
# Convert column from plain text to datetime
citations['ISSUE DATE'] = pd.to_datetime(citations['ISSUE DATE'])
# Check column type now
citations.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31086 entries, 0 to 31085
Data columns (total 5 columns):
ISSUE DATE                31086 non-null datetime64[ns]
ISSUE TIME                31050 non-null object
FINE AMOUNT               31086 non-null object
TICKET LOCATION STREET    31086 non-null object
VIOLATION DESCRIPTION     31086 non-null object
dtypes: datetime64[ns](1), object(4)
memory usage: 1.2+ MB


In [29]:
# Check max and min now
citations['ISSUE DATE'].describe()

count                   31086
unique                    626
top       2016-10-03 00:00:00
freq                      178
first     2015-01-04 00:00:00
last      2017-03-13 00:00:00
Name: ISSUE DATE, dtype: object

In [30]:
# Remove dates before outside of FY 2015-16
# See: https://stackoverflow.com/a/27360130
citations = citations.drop(citations[citations['ISSUE DATE'] < '7-1-2015'].index).drop(citations[citations['ISSUE DATE'] > '6-30-2016'].index)
# Check max and min now
citations['ISSUE DATE'].describe()

count                   14049
unique                    291
top       2016-04-29 00:00:00
freq                      123
first     2015-07-01 00:00:00
last      2016-06-30 00:00:00
Name: ISSUE DATE, dtype: object

In [34]:
# Value counts are the best!
# See: 
citations['TICKET LOCATION STREET'].value_counts()

UNIVERSITY ST METERS         2672
LOT 16A - FAC/STAFF LOT      1300
15TH AVE                      883
LOT 29A - EMU VIS LOT         578
LOT 12A - ONYX-LAWRENCE       560
13TH AVE METERS               558
LOT 15 - JAQUA                480
LOT 17 - HEDCO                477
LOT 56 - MILLRACE             403
LOT 42 - 12TH W OF KIN        377
LOT 18 - EDUCATION            266
LOT 44 - GRAD VILLAGE         248
LOT 36A - CARSON              244
SPENCER VIEW APTS             222
LOT 33 - BEAN EAST            218
LOT 55 - MOSS ST              216
LOT 37 - HAMILTON E           214
LOT 50 - BARNHART             214
LOT 34E - GLOBAL SCHOLARS     201
LOT 46 - MOSS CONNECT         191
LOT 19 - HEDCO/CSB            168
LOT 13 - 13TH BEECH-UNIV      163
15TH AVE METERS               137
LOT 6A - 11TH/KINCAID         135
LOT 58 - 1715 FRANKLIN        126
LOT 30 - MCCT                 126
LOT 99 - COL. GARAGE          108
LOT 52 - UOPD EAST             95
LOT 3A - AAA AREA              89
LOT 36B - STU 

In [35]:
# And get some data just to be sure
citations['TICKET LOCATION STREET'].value_counts().describe()

count     291.000000
mean       48.278351
std       198.304559
min         1.000000
25%         1.000000
50%         2.000000
75%        12.000000
max      2672.000000
Name: TICKET LOCATION STREET, dtype: float64

In [None]:
# While, we know that some of the data is messy,
# ... the top offenders are obvious. The top nine
# ... locations above have an average of more than one
# ... ticket per day, with University Street overwhelmingly
# ... taking the first spot with nearly 7 tickets per day.

# While this is only one fiscal year's worth of data,
# ... it certainly gives you a start for a story.