In [1]:
# Liam's script for looking at basic info about the data
import pandas as pd
import numpy as np
import os

In [2]:
# Read raw data in
root = os.path.expanduser('../data/')

files = [root + f for f in os.listdir(root) if f.endswith('.csv')]
dfs = [pd.read_csv(f, header=0, index_col=0, parse_dates=['Date/Time']) for f in files]
df = pd.concat(dfs)

In [3]:
# Number of rows
len(df.index)

85944

In [4]:
# Number of unique call numbers
len(df['Call Number'].unique())

63304

In [5]:
# Number of rows containing missing data
len(df[df.isnull().any(axis=1)].index)

0

In [6]:
# Number of addresses not containing "MKE"
len(df[df['Location'].str.contains('MKE') == False])

2016

In [7]:
# Number of unique locations
len(df['Location'].value_counts())

28954

In [8]:
# Top 10 locations (police stations etc)
df['Location'].value_counts().head(10)

2333 N 49TH ST,MKE              2060
749 W STATE ST,MKE              1863
245 W LINCOLN AV,MKE            1827
3626 W FOND DU LAC AV,MKE       1821
6929 W SILVER SPRING DR,MKE     1642
3006 S 27TH ST,MKE              1415
2920 N VEL R PHILLIPS AV,MKE    1134
2920 N 4TH ST,MKE                598
6680 N TEUTONIA AV,MKE           566
4715 W VLIET ST,MKE              465
Name: Location, dtype: int64

In [9]:
# Number of instances of each nature
df['Nature of Call'].value_counts().head(10)

TRAFFIC STOP       7312
TRBL W/SUBJ        5591
BUSINESS CHECK     5434
RETURN STATION     4344
FOLLOW UP          3245
WELFARE CITIZEN    2637
REPORTS            2529
ACC PDO            2203
PATROL             2139
SPECIAL ASSIGN     2082
Name: Nature of Call, dtype: int64

In [10]:
# Number of instances of each status
df['Status'].value_counts()

Service in Progress              31843
Assignment Completed             24401
Advised                          13286
Unable to Locate Complainant      7433
To be Filed                       3460
City Citation(s) Issued           2286
Advised/Referral                  1915
Open Investigation                 907
No Prosecution                     112
Cleared by Arrest                  102
False Alarm                         78
Filed Driver Exchange Report        63
Patrol Request                      28
Referral                            23
False Alarm (Weather Related)        4
State Citation(s) Issued             3
Name: Status, dtype: int64

In [11]:
# Number of instances of each police district
df['Police District'].value_counts()

7       14742
3       14462
5       13099
4       12353
2       11812
1        9692
6        9278
CITY      188
OCOE      138
SPD       113
OUT        40
DPR        14
SCD         9
NTF         2
CIB         1
NLA         1
Name: Police District, dtype: int64

In [12]:
# Items with NLA police district (example)
df[df['Police District'] == 'NLA']

Unnamed: 0_level_0,Call Number,Date/Time,Location,Police District,Nature of Call,Status
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
182682688-Service in Progress,182682688,2018-09-25 21:17:22,"6500-BLK S 27TH, FRANKLIN",NLA,SPECIAL ASSIGN,Service in Progress


In [13]:
# Items originating from false alarms (example)
df[df['Status'] == 'False Alarm (Weather Related)']

Unnamed: 0_level_0,Call Number,Date/Time,Location,Police District,Nature of Call,Status
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
182770332-False Alarm (Weather Related),182770332,2018-10-04 03:44:38,"5135 W HASSEL LA,MKE",4,ENTRY,False Alarm (Weather Related)
182640245-False Alarm (Weather Related),182640245,2018-09-21 02:33:15,"2620 W WISCONSIN AV,MKE",3,MPD MON ALARM,False Alarm (Weather Related)
182551924-False Alarm (Weather Related),182551924,2018-09-12 16:35:18,"3522 W LISBON AV,MKE",3,HOLDUP ALARM,False Alarm (Weather Related)
182840307-False Alarm (Weather Related),182840307,2018-10-11 03:02:23,"1301 N 12TH ST,MKE",3,BURG/AUD GOVT,False Alarm (Weather Related)


In [17]:
pd.DataFrame(df['Nature of Call'].unique()).to_csv('natures.csv')

In [26]:
addrDict = {'Location': df['Location'].unique(), 'Cleaned Location': '', 'Latitude': '', 'Longitude': ''}
pd.DataFrame(addrDict).set_index(['Location']).to_csv('addresses.csv')