In [24]:
# Liam's script for looking at basic info about the data
import pandas as pd
import numpy as np
import os

In [25]:
# Read raw data in
root = os.path.expanduser('../data/')

files = [root + f for f in os.listdir(root) if f.endswith('.csv') and f != 'addresses.csv']
dfs = [pd.read_csv(f, header=0, index_col=0, parse_dates=['Date/Time']) for f in files]
df = pd.concat(dfs)

In [26]:
# Number of rows
len(df.index)

179781

In [27]:
# Number of unique call numbers
len(df['Call Number'].unique())

132526

In [28]:
# Number of rows containing missing data
len(df[df.isnull().any(axis=1)].index)

0

In [29]:
# Number of addresses not containing "MKE"
len(df[df['Location'].str.contains('MKE') == False])

4384

In [30]:
# Number of unique locations
len(df['Location'].value_counts())

49776

In [31]:
# Top 10 locations (police stations etc)
df['Location'].value_counts().head(10)

2333 N 49TH ST,MKE              4405
3626 W FOND DU LAC AV,MKE       3962
749 W STATE ST,MKE              3803
245 W LINCOLN AV,MKE            3619
6929 W SILVER SPRING DR,MKE     3291
2920 N VEL R PHILLIPS AV,MKE    3090
3006 S 27TH ST,MKE              2979
6680 N TEUTONIA AV,MKE          1281
4715 W VLIET ST,MKE              943
949 N 9TH ST,MKE                 832
Name: Location, dtype: int64

In [32]:
# Number of instances of each nature
df['Nature of Call'].value_counts().head(10)

TRAFFIC STOP       16891
BUSINESS CHECK     13806
TRBL W/SUBJ        10977
RETURN STATION      9380
FOLLOW UP           6723
REPORTS             5210
WELFARE CITIZEN     5071
PATROL              4911
ACC PDO             4580
INVESTIGATION       4358
Name: Nature of Call, dtype: int64

In [33]:
# Number of instances of each status
df['Status'].value_counts()

Service in Progress              64947
Assignment Completed             53735
Advised                          27935
Unable to Locate Complainant     14272
To be Filed                       7040
City Citation(s) Issued           5333
Advised/Referral                  3797
Open Investigation                1882
No Prosecution                     241
Cleared by Arrest                  205
False Alarm                        150
Filed Driver Exchange Report       127
Patrol Request                      54
Referral                            52
State Citation(s) Issued             6
False Alarm (Weather Related)        4
Ordered to Appear                    1
Name: Status, dtype: int64

In [34]:
# Number of instances of each police district
df['Police District'].value_counts()

7       31494
3       29386
5       27027
4       24966
2       24482
6       19768
1       19462
CITY      371
7         363
5         333
3         303
6         273
4         271
2         266
?         263
OCOE      240
SPD       220
1         163
OUT        73
DPR        20
SCD        20
ICS3        7
CIB         4
NLA         2
NTF         2
FI          2
Name: Police District, dtype: int64

In [35]:
# Items with NLA police district (example)
df[df['Police District'] == 'NLA']

Unnamed: 0_level_0,Call Number,Date/Time,Location,Police District,Nature of Call,Status
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
183020910-Service in Progress,183020910,2018-10-29 10:56:08,3128 GRAYDON AVE - EAST TROY WI,NLA,INVESTIGATION,Service in Progress
182682688-Service in Progress,182682688,2018-09-25 21:17:22,"6500-BLK S 27TH, FRANKLIN",NLA,SPECIAL ASSIGN,Service in Progress


In [36]:
# Items originating from false alarms (example)
df[df['Status'] == 'False Alarm (Weather Related)']

Unnamed: 0_level_0,Call Number,Date/Time,Location,Police District,Nature of Call,Status
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
182770332-False Alarm (Weather Related),182770332,2018-10-04 03:44:38,"5135 W HASSEL LA,MKE",4,ENTRY,False Alarm (Weather Related)
182640245-False Alarm (Weather Related),182640245,2018-09-21 02:33:15,"2620 W WISCONSIN AV,MKE",3,MPD MON ALARM,False Alarm (Weather Related)
182551924-False Alarm (Weather Related),182551924,2018-09-12 16:35:18,"3522 W LISBON AV,MKE",3,HOLDUP ALARM,False Alarm (Weather Related)
182840307-False Alarm (Weather Related),182840307,2018-10-11 03:02:23,"1301 N 12TH ST,MKE",3,BURG/AUD GOVT,False Alarm (Weather Related)


In [37]:
pd.DataFrame(df['Nature of Call'].unique()).to_csv('natures.csv')

In [38]:
addrDict = {'Location': df['Location'].unique(), 'Cleaned Location': '', 'Latitude': '', 'Longitude': ''}
pd.DataFrame(addrDict).set_index(['Location']).to_csv('addresses.csv')