This notebook takes the raw data found in multiple files within the "raw" folder and places them all into one file for a given month. The data is separated into motor vehicle theft and theft and finally geocoded so that it can be used in the map construction.

In [1]:
import pandas as pd
import os
import geocoder
import requests

In [2]:
for folder in os.listdir('./raw'):
    file_list = []
    for file in os.listdir('./raw/' + folder):
        if(file.endswith('.xls')):
            data = pd.read_excel('./raw/' + folder + '/' + file)
            file_list.append(data)
   
    df = pd.concat(file_list, ignore_index=True)
    
    columns = ['Incident Number','Date', 'Time', 'Police District','Offense 1',
          'Offense 2', 'Offense 3', 'Offense 4', 'Offense 5', 'Location']
    df.columns = columns
    
    df = df[df['Offense 1'].notnull()]
    df = df[df['Location'].notnull()]
    
    count_1 = df['Offense 1'].value_counts()
    count_2 = df['Offense 2'].value_counts()
    count_3 = df['Offense 3'].value_counts()
    count_4 = df['Offense 4'].value_counts()
    count_5 = df['Offense 5'].value_counts()

    counts = [count_1, count_2, count_3, count_4, count_5]
    for count in counts:
        if count.empty:
            df = df.drop(count.name, axis = 1)
            
    df.to_csv('./raw/' + folder + 'Complete.csv', sep=';', index = False)

In [3]:
def address_to_latlong(address):
    '''Returns the latitude and longitude for a given address'''
    address += ", Milwaukee, WI"
    location = geocoder.arcgis(address, session = session)
    
    if not location.latlng:
        print("Couldn't Parse: "  + address )
    else:
        print(f'Parsed: {address} - {location.latlng}')
    return location.latlng

In [4]:
for file in os.listdir('./raw/'):
    if(file.endswith('.csv')):
        df = pd.read_csv('./raw/'+ file, sep =';')
    
        zipped = zip(df['Offense 1'].items(), df['Offense 2'].items(), 
        df['Offense 3'].items())
        keyword = 'theft'
        
        for off_1, off_2, off_3 in zipped:
            if(not keyword in off_1[1].lower() and
              (pd.isnull(off_2[1]) or not keyword in off_2[1].lower()) and
              (pd.isnull(off_3[1]) or not keyword in off_3[1].lower())):
                df = df.drop(off_1[0])
 
        df['Address'] = df['Location'].copy()
            
        theft = df.copy()
        m_theft = df.copy()
        
        zipped = zip(df['Offense 1'].items(), df['Offense 2'].items(), 
        df['Offense 3'].items())
        keyword = 'motor vehicle theft'
        
        for off_1, off_2, off_3 in zipped:
            if(not keyword in off_1[1].lower() and
                (pd.isnull(off_2[1]) or not keyword in off_2[1].lower()) and
                (pd.isnull(off_3[1]) or not keyword in off_3[1].lower())):
                m_theft = m_theft.drop(off_1[0])
            else:
                theft = theft.drop(off_1[0])
                
        
        with requests.Session() as session:
            # This is likely to be very slow
            # depending on the geocoding service used
            theft['Location'] = theft['Location'].apply(address_to_latlong)
            m_theft['Location'] = m_theft['Location'].apply(address_to_latlong)
            
        theft.to_csv('./' + 'theft_' + file.split('Complete.csv')[0] + '.csv', sep=';', index = False)
        m_theft.to_csv('./' + 'm_theft_' + file.split('Complete.csv')[0] + '.csv', sep=';', index = False)
        print(f'Finished {file}')

Parsed: 6109 N 39TH ST, Milwaukee, WI - [43.129179586733216, -87.96094116475253]
Parsed: 4729 N 29TH ST, Milwaukee, WI - [43.103076577475576, -87.94915084744304]
Parsed: 4024 N 24TH PL, Milwaukee, WI - [43.0904342200935, -87.94344542627985]
Parsed: 4614 N TEUTONIA AV, Milwaukee, WI - [43.100993754659754, -87.94476059515867]
Parsed: 4242 N 26TH ST, Milwaukee, WI - [43.093753994171635, -87.945572930175]
Parsed: 7005 N TEUTONIA AV, Milwaukee, WI - [43.14513939494493, -87.95584821276456]
Parsed: 4847 N 22ND ST, Milwaukee, WI - [43.10512237121063, -87.9393286289432]
Parsed: 5103 N 31ST ST, Milwaukee, WI - [43.10991117346646, -87.95161063200395]
Parsed: 5256 N TEUTONIA AV, Milwaukee, WI - [43.1129616038293, -87.94999987753627]
Parsed: 5062 N 39TH ST, Milwaukee, WI - [43.10961283236193, -87.96140536078269]
Parsed: 3002 W SILVER SPRING DR, Milwaukee, WI - [43.11937143883067, -87.95184354948391]
Parsed: 4371 N 29TH ST, Milwaukee, WI - [43.096742947544584, -87.94931007372014]
Parsed: 1937 W HOPE

Parsed: 5600 W BURLEIGH ST, Milwaukee, WI - [43.0754108749512, -87.98311907123299]
Parsed: 3618 N 63RD ST, Milwaukee, WI - [43.08397171717931, -87.99085645181508]
Parsed: 2625 N 53RD ST, Milwaukee, WI - [43.06673716763805, -87.9798905809335]
Parsed: 6000 W MAIN ST, Milwaukee, WI - [43.025973504327844, -87.98765399999999]
Parsed: 1030 N 46TH ST, Milwaukee, WI - [43.04289541326679, -87.97098387131426]
Parsed: 7301 W NASH ST, Milwaukee, WI - [43.08440849913744, -88.00438193363932]
Parsed: 5120 W CENTER ST, Milwaukee, WI - [43.06800951875454, -87.97819147446477]
Parsed: 520 N 40TH ST, Milwaukee, WI - [43.03669045268045, -87.96425731845481]
Parsed: 588 S OAK PARK CT, Milwaukee, WI - [43.02646200000001, -87.98316162479058]
Parsed: 500 S 84TH ST, Milwaukee, WI - [43.02569908575227, -88.01717590230344]
Parsed: 6732 W FAIRVIEW AV, Milwaukee, WI - [43.03139646047075, -87.997122]
Parsed: 2860 N 74TH ST, Milwaukee, WI - [43.071109931444404, -88.00489937979704]
Parsed: 9250 W WISCONSIN AV, Milwauke

Parsed: 2057 S 14TH ST, Milwaukee, WI - [43.007285614338244, -87.92973109276667]
Parsed: 1670 S 11TH ST, Milwaukee, WI - [43.01274574136568, -87.92544788992879]
Parsed: 1670 S 11TH ST, Milwaukee, WI - [43.01274574136568, -87.92544788992879]
Parsed: 700 S 4TH ST, Milwaukee, WI - [43.02413665175822, -87.9153536517582]
Parsed: 100 W OREGON ST, Milwaukee, WI - [43.028412168431544, -87.91113316843153]
Parsed: 2481 S 6TH ST, Milwaukee, WI - [42.99972367055227, -87.91888955539827]
Parsed: 800 S 2ND ST, Milwaukee, WI - [43.02303266472387, -87.912606407958]
Parsed: 1209 S 18TH ST, Milwaukee, WI - [43.01878516763807, -87.93556856650677]
Parsed: 915 S 20TH ST, Milwaukee, WI - [43.021620167638076, -87.93809808482864]
Parsed: 2046 S 15TH PL, Milwaukee, WI - [43.00730013044608, -87.93223397735031]
Parsed: 1460 S MUSKEGO AV, Milwaukee, WI - [43.01600057588453, -87.93409120979305]
Parsed: 900 W HISTORIC MITCHELL ST, Milwaukee, WI - [43.012374148088995, -87.92278364808901]
Parsed: 1000 W GREENFIELD AV,

Parsed: 2460 S AUSTIN ST, Milwaukee, WI - [42.99974907799066, -87.90688394128351]
Parsed: 2309 E EUCLID AV, Milwaukee, WI - [42.987500876857695, -87.88127105743652]
Parsed: 2383 S WILLIAMS ST, Milwaukee, WI - [43.00084925541038, -87.8995131186497]


KeyboardInterrupt: 