This notebook takes the raw data found in multiple files within the "raw" folder and places them all into one file for a given month. The data is separated into motor vehicle theft and theft and finally geocoded so that it can be used in the map construction.

In [1]:
import pandas as pd
import os
import geocoder
import requests

In [2]:
for folder in os.listdir('./raw'):
    file_list = []
    for file in os.listdir('./raw/' + folder):
        if(file.endswith('.xls')):
            data = pd.read_excel('./raw/' + folder + '/' + file)
            file_list.append(data)
   
    df = pd.concat(file_list, ignore_index=True)
    
    columns = ['Incident Number','Date', 'Time', 'Police District','Offense 1',
          'Offense 2', 'Offense 3', 'Offense 4', 'Offense 5', 'Location']
    df.columns = columns
    
    df = df[df['Offense 1'].notnull()]
    df = df[df['Location'].notnull()]
    
    count_1 = df['Offense 1'].value_counts()
    count_2 = df['Offense 2'].value_counts()
    count_3 = df['Offense 3'].value_counts()
    count_4 = df['Offense 4'].value_counts()
    count_5 = df['Offense 5'].value_counts()

    counts = [count_1, count_2, count_3, count_4, count_5]
    for count in counts:
        if count.empty:
            df = df.drop(count.name, axis = 1)
            
    df.to_csv('./raw/' + folder + 'Complete.csv', sep=';', index = False)

In [3]:
def address_to_latlong(address):
    '''Returns the latitude and longitude for a given address'''
    address += ", Milwaukee, WI"
    location = geocoder.arcgis(address, session = session)
    
    if not location.latlng:
        print("Couldn't Parse: "  + address )
    else:
        print(f'Parsed: {address} - {location.latlng}')
    return location.latlng

In [4]:
for file in os.listdir('./raw/'):
    if(file.endswith('.csv')):
        df = pd.read_csv('./raw/'+ file, sep =';')
    
        zipped = zip(df['Offense 1'].items(), df['Offense 2'].items(), 
        df['Offense 3'].items())
        keyword = 'theft'
        
        for off_1, off_2, off_3 in zipped:
            if(not keyword in off_1[1].lower() and
              (pd.isnull(off_2[1]) or not keyword in off_2[1].lower()) and
              (pd.isnull(off_3[1]) or not keyword in off_3[1].lower())):
                df = df.drop(off_1[0])
 
        df['Address'] = df['Location'].copy()
            
        theft = df.copy()
        m_theft = df.copy()
        
        zipped = zip(df['Offense 1'].items(), df['Offense 2'].items(), 
        df['Offense 3'].items())
        keyword = 'motor vehicle theft'
        
        for off_1, off_2, off_3 in zipped:
            if(not keyword in off_1[1].lower() and
                (pd.isnull(off_2[1]) or not keyword in off_2[1].lower()) and
                (pd.isnull(off_3[1]) or not keyword in off_3[1].lower())):
                m_theft = m_theft.drop(off_1[0])
            else:
                theft = theft.drop(off_1[0])
                
        
        with requests.Session() as session:
            # This is likely to be very slow
            # depending on the geocoding service used
            theft['Location'] = theft['Location'].apply(address_to_latlong)
            m_theft['Location'] = m_theft['Location'].apply(address_to_latlong)
            
        theft.to_csv('./' + 'theft_' + file.split('Complete.csv')[0] + '.csv', sep=';', index = False)
        m_theft.to_csv('./' + 'm_theft_' + file.split('Complete.csv')[0] + '.csv', sep=';', index = False)
        print(f'Finished {file}')

Parsed: 5173 N 35TH ST, Milwaukee, WI - [43.11140483819031, -87.9564576503258]
Parsed: 5928 N 39TH ST, Milwaukee, WI - [43.125452161809676, -87.96091583524746]
Parsed: 4235 N 17TH ST, Milwaukee, WI - [43.09467244463039, -87.93192860646873]
Parsed: 3000 W CAPITOL DR, Milwaukee, WI - [43.0899075337582, -87.95081216472387]
Parsed: 4032 N 12TH ST, Milwaukee, WI - [43.09019857507647, -87.92577444460173]
Parsed: 6180 N TEUTONIA AV, Milwaukee, WI - [43.12936099491562, -87.9521672837547]
Parsed: 2222 W CAPITOL DR, Milwaukee, WI - [43.08981144753335, -87.94030110857977]
Parsed: 2000 W ROOSEVELT DR, Milwaukee, WI - [43.09782200279864, -87.9371658475098]
Parsed: 4951 N SHERMAN BL, Milwaukee, WI - [43.107615199001685, -87.96659907372015]
Parsed: 6024 N 42ND ST, Milwaukee, WI - [43.12704290105549, -87.96469504041275]
Parsed: 4356 N 25TH ST, Milwaukee, WI - [43.096354575624716, -87.9440838122224]
Parsed: 6852 N 42ND ST, Milwaukee, WI - [43.142444800998334, -87.96437590074464]
Parsed: 5935-A N 36TH S

Parsed: 3300 S 39TH ST, Milwaukee, WI - [42.98392741326677, -87.96300245340255]
Parsed: 3355 S 27TH ST, Milwaukee, WI - [42.98489091248832, -87.94843751889785]
Parsed: 2339 S 43RD ST, Milwaukee, WI - [43.00193722009348, -87.96776859045453]
Parsed: 1931 S 14TH ST, Milwaukee, WI - [43.009326366639726, -87.92963157372014]
Parsed: 1922 S 8TH ST, Milwaukee, WI - [43.00958713627446, -87.921336407958]
Parsed: 2557 S 12TH ST, Milwaukee, WI - [42.997905031363615, -87.92735007372015]
Parsed: 1725 S 12TH ST, Milwaukee, WI - [43.01168734836635, -87.92701084836635]
Parsed: 539 W VIRGINIA ST, Milwaukee, WI - [43.02632648845881, -87.91798066763806]
Parsed: 539 W VIRGINIA ST, Milwaukee, WI - [43.02632648845881, -87.91798066763806]
Parsed: 818 S WATER ST, Milwaukee, WI - [43.0222021306118, -87.90711142535162]
Parsed: 209 W ORCHARD ST, Milwaukee, WI - [43.01597451788916, -87.91300366763807]
Parsed: 1559 S 14TH ST, Milwaukee, WI - [43.01469150874257, -87.92938900822297]
Parsed: 1433 W BURNHAM ST, Milwauk

KeyboardInterrupt: 