## 1. Import data and libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import os
import folium
import json

In [2]:
# This command propts matplotlib visuals to appear in the notebook 
%matplotlib inline

In [3]:
# Import geojson file for LAPD divisions 
LAPD_geo = r'C:\Users\Yan Peng\LA Crimes\02 Data\Prepared Data\LAPD_Division_swapped.geojson'

In [4]:
# Define path
path = r'C:\Users\Yan Peng\LA Crimes'

In [5]:
# Import data
df = pd.read_csv(os.path.join(path, '02 Data', 'Prepared Data', 'crime_data_checked.csv'),  index_col = 0)

## 2. Data wrangling

In [6]:
df.columns

Index(['case_number', 'date_reported', 'date_occurred', 'time_occurred',
       'area', 'area_name', 'reporting_distict', 'part_1_2_crimes',
       'crime_code', 'crime_code_description', 'Mocodes', 'victim_age',
       'victim_sex', 'victim_descent', 'premise_code', 'premise_description',
       'weapon_used_code', 'weapon_description', 'Status', 'Status Desc',
       'Crm Cd 1', 'Crm Cd 2', 'Crm Cd 3', 'Crm Cd 4', 'location',
       'Cross Street', 'LAT', 'LON'],
      dtype='object')

In [7]:
# Convert value_counts on the division column into a dataframe
df_div = df['area_name'].value_counts().rename_axis('division').reset_index(name='crime_counts')

In [8]:
# Write out abbreviations and set all letters in uppercase to align with the names in the geojson file
df_div['division'].replace('N Hollywood', 'North Hollywood', inplace=True)
df_div['division'].replace('West LA', 'West Los Angeles', inplace=True)
df_div['division'] = df_div['division'].str.upper()

In [9]:
df_div

Unnamed: 0,division,crime_counts
0,CENTRAL,60123
1,77TH STREET,55450
2,PACIFIC,51520
3,SOUTHWEST,49598
4,HOLLYWOOD,46558
5,SOUTHEAST,44696
6,OLYMPIC,44621
7,NORTH HOLLYWOOD,44263
8,NEWTON,44214
9,WILSHIRE,42269


## 3. Plotting a choropleth

In [48]:
# Setup a folium map at a high-level zoom
center_coords = [34.33730429011874, -118.50380032313414]

map = folium.Map(location = center_coords, zoom_start = 12)

# Choropleth maps bind Pandas Data Frames and json geometries.This allows us to quickly visualize data combinations
folium.Choropleth(
    geo_data = LAPD_geo, 
    data = df_div,
    columns = ['division', 'crime_counts'],
    key_on = 'feature.properties.APREC', # this part is very important - check your json file to see where the KEY is located
    fill_color = 'YlOrBr', fill_opacity=0.6, line_opacity=0.1,
    legend_name = "Number of crimes").add_to(map)
folium.LayerControl().add_to(map)

# Add key labels from the geojson file
with open(LAPD_geo, 'r') as f:
    geojson_data = json.load(f)

from shapely.geometry import Polygon
    
# Iterate through features in GeoJSON
for feature in geojson_data['features']:
    # Extract coordinates of the polygon
    polygon_coordinates = feature['geometry']['coordinates'][0]  # Assuming there's only one polygon in each feature

    # Create a Shapely Polygon object
    polygon = Polygon(polygon_coordinates)

    # Calculate the centroid of the polygon
    centroid = polygon.centroid

    # Get the latitude and longitude of the centroid
    lat, lon = centroid.y, centroid.x
    
    # Add a marker with the label to the map
    label = feature['properties']['APREC']
    folium.Marker([lat, lon], 
                  popup=label,
                  icon=folium.DivIcon(html=f"""<div style="font-family: calibri; color: brown; font-size: 15px">{label}</div>""")
                 ).add_to(map)

map

### The map shows that the divisions Central, 77th Street, and Pacific are the areas with the highest crime rates.