## Import all the required libraries

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.ensemble import VotingClassifier,AdaBoostClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.utils import resample
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.datasets import make_circles, make_classification, make_moons
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.decomposition import PCA
from sklearn.inspection import DecisionBoundaryDisplay


---

The goal of the analysis is to generate a Dash app showing the year with maximum crimes for each Area

### Step 1: Read the `Crime_Data_from_2020_to_Present_20240611 clean.csv` data from the `/content/` folder into a Pandas DataFrame.

In [3]:
from google.colab import files
uploaded = files.upload()

Saving Crime_Data_from_2020_to_Present_20240611.csv to Crime_Data_from_2020_to_Present_20240611.csv


In [4]:
# Read the CSV file from the Resources folder into a Pandas DataFrame

crime_df = pd.read_csv(
    Path("/content/Crime_Data_from_2020_to_Present_20240611.csv")

)

# Review the DataFrame
crime_df.head()
# Review the DataFrame


Unnamed: 0,DR_NO,Date Rptd,DATE OCC,TIME OCC,AREA,AREA NAME,Rpt Dist No,Part 1-2,Crm Cd,Crm Cd Desc,...,Status,Status Desc,Crm Cd 1,Crm Cd 2,Crm Cd 3,Crm Cd 4,LOCATION,Cross Street,LAT,LON
0,190326475,3/1/2020 0:00,3/1/2020 0:00,2130,7,Wilshire,784,1,510,VEHICLE - STOLEN,...,AA,Adult Arrest,510.0,998.0,,,1900 S LONGWOOD AV,,34.0375,-118.3506
1,200106753,2/9/2020 0:00,2/8/2020 0:00,1800,1,Central,182,1,330,BURGLARY FROM VEHICLE,...,IC,Invest Cont,330.0,998.0,,,1000 S FLOWER ST,,34.0444,-118.2628
2,200320258,11/11/2020 0:00,11/4/2020 0:00,1700,3,Southwest,356,1,480,BIKE - STOLEN,...,IC,Invest Cont,480.0,,,,1400 W 37TH ST,,34.021,-118.3002
3,200907217,5/10/2023 0:00,3/10/2020 0:00,2037,9,Van Nuys,964,1,343,SHOPLIFTING-GRAND THEFT ($950.01 & OVER),...,IC,Invest Cont,343.0,,,,14000 RIVERSIDE DR,,34.1576,-118.4387
4,220614831,8/18/2022 0:00,8/17/2020 0:00,1200,6,Hollywood,666,2,354,THEFT OF IDENTITY,...,IC,Invest Cont,354.0,,,,1900 TRANSIENT,,34.0944,-118.3277


#### Creating categorical variable list

In [5]:
#Generate our categorical variable list
crime_cat = crime_df.dtypes[crime_df.dtypes == "object"].index.tolist()

In [6]:
#Check the number of unique columns in each column
crime_df[crime_cat].nunique()

Date Rptd         1609
DATE OCC          1609
AREA NAME           21
Crm Cd Desc        139
Mocodes         306996
Vict Sex             5
Vict Descent        20
Premis Desc        306
Weapon Desc         79
Status               6
Status Desc          6
LOCATION         65757
Cross Street     10181
dtype: int64

## STEP 2: Data Cleansing - The data is cleaned, normalized, and standardized


### To handle the huge crime data and we categorized into the handful crime bins


In [7]:
from google.colab import files
uploaded = files.upload()

Saving crime_bins.csv to crime_bins.csv


In [8]:

# Read the CSV file from the Resources folder into a Pandas DataFrame

crimebin_df = pd.read_csv(
    Path("/content/crime_bins.csv")

)

# Review the DataFrame
crimebin_df.head()
# Review the DataFrame

Unnamed: 0,Crm Cd Desc,CRIMEBIN
0,ARSON,VANDALISM
1,ASSAULT WITH DEADLY WEAPON ON POLICE OFFICER,VIOLENT
2,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",VIOLENT
3,ATTEMPTED ROBBERY,ROBBERY
4,BATTERY - SIMPLE ASSAULT,VIOLENT


In [9]:
# Merge the actual data with the crime bins.
merged_df = pd.merge(crime_df, crimebin_df, on='Crm Cd Desc', how='inner')
merged_df

Unnamed: 0,DR_NO,Date Rptd,DATE OCC,TIME OCC,AREA,AREA NAME,Rpt Dist No,Part 1-2,Crm Cd,Crm Cd Desc,...,Status Desc,Crm Cd 1,Crm Cd 2,Crm Cd 3,Crm Cd 4,LOCATION,Cross Street,LAT,LON,CRIMEBIN
0,190326475,3/1/2020 0:00,3/1/2020 0:00,2130,7,Wilshire,784,1,510,VEHICLE - STOLEN,...,Adult Arrest,510.0,998.0,,,1900 S LONGWOOD AV,,34.0375,-118.3506,VEHICLE
1,221008844,5/6/2022 0:00,11/1/2020 0:00,130,10,West Valley,1029,1,510,VEHICLE - STOLEN,...,Invest Cont,510.0,,,,VALJEAN ST,VANOWEN AV,34.1939,-118.4859,VEHICLE
2,200412582,9/9/2020 0:00,9/9/2020 0:00,630,4,Hollenbeck,413,1,510,VEHICLE - STOLEN,...,Invest Cont,510.0,,,,200 E AVENUE 28,,34.0820,-118.2130,VEHICLE
3,201810154,4/26/2020 0:00,4/22/2020 0:00,1900,18,Southeast,1802,1,510,VEHICLE - STOLEN,...,Invest Cont,510.0,,,,90TH,WALL,33.9547,-118.2717,VEHICLE
4,231510293,5/27/2023 0:00,11/24/2020 0:00,200,15,N Hollywood,1504,1,510,VEHICLE - STOLEN,...,Invest Cont,510.0,,,,7500 LAUREL CANYON BL,,34.2071,-118.3965,VEHICLE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
947678,231008207,4/20/2023 0:00,11/1/2022 0:00,1900,10,West Valley,1077,1,445,DISHONEST EMPLOYEE ATTEMPTED THEFT,...,Adult Arrest,445.0,,,,17300 VENTURA BL,,34.1608,-118.5098,theft
947679,231507120,3/16/2023 0:00,3/12/2023 0:00,2015,15,N Hollywood,1533,1,445,DISHONEST EMPLOYEE ATTEMPTED THEFT,...,Invest Cont,445.0,,,,6000 LAUREL CANYON BL,,34.1794,-118.3965,theft
947680,241708301,4/13/2024 0:00,10/30/2023 0:00,2130,17,Devonshire,1782,1,445,DISHONEST EMPLOYEE ATTEMPTED THEFT,...,Invest Cont,445.0,,,,8800 CORBIN AV,,34.2302,-118.5623,theft
947681,241707918,4/2/2024 0:00,1/11/2024 0:00,1959,17,Devonshire,1782,1,445,DISHONEST EMPLOYEE ATTEMPTED THEFT,...,Invest Cont,445.0,,,,8800 CORBIN AV,,34.2302,-118.5623,theft


In [10]:
# Cleanse the time related data and add day of the week feature for improve the accuracy
merged_df['DATE OCC'] = pd.to_datetime(merged_df['DATE OCC'])
merged_df['year'] = merged_df['DATE OCC'].dt.year
merged_df['month'] = merged_df['DATE OCC'].dt.month
merged_df['day_of_week'] = merged_df['DATE OCC'].dt.day_name()
merged_df

Unnamed: 0,DR_NO,Date Rptd,DATE OCC,TIME OCC,AREA,AREA NAME,Rpt Dist No,Part 1-2,Crm Cd,Crm Cd Desc,...,Crm Cd 3,Crm Cd 4,LOCATION,Cross Street,LAT,LON,CRIMEBIN,year,month,day_of_week
0,190326475,3/1/2020 0:00,2020-03-01,2130,7,Wilshire,784,1,510,VEHICLE - STOLEN,...,,,1900 S LONGWOOD AV,,34.0375,-118.3506,VEHICLE,2020,3,Sunday
1,221008844,5/6/2022 0:00,2020-11-01,130,10,West Valley,1029,1,510,VEHICLE - STOLEN,...,,,VALJEAN ST,VANOWEN AV,34.1939,-118.4859,VEHICLE,2020,11,Sunday
2,200412582,9/9/2020 0:00,2020-09-09,630,4,Hollenbeck,413,1,510,VEHICLE - STOLEN,...,,,200 E AVENUE 28,,34.0820,-118.2130,VEHICLE,2020,9,Wednesday
3,201810154,4/26/2020 0:00,2020-04-22,1900,18,Southeast,1802,1,510,VEHICLE - STOLEN,...,,,90TH,WALL,33.9547,-118.2717,VEHICLE,2020,4,Wednesday
4,231510293,5/27/2023 0:00,2020-11-24,200,15,N Hollywood,1504,1,510,VEHICLE - STOLEN,...,,,7500 LAUREL CANYON BL,,34.2071,-118.3965,VEHICLE,2020,11,Tuesday
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
947678,231008207,4/20/2023 0:00,2022-11-01,1900,10,West Valley,1077,1,445,DISHONEST EMPLOYEE ATTEMPTED THEFT,...,,,17300 VENTURA BL,,34.1608,-118.5098,theft,2022,11,Tuesday
947679,231507120,3/16/2023 0:00,2023-03-12,2015,15,N Hollywood,1533,1,445,DISHONEST EMPLOYEE ATTEMPTED THEFT,...,,,6000 LAUREL CANYON BL,,34.1794,-118.3965,theft,2023,3,Sunday
947680,241708301,4/13/2024 0:00,2023-10-30,2130,17,Devonshire,1782,1,445,DISHONEST EMPLOYEE ATTEMPTED THEFT,...,,,8800 CORBIN AV,,34.2302,-118.5623,theft,2023,10,Monday
947681,241707918,4/2/2024 0:00,2024-01-11,1959,17,Devonshire,1782,1,445,DISHONEST EMPLOYEE ATTEMPTED THEFT,...,,,8800 CORBIN AV,,34.2302,-118.5623,theft,2024,1,Thursday


In [11]:
# Build data by Year Area and count the crime stats based on the bins
merged_df
filtered_df = merged_df['AREA NAME']

In [12]:
merged_df.columns

Index(['DR_NO', 'Date Rptd', 'DATE OCC', 'TIME OCC', 'AREA', 'AREA NAME',
       'Rpt Dist No', 'Part 1-2', 'Crm Cd', 'Crm Cd Desc', 'Mocodes',
       'Vict Age', 'Vict Sex', 'Vict Descent', 'Premis Cd', 'Premis Desc',
       'Weapon Used Cd', 'Weapon Desc', 'Status', 'Status Desc', 'Crm Cd 1',
       'Crm Cd 2', 'Crm Cd 3', 'Crm Cd 4', 'LOCATION', 'Cross Street', 'LAT',
       'LON', 'CRIMEBIN', 'year', 'month', 'day_of_week'],
      dtype='object')

In [13]:
dash_df = merged_df[['year','AREA NAME','CRIMEBIN']]
dash_df

Unnamed: 0,year,AREA NAME,CRIMEBIN
0,2020,Wilshire,VEHICLE
1,2020,West Valley,VEHICLE
2,2020,Hollenbeck,VEHICLE
3,2020,Southeast,VEHICLE
4,2020,N Hollywood,VEHICLE
...,...,...,...
947678,2022,West Valley,theft
947679,2023,N Hollywood,theft
947680,2023,Devonshire,theft
947681,2024,Devonshire,theft


In [14]:
c_df = dash_df.groupby(["year","AREA NAME"]).size().reset_index(name='count')
c_df.rename(columns = {'AREA NAME': 'AREA'} ,inplace = True)

c_df

Unnamed: 0,year,AREA,count
0,2020,77th Street,13329
1,2020,Central,11599
2,2020,Devonshire,7976
3,2020,Foothill,7103
4,2020,Harbor,8868
...,...,...,...
100,2024,Topanga,3324
101,2024,Van Nuys,3431
102,2024,West LA,2921
103,2024,West Valley,3323


In [15]:
#Install Dash
!pip install Dash

Collecting Dash
  Downloading dash-2.17.1-py3-none-any.whl (7.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.5/7.5 MB[0m [31m18.3 MB/s[0m eta [36m0:00:00[0m
Collecting dash-html-components==2.0.0 (from Dash)
  Downloading dash_html_components-2.0.0-py3-none-any.whl (4.1 kB)
Collecting dash-core-components==2.0.0 (from Dash)
  Downloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)
Collecting dash-table==5.0.0 (from Dash)
  Downloading dash_table-5.0.0-py3-none-any.whl (3.9 kB)
Collecting retrying (from Dash)
  Downloading retrying-1.3.4-py3-none-any.whl (11 kB)
Installing collected packages: dash-table, dash-html-components, dash-core-components, retrying, Dash
Successfully installed Dash-2.17.1 dash-core-components-2.0.0 dash-html-components-2.0.0 dash-table-5.0.0 retrying-1.3.4


In [16]:
#crime_req_df
from dash import Dash, html, dcc, callback, Output, Input
import plotly.express as px
import pandas as pd

app = Dash()
app.layout = [html.H1(children='Crime Graph ' ,style={'textAlign':'center'}),
             dcc.Dropdown(c_df.AREA.unique(),'Central',id='dropdown-selection'),
             dcc.Graph(id='graph-content')]
@callback(
    Output(component_id='graph-content',component_property='figure'),
    Input(component_id='dropdown-selection',component_property='value')
)
def update_graph(value):
    fig = c_df[c_df.AREA==value]
    return px.bar(fig,x='year',y='count')
if __name__ == '__main__':
    app.run(debug=True,port=8051)

<IPython.core.display.Javascript object>