#Part B - BI Dashboard and Information Visualization

In [None]:
#get CSV file from git repo
!wget -O CovidFactTable.csv https://raw.githubusercontent.com/zhrmnch/CSI4142_Phase3/main/tables/CovidFactTable.csv
!wget -O CrimeFactTable.csv https://raw.githubusercontent.com/zhrmnch/CSI4142_Phase3/main/tables/CrimeFactTable.csv
!wget -O IncidentTable.csv https://raw.githubusercontent.com/zhrmnch/CSI4142_Phase3/main/tables/IncidentTable.csv
!wget -O LocationTable.csv https://raw.githubusercontent.com/zhrmnch/CSI4142_Phase3/main/tables/LocationTable.csv
!wget -O NeighbourhoodTable.csv https://raw.githubusercontent.com/zhrmnch/CSI4142_Phase3/main/tables/NeighbourhoodTable.csv
!wget -O OccurrenceTable.csv https://raw.githubusercontent.com/zhrmnch/CSI4142_Phase3/main/tables/OccurrenceTable.csv
!wget -O PremiseTable.csv https://raw.githubusercontent.com/zhrmnch/CSI4142_Phase3/main/tables/PremiseTable.csv
!wget -O ReportedTable.csv https://raw.githubusercontent.com/zhrmnch/CSI4142_Phase3/main/tables/ReportedTable.csv

In [53]:
import pandas as pd
crime_fact_table = pd.read_csv('/content/CrimeFactTable.csv')
premise = pd.read_csv('/content/PremiseTable.csv')
incident = pd.read_csv('/content/IncidentTable.csv')
location = pd.read_csv('/content/LocationTable.csv')
occurrence_date = pd.read_csv('/content/OccurrenceTable.csv')
reported_date = pd.read_csv('/content/ReportedTable.csv')
neighbourhood = pd.read_csv('/content/NeighbourhoodTable.csv')
covid_fact_table = pd.read_csv('/content/CovidFactTable.csv')

In [54]:
import pandas as pd
import plotly.express as px

# Merge the dataframes
merged_df = pd.merge(crime_fact_table, neighbourhood, on='neighbourhood_PK')
grouped_df = merged_df.groupby('Neighbourhood').size().reset_index(name='crime_count')

fig = px.bar(grouped_df, x='Neighbourhood', y='crime_count', title='Number of Crimes per Neighbourhood')

fig.show()

In [55]:
import pandas as pd
import plotly.express as px

# Merge the dataframes
merged_df = pd.merge(crime_fact_table, incident, on='incident_PK')
grouped_df = merged_df.groupby('mci_category').size().reset_index(name='crime_count')

fig = px.bar(grouped_df, x='mci_category', y='crime_count', title='Number of Crimes by MCI Category')

fig.show()

In [56]:
import pandas as pd
import plotly.express as px

# Merge the dataframes
merged_df = pd.merge(crime_fact_table, occurrence_date, on='occurence_PK')
merged_df['occurrencedate'] = pd.to_datetime(merged_df['occurrencedate'])

# Group by month and year
merged_df['year_month'] = merged_df['occurrencedate'].dt.to_period('M')
grouped_df = merged_df.groupby('year_month').size().reset_index(name='crime_count')

grouped_df['year_month'] = grouped_df['year_month'].astype(str)

# Create the line chart
fig = px.line(grouped_df, x='year_month', y='crime_count', title='Number of Crimes per Month over the Years')

fig.show()


Converting to PeriodArray/Index representation will drop timezone information.



In [58]:
import pandas as pd
import plotly.express as px

# Merge the dataframes
merged_df = pd.merge(crime_fact_table, location, on='location_PK')
merged_df = pd.merge(merged_df, neighbourhood, on='neighbourhood_PK')

grouped_df = merged_df.groupby(['Neighbourhood', 'Latitude', 'Longitude']).size().reset_index(name='crime_count')

# Create the scatter plot
fig = px.scatter_mapbox(grouped_df,
                        lat='Latitude',
                        lon='Longitude',
                        size='crime_count',
                        hover_name='Neighbourhood',
                        color_continuous_scale='Viridis',
                        color='crime_count',
                        mapbox_style='carto-positron',
                        zoom=10,
                        title='Number of Crimes by Neighborhood')

fig.show()

In [60]:
import pandas as pd
import plotly.express as px

# Merge the dataframes
merged_df = pd.merge(covid_fact_table, neighbourhood, on='neighbourhood_PK')

grouped_df = merged_df.groupby(['Neighbourhood', 'year', 'month']).sum('Cases').reset_index()

grouped_df['month_year'] = grouped_df['year'].astype(str) + '-' + grouped_df['month'].astype(str).str.zfill(2)

# Create the bar chart
fig = px.bar(grouped_df,
             x='month_year',
             y='Cases',
             color='Neighbourhood',
             title='Number of COVID-19 Cases per Month by Neighborhood',
             labels={'month_year': 'Month-Year', 'Cases': 'COVID-19 Cases'})

fig.show()


In [61]:
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
from sklearn.linear_model import LinearRegression

# Merge crime_fact_table with location and neighbourhood dataframes
crime_merged_df = pd.merge(crime_fact_table, location, on='location_PK')
crime_merged_df = pd.merge(crime_merged_df, neighbourhood, on='neighbourhood_PK')

crime_grouped_df = crime_merged_df.groupby(['neighbourhood_PK', 'Neighbourhood']).size().reset_index(name='crime_count')
covid_merged_df = pd.merge(covid_fact_table, neighbourhood, on='neighbourhood_PK')
covid_grouped_df = covid_merged_df.groupby(['neighbourhood_PK', 'Neighbourhood']).sum('Cases').reset_index()
merged_df = pd.merge(crime_grouped_df, covid_grouped_df, on=['neighbourhood_PK', 'Neighbourhood'])

# Linear regression
X = merged_df[['crime_count']]
y = merged_df['Cases']

model = LinearRegression()
model.fit(X, y)
y_pred = model.predict(X)

fig = px.scatter(merged_df,
                 x='crime_count',
                 y='Cases',
                 hover_name='Neighbourhood',
                 title='Crime Incidents vs. COVID-19 Cases by Neighborhood')

# Add linear regression line
fig.add_trace(go.Scatter(x=merged_df['crime_count'],
                         y=y_pred,
                         mode='lines',
                         name='Linear Regression Line',
                         line=dict(color='red', width=2)))

fig.show()

In [62]:
correlation = merged_df['crime_count'].corr(merged_df['Cases'])
print(f"Pearson correlation coefficient: {correlation:.2f}")

Pearson correlation coefficient: 0.72


In [64]:
import pandas as pd
import plotly.express as px

crime_merged_df = pd.merge(crime_fact_table, premise, on='premise_PK')
crime_merged_df = pd.merge(crime_merged_df, location, on='location_PK')
crime_merged_df = pd.merge(crime_merged_df, neighbourhood, on='neighbourhood_PK')

crime_by_location_type = crime_merged_df.groupby('premises_type').size().reset_index(name='crime_count')

fig = px.bar(crime_by_location_type,
             x='premises_type',
             y='crime_count',
             title='Number of Crimes by Location Type')

fig.show()

In [65]:
import pandas as pd
import plotly.express as px

# Merge crime_fact_table with location and neighbourhood dataframes
crime_merged_df = pd.merge(crime_fact_table, location, on='location_PK')
crime_merged_df = pd.merge(crime_merged_df, neighbourhood, on='neighbourhood_PK')

crime_by_neighbourhood = crime_merged_df.groupby(['neighbourhood_PK', 'Neighbourhood']).size().reset_index(name='crime_count')
top_crime_neighbourhoods = crime_by_neighbourhood.sort_values('crime_count', ascending=False).head(10)
fig = px.bar(top_crime_neighbourhoods,
             x='Neighbourhood',
             y='crime_count',
             title='Top 10 Crime-prone Neighborhoods')

fig.update_xaxes(tickangle=-45)

fig.show()


In [66]:
import pandas as pd
import plotly.graph_objects as go

crime_merged_df = pd.merge(crime_fact_table, occurrence_date, on='occurence_PK')
crime_monthly = crime_merged_df.groupby(['occurrenceyear', 'occurrencemonth']).size().reset_index(name='crime_count')
covid_monthly = covid_fact_table.groupby(['year', 'month']).agg({'Cases': 'sum'}).reset_index()

fig = go.Figure()

fig.add_trace(go.Scatter(x=pd.to_datetime(crime_monthly['occurrencemonth'].astype(str) + '-' + crime_monthly['occurrenceyear'].astype(str)),
                         y=crime_monthly['crime_count'],
                         mode='lines+markers',
                         name='Crimes'))

fig.add_trace(go.Scatter(x=pd.to_datetime(covid_monthly['month'].astype(str) + '-' + covid_monthly['year'].astype(str)),
                         y=covid_monthly['Cases'],
                         mode='lines+markers',
                         name='COVID-19 Cases'))

fig.update_layout(title='Monthly Crime vs. COVID-19 Cases',
                  xaxis_title='Month',
                  yaxis_title='Count',
                  legend_title='Data')

fig.show()

In [67]:
import pandas as pd
import plotly.graph_objects as go

# Merge crime_fact_table with relevant dataframes
crime_merged_df = pd.merge(crime_fact_table, occurrence_date, on='occurence_PK')
crime_merged_df = pd.merge(crime_merged_df, premise, on='premise_PK')
crime_merged_df = pd.merge(crime_merged_df, neighbourhood, on='neighbourhood_PK')

crime_by_location_type = crime_merged_df.groupby(['Neighbourhood', 'premises_type']).size().reset_index(name='crime_count')
covid_by_neighbourhood = covid_fact_table.groupby(['neighbourhood_PK']).agg({'Cases': 'sum'}).reset_index()
covid_merged_df = pd.merge(covid_by_neighbourhood, neighbourhood, on='neighbourhood_PK')

fig = go.Figure()

for premises_type in crime_by_location_type['premises_type'].unique():
    fig.add_trace(go.Bar(x=crime_by_location_type[crime_by_location_type['premises_type'] == premises_type]['Neighbourhood'],
                         y=crime_by_location_type[crime_by_location_type['premises_type'] == premises_type]['crime_count'],
                         name=f'Crimes ({premises_type})'))

fig.add_trace(go.Bar(x=covid_merged_df['Neighbourhood'],
                     y=covid_merged_df['Cases'],
                     name='COVID-19 Cases',
                     marker_color='rgba(0, 0, 0, 0.5)'))

fig.update_layout(title='Crime by Location Type vs. COVID-19 Cases',
                  xaxis_title='Neighbourhood',
                  yaxis_title='Count',
                  legend_title='Data',
                  barmode='group')

fig.show()