# Detective monkeys

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.express as px
from pathlib import Path
import json


In [None]:
# Loads the geojson into a JSON object
geojsonPath = Path.cwd()/'limitespdq.geojson' # You might need to change this depending on where and how you are running this code
geojson = json.load(open(geojsonPath, "r")) # The GeoJSON data contains the boundaries of the different precincts.

# Loads the criminal logs into a pandas dataframe
# You will probably need to manipulate the dataframe to get something good from it :D
criminalLogsPath = Path.cwd()/'actes-criminels.csv'
df = pd.read_csv(criminalLogsPath)

In [None]:
df: pd.DataFrame = pd.read_csv('actes-criminels.csv')

In [None]:
df.head()

In [None]:
# Summarize the data
df.describe()

In [None]:

# What is the most common crime?
df['CATEGORIE'].value_counts()

In [None]:


#clean data
crime_freq = df.dropna(subset=['PDQ'])

#crime freq
crime_freq = crime_freq.groupby('PDQ').agg({'CATEGORIE': 'count'}).sort_values(by='CATEGORIE',ascending=False)
crime_freq = crime_freq.rename(columns={'CATEGORIE': 'crime_count'})

# joining to main table
combined_table = pd.merge(df, crime_freq, on="PDQ", how='inner')
crime_combined_table = combined_table.drop_duplicates(subset=['PDQ']).sort_values(by='PDQ', ascending=False)

crime_combined_table

In [None]:
## crime freq heat map code 

fig = px.choropleth_mapbox(combined_table, geojson=geojson, 
                           color = "crime_count", # Might need to change depending on what you want to measure
                           locations="PDQ", featureidkey="properties.PDQ",
                           center={'lat': 45.508888, 'lon': -73.561668}, # Can change the coordinates to make city more centered
                           mapbox_style="carto-positron", # Another option is called "open-street-map" :o
                           zoom=9
                           )

# .update_layout() updates the layout of the figure (includes all non-date components of the visualization)
# We are just setting the plot to take up the entire avaliable space, with no padding or spacing around the edges.
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})

# Let's see our graph in action!
fig.show()

In [None]:
# Top 3 police precints (PDQ) got that most or least  crime complaints
# Group by PDQ and count the number of crimes and sort by count and list the time range of the crimes
df.groupby('PDQ')['DATE'].agg(['count', 'min', 'max']).sort_values(by='count', ascending=False)


# Statistical Analysis of Predicting PDQ based on crime

In [None]:
from scipy.stats import chi2_contingency

df_nona = df.dropna(subset="PDQ")

ct = pd.crosstab(df_nona["CATEGORIE"], df_nona["PDQ"])
c, p, d, e = chi2_contingency(ct)
print("Chi2-statistics: " + str(c))
print("P-value: " + str(p))

In [None]:
import pyproj

# Define the source and target coordinate systems (CRS)
src_crs = pyproj.CRS("EPSG:4326")  # WGS84
tgt_crs = pyproj.CRS("EPSG:2950")  # MTM8 with SRID 2950

# Create a transformer to convert coordinates from source CRS to target CRS
transformer = pyproj.Transformer.from_crs(src_crs, tgt_crs, always_xy=True)

def trans(lon, lat):
    return transformer.transform(lon, lat)

In [None]:
from shapely.geometry import Point, MultiPolygon, Polygon
import random

d = {i['properties']['PDQ'] : MultiPolygon([Polygon(i["geometry"]['coordinates'][0][0])]) for i in geojson['features']}

multipolygon = MultiPolygon([Polygon(geojson['features'][0]["geometry"]['coordinates'][0][0])])

def gen_random_point(multipolygon):
    minx, miny, maxx, maxy = multipolygon.bounds
    while True:
        x = random.uniform(minx, maxx)
        y = random.uniform(miny, maxy)
        random_point = Point(x, y)

        if multipolygon.contains(random_point):
            return x, y

def fill_na_cord(d, df):
    for index, row in df.iterrows():
        if (row.isnull()["LATITUDE"]):
            pdq = int(row["PDQ"])
            if (pdq in d.keys()):
                lon, lat = gen_random_point(d[pdq])
                x, y = trans(lon, lat)
                df.at[index, "LONGITUDE"] = lon
                df.at[index, "LATITUDE"] = lat
                df.at[index, "X"] = x
                df.at[index, "Y"] = y
                

test = df.dropna(subset="PDQ")
fill_na_cord(d, test)
test


In [None]:
test.isnull().sum()

In [None]:
import plotly.express as px

fig = px.scatter_mapbox(test, lon="LONGITUDE", lat="LATITUDE",
                        color="CATEGORIE",
                        zoom=8,
                        mapbox_style="open-street-map")
fig.show()
    