In [2]:
import numpy as np 
import pandas as pd 
from sqlalchemy import create_engine, MetaData, Table, Column, Numeric, Integer, VARCHAR, update
import pymysql
from sodapy import Socrata
from datetime import date,timedelta

In [44]:
current_date = date.today()
previous_date = current_date - timedelta(days=3)
previous_date = previous_date.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3]
previous_date = "date > " + previous_date
previous_date

'date > 2022-12-07T00:00:00.000'

# Connect to API Crime Statistics data on City of Chicago Portal to download data

In [7]:
client = Socrata("data.cityofchicago.org",
                 "3cEpNXhRQeluJ8lkuDyTGCZRi")

In [45]:
results = client.get("ijzp-q8t2", where = previous_date)

HTTPError: 503 Server Error: Service Temporarily Unavailable

In [10]:
results_df = pd.DataFrame.from_records(results)
results_df.to_csv("Crimes.csv")

Unnamed: 0,id,case_number,date,block,iucr,primary_type,description,location_description,arrest,domestic,...,longitude,location,:@computed_region_awaf_s7ux,:@computed_region_6mkv_f3dw,:@computed_region_vrxf_vc4k,:@computed_region_bdys_3d7i,:@computed_region_43wa_7qmu,:@computed_region_rpca_8um6,:@computed_region_d9mm_jgwp,:@computed_region_d3ds_rm58
0,12910858,JF496124,2022-12-02T23:59:00.000,046XX S WOOD ST,141A,WEAPONS VIOLATION,UNLAWFUL USE - HANDGUN,RESIDENCE,False,False,...,-87.669863956,"{'latitude': '41.809773391', 'longitude': '-87...",53,14924,59,706,3,37,23,108
1,27220,JF496107,2022-12-02T23:52:00.000,002XX S CICERO AVE,0110,HOMICIDE,FIRST DEGREE MURDER,STREET,False,False,...,-87.745163462,"{'latitude': '41.877355049', 'longitude': '-87...",11,22216,26,69,23,32,25,137


### Perform data transforms and insert data into database

This step is performed by our Database_Ingestor python notebook.

# Calculating Scores

### Connecting to the Database : Crime Statistics

In [3]:
connect_args={'ssl':{'fake_flag_to_enable_tls': True}}
#connect_string = 'mysql+pymysql://{}:{}@{}/{}'.format(user,pw,host,database)
connect_string = 'mysql+pymysql://ingestor:ingestorPassword123@rev.mysql.database.azure.com/crime_statistics'
connector = create_engine(connect_string,connect_args=connect_args)

### Querying the SQL Database to get the Sums beat wise

In [4]:
sql_query = pd.read_sql_query ("""SELECT b.beat_id, d.district_id, SUM(i.rank) AS 'sum_rank_day' FROM iucr AS i LEFT JOIN crime_report AS cr ON i.iucr = cr.iucr 
                               LEFT JOIN location AS l ON cr.location_id = l.location_id 
                               LEFT JOIN beat AS b ON b.beat_id = l.beat_id
                               LEFT JOIN district as d ON b.district_id = d.district_id
                               WHERE year(cr.date) >= 2013 AND HOUR(cr.date) < 16
                               GROUP BY b.beat_id, d.district_id""", connector)
beat_rank_day = pd.DataFrame(sql_query)
sql_query = pd.read_sql_query ("""SELECT b.beat_id, d.district_id, SUM(i.rank) AS 'sum_rank_night' FROM iucr AS i LEFT JOIN crime_report AS cr ON i.iucr = cr.iucr 
                               LEFT JOIN location AS l ON cr.location_id = l.location_id 
                               LEFT JOIN beat AS b ON b.beat_id = l.beat_id
                               LEFT JOIN district as d ON b.district_id = d.district_id
                               WHERE year(cr.date) >= 2013 AND HOUR(cr.date) >= 16
                               GROUP BY b.beat_id, d.district_id""", connector)
beat_rank_night = pd.DataFrame(sql_query)

In [12]:
beat_rank_day.head()

Unnamed: 0,beat_id,district_id,sum_rank_day
0,1011,10,68418.0
1,321,3,57719.0
2,433,4,33149.0
3,412,4,51583.0
4,1112,11,69994.0


In [13]:
beat_rank_night.head()

Unnamed: 0,beat_id,district_id,sum_rank_night
0,1422,14,25742.0
1,725,7,34990.0
2,1023,10,20264.0
3,614,6,27233.0
4,334,3,26026.0


### Normalize Scores

In [14]:
#Normalize scores
beat_rank_day['normalized_score_day'] = (beat_rank_day['sum_rank_day'] - beat_rank_day['sum_rank_day'].min()) / (beat_rank_day['sum_rank_day'].max() + 1000 - beat_rank_day['sum_rank_day'].min())
beat_rank_night['normalized_score_night'] = (beat_rank_night['sum_rank_night'] - beat_rank_night['sum_rank_night'].min()) / (beat_rank_night['sum_rank_night'].max() + 500 - beat_rank_night['sum_rank_night'].min())

### Combine the DataFrame

In [15]:
beat_rank = beat_rank_day.merge(beat_rank_night, on = ['beat_id', 'district_id'], how = 'outer')
beat_rank.head()

Unnamed: 0,beat_id,district_id,sum_rank_day,normalized_score_day,sum_rank_night,normalized_score_night
0,1011,10,68418.0,0.69546,49451.0,0.820919
1,321,3,57719.0,0.580598,43331.0,0.716887
2,433,4,33149.0,0.316822,22583.0,0.364197
3,412,4,51583.0,0.514724,32633.0,0.535034
4,1112,11,69994.0,0.712379,51781.0,0.860526


### Calculate the Safety Score

In [16]:
beat_rank['safety_score'] = 100 - (70*(beat_rank['normalized_score_day'])) - (25*(beat_rank['normalized_score_night']))

### Connecting to the Database : Crime Scores

In [17]:
connect_args={'ssl':{'fake_flag_to_enable_tls': True}}
#connect_string = 'mysql+pymysql://{}:{}@{}/{}'.format(user,pw,host,database)
connect_string_db = 'mysql+pymysql://rootroot:AbidAli123@rev.mysql.database.azure.com/crime_scores'
connector_db = create_engine(connect_string_db,connect_args=connect_args)

In [18]:
connection_db = connector_db.raw_connection()
cursor_db = connection_db.cursor()

In [19]:
beat_rank_scores = beat_rank[['beat_id', 'district_id', 'safety_score']]
beat_rank_scores.head()

Unnamed: 0,beat_id,district_id,safety_score
0,1011,10,30.794844
1,321,3,41.435948
2,433,4,68.717541
3,412,4,50.593459
4,1112,11,28.620305


In [20]:
beat_rank_scores.to_sql('beat_district_scores', connector_db, index = False, if_exists='replace')

274