### Importing libraries

In [1]:
import numpy as np 
import pandas as pd 
from sqlalchemy import create_engine

### Reading the data

In [2]:
crime_data = pd.read_csv('Downloads/Crimes.csv')

community_area_mapping = pd.read_csv('Downloads/community_area_mapping.csv')
district_mapping = pd.read_csv('Downloads/district_mapping.csv')
ward_mapping = pd.read_csv('Downloads/ward_mapping.csv')

### Establishing the database connection

In [3]:
connect_args={'ssl':{'fake_flag_to_enable_tls': True}}
connect_string = 'mysql+pymysql://rootroot:AbidAli123@rev.mysql.database.azure.com/crime_statistics'
connector = create_engine(connect_string,connect_args=connect_args) 

### Data Pre-processing

In [None]:
crime_data = crime_data.drop_duplicates().dropna()
crime_data = crime_data[crime_data.District != 31]
crime_data['Date']= pd.to_datetime(crime_data['Date'])

### Ingesting Data into tables

#### 1. description

In [None]:
desc_df = crime_data[['Description']].drop_duplicates().rename(columns = {'Description':'description'}).reset_index(drop=True).dropna()
desc_df.to_sql(name="description", 
          con = connector,
          index = False,
          if_exists = 'append')

#### 2. primary_type

In [None]:
primary_type_df = crime_data[['Primary Type']].drop_duplicates().rename(columns = {'Primary Type':'primary_type_desc'}).reset_index(drop=True).dropna()
primary_type_df.to_sql(name="primary_type", 
          con = connector,
          index = False,
          if_exists = 'append')

#### 3. environment

In [None]:
env_df = crime_data[['Location Description']].drop_duplicates().rename(columns = {'Location Description':'environment_description'}).reset_index(drop=True).dropna()
env_df.to_sql(name="environment", 
          con = connector,
          index = False,
          if_exists = 'append')

#### 4. community

In [None]:
community_area_mapping.to_sql(name="community", 
          con = connector,
          index = False,
          if_exists = 'append')

#### 5. district

In [None]:
district_mapping = district_mapping.dropna()
district_mapping.district_id = district_mapping.district_id.astype(int)
district_mapping.to_sql(name="district", 
          con = connector,
          index = False,
          if_exists = 'append')

#### 6. ward

In [None]:
ward_df = crime_data[['Ward', 'District']].drop_duplicates().rename(columns = {'Ward':'ward_id', 'District':'district_id'}).reset_index(drop=True).dropna()
ward_df.district_id = ward_df.district_id.astype(int)
ward_df.ward_id = ward_df.ward_id.astype(int)

ward_df = pd.merge(ward_df, ward_mapping, how="left", on=["ward_id"])
connector.execute("SET FOREIGN_KEY_CHECKS = 0");
ward_df.to_sql(name="ward", 
          con = connector,
          index = False,
          if_exists = 'append')

#### 7. beat

In [None]:
beat_df = crime_data[['Beat', 'Ward']].drop_duplicates().rename(columns = {'Beat':'beat_id', 'Ward':'ward_id'}).reset_index(drop=True).dropna()
beat_df.ward_id = beat_df.ward_id.astype(int)
beat_df.to_sql(name="beat", 
          con = connector,
          index = False,
          if_exists = 'append')

#### 8. location

In [None]:
location_df = crime_data[['Block', 'Latitude', 'Longitude', 'Beat', 'Community Area']].drop_duplicates().rename(columns = {'Block' : 'block', 'Latitude' : 'latitude', 'Longitude' : 'longitude', 'Beat' : 'beat_id', 'Community Area' : 'community_id'}).reset_index(drop=True).dropna()
location_df.community_id = location_df.community_id.astype(int)
location_df.to_sql(name="location", 
          con = connector,
          index = False,
          if_exists = 'append')

#### 9. iucr

In [None]:
iucr_df = crime_data[['IUCR', 'Primary Type', 'Description']].drop_duplicates().rename(columns = {'IUCR':'iucr', 'Primary Type':'primary_type_desc', 'Description':'description'}).reset_index(drop=True).dropna()

sql_query = pd.read_sql_query ('''
                               SELECT
                               *
                               FROM primary_type
                               ''', connector)

primary_type_sql = pd.DataFrame(sql_query, columns = ['primary_type_id', 'primary_type_desc'])
sql_query = pd.read_sql_query ('''
                               SELECT
                               *
                               FROM description
                               ''', connector)
description_sql = pd.DataFrame(sql_query, columns = ['description_id', 'description'])

iucr_df = pd.merge(iucr_df, primary_type_sql, on=["primary_type_desc"])
iucr_df = pd.merge(    iucr_df, description_sql, on=["description"],)
iucr_df = iucr_df[['iucr', 'primary_type_id', 'description_id']]
iucr_df.to_sql(name="iucr", 
          con = connector,
          index = False,
          if_exists = 'append')


#### 10. crime_report

In [None]:
crime_report_df = crime_data[['Case Number', 'Date', 'Block', 'Latitude', 'Longitude', 'Beat', 'Community Area', 'IUCR', 'Arrest', 'Domestic', 'Location Description']].drop_duplicates().dropna()
crime_report_df["Arrest"] = crime_report_df["Arrest"].astype(int)
crime_report_df["Domestic"] = crime_report_df["Domestic"].astype(int)

crime_report_df = crime_report_df.rename(columns = {'Case Number' : 'case_number', 'Date' : 'date', 'Block' : 'block',
                                                    'Latitude' : 'latitude', 'Longitude' : 'longitude', 'Beat' : 'beat_id', 'Community Area' : 'community_id','IUCR' : 'iucr', 'Arrest' :'arrest', 'Domestic': 'domestic', 'Location Description' : 'environment_description'})
sql_query = pd.read_sql_query ('''
                               SELECT
                               *
                               FROM location
                               ''', connector)

location_sql = pd.DataFrame(sql_query, columns = ['location_id', 'block', 'latitude', 'longitude', 'beat_id', 'community_id'])

sql_query = pd.read_sql_query ('''
                               SELECT
                               *
                               FROM environment
                               ''', connector)
environment_sql = pd.DataFrame(sql_query, columns = ['environment_id', 'environment_description'])

crime_report_df = pd.merge(crime_report_df, location_sql, on=['block', 'latitude', 'longitude', 'beat_id', 'community_id'],how="left")
crime_report_df = pd.merge(crime_report_df, environment_sql, on=["environment_description"],how= "left")
crime_report_df = crime_report_df[['case_number', 'date', 'location_id','iucr', 'arrest', 'domestic', 'environment_id']].dropna()

crime_report_df.to_sql(name="crime_report", 
          con = connector,
          index = False,
          if_exists = 'append')

connector.execute("SET FOREIGN_KEY_CHECKS = 1");