# Exploratory Data Analysis - Crime Data Los Angeles

This notebook is used to explore the questions for the Data Scientist Exercise in July 2020.

- How many crimes were reported over the past 5 years?
- List the top five reported crimes for each year for the past 5 years.
- What are the most common MO codes? Have these changed over the past 5 years?
- How else has reported crime changed over time in the City?
- Based on your analysis, please share any changes to services, programs, or policies that the City should consider.

##  Load libraries

In [38]:
# Import libraries
import os, sys, subprocess
import json
import pandas as pd
import numpy as np
from langdetect import detect
import pickle

pd.set_option('display.max_colwidth', -1)

pd.set_option('display.max_rows', 1000)
#pd.set_option('display.max_columns', 500)

In [39]:
# load project config
terminal_call = ! git rev-parse --show-toplevel
repo_path=terminal_call[0]
project_config_path = os.path.join(repo_path,'project_config.json')

with open(project_config_path,'r') as fp: 
    project_config = json.load(fp)

In [40]:
# import custom module to look at trends
module_path = os.path.join(repo_path,project_config['project_module_relative_path'])
sys.path.append(module_path)

import trends
from trends import get_top_trends as gt
from trends import convert
from trends.convert import crime_dict # load in crime name dictionary

from importlib import reload # for updating scripts

# Load in data

In [41]:
# load in data that was collected
df = pickle.load(open("../data/crime_data.pkl", "rb"))

## How many crimes were reported over the past 5 years?

In [6]:
# Check how many crimes were reported over the past 5 years
# Calculate current date and offset by 5 years
(df["date_rptd"] > (pd.datetime.now()- pd.DateOffset(years=5))).sum()

1117864

### A: 1,117,864 crimes were reported in the past 5 years

## List the top five reported crimes for each year for the past 5 years.

In [7]:
reload(trends)

<module 'trends' (namespace)>

In [43]:
# Select the last 5 years as a dataframe
df_5 = df[df["date_rptd"] > (pd.datetime.now()- pd.DateOffset(years=5))]

In [44]:
# Find the top 5 reported crimes for each year
# Crime is listed in the data under 'crm_cd'
# Custom function 'top_trends' groups by column and selects the top 'n' 
top_crimes = gt.top_trends(df_5, column='year', variable='crm_cd', n=5)

In [45]:
# Add in the name of the crimes to dataframe
top_crimes["crm_name"] = convert.get_names(top_crimes, "crm_cd", crime_dict)

In [46]:
top_crimes

Unnamed: 0,year,crm_cd,count,crm_name
0,2015,624,8853,Battery - misdemeanor
1,2015,510,8573,Stolen Vehicle
2,2015,440,7866,Theft - $950 & under
3,2015,330,7316,Burg from Vehicle
4,2015,354,7297,Theft of Identity
131,2016,510,18353,Stolen Vehicle
132,2016,624,17942,Battery - misdemeanor
133,2016,330,16779,Burg from Vehicle
134,2016,440,14814,Theft - $950 & under
135,2016,310,14558,Burglary


## What are the most common MO codes? Have these changed over the past 5 years?

In [48]:
# Find the top 10 most common MO codes across all the years (2010-2020)
df_5["mocodes_1"].value_counts(ascending = False).head(10)

0344    220881
0329    93544 
2000    54130 
0416    48526 
1822    41157 
0400    30278 
0325    26081 
1501    23966 
1300    20486 
1402    19233 
Name: mocodes_1, dtype: int64

In [49]:
# Find the top 10 MO codes for each year
# MO code is listed in the data under 'mocodes'
# Custom function 'top_trends' groups by column and selects the top 'n' 
top_mo = gt.top_trends(df_5, column='year', variable='mocodes_1', n=10)

In [None]:
# Find the top 10 MO codes for each year

top_mo["mocodes_1"] = top_mo["mocodes"].str.split(" ").str[0]

In [51]:
import os

In [52]:
import json

In [91]:
with open("../data/mocodes.json") as f:
                    data = json.loads(f.read())

In [106]:
dict((int(k), v) for k, v in data.items())

{100: 'Suspect Impersonate',
 101: 'Aid victim',
 102: 'Blind',
 103: 'Crippled',
 104: 'Customer',
 105: 'Delivery',
 106: 'Doctor',
 107: 'God',
 108: 'Infirm',
 109: 'Inspector',
 110: 'Involved in traffic/accident',
 112: 'Police',
 113: 'Renting',
 114: 'Repair Person',
 115: 'Returning stolen property',
 116: 'Satan',
 117: 'Salesman',
 118: 'Seeking someone',
 119: 'Sent by owner',
 120: 'Social Security/Medicare',
 121: 'DWP/Gas Company/Utility worker',
 122: 'Contractor',
 123: 'Gardener/Tree Trimmer',
 200: 'Suspect wore disguise',
 201: 'Bag',
 202: 'Cap/hat',
 203: 'Cloth (with eyeholes)',
 204: 'Clothes of opposite sex',
 205: 'Earring',
 206: 'Gloves',
 207: 'Handkerchief',
 208: 'Halloween mask',
 209: 'Mask',
 210: 'Make up (males only)',
 211: 'Shoes',
 212: 'Nude/partly nude',
 213: 'Ski mask',
 214: 'Stocking',
 215: 'Unusual clothes',
 216: 'Suspect wore hood/hoodie',
 217: 'Uniform',
 218: 'Wig',
 219: 'Mustache-Fake',
 220: 'Suspect wore motorcycle helmet',
 301: 

In [92]:
data

{'100': 'Suspect Impersonate',
 '101': 'Aid victim',
 '102': 'Blind',
 '103': 'Crippled',
 '104': 'Customer',
 '105': 'Delivery',
 '106': 'Doctor',
 '107': 'God',
 '108': 'Infirm',
 '109': 'Inspector',
 '110': 'Involved in traffic/accident',
 '112': 'Police',
 '113': 'Renting',
 '114': 'Repair Person',
 '115': 'Returning stolen property',
 '116': 'Satan',
 '117': 'Salesman',
 '118': 'Seeking someone',
 '119': 'Sent by owner',
 '120': 'Social Security/Medicare',
 '121': 'DWP/Gas Company/Utility worker',
 '122': 'Contractor',
 '123': 'Gardener/Tree Trimmer',
 '200': 'Suspect wore disguise',
 '201': 'Bag',
 '202': 'Cap/hat',
 '203': 'Cloth (with eyeholes)',
 '204': 'Clothes of opposite sex',
 '205': 'Earring',
 '206': 'Gloves',
 '207': 'Handkerchief',
 '208': 'Halloween mask',
 '209': 'Mask',
 '210': 'Make up (males only)',
 '211': 'Shoes',
 '212': 'Nude/partly nude',
 '213': 'Ski mask',
 '214': 'Stocking',
 '215': 'Unusual clothes',
 '216': 'Suspect wore hood/hoodie',
 '217': 'Uniform',


In [69]:
test_df = pd.read_json("../data/mocodes.json", orient = "index").reset_index()

In [75]:
test_df.rename(columns={"index": "mocode", 0: "mocode_name"}, inplace = True)

In [56]:
pd.DataFrame(data)

ValueError: DataFrame constructor not properly called!