# Import and Read Data

In [1]:
import numpy as np
import pandas as pd

In [2]:
df1 = pd.read_csv("Crimes_and_Clearances_with_Arson-1985-2016.csv",low_memory=False)

In [3]:
df1.head()

Unnamed: 0,Year,County,NCICCode,Violent_sum,Homicide_sum,ForRape_sum,Robbery_sum,AggAssault_sum,Property_sum,Burglary_sum,...,MVLARnao_sum,MVPLARnao_sum,BILARnao_sum,FBLARnao_sum,COMLARnao_sum,AOLARnao_sum,LT400nao_sum,LT200400nao_sum,LT50200nao_sum,LT50nao_sum
0,1985,Alameda County,Alameda Co. Sheriff's Department,427,3,27,166,231,3964,1483,...,930,109,205,44,11,475,753,437,440,498
1,1985,Alameda County,Alameda,405,7,15,220,163,4486,989,...,538,673,516,183,53,559,540,622,916,1159
2,1985,Alameda County,Albany,101,1,4,58,38,634,161,...,147,62,39,46,17,37,84,68,128,138
3,1985,Alameda County,Berkeley,1164,11,43,660,450,12035,2930,...,3153,508,611,1877,18,496,533,636,2793,4274
4,1985,Alameda County,Emeryville,146,0,5,82,59,971,205,...,207,153,16,85,24,169,217,122,161,164


# Check Broken Data (NaN)

In [4]:
df1[df1.isnull().values==True]

Unnamed: 0,Year,County,NCICCode,Violent_sum,Homicide_sum,ForRape_sum,Robbery_sum,AggAssault_sum,Property_sum,Burglary_sum,...,MVLARnao_sum,MVPLARnao_sum,BILARnao_sum,FBLARnao_sum,COMLARnao_sum,AOLARnao_sum,LT400nao_sum,LT200400nao_sum,LT50200nao_sum,LT50nao_sum


# Preprocess

In [5]:
county_list = df1["County"].unique().tolist()
year_list = [str(a) for a in df1["Year"].unique()]
col_list = df1.columns.tolist()

In [6]:
df1[["Year","County"]] = df1[["Year","County"]].astype("str")

In [7]:
df1_index_setting = df1.set_index(["Year","County"])

In [8]:
sum(df1_index_setting.xs("San Diego County",level=1).xs("1985").sum(axis=1))

534138

In [9]:
sum(df1_index_setting.xs("1992",level=0).xs("San Diego County").sum(axis=1))

733733

# Create New DataFrame

In [10]:
df2 = pd.DataFrame(np.zeros((len(year_list),len(county_list))),index=year_list,columns=county_list)

In [11]:
for i in range(len(year_list)):
    for j in range(len(county_list)):
        df2.iloc[i,j] = sum(df1_index_setting.xs(year_list[i],level=0).xs(county_list[j]).sum(axis=1))

In [12]:
df3 = df2.transpose()
df3

Unnamed: 0,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
Alameda County,456887.0,461828.0,452061.0,464711.0,459545.0,419649.0,466799.0,469982.0,467652.0,447084.0,...,306348.0,296998.0,279192.0,256188.0,264354.0,304886.0,295025.0,278351.0,284656.0,283826.0
Alpine County,1217.0,968.0,813.0,690.0,640.0,603.0,334.0,540.0,735.0,885.0,...,379.0,352.0,309.0,371.0,486.0,221.0,178.0,128.0,207.0,168.0
Amador County,3079.0,3518.0,3739.0,3578.0,3509.0,3348.0,3573.0,3579.0,3910.0,3900.0,...,4529.0,5479.0,4439.0,4432.0,4965.0,4333.0,4275.0,3364.0,3233.0,3610.0
Butte County,40565.0,44965.0,41604.0,47751.0,44715.0,39787.0,40532.0,44789.0,42841.0,46424.0,...,33785.0,33041.0,31782.0,29791.0,26691.0,28646.0,30126.0,31973.0,36092.0,34704.0
Calaveras County,5194.0,6490.0,6116.0,4788.0,4517.0,4853.0,4861.0,5297.0,5181.0,5331.0,...,3838.0,4169.0,3888.0,4154.0,5581.0,5601.0,5251.0,4794.0,4063.0,3599.0
Colusa County,2984.0,2855.0,2792.0,2512.0,2219.0,2584.0,3140.0,2933.0,2943.0,2995.0,...,3105.0,2472.0,2730.0,2448.0,2431.0,2701.0,2570.0,1776.0,1815.0,1777.0
Contra Costa County,205832.0,210611.0,213378.0,224920.0,223609.0,214715.0,225596.0,224162.0,222580.0,222182.0,...,171672.0,164496.0,148190.0,146817.0,144437.0,156371.0,148070.0,149296.0,148596.0,133940.0
Del Norte County,5002.0,4801.0,4859.0,5722.0,5095.0,4894.0,4620.0,4735.0,5211.0,5996.0,...,2723.0,3263.0,3471.0,3946.0,4451.0,4062.0,3761.0,3915.0,3479.0,3553.0
El Dorado County,22841.0,24527.0,25819.0,23473.0,24104.0,23338.0,24078.0,24489.0,24353.0,25786.0,...,16199.0,16343.0,14912.0,15380.0,16344.0,15730.0,16547.0,15901.0,15523.0,15261.0
Fresno County,209746.0,234992.0,221187.0,226794.0,239613.0,243421.0,269519.0,274351.0,258925.0,277018.0,...,173521.0,180355.0,173873.0,190935.0,204072.0,202378.0,180701.0,161578.0,167309.0,161619.0


In [13]:
df3_norm=df3.apply(lambda x: (x - np.mean(x)) / np.std(x))

In [14]:
df3_norm

Unnamed: 0,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
Alameda County,0.93093,0.887846,0.913489,0.908361,0.81204,0.687066,0.763745,0.783699,0.830667,0.86041,...,1.042787,1.03505,1.046854,0.9766,1.054283,1.221665,1.221259,1.204953,1.101767,1.070554
Alpine County,-0.379372,-0.383609,-0.399012,-0.397745,-0.385086,-0.378332,-0.37975,-0.384628,-0.394357,-0.410482,...,-0.473855,-0.477753,-0.482307,-0.489011,-0.493342,-0.51121,-0.511613,-0.501748,-0.488847,-0.463348
Amador County,-0.374018,-0.376574,-0.390501,-0.389616,-0.377601,-0.371353,-0.37181,-0.377065,-0.386027,-0.401894,...,-0.453284,-0.451607,-0.459661,-0.465745,-0.467072,-0.487822,-0.487534,-0.481897,-0.471926,-0.444735
Butte County,-0.266225,-0.262227,-0.280367,-0.26528,-0.270109,-0.278709,-0.281208,-0.274503,-0.283886,-0.280775,...,-0.308267,-0.311049,-0.309735,-0.320459,-0.339646,-0.349534,-0.335603,-0.306401,-0.288182,-0.276592
Calaveras County,-0.367936,-0.368375,-0.383588,-0.38621,-0.374972,-0.367527,-0.368652,-0.372789,-0.382693,-0.397818,...,-0.45671,-0.458287,-0.462683,-0.467337,-0.463459,-0.48061,-0.481798,-0.473125,-0.467285,-0.444795
Colusa County,-0.374291,-0.378403,-0.393256,-0.392616,-0.380967,-0.373295,-0.372871,-0.378673,-0.388564,-0.404472,...,-0.460343,-0.466942,-0.469032,-0.477111,-0.481935,-0.497104,-0.497555,-0.491638,-0.479856,-0.454647
Contra Costa County,0.209008,0.19477,0.219255,0.233408,0.196564,0.166034,0.172459,0.171913,0.187685,0.21983,...,0.375219,0.359331,0.328549,0.349998,0.350952,0.376941,0.357577,0.413292,0.340931,0.260034
Del Norte County,-0.368488,-0.373034,-0.387244,-0.383581,-0.373464,-0.367422,-0.369243,-0.374188,-0.382614,-0.395924,...,-0.462236,-0.462908,-0.464969,-0.468529,-0.470087,-0.489363,-0.490555,-0.478517,-0.470551,-0.445044
El Dorado County,-0.317192,-0.318613,-0.326279,-0.333616,-0.323876,-0.32053,-0.321543,-0.325025,-0.332392,-0.339557,...,-0.395438,-0.396204,-0.402236,-0.403022,-0.400333,-0.422998,-0.415409,-0.404992,-0.403202,-0.381732
Fresno County,0.220263,0.262034,0.241968,0.238683,0.238313,0.239017,0.280132,0.296821,0.283042,0.376017,...,0.384384,0.440207,0.469373,0.602756,0.70072,0.63862,0.549355,0.488633,0.445572,0.40971


In [16]:
df3.to_csv("Total_event_num_NONE_normed.csv")
df3_norm.to_csv("Total_event_num_normed.csv")

In [36]:
df1.corr()

Unnamed: 0,Violent_sum,Homicide_sum,ForRape_sum,Robbery_sum,AggAssault_sum,Property_sum,Burglary_sum,VehicleTheft_sum,LTtotal_sum,ViolentClr_sum,...,MVLARnao_sum,MVPLARnao_sum,BILARnao_sum,FBLARnao_sum,COMLARnao_sum,AOLARnao_sum,LT400nao_sum,LT200400nao_sum,LT50200nao_sum,LT50nao_sum
Violent_sum,1.000000,0.984308,0.926076,0.984753,0.991580,0.948455,0.928185,0.954394,0.928297,0.989118,...,0.939604,0.658527,0.540242,0.924091,0.161497,0.725921,0.937978,0.920771,0.908011,0.767044
Homicide_sum,0.984308,1.000000,0.915464,0.978559,0.968284,0.935971,0.927450,0.945828,0.909798,0.965103,...,0.923110,0.642021,0.522274,0.905308,0.152738,0.720016,0.927894,0.901638,0.891672,0.743142
ForRape_sum,0.926076,0.915464,1.000000,0.905693,0.914794,0.949976,0.932241,0.909900,0.948215,0.903936,...,0.913652,0.771275,0.631723,0.927854,0.214358,0.778030,0.948035,0.930065,0.921212,0.800728
Robbery_sum,0.984753,0.978559,0.905693,1.000000,0.954258,0.943051,0.921693,0.952621,0.921941,0.959173,...,0.946214,0.627721,0.534513,0.909727,0.161197,0.703941,0.926735,0.898327,0.887167,0.780975
AggAssault_sum,0.991580,0.968284,0.914794,0.954258,1.000000,0.929687,0.910314,0.935310,0.909815,0.992636,...,0.913810,0.659358,0.526638,0.912543,0.155022,0.721789,0.923245,0.914969,0.901053,0.736816
Property_sum,0.948455,0.935971,0.949976,0.943051,0.929687,1.000000,0.980323,0.969834,0.993467,0.928453,...,0.980260,0.745679,0.697479,0.926341,0.246331,0.824550,0.963215,0.969651,0.964809,0.869256
Burglary_sum,0.928185,0.927450,0.932241,0.921693,0.910314,0.980323,1.000000,0.939433,0.963118,0.909816,...,0.948868,0.722052,0.710664,0.880977,0.248491,0.811260,0.938509,0.948930,0.953372,0.824948
VehicleTheft_sum,0.954394,0.945828,0.909900,0.952621,0.935310,0.969834,0.939433,1.000000,0.942909,0.938612,...,0.960155,0.657327,0.618614,0.896722,0.183186,0.743039,0.938471,0.936019,0.923614,0.791413
LTtotal_sum,0.928297,0.909798,0.948215,0.921941,0.909815,0.993467,0.963118,0.942909,1.000000,0.906402,...,0.974696,0.772226,0.706603,0.931852,0.265469,0.841930,0.957401,0.965824,0.960598,0.896278
ViolentClr_sum,0.989118,0.965103,0.903936,0.959173,0.992636,0.928453,0.909816,0.938612,0.906402,1.000000,...,0.918818,0.639109,0.533381,0.908236,0.144056,0.700477,0.921083,0.915407,0.902126,0.728818


In [17]:
import pickle

In [27]:
f = open("Crime_Raw_Data","wb+")
pickle.dump(df3,f)
f.close()
f = open("Crime_Norm_Data","wb+")
pickle.dump(df3_norm,f)
f.close()

In [34]:
d = pickle.load(open("unemploymentDataFile","rb"))

# Load Population for crime rate

In [196]:
df_pop = pd.read_csv("California_Population.csv")
df_pop.head()

Unnamed: 0,Area Type,Area Name,Year,Period,Data Source,Population
0,California - Statewide,California,2014,Annual,California Dept of Finance,38499378
1,California - Statewide,California,2013,Annual,California Dept of Finance,38164011
2,California - Statewide,California,2012,Annual,California Dept of Finance,37826160
3,California - Statewide,California,2011,Annual,California Dept of Finance,37570307
4,California - Statewide,California,2010,Annual,California Dept of Finance,37309382


In [197]:
df_pop = df_pop.set_index(["Data Source"]).xs("California Dept of Finance")

pop_county_list = df_pop["Area Name"].unique().tolist()[1:]
pop_year_list = df_pop["Year"].unique().tolist()

df_index = df_pop.set_index(["Area Name","Year"])

In [198]:
df_pop = df_index.drop(["Area Type","Period"],axis=1)
df_pop.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Population
Area Name,Year,Unnamed: 2_level_1
California,2014,38499378
California,2013,38164011
California,2012,37826160
California,2011,37570307
California,2010,37309382


In [231]:
crime_rate = np.zeros((len(pop_county_list),len(pop_year_list)))
for i in range(len(pop_county_list)):
    for j in range(len(pop_year_list)):
        crime_rate[i][j] = df3.xs(pop_county_list[i]).xs(str(pop_year_list[len(pop_year_list)-j-1])) / df_pop.xs(pop_county_list[i],level=0).xs(pop_year_list[len(pop_year_list)-j-1])["Population"]

df_crime_rate = pd.DataFrame(crime_rate,index=pop_county_list,columns=pop_year_list[::-1])
df_crime_rate.to_csv("Crime_rate.csv")
df_crime_rate

Unnamed: 0,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,...,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014
Alameda County,0.363333,0.360783,0.3548,0.336798,0.320181,0.298318,0.293221,0.270389,0.232323,0.196709,...,0.199035,0.210614,0.200515,0.191543,0.177954,0.169298,0.173208,0.197876,0.188696,0.175729
Alpine County,0.301716,0.479574,0.649867,0.786667,0.568559,0.577181,0.547755,0.658436,0.486464,0.340249,...,0.363262,0.344171,0.302716,0.291391,0.261864,0.319003,0.434705,0.196096,0.160072,0.115108
Amador County,0.115076,0.112166,0.120178,0.118021,0.116721,0.109279,0.107968,0.115465,0.107603,0.104352,...,0.140256,0.116661,0.118681,0.144424,0.116927,0.117085,0.133153,0.117429,0.118658,0.093188
Butte County,0.218627,0.236813,0.223412,0.239216,0.219477,0.19706,0.210191,0.19494,0.166765,0.157167,...,0.162983,0.159131,0.154385,0.149906,0.143595,0.13542,0.121036,0.129551,0.136061,0.142797
Calaveras County,0.145191,0.150415,0.142602,0.143422,0.195582,0.146232,0.156607,0.137025,0.128154,0.114404,...,0.103761,0.098551,0.083496,0.090461,0.084597,0.091373,0.123629,0.124342,0.117074,0.106347
Colusa County,0.190338,0.1742,0.171593,0.171771,0.157285,0.154808,0.14505,0.111436,0.111081,0.104746,...,0.131371,0.144233,0.142686,0.11281,0.123574,0.113977,0.113059,0.124965,0.118625,0.082199
Contra Costa County,0.276811,0.269351,0.262295,0.258062,0.236724,0.217584,0.220294,0.217214,0.178384,0.160267,...,0.177572,0.168013,0.164843,0.155736,0.138656,0.139532,0.136085,0.146168,0.136855,0.13614
Del Norte County,0.181262,0.175924,0.189995,0.216407,0.25457,0.220321,0.210995,0.205624,0.173239,0.158828,...,0.150406,0.120676,0.093062,0.110881,0.117661,0.138243,0.156186,0.143235,0.133251,0.138736
El Dorado County,0.184956,0.181537,0.175469,0.181793,0.168411,0.13996,0.142849,0.115401,0.091346,0.097814,...,0.104008,0.104456,0.090663,0.090635,0.082154,0.085009,0.090467,0.087099,0.090442,0.086439
Fresno County,0.394752,0.388754,0.357921,0.376482,0.371279,0.329017,0.304583,0.263722,0.236246,0.255067,...,0.22794,0.20938,0.188638,0.192665,0.183231,0.204783,0.217265,0.213744,0.188827,0.167007


In [208]:
with open("Crime_Rate","wb+") as f:
    pickle.dump(df_crime_rate,f)
f.close()

In [230]:
read_crime_rate = pickle.load(open("Crime_Rate","rb"))
read_crime_rate

Unnamed: 0,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,...,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014
Alameda County,0.363333,0.360783,0.3548,0.336798,0.320181,0.298318,0.293221,0.270389,0.232323,0.196709,...,0.199035,0.210614,0.200515,0.191543,0.177954,0.169298,0.173208,0.197876,0.188696,0.175729
Alpine County,0.301716,0.479574,0.649867,0.786667,0.568559,0.577181,0.547755,0.658436,0.486464,0.340249,...,0.363262,0.344171,0.302716,0.291391,0.261864,0.319003,0.434705,0.196096,0.160072,0.115108
Amador County,0.115076,0.112166,0.120178,0.118021,0.116721,0.109279,0.107968,0.115465,0.107603,0.104352,...,0.140256,0.116661,0.118681,0.144424,0.116927,0.117085,0.133153,0.117429,0.118658,0.093188
Butte County,0.218627,0.236813,0.223412,0.239216,0.219477,0.19706,0.210191,0.19494,0.166765,0.157167,...,0.162983,0.159131,0.154385,0.149906,0.143595,0.13542,0.121036,0.129551,0.136061,0.142797
Calaveras County,0.145191,0.150415,0.142602,0.143422,0.195582,0.146232,0.156607,0.137025,0.128154,0.114404,...,0.103761,0.098551,0.083496,0.090461,0.084597,0.091373,0.123629,0.124342,0.117074,0.106347
Colusa County,0.190338,0.1742,0.171593,0.171771,0.157285,0.154808,0.14505,0.111436,0.111081,0.104746,...,0.131371,0.144233,0.142686,0.11281,0.123574,0.113977,0.113059,0.124965,0.118625,0.082199
Contra Costa County,0.276811,0.269351,0.262295,0.258062,0.236724,0.217584,0.220294,0.217214,0.178384,0.160267,...,0.177572,0.168013,0.164843,0.155736,0.138656,0.139532,0.136085,0.146168,0.136855,0.13614
Del Norte County,0.181262,0.175924,0.189995,0.216407,0.25457,0.220321,0.210995,0.205624,0.173239,0.158828,...,0.150406,0.120676,0.093062,0.110881,0.117661,0.138243,0.156186,0.143235,0.133251,0.138736
El Dorado County,0.184956,0.181537,0.175469,0.181793,0.168411,0.13996,0.142849,0.115401,0.091346,0.097814,...,0.104008,0.104456,0.090663,0.090635,0.082154,0.085009,0.090467,0.087099,0.090442,0.086439
Fresno County,0.394752,0.388754,0.357921,0.376482,0.371279,0.329017,0.304583,0.263722,0.236246,0.255067,...,0.22794,0.20938,0.188638,0.192665,0.183231,0.204783,0.217265,0.213744,0.188827,0.167007
