In [5]:
import pandas as pd
import numpy as np
import scipy as scipy

# Data preping for earnings

In [6]:
earnings_df_origin = pd.read_csv("earnings.csv", encoding = "ISO-8859-1")
earnings_df_origin.head()

Unnamed: 0,geo_id,fips,county,total_med,total_agri_fish_mine,agri_fish_hunt,mining_quarrying_oilgas_extract,construction,manufacturing,wholesale_trade,...,admin_sup,total_edu_health_social,edu_serv,health_social,total_arts_ent_acc_food,arts_ent_rec,acc_food_serv,other_ser,pub_admin,year
0,0500000US01001,1001,"Autauga County, Alabama",31811,31793,31957,22357,30347,44452,38203,...,31250,30306.0,34358,26839,11231,10272,11430,26279,46858,2010
1,0500000US01003,1003,"Baldwin County, Alabama",30099,33173,27342,80938,30068,41917,43444,...,23910,34506.0,37341,30968,14924,17414,14765,21440,38629,2010
2,0500000US01005,1005,"Barbour County, Alabama",23504,16167,14464,27500,20305,27630,18777,...,20179,22398.0,42264,20434,6473,10724,5972,16090,30871,2010
3,0500000US01007,1007,"Bibb County, Alabama",30453,45237,46064,34406,30288,36231,43403,...,13636,25474.0,36618,21632,16530,5114,20481,30956,33095,2010
4,0500000US01009,1009,"Blount County, Alabama",30739,26909,25610,54375,33055,32084,33198,...,27553,31931.0,36631,29958,7535,10050,7282,25326,34838,2010


Drop the mgmt column because it is way to sparse, drop geo_id, also sort according to county and year

In [7]:
earnings_df = earnings_df_origin.drop(['mgmt','geo_id'], axis=1)
earnings_df = earnings_df.sort(['county','year'],ascending=[1, 1])


sort(columns=....) is deprecated, use sort_values(by=.....)



In [8]:
earnings_df.iloc[:, 2:-1].head()

Unnamed: 0,total_med,total_agri_fish_mine,agri_fish_hunt,mining_quarrying_oilgas_extract,construction,manufacturing,wholesale_trade,retail_trade,transport_warehouse_utilities,transport_warehouse,...,prof_sci_tech,admin_sup,total_edu_health_social,edu_serv,health_social,total_arts_ent_acc_food,arts_ent_rec,acc_food_serv,other_ser,pub_admin
2316,25653,39000,39000,,23063,32115,11667,20713,40949,37679,...,32917,21623,24720.0,30017,21711,6757,21154,5266,17226,30067
5459,26752,13938,13938,,25174,31623,35208,18554,48882,50944,...,21835,21731,24928.0,30286,23225,9638,22793,8574,32470,30099
8602,28031,33229,24167,47250.0,26436,31135,39545,16729,49813,54265,...,23438,27031,25661.0,24250,26232,9475,19297,9139,31778,22440
11745,28404,33214,26250,,25507,32891,40125,17593,52669,53665,...,33571,21295,25650.0,30995,23375,11794,7361,11956,30801,21776
14888,28382,33177,24750,50833.0,32171,33154,39438,16589,55077,56538,...,33929,13750,25838.0,26011,25521,13769,18214,13692,19358,25536


Convert earning columns into floats

In [9]:
earnings_df.iloc[:, 2:-1] = earnings_df.iloc[:, 2:-1].apply(pd.to_numeric, errors='coerce')
earnings_df.dtypes

fips                                 int64
county                              object
total_med                            int64
total_agri_fish_mine               float64
agri_fish_hunt                     float64
mining_quarrying_oilgas_extract    float64
construction                       float64
manufacturing                      float64
wholesale_trade                    float64
retail_trade                       float64
transport_warehouse_utilities      float64
transport_warehouse                float64
utilities                          float64
information                        float64
fin_ins_realest                    float64
fin_ins                            float64
realest_rent_lease                 float64
total_prof_sci_mgmt_admin          float64
prof_sci_tech                      float64
admin_sup                          float64
total_edu_health_social            float64
edu_serv                           float64
health_social                      float64
total_arts_

Handle nan results through interpolation

In [10]:
earnings_df_inter = earnings_df.interpolate()
count_nan_before_inter = len(earnings_df) - earnings_df.count()
count_nan_after_inter = len(earnings_df_inter) - earnings_df_inter.count()

In [11]:
print(count_nan_before_inter)

fips                                  0
county                                0
total_med                             0
total_agri_fish_mine                154
agri_fish_hunt                      381
mining_quarrying_oilgas_extract    6311
construction                         47
manufacturing                       303
wholesale_trade                     946
retail_trade                         55
transport_warehouse_utilities        85
transport_warehouse                 169
utilities                          1542
information                        1862
fin_ins_realest                     241
fin_ins                             486
realest_rent_lease                 2755
total_prof_sci_mgmt_admin           196
prof_sci_tech                       694
admin_sup                           794
total_edu_health_social               7
edu_serv                             21
health_social                        65
total_arts_ent_acc_food             181
arts_ent_rec                       2803


In [12]:
print(count_nan_after_inter)

fips                               0
county                             0
total_med                          0
total_agri_fish_mine               0
agri_fish_hunt                     0
mining_quarrying_oilgas_extract    2
construction                       0
manufacturing                      0
wholesale_trade                    0
retail_trade                       0
transport_warehouse_utilities      0
transport_warehouse                0
utilities                          0
information                        0
fin_ins_realest                    0
fin_ins                            0
realest_rent_lease                 0
total_prof_sci_mgmt_admin          0
prof_sci_tech                      0
admin_sup                          0
total_edu_health_social            0
edu_serv                           0
health_social                      0
total_arts_ent_acc_food            0
arts_ent_rec                       0
acc_food_serv                      0
other_ser                          0
p

In [13]:
earnings_df_inter.head()

Unnamed: 0,fips,county,total_med,total_agri_fish_mine,agri_fish_hunt,mining_quarrying_oilgas_extract,construction,manufacturing,wholesale_trade,retail_trade,...,admin_sup,total_edu_health_social,edu_serv,health_social,total_arts_ent_acc_food,arts_ent_rec,acc_food_serv,other_ser,pub_admin,year
2316,45001,"Abbeville County, South Carolina",25653,39000.0,39000.0,,23063.0,32115.0,11667.0,20713.0,...,21623.0,24720.0,30017.0,21711.0,6757.0,21154.0,5266.0,17226.0,30067.0,2010
5459,45001,"Abbeville County, South Carolina",26752,13938.0,13938.0,,25174.0,31623.0,35208.0,18554.0,...,21731.0,24928.0,30286.0,23225.0,9638.0,22793.0,8574.0,32470.0,30099.0,2011
8602,45001,"Abbeville County, South Carolina",28031,33229.0,24167.0,47250.0,26436.0,31135.0,39545.0,16729.0,...,27031.0,25661.0,24250.0,26232.0,9475.0,19297.0,9139.0,31778.0,22440.0,2012
11745,45001,"Abbeville County, South Carolina",28404,33214.0,26250.0,49041.5,25507.0,32891.0,40125.0,17593.0,...,21295.0,25650.0,30995.0,23375.0,11794.0,7361.0,11956.0,30801.0,21776.0,2013
14888,45001,"Abbeville County, South Carolina",28382,33177.0,24750.0,50833.0,32171.0,33154.0,39438.0,16589.0,...,13750.0,25838.0,26011.0,25521.0,13769.0,18214.0,13692.0,19358.0,25536.0,2014


Only two nan values left, may as well fill by hand

In [14]:
earnings_df_inter.iloc[0,5] = 47250.0
earnings_df_inter.iloc[1,5] = 47250.0
earnings_df_inter.reindex()

Unnamed: 0,fips,county,total_med,total_agri_fish_mine,agri_fish_hunt,mining_quarrying_oilgas_extract,construction,manufacturing,wholesale_trade,retail_trade,...,admin_sup,total_edu_health_social,edu_serv,health_social,total_arts_ent_acc_food,arts_ent_rec,acc_food_serv,other_ser,pub_admin,year
2316,45001,"Abbeville County, South Carolina",25653,39000.0,39000.0,47250.0,23063.0,32115.0,11667.00,20713.0,...,21623.0,24720.0,30017.0,21711.0,6757.0,21154.00,5266.0,17226.0,30067.0,2010
5459,45001,"Abbeville County, South Carolina",26752,13938.0,13938.0,47250.0,25174.0,31623.0,35208.00,18554.0,...,21731.0,24928.0,30286.0,23225.0,9638.0,22793.00,8574.0,32470.0,30099.0,2011
8602,45001,"Abbeville County, South Carolina",28031,33229.0,24167.0,47250.0,26436.0,31135.0,39545.00,16729.0,...,27031.0,25661.0,24250.0,26232.0,9475.0,19297.00,9139.0,31778.0,22440.0,2012
11745,45001,"Abbeville County, South Carolina",28404,33214.0,26250.0,49041.5,25507.0,32891.0,40125.00,17593.0,...,21295.0,25650.0,30995.0,23375.0,11794.0,7361.00,11956.0,30801.0,21776.0,2013
14888,45001,"Abbeville County, South Carolina",28382,33177.0,24750.0,50833.0,32171.0,33154.0,39438.00,16589.0,...,13750.0,25838.0,26011.0,25521.0,13769.0,18214.00,13692.0,19358.0,25536.0,2014
18030,45001,"Abbeville County, South Carolina",27767,32548.0,14625.0,51000.0,32436.0,32087.0,38043.00,14971.0,...,11065.0,26243.0,26264.0,26218.0,12036.0,7440.00,12071.0,17579.0,26591.0,2015
21172,45001,"Abbeville County, South Carolina",27086,47708.0,35694.0,50833.0,32091.0,33309.0,36118.00,15273.0,...,10114.0,25650.0,25182.0,26451.0,11798.0,7446.00,11823.0,13245.0,23904.0,2016
1113,22001,"Acadia Parish, Louisiana",25842,48478.0,31302.0,51315.0,26933.0,31895.0,32317.00,17076.0,...,25625.0,24471.0,29093.0,21181.0,11494.0,19732.00,10644.0,15186.0,25918.0,2010
4256,22001,"Acadia Parish, Louisiana",27026,51141.0,26966.0,55821.0,31002.0,32000.0,34816.00,20028.0,...,17269.0,24300.0,27742.0,21957.0,13735.0,21723.00,11750.0,21180.0,26110.0,2011
7399,22001,"Acadia Parish, Louisiana",26837,49839.0,31471.0,58714.0,31679.0,31250.0,35377.00,17231.0,...,16622.0,21683.0,25411.0,19127.0,14371.0,21528.00,12071.0,25015.0,26385.0,2012


Make a new table that contains the percentage change in industry earning

In [15]:
total_rows = earnings_df_inter.shape[0]
earnings_df_delta = earnings_df_inter.copy()
earnings_df_delta.head()

Unnamed: 0,fips,county,total_med,total_agri_fish_mine,agri_fish_hunt,mining_quarrying_oilgas_extract,construction,manufacturing,wholesale_trade,retail_trade,...,admin_sup,total_edu_health_social,edu_serv,health_social,total_arts_ent_acc_food,arts_ent_rec,acc_food_serv,other_ser,pub_admin,year
2316,45001,"Abbeville County, South Carolina",25653,39000.0,39000.0,47250.0,23063.0,32115.0,11667.0,20713.0,...,21623.0,24720.0,30017.0,21711.0,6757.0,21154.0,5266.0,17226.0,30067.0,2010
5459,45001,"Abbeville County, South Carolina",26752,13938.0,13938.0,47250.0,25174.0,31623.0,35208.0,18554.0,...,21731.0,24928.0,30286.0,23225.0,9638.0,22793.0,8574.0,32470.0,30099.0,2011
8602,45001,"Abbeville County, South Carolina",28031,33229.0,24167.0,47250.0,26436.0,31135.0,39545.0,16729.0,...,27031.0,25661.0,24250.0,26232.0,9475.0,19297.0,9139.0,31778.0,22440.0,2012
11745,45001,"Abbeville County, South Carolina",28404,33214.0,26250.0,49041.5,25507.0,32891.0,40125.0,17593.0,...,21295.0,25650.0,30995.0,23375.0,11794.0,7361.0,11956.0,30801.0,21776.0,2013
14888,45001,"Abbeville County, South Carolina",28382,33177.0,24750.0,50833.0,32171.0,33154.0,39438.0,16589.0,...,13750.0,25838.0,26011.0,25521.0,13769.0,18214.0,13692.0,19358.0,25536.0,2014


In [14]:
for i in range(total_rows):
    if i % 7 != 0:
        earnings_df_delta.iloc[i, 2:-1] = (earnings_df_inter.iloc[i, 2:-1] - earnings_df_inter.iloc[i-1, 2:-1])/ earnings_df_inter.iloc[i-1, 2:-1]
    else:
        earnings_df_delta.iloc[i, 2:-1] = np.zeros(earnings_df_delta.iloc[1, 2:-1].shape[0])
earnings_df_delta.head()

Unnamed: 0,fips,county,total_med,total_agri_fish_mine,agri_fish_hunt,mining_quarrying_oilgas_extract,construction,manufacturing,wholesale_trade,retail_trade,...,admin_sup,total_edu_health_social,edu_serv,health_social,total_arts_ent_acc_food,arts_ent_rec,acc_food_serv,other_ser,pub_admin,year
2316,45001,"Abbeville County, South Carolina",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2010
5459,45001,"Abbeville County, South Carolina",0.042841,-0.642615,-0.642615,0.0,0.091532,-0.01532,2.017742,-0.104234,...,0.004995,0.008414,0.008962,0.069734,0.426373,0.077479,0.628181,0.884941,0.001064,2011
8602,45001,"Abbeville County, South Carolina",0.04781,1.384058,0.733893,0.0,0.050131,-0.015432,0.123182,-0.098362,...,0.243891,0.029405,-0.1993,0.129473,-0.016912,-0.15338,0.065897,-0.021312,-0.25446,2012
11745,45001,"Abbeville County, South Carolina",0.013307,-0.000451,0.086192,0.037915,-0.035141,0.0564,0.014667,0.051647,...,-0.212201,-0.000429,0.278144,-0.108913,0.244749,-0.618542,0.308239,-0.030745,-0.02959,2013
14888,45001,"Abbeville County, South Carolina",-0.000775,-0.001114,-0.057143,0.03653,0.261262,0.007996,-0.017121,-0.057068,...,-0.354309,0.007329,-0.1608,0.091807,0.167458,1.474392,0.145199,-0.371514,0.172667,2014


In [15]:
earnings_df_delta.head(20)

Unnamed: 0,fips,county,total_med,total_agri_fish_mine,agri_fish_hunt,mining_quarrying_oilgas_extract,construction,manufacturing,wholesale_trade,retail_trade,...,admin_sup,total_edu_health_social,edu_serv,health_social,total_arts_ent_acc_food,arts_ent_rec,acc_food_serv,other_ser,pub_admin,year
2316,45001,"Abbeville County, South Carolina",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2010
5459,45001,"Abbeville County, South Carolina",0.042841,-0.642615,-0.642615,0.0,0.091532,-0.01532,2.017742,-0.104234,...,0.004995,0.008414,0.008962,0.069734,0.426373,0.077479,0.628181,0.884941,0.001064,2011
8602,45001,"Abbeville County, South Carolina",0.04781,1.384058,0.733893,0.0,0.050131,-0.015432,0.123182,-0.098362,...,0.243891,0.029405,-0.1993,0.129473,-0.016912,-0.15338,0.065897,-0.021312,-0.25446,2012
11745,45001,"Abbeville County, South Carolina",0.013307,-0.000451,0.086192,0.037915,-0.035141,0.0564,0.014667,0.051647,...,-0.212201,-0.000429,0.278144,-0.108913,0.244749,-0.618542,0.308239,-0.030745,-0.02959,2013
14888,45001,"Abbeville County, South Carolina",-0.000775,-0.001114,-0.057143,0.03653,0.261262,0.007996,-0.017121,-0.057068,...,-0.354309,0.007329,-0.1608,0.091807,0.167458,1.474392,0.145199,-0.371514,0.172667,2014
18030,45001,"Abbeville County, South Carolina",-0.021669,-0.018959,-0.409091,0.003285,0.008237,-0.032183,-0.035372,-0.097535,...,-0.195273,0.015675,0.009727,0.027311,-0.125862,-0.591523,-0.11839,-0.0919,0.041314,2015
21172,45001,"Abbeville County, South Carolina",-0.024526,0.465774,1.440615,-0.003275,-0.010636,0.038084,-0.050601,0.020172,...,-0.085947,-0.022597,-0.041197,0.008887,-0.019774,0.000806,-0.020545,-0.246544,-0.101049,2016
1113,22001,"Acadia Parish, Louisiana",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2010
4256,22001,"Acadia Parish, Louisiana",0.045817,0.054932,-0.138522,0.087811,0.151079,0.003292,0.077328,0.172874,...,-0.326088,-0.006988,-0.046437,0.036637,0.194971,0.100902,0.103908,0.394706,0.007408,2011
7399,22001,"Acadia Parish, Louisiana",-0.006993,-0.025459,0.167062,0.051826,0.021837,-0.023438,0.016113,-0.139654,...,-0.037466,-0.107695,-0.084024,-0.128888,0.046305,-0.008977,0.027319,0.181067,0.010532,2012


# Visualisation of Industry Earnings

In [33]:
earnings_df_inter = pd.read_csv("earnings_cleaned.csv", encoding = "ISO-8859-1")

In [34]:
import plotly
plotly.tools.set_credentials_file(username='tomlu97', api_key='UsNn1jqGcso8ri5Ftv40')
import plotly.figure_factory as ff

In [125]:
final_data_set = pd.read_csv("thmdata.csv", encoding = "ISO-8859-1")

In [126]:
final_data_set.head(40)

Unnamed: 0.1,Unnamed: 0,fips,year,uranium_avg_excess,arsenic_avg_excess,dehp_avg_excess,nitrates_avg_excess,haa_avg_excess,thm_avg_excess,county,...,admin_sup,total_edu_health_social,edu_serv,health_social,total_arts_ent_acc_food,arts_ent_rec,acc_food_serv,other_ser,pub_admin,maxcontam
0,1,12001,2011,0.0,0.0,0.0,0.0,0.0,0.0,"Alachua County, Florida",...,0.05194,0.016826,0.034663,0.011383,0.000674,-0.135286,0.033977,0.004307,0.031324,0.0
1,2,12001,2012,0.0,0.0,0.0,0.0,0.0,0.008295,"Alachua County, Florida",...,-0.027928,-0.001478,-0.002884,0.002172,0.041534,-0.0152,0.048293,0.006389,0.038597,0.09125
2,3,12001,2013,0.0,0.0,0.0,0.0,0.0,0.0,"Alachua County, Florida",...,-0.000795,-0.012974,-0.007783,-0.017003,0.027083,-0.005419,0.041882,0.00487,-0.024155,0.0
3,4,12001,2014,0.0,0.0,0.0,0.0,0.038956,0.054842,"Alachua County, Florida",...,0.028974,-0.01043,-0.001818,-0.030789,-0.10532,-0.037738,-0.124231,-0.012786,0.014331,0.822625
4,5,12001,2015,0.0,0.0,0.0,0.0,0.031155,0.022393,"Alachua County, Florida",...,0.018117,0.005677,0.001911,0.015601,-0.039762,-0.17229,-0.009427,-0.064818,-0.068228,0.296167
5,6,12001,2016,0.0,0.0,0.0,0.0,0.001136,0.021511,"Alachua County, Florida",...,-0.070307,0.002379,-0.024089,0.018637,0.075792,0.365933,0.009506,0.062037,0.008131,0.236625
6,13,12005,2011,0.0,0.0,0.0,0.0,0.0,0.0,"Bay County, Florida",...,0.054243,0.021031,0.090012,-0.026663,0.045365,0.166163,0.043912,-0.00535,-0.007659,0.0
7,14,12005,2012,0.0,0.0,0.0,0.0,0.0,0.044437,"Bay County, Florida",...,-0.0798,-0.018368,0.003457,-0.045468,-0.038474,-0.184729,-0.03128,-0.006154,-0.007221,0.444375
8,15,12005,2013,0.0,0.0,0.0,0.0,0.084574,0.028125,"Bay County, Florida",...,-0.048586,-0.007067,-0.010642,0.044141,-0.007012,-0.027655,-0.00677,-0.057293,0.001858,0.500833
9,16,12005,2014,0.0,0.0,0.0,0.0,0.0,0.0,"Bay County, Florida",...,0.043604,-0.001237,-0.019727,0.005323,0.031194,0.132857,0.014851,0.076439,-0.034747,0.0


In [127]:
final_2011 = final_data_set.loc[final_data_set['year'] == 2011]
final_2012 = final_data_set.loc[final_data_set['year'] == 2012]
final_2013 = final_data_set.loc[final_data_set['year'] == 2013]
final_2014 = final_data_set.loc[final_data_set['year'] == 2014]
final_2015 = final_data_set.loc[final_data_set['year'] == 2015]
final_2016 = final_data_set.loc[final_data_set['year'] == 2016]

In [128]:
final_2011 = final_2011.loc[final_2011['thm_avg_excess'] > 0]
final_2012 = final_2012.loc[final_2012['thm_avg_excess'] > 0]
final_2013 = final_2013.loc[final_2013['thm_avg_excess'] > 0]
final_2014 = final_2014.loc[final_2014['thm_avg_excess'] > 0]
final_2015 = final_2015.loc[final_2015['thm_avg_excess'] > 0]
final_2016 = final_2016.loc[final_2016['thm_avg_excess'] > 0]

In [140]:
my_fips = final_2011['fips'].astype(str).values.tolist()
thm_avg_excess = final_2011['thm_avg_excess'].values.tolist()
fig = ff.create_choropleth(fips=my_fips, values=thm_avg_excess, binning_endpoints=[0, 0.025, 0.05, 0.075, 0.1, 0.125], 
                           legend_title='Thm level mg/L', title='Thm level map 2011')
py.iplot(fig, filename='Thm level map 2011',format='png')

In [148]:
my_fips = final_2011['fips'].astype(str).values.tolist()
fin_ins_realest = final_2011['fin_ins_realest'].values.tolist()
fig = ff.create_choropleth(fips=my_fips, values=fin_ins_realest, binning_endpoints=[-0.1, -0.05, 0, 0.05, 0.1, 0.15],
                           legend_title='Percentage earning increase %', title='Finiance and Realestate Earning 2011')
py.iplot(fig, filename='Fin_ins_realest map 2011',format='png')

In [143]:
my_fips = final_2012['fips'].astype(str).values.tolist()
thm_avg_excess = final_2012['thm_avg_excess'].values.tolist()
fig = ff.create_choropleth(fips=my_fips, values=thm_avg_excess, binning_endpoints=[0, 0.025, 0.05, 0.075, 0.1, 0.125], 
                           legend_title='Thm level mg/L', title='Thm level map 2012')
py.iplot(fig, filename='Thm level map 2012',format='png')

In [149]:
my_fips = final_2012['fips'].astype(str).values.tolist()
fin_ins_realest = final_2012['fin_ins_realest'].values.tolist()
fig = ff.create_choropleth(fips=my_fips, values=fin_ins_realest, binning_endpoints=[-0.1, -0.05, 0, 0.05, 0.1, 0.15],
                           legend_title='Percentage earning increase %', title='Finiance and Realestate Earning 2012')
py.iplot(fig, filename='Fin_ins_realest map 2012',format='png')

In [144]:
my_fips = final_2013['fips'].astype(str).values.tolist()
thm_avg_excess = final_2013['thm_avg_excess'].values.tolist()
fig = ff.create_choropleth(fips=my_fips, values=thm_avg_excess, binning_endpoints=[0, 0.025, 0.05, 0.075, 0.1, 0.125], 
                           legend_title='Thm level mg/L', title='Thm level map 2013')
py.iplot(fig, filename='Thm level map 2013',format='png')

In [150]:
my_fips = final_2013['fips'].astype(str).values.tolist()
fin_ins_realest = final_2013['fin_ins_realest'].values.tolist()
fig = ff.create_choropleth(fips=my_fips, values=fin_ins_realest, binning_endpoints=[-0.1, -0.05, 0, 0.05, 0.1, 0.15],
                           legend_title='Percentage earning increase %', title='Finiance and Realestate Earning 2013')
py.iplot(fig, filename='Fin_ins_realest map 2013',format='png')

In [145]:
my_fips = final_2014['fips'].astype(str).values.tolist()
thm_avg_excess = final_2014['thm_avg_excess'].values.tolist()
fig = ff.create_choropleth(fips=my_fips, values=thm_avg_excess, binning_endpoints=[0, 0.025, 0.05, 0.075, 0.1, 0.125], 
                           legend_title='Thm level mg/L', title='Thm level map 2014')
py.iplot(fig, filename='Thm level map 2014',format='png')

In [151]:
my_fips = final_2014['fips'].astype(str).values.tolist()
fin_ins_realest = final_2014['fin_ins_realest'].values.tolist()
fig = ff.create_choropleth(fips=my_fips, values=fin_ins_realest, binning_endpoints=[-0.1, -0.05, 0, 0.05, 0.1, 0.15],
                           legend_title='Percentage earning increase %', title='Finiance and Realestate Earning 2014')
py.iplot(fig, filename='Fin_ins_realest map 2014', format='png')

In [146]:
my_fips = final_2015['fips'].astype(str).values.tolist()
thm_avg_excess = final_2015['thm_avg_excess'].values.tolist()
fig = ff.create_choropleth(fips=my_fips, values=thm_avg_excess, binning_endpoints=[0, 0.025, 0.05, 0.075, 0.1, 0.125], 
                           legend_title='Thm level mg/L', title='Thm level map 2015')
py.iplot(fig, filename='Thm level map 2015',format='png')

In [152]:
my_fips = final_2015['fips'].astype(str).values.tolist()
fin_ins_realest = final_2015['fin_ins_realest'].values.tolist()
fig = ff.create_choropleth(fips=my_fips, values=fin_ins_realest, binning_endpoints=[-0.1, -0.05, 0, 0.05, 0.1, 0.15],
                           legend_title='Percentage earning increase %', title='Finiance and Realestate Earning 2015')
py.iplot(fig, filename='Fin_ins_realest map 2015',format='png')

In [147]:
my_fips = final_2016['fips'].astype(str).values.tolist()
thm_avg_excess = final_2016['thm_avg_excess'].values.tolist()
fig = ff.create_choropleth(fips=my_fips, values=thm_avg_excess, binning_endpoints=[0, 0.025, 0.05, 0.075, 0.1, 0.125], 
                           legend_title='Thm level mg/L', title='Thm level map 2016')
py.iplot(fig, filename='Thm level map 2016',format='png')

In [153]:
my_fips = final_2016['fips'].astype(str).values.tolist()
fin_ins_realest = final_2016['fin_ins_realest'].values.tolist()
fig = ff.create_choropleth(fips=my_fips, values=fin_ins_realest, binning_endpoints=[-0.1, -0.05, 0, 0.05, 0.1, 0.15],
                           legend_title='Percentage earning increase %', title='Finiance and Realestate Earning 2016')
py.iplot(fig, filename='Fin_ins_realest map 2016', format='png')

In [168]:
final_data_set = pd.read_csv("haadata.csv", encoding = "ISO-8859-1")

In [169]:
final_2011 = final_data_set.loc[final_data_set['year'] == 2011]
final_2012 = final_data_set.loc[final_data_set['year'] == 2012]
final_2013 = final_data_set.loc[final_data_set['year'] == 2013]
final_2014 = final_data_set.loc[final_data_set['year'] == 2014]
final_2015 = final_data_set.loc[final_data_set['year'] == 2015]
final_2016 = final_data_set.loc[final_data_set['year'] == 2016]
final_2011 = final_2011.loc[final_2011['haa_avg_excess'] > 0]
final_2012 = final_2012.loc[final_2012['haa_avg_excess'] > 0]
final_2013 = final_2013.loc[final_2013['haa_avg_excess'] > 0]
final_2014 = final_2014.loc[final_2014['haa_avg_excess'] > 0]
final_2015 = final_2015.loc[final_2015['haa_avg_excess'] > 0]
final_2016 = final_2016.loc[final_2016['haa_avg_excess'] > 0]

In [170]:
final_2016.head(40)

Unnamed: 0.1,Unnamed: 0,fips,year,uranium_avg_excess,arsenic_avg_excess,dehp_avg_excess,nitrates_avg_excess,haa_avg_excess,thm_avg_excess,county,...,admin_sup,total_edu_health_social,edu_serv,health_social,total_arts_ent_acc_food,arts_ent_rec,acc_food_serv,other_ser,pub_admin,maxcontam
5,6,12001,2016,0.0,0.0,0.0,0.0,0.001136,0.021511,"Alachua County, Florida",...,-0.070307,0.002379,-0.024089,0.018637,0.075792,0.365933,0.009506,0.062037,0.008131,0.236625
17,30,12009,2016,0.0,0.0,0.0,0.0,0.010458,0.086619,"Brevard County, Florida",...,0.0106,-0.015773,-0.01561,-0.00672,0.014367,0.023002,0.000485,0.012593,0.026936,1.575
89,162,12055,2016,0.0,0.0,0.0,0.0,0.021611,0.003368,"Highlands County, Florida",...,-0.027558,-0.030824,-0.107237,-0.008267,-0.104785,-0.048951,-0.094139,-0.08529,0.023789,0.198833
137,252,12085,2016,0.0,0.0,0.0,0.0,0.0426,0.006175,"Martin County, Florida",...,0.000829,-0.042627,-0.022404,-0.041,-0.009557,-0.085748,-0.021896,0.075216,-0.040142,0.426
179,306,12101,2016,0.0,0.0,0.0,0.0,0.005557,0.0,"Pasco County, Florida",...,-0.01398,-0.014117,-0.01025,-0.009395,-0.001668,0.003444,-0.001918,-0.016504,-0.012506,0.261167
197,330,12109,2016,0.0,0.0,0.0,0.0,0.002922,0.0,"St. Johns County, Florida",...,0.006429,0.014886,0.00638,0.07343,0.069814,0.171265,0.045164,0.027153,0.02603,0.049667
263,954,19183,2016,0.0,0.0,0.0,0.0,0.003519,0.0,"Washington County, Iowa",...,-0.135767,0.01936,-0.016597,0.038428,0.033841,-0.094519,0.014689,-0.143901,-0.005923,0.031667
305,1044,20015,2016,0.0,0.0,0.0,0.0,0.019518,0.000987,"Butler County, Kansas",...,-0.094112,-0.009241,0.002761,-0.010058,0.05278,0.873659,-0.141589,-0.051313,-0.026768,0.370833
317,1056,20019,2016,0.0,0.0,0.0,0.0,0.105,0.02625,"Chautauqua County, Kansas",...,0.115791,0.009101,0.080779,0.213121,-0.538945,-0.082516,-0.360486,-0.099978,-0.018128,0.5
359,1146,20049,2016,0.0,0.0,0.0,0.0,0.083333,0.05625,"Elk County, Kansas",...,-0.728088,-0.015799,0.218629,-0.368574,0.082963,-0.137633,0.102196,-0.440016,-0.017674,0.483333


In [171]:
my_fips = final_2011['fips'].astype(str).values.tolist()
haa_avg_excess = final_2011['haa_avg_excess'].values.tolist()
fig = ff.create_choropleth(fips=my_fips, values=haa_avg_excess, binning_endpoints=[0, 0.025, 0.05, 0.075, 0.1, 0.125],
                          legend_title='Haa level mg/L %', title='Haa Level map 2011')
py.iplot(fig, filename='Haa level map 2011',format='png')

In [179]:
my_fips = final_2011['fips'].astype(str).values.tolist()
retail_trade = final_2011['retail_trade'].values.tolist()
max(retail_trade)

0.49146203899999996

In [181]:
my_fips = final_2011['fips'].astype(str).values.tolist()
retail_trade = final_2011['retail_trade'].values.tolist()
fig = ff.create_choropleth(fips=my_fips, values=retail_trade, binning_endpoints=[-0.1, -0.05, 0, 0.05, 0.1, 0.15],
                          legend_title='Retail trade % increase', title='Retail trade earning map 2011')
py.iplot(fig, filename='retail trade 2011',format='png')

In [158]:
my_fips = final_2012['fips'].astype(str).values.tolist()
haa_avg_excess = final_2012['haa_avg_excess'].values.tolist()
fig = ff.create_choropleth(fips=my_fips, values=haa_avg_excess, binning_endpoints=[0, 0.025, 0.05, 0.075, 0.1, 0.125],
                          legend_title='Haa level mg/L %', title='Haa Level map 2012')
py.iplot(fig, filename='Haa level map 2012',format='png')

In [182]:
my_fips = final_2012['fips'].astype(str).values.tolist()
retail_trade = final_2012['retail_trade'].values.tolist()
fig = ff.create_choropleth(fips=my_fips, values=retail_trade, binning_endpoints=[0, 0.025, 0.05, 0.075, 0.1, 0.125],
                          legend_title='Retail trade % increase', title='Retail trade earning map 2012')
py.iplot(fig, filename='retail trade 2012',format='png')

In [159]:
my_fips = final_2013['fips'].astype(str).values.tolist()
haa_avg_excess = final_2013['haa_avg_excess'].values.tolist()
fig = ff.create_choropleth(fips=my_fips, values=haa_avg_excess, binning_endpoints=[0, 0.025, 0.05, 0.075, 0.1, 0.125],
                          legend_title='Haa level mg/L %', title='Haa Level map 2013')
py.iplot(fig, filename='Haa level map 2013',format='png')

In [183]:
my_fips = final_2013['fips'].astype(str).values.tolist()
retail_trade = final_2013['retail_trade'].values.tolist()
fig = ff.create_choropleth(fips=my_fips, values=retail_trade, binning_endpoints=[0, 0.025, 0.05, 0.075, 0.1, 0.125],
                          legend_title='Retail trade % increase', title='Retail trade earning map 2013')
py.iplot(fig, filename='retail trade 2013',format='png')

In [160]:
my_fips = final_2014['fips'].astype(str).values.tolist()
haa_avg_excess = final_2014['haa_avg_excess'].values.tolist()
fig = ff.create_choropleth(fips=my_fips, values=haa_avg_excess, binning_endpoints=[0, 0.025, 0.05, 0.075, 0.1, 0.125],
                          legend_title='Haa level mg/L %', title='Haa Level map 2014')
py.iplot(fig, filename='Haa level map 2014',format='png')

In [184]:
my_fips = final_2014['fips'].astype(str).values.tolist()
retail_trade = final_2014['retail_trade'].values.tolist()
fig = ff.create_choropleth(fips=my_fips, values=retail_trade, binning_endpoints=[0, 0.025, 0.05, 0.075, 0.1, 0.125],
                          legend_title='Retail trade % increase', title='Retail trade earning map 2014')
py.iplot(fig, filename='retail trade 2014',format='png')

In [161]:
my_fips = final_2015['fips'].astype(str).values.tolist()
haa_avg_excess = final_2015['haa_avg_excess'].values.tolist()
fig = ff.create_choropleth(fips=my_fips, values=haa_avg_excess, binning_endpoints=[0, 0.025, 0.05, 0.075, 0.1, 0.125],
                          legend_title='Haa level mg/L %', title='Haa Level map 2015')
py.iplot(fig, filename='Haa level map 2015',format='png')

In [185]:
my_fips = final_2015['fips'].astype(str).values.tolist()
retail_trade = final_2015['retail_trade'].values.tolist()
fig = ff.create_choropleth(fips=my_fips, values=retail_trade, binning_endpoints=[0, 0.025, 0.05, 0.075, 0.1, 0.125],
                          legend_title='Retail trade % increase', title='Retail trade earning map 2015')
py.iplot(fig, filename='retail trade 2015',format='png')

In [162]:
my_fips = final_2016['fips'].astype(str).values.tolist()
haa_avg_excess = final_2016['haa_avg_excess'].values.tolist()
fig = ff.create_choropleth(fips=my_fips, values=haa_avg_excess, binning_endpoints=[0, 0.025, 0.05, 0.075, 0.1, 0.125],
                          legend_title='Haa level mg/L %', title='Haa Level map 2016')
py.iplot(fig, filename='Haa level map 2016',format='png')

In [186]:
my_fips = final_2016['fips'].astype(str).values.tolist()
retail_trade = final_2016['retail_trade'].values.tolist()
fig = ff.create_choropleth(fips=my_fips, values=retail_trade, binning_endpoints=[0, 0.025, 0.05, 0.075, 0.1, 0.125],
                          legend_title='Retail trade % increase', title='Retail trade earning map 2016')
py.iplot(fig, filename='retail trade 2016',format='png')