In [5]:
'''
File name: project.ipynb
Author: ..., Mohamed Ndoye, Raphael Strebel
Date created: 03/11/2019
Date last modified: ...
Python Version: 3.7.4
''';

<a id="up"></a>
# Food Inspections in Chicago

 - [Load Databases](#load-databases)
 - [Basic Statistics](#basic-stats)

In [1]:
# useful : https://www.sustainabilist.com/blog/chicago-data-analysis-a-internship-project

import pandas as pd
from utils import constants
from utils import clean_database

# Set auto-reload 
%load_ext autoreload
%autoreload 2

<a id = 'load-databases'></a>
## Load Databases

In this section we load and clean the databases.

[Table of Content](#up)

In [6]:
# Load the food inspections dataframe
food_inspections_DF = pd.read_csv(constants.FOOD_INSPECTIONS_PATH, sep = ',', header = 0, 
                   names = constants.FOOD_INSPECTIONS_COL_NAMES, index_col = None, error_bad_lines=False
                   )

In [7]:
food_inspections_DF.head()

Unnamed: 0,inspection_id,DBA_name,AKA_name,license_num,facility_type,risk,address,city,state,zip,...,result,violations,lat,lng,location,historical_wards,zip_codes,community_areas,census_tracts,wards
0,2320830,"THE HOXTON, CHICAGO","THE HOXTON, CHICAGO",2694640.0,Restaurant,Risk 2 (Medium),200 N GREEN ST,CHICAGO,IL,60607.0,...,Pass,36. THERMOMETERS PROVIDED & ACCURATE - Comment...,41.885699,-87.648789,"{'latitude': '-87.64878908937915', 'longitude'...",,,,,
1,2320831,OGDEN PLAZA INC.,OGDEN PLAZA INC.,2475982.0,Grocery Store,Risk 3 (Low),3459 W OGDEN AVE,CHICAGO,IL,60623.0,...,Out of Business,,41.855266,-87.712402,"{'latitude': '-87.71240156240032', 'longitude'...",,,,,
2,2320829,PLAZA FOOD AND LIQUOR,PLAZA FOOD AND LIQUOR,2689756.0,Grocery Store,Risk 3 (Low),3455-3459 S OGDEN AVE,CHICAGO,IL,60623.0,...,Not Ready,,,,,,,,,
3,2320813,PLAZA FOOD AND LIQUOR,PLAZA FOOD AND LIQUOR,2689757.0,Grocery Store,Risk 3 (Low),3455-3459 S OGDEN AVE,CHICAGO,IL,60623.0,...,Fail,5. PROCEDURES FOR RESPONDING TO VOMITING AND D...,,,,,,,,
4,2320757,GADS HILL CENTER,GADS HILL CENTER,2698627.0,Daycare Above and Under 2 Years,Risk 1 (High),4255-4259 S ARCHER AVE,CHICAGO,IL,60632.0,...,Fail,5. PROCEDURES FOR RESPONDING TO VOMITING AND D...,41.816005,-87.700893,"{'latitude': '-87.70089338917239', 'longitude'...",,,,,


In [8]:
# We can already see that the 5 last columns are always null, so we clean them
food_inspections_DF = clean_database.drop_all_nan_col(food_inspections_DF)

In [9]:
# Load the socio-economic indicators dataframe
socio_economic_DF = pd.read_csv(constants.SOCIO_ECONOMIC_INDICATORS_PATH, sep = ',', header = 0, 
                   names = constants.SOCIO_ECONOMIC_COL_NAMES, index_col = None, error_bad_lines=False
                   )

In [10]:
socio_economic_DF.head()

Unnamed: 0,community_area_num,community_area_name,housing_crowded_perc,housholds_below_poverty_perc,aged_16_or_more_unemployed_perc,aged_25_or_more_without_high_school_diploma_perc,aged_under_18_or_over_64_perc,per_capita_income,hardship_idx
0,1.0,Rogers Park,7.7,23.6,8.7,18.2,27.5,23939,39.0
1,2.0,West Ridge,7.8,17.2,8.8,20.8,38.5,23040,46.0
2,3.0,Uptown,3.8,24.0,8.9,11.8,22.2,35787,20.0
3,4.0,Lincoln Square,3.4,10.9,8.2,13.4,25.5,37524,17.0
4,5.0,North Center,0.3,7.5,5.2,4.5,26.2,57123,6.0


In [11]:
# Load the socio-economic indicators dataframe
life_expectancy_DF = pd.read_csv(constants.LIFE_EXPECTANCY_PATH, sep = ',', header = 0, 
                   names = constants.LIFE_EXPECTANCY_COL_NAMES, index_col = None, error_bad_lines=False
                   )

In [12]:
life_expectancy_DF.head()

Unnamed: 0,community_area_num,community_area_name,life_exp_1990,lower_95_perc_CI_1990,upper_95_perc_CI_1990,life_exp_2000,lower_95_perc_CI_2000,upper_95_perc_CI_2000,life_exp_2010,lower_95_perc_CI_2010,upper_95_perc_CI_2010
0,1.0,Rogers Park,70.9,69.9,71.9,73.1,72.2,74.1,77.3,76.3,78.2
1,2.0,West Ridge,76.9,76.1,77.8,78.1,77.3,78.8,80.3,79.5,81.1
2,3.0,Uptown,64.0,63.1,64.9,71.7,70.8,72.7,76.0,75.1,76.9
3,4.0,Lincoln Square,74.2,73.1,75.4,76.8,75.8,77.8,80.5,79.3,81.6
4,5.0,North Center,73.4,72.1,74.7,77.9,76.6,79.1,81.5,80.1,82.8


<a id = 'basic-stats'></a>
## Basic Statistics

We report some statistics on the various dataframes.

[Table of Content](#up)