# Analysis of Eviction in US Counties & ML Model to Predict Eviction.

Importing Libraries
----

In [64]:
import pandas as pd


#setting max number of columns to display == 25 in pandas options.
#pd.options.display.max_columns = 25
#pd.reset_option("^display")

Importing Dataset - counties.csv
--------------------------------

Data Source :
>In April 2018, the first-ever data set about evictions across 48 states and the District of Columbia was released by Eviction Lab—Desmond’s research group at Princeton University. To date, the Lab had collected 83 million records and it shows that in 2016

In [65]:
evic_data = pd.read_csv("counties.csv")

Understanding the dimensions and details of the dataset
---------

In [66]:
#top 5 records of the data

evic_data.head()

Unnamed: 0,GEOID,year,name,parent-location,population,poverty-rate,renter-occupied-households,pct-renter-occupied,median-gross-rent,median-household-income,...,pct-hispanic,pct-am-ind,pct-asian,pct-nh-pi,pct-multiple,pct-other,eviction-filings,evictions,eviction-rate,eviction-filing-rate
0,54001,2000,Barbour County,West Virginia,15557,22.56,1308,21.36,330,24729,...,0.47,0.71,0.26,0.01,1.01,0.01,3.0,3.0,0.23,0.23
1,54001,2001,Barbour County,West Virginia,15557,22.56,1331,21.36,330,24729,...,0.47,0.71,0.26,0.01,1.01,0.01,3.0,2.0,0.15,0.23
2,54001,2002,Barbour County,West Virginia,15557,22.56,1354,21.36,330,24729,...,0.47,0.71,0.26,0.01,1.01,0.01,0.0,0.0,0.0,0.0
3,54001,2003,Barbour County,West Virginia,15557,22.56,1377,21.36,330,24729,...,0.47,0.71,0.26,0.01,1.01,0.01,1.0,1.0,0.07,0.07
4,54001,2004,Barbour County,West Virginia,15557,22.56,1400,21.36,330,24729,...,0.47,0.71,0.26,0.01,1.01,0.01,10.0,10.0,0.71,0.71


In [63]:
print(pd.get_option("display.max_columns"))

20


In [58]:
#evic_data.shape

print('Total no. of. records/observations:', evic_data.shape[0])

print('Total no. of. attributes/variables:', evic_data.shape[1])

Total no. of. records/observations: 935
Total no. of. attributes/variables: 24


>>Renaming the columns
>>-------------------
>>>Replacing '-' with '_'

In [40]:
columns = evic_data.columns

renames = {}

for col in columns:
    renames[col] = col.replace('-', '_')
    
evic_data = evic_data.rename(columns=renames)

In [41]:
#column names of data

Col_list = evic_data.columns.tolist()

Col_list

['GEOID',
 'year',
 'name',
 'parent_location',
 'population',
 'poverty_rate',
 'renter_occupied_households',
 'pct_renter_occupied',
 'median_gross_rent',
 'median_household_income',
 'median_property_value',
 'rent_burden',
 'pct_white',
 'pct_af_am',
 'pct_hispanic',
 'pct_am_ind',
 'pct_asian',
 'pct_nh_pi',
 'pct_multiple',
 'pct_other',
 'eviction_filings',
 'evictions',
 'eviction_rate',
 'eviction_filing_rate']

In [42]:
#col data types
#sorting by type for simple vision and understanding

evic_data.dtypes.sort_values()

GEOID                           int64
year                            int64
population                      int64
renter_occupied_households      int64
median_gross_rent               int64
median_household_income         int64
median_property_value           int64
evictions                     float64
eviction_filings              float64
pct_other                     float64
pct_multiple                  float64
pct_nh_pi                     float64
pct_asian                     float64
pct_am_ind                    float64
rent_burden                   float64
pct_af_am                     float64
pct_white                     float64
eviction_rate                 float64
pct_renter_occupied           float64
poverty_rate                  float64
pct_hispanic                  float64
eviction_filing_rate          float64
parent_location                object
name                           object
dtype: object

>We have 7 int type, 15 float type and 2 object type variables.

Data Dictionary:
----

In [43]:
col_meaning_list = ["Census FIPS code", 
"The of which the recorded information belongs to.",
"Census location name (modified for display on map)", 
"Parent location of given geography.- USA for states, state for counties and cities, counties for tracts and block groups", 
"Total population of the county for the corresponding year.", 
"% of the population with income in the past 12 months below the poverty level", 
"Interpolated count of renter-occupied households - Based on Census and ESRI Business Analyst demographic estimates", 
"% of occupied housing units that are renter-occupied. - NOTE: This is not based off of the interpolated renter-occupied-households variable", 
"Median gross rent", 
"Median household income",
"Median property value",
"Median gross rent as a percentage of household income, max is 50% representing >= 50%", 
"% population that is White alone and not Hispanic or Latino", 
"% population that is Black or African American alone and not Hispanic or Latino",
"% population that is of Hispanic or Latino origin",
"% population that is American Indian and Alaska Native alone and not Hispanic or Latino",
"% population that is Asian alone and not Hispanic or Latino",
"% population that is Native Hawaiian and Other Pacific Islander alone and not Hispanic or Latino",  
"% population that is two or more races and not Hispanic or Latino",
"% population that is other race alone and not Hispanic or Latino", 
"All eviction cases filed in an area, including multiple cases filed against the same address in the same year",
"Number of eviction judgments in which renters were ordered to leave in a given area and year  - Only counts a single address which received an eviction judgment per year",  
"Ratio of the number of renter-occupied households in an area that received an eviction judgement in which renters were ordered to leave  - Only counts a single address per year which received an eviction judgment", 
"Ratio of the number of evictions filed in an area over the number of renter-occupied homes in that area  - Counts all eviction cases filed in an area, including multiple cases filed against the same address in the same year"]

In [45]:
data_dict = {}

for i in range(len(Col_list)):
    data_dict[Col_list[i]] = col_meaning_list[i]
    
data_dict   

{'GEOID': 'Census FIPS code',
 'year': 'The of which the recorded information belongs to.',
 'name': 'Census location name (modified for display on map)',
 'parent_location': 'Parent location of given geography.- USA for states, state for counties and cities, counties for tracts and block groups',
 'population': 'Total population of the county for the corresponding year.',
 'poverty_rate': '% of the population with income in the past 12 months below the poverty level',
 'renter_occupied_households': 'Interpolated count of renter-occupied households - Based on Census and ESRI Business Analyst demographic estimates',
 'pct_renter_occupied': '% of occupied housing units that are renter-occupied. - NOTE: This is not based off of the interpolated renter-occupied-households variable',
 'median_gross_rent': 'Median gross rent',
 'median_household_income': 'Median household income',
 'median_property_value': 'Median property value',
 'rent_burden': 'Median gross rent as a percentage of househo

In [38]:
#what is GEOID?

data_dict['GEOID']

'Census FIPS code'

In [39]:
#what is evictions?

data_dict['evictions']

'Number of eviction judgments in which renters were ordered to leave in a given area and year  - Only counts a single address which received an eviction judgment per year'

Summary Statistics Of Data
----

In [49]:
evic_data.describe()

Unnamed: 0,GEOID,year,population,poverty_rate,renter_occupied_households,pct_renter_occupied,median_gross_rent,median_household_income,median_property_value,rent_burden,pct_white,pct_af_am,pct_hispanic,pct_am_ind,pct_asian,pct_nh_pi,pct_multiple,pct_other,eviction_filings,evictions,eviction_rate,eviction_filing_rate
count,935.0,935.0,935.0,935.0,935.0,935.0,935.0,935.0,935.0,935.0,935.0,935.0,935.0,935.0,935.0,935.0,935.0,935.0,771.0,771.0,771.0,771.0
mean,54055.0,2008.0,33219.517647,15.446267,3695.742246,23.491947,493.596791,34871.987166,87007.486631,27.688663,95.275979,2.144588,0.867701,0.175102,0.357979,0.018299,1.107947,0.052128,127.159533,111.473411,2.100246,2.325136
std,31.766007,4.901601,32578.616261,5.289984,4688.826566,5.321703,117.273135,7679.546343,30293.058045,3.769726,3.733745,2.576346,0.82442,0.15808,0.44388,0.039925,0.764011,0.082047,262.324602,222.160303,1.652173,1.959052
min,54001.0,2000.0,5696.0,4.38,386.0,14.23,260.0,16931.0,22600.0,19.1,77.27,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,54027.0,2004.0,13217.0,11.77,1077.5,20.46,390.0,29291.0,69400.0,24.9,94.16,0.47,0.47,0.08,0.09,0.0,0.66,0.0,10.0,10.0,0.97,1.075
50%,54055.0,2008.0,23801.0,14.63,2122.0,22.77,496.0,34625.0,80900.0,27.8,96.62,0.97,0.67,0.15,0.25,0.0,0.93,0.03,31.0,28.0,1.82,1.94
75%,54083.0,2012.0,41496.0,18.19,3929.0,25.97,567.0,39280.0,99300.0,29.7,97.68,3.31,1.015,0.23,0.45,0.02,1.35,0.07,95.0,89.0,2.86,3.065
max,54109.0,2016.0,200073.0,37.69,29677.0,45.63,919.0,66677.0,259400.0,40.7,99.96,12.48,5.67,1.13,3.15,0.27,5.2,0.69,1659.0,1421.0,10.23,13.64


In [46]:
evic_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 935 entries, 0 to 934
Data columns (total 24 columns):
GEOID                         935 non-null int64
year                          935 non-null int64
name                          935 non-null object
parent_location               935 non-null object
population                    935 non-null int64
poverty_rate                  935 non-null float64
renter_occupied_households    935 non-null int64
pct_renter_occupied           935 non-null float64
median_gross_rent             935 non-null int64
median_household_income       935 non-null int64
median_property_value         935 non-null int64
rent_burden                   935 non-null float64
pct_white                     935 non-null float64
pct_af_am                     935 non-null float64
pct_hispanic                  935 non-null float64
pct_am_ind                    935 non-null float64
pct_asian                     935 non-null float64
pct_nh_pi                     935 non-null f

>RangeIndex: 935 entries, 0 to 934 -> We have 935 observations.

>We can see some data is missing in the columns: eviction_filings, evictions, eviction_rate and eviction_filing_rate.           

In [60]:
data_dict['GEOID']

'Census FIPS code'