# Predicting Gentrification using Feed Forward Neural Network

### Importing the Median House Income Data 

In [93]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt



### - Rent
DATA_Rent = pd.read_csv('sub-borougharea-medianhouseholdincome2017.csv',na_values=[''])

dt_num_rent = DATA_Rent.loc[:,['2000', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015',
               '2016']]



### Massaging the data and Feature engineering

In [94]:
dt_chg_rent = dt_num_rent.divide(dt_num_rent.loc[:, '2000'], axis=0)

dt_chg_rent_melt = pd.melt(dt_chg_rent, var_name='Year', value_name='Rent_Pct').drop('Year', 1)
dt_rent = pd.melt(DATA_Rent, id_vars=['short_name', 'long_name', 'Sub-Borough Area'],var_name='Year', value_name='Rent')
dt_rent = pd.concat([dt_rent, dt_chg_rent_melt], axis=1)
yr_chg_mean = dt_chg_rent.mean(0).reset_index()
yr_chg_mean.columns = ['Year', 'yr_mean']
dt_rent = pd.merge(dt_rent, yr_chg_mean, on='Year')
dt_rent['delta'] = dt_rent.loc[:, 'Rent_Pct'] - dt_rent.loc[:, 'yr_mean']
dt_rent['growth_flag'] = (dt_rent['delta'] > 0).astype(int)
sum_consec = 0
for br in dt_rent.loc[:, 'Sub-Borough Area'].unique():
    for yr in ['2000', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015','2016']:
        if float(dt_rent.loc[(dt_rent['Year'] == yr) & (dt_rent['Sub-Borough Area'] == br), 'delta']) > 0:
            sum_consec += 1
        else:
            sum_consec = 0
        dt_rent.loc[(dt_rent['Year'] == yr) & (dt_rent['Sub-Borough Area'] == br), 'cumm_growth_yr'] = sum_consec


### Gentrification Function

In [95]:
def paint(ls):
    rtn = []
    yr = 0
    yr2 = 0
    flag = True
    while len(ls) != 0:
        if ls[-1] >= 4:
            rtn = rtn + list(zip(['Gentrifying'] * int(ls[-1]), range(ls[-1], 0, -1)))
        
            flag = False
            ls = ls[:len(ls) - ls[-1]]
        elif ls[-1] < 4 and flag == True:
            rtn.append(['Gentrified', yr])
            yr += 1
            ls.pop()
        elif ls[-1] < 4 and flag == False:
            rtn.append(['Non-Gentrified', yr2])
            yr2 += 1
            ls.pop()
    rtn.reverse()
    rtn = pd.DataFrame(np.array(rtn), columns=['gentrify_flag', 'passed_years'])
    rtn.passed_years = rtn.passed_years.astype(int)
    rtn.loc[rtn['gentrify_flag'] == 'Non-Gentrified', 'passed_years'] = \
        yr2 - rtn.loc[rtn['gentrify_flag'] == 'Non-Gentrified', 'passed_years']
    rtn.loc[rtn['gentrify_flag'] == 'Gentrified', 'passed_years'] = \
        yr - rtn.loc[rtn['gentrify_flag'] == 'Gentrified', 'passed_years']
    return rtn

range(ls[-1], 0, -1)


range(12, 0, -1)

In [96]:
dt_rent = dt_rent.sort_values(by=['Sub-Borough Area', 'Year']).reset_index(drop=True)
dt_rent.head()

Unnamed: 0,short_name,long_name,Sub-Borough Area,Year,Rent,Rent_Pct,yr_mean,delta,growth_flag,cumm_growth_yr
0,hh_inc_med_adj,Median household income ($2017),Astoria,2000,53924.70137,1.0,1.0,0.0,0,0.0
1,hh_inc_med_adj,Median household income ($2017),Astoria,2005,46660.0,0.865281,0.9418,-0.076519,0,0.0
2,hh_inc_med_adj,Median household income ($2017),Astoria,2006,51237.801359,0.950173,0.987463,-0.03729,0,0.0
3,hh_inc_med_adj,Median household income ($2017),Astoria,2007,53910.0,0.999727,0.993594,0.006133,1,1.0
4,hh_inc_med_adj,Median household income ($2017),Astoria,2008,59294.152225,1.099573,1.024123,0.07545,1,2.0


In [97]:
dt_rent = dt_rent.sort_values(by=['Sub-Borough Area', 'Year']).reset_index(drop=True)
tmp = pd.DataFrame([])
for br in dt_rent.loc[:, 'Sub-Borough Area'].unique():
    ls = dt_rent.loc[dt_rent['Sub-Borough Area'] == br, 'cumm_growth_yr'].astype(int).tolist()
    tmp = pd.concat([tmp, paint(ls)])

dt_rent = pd.concat([dt_rent, tmp.reset_index(drop=True)], axis=1)
dt_rent.head()

Unnamed: 0,short_name,long_name,Sub-Borough Area,Year,Rent,Rent_Pct,yr_mean,delta,growth_flag,cumm_growth_yr,gentrify_flag,passed_years
0,hh_inc_med_adj,Median household income ($2017),Astoria,2000,53924.70137,1.0,1.0,0.0,0,0.0,Non-Gentrified,1
1,hh_inc_med_adj,Median household income ($2017),Astoria,2005,46660.0,0.865281,0.9418,-0.076519,0,0.0,Non-Gentrified,2
2,hh_inc_med_adj,Median household income ($2017),Astoria,2006,51237.801359,0.950173,0.987463,-0.03729,0,0.0,Non-Gentrified,3
3,hh_inc_med_adj,Median household income ($2017),Astoria,2007,53910.0,0.999727,0.993594,0.006133,1,1.0,Non-Gentrified,4
4,hh_inc_med_adj,Median household income ($2017),Astoria,2008,59294.152225,1.099573,1.024123,0.07545,1,2.0,Non-Gentrified,5


### Importing and cleaning Median Household Income data

In [105]:
### - Income
DATA_Income = pd.read_csv('sub-borougharea-medianhouseholdincome2017.csv',
                          na_values=[''],
                          )

dt_num_income = DATA_Income.loc[:,
                ['2000', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015',
                 '2016']]


dt_pvt_flag = pd.concat([DATA_Income.loc[:, 'Sub-Borough Area'],
                         dt_num_income.loc[:, '2000'] <= dt_num_income.loc[:, '2000'].quantile(0.4)
                         ], axis=1)
dt_pvt_flag.columns = ['Sub-Borough Area', 'Poverty_flag']

dt_income = pd.melt(DATA_Income, id_vars=['short_name', 'long_name', 'Sub-Borough Area'],
                    var_name='Year', value_name='Income')
dt_income = pd.merge(dt_income, dt_pvt_flag, on='Sub-Borough Area')
# dt_income = pd.concat([dt_income, dt_chg_rent_melt], axis=1)
dt_income = dt_income.sort_values(by=['Sub-Borough Area', 'Year']).reset_index(drop=True)

dt_income_rent = pd.concat([dt_rent, dt_income], axis=1)
dt_income_rent = dt_income_rent.iloc[:, list(range(2, 12)) + [16, 17]]
# dt_income_rent = pd.merge(dt_income_rent, dt_pvt_flag, on='Sub-Borough Area')

for br in dt_income_rent.loc[dt_income_rent['Poverty_flag'] == True, 'Sub-Borough Area'].unique():
    if len(dt_income_rent.loc[(dt_income_rent['Sub-Borough Area'] == br) &
                              (dt_income_rent['gentrify_flag'] == 'Gentrified'), :]) == 13:
        dt_income_rent.loc[dt_income_rent['Sub-Borough Area'] == br, 'gentrify_flag'] = 'Non-Gentrified'



DATA_Income.head()

Unnamed: 0,short_name,long_name,Sub-Borough Area,2000,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
0,hh_inc_med_adj,Median household income ($2017),Greenwich Village/Financial District,101669.764384,115100,115427.099773,121820,120389.962932,115417.396305,113844.301681,129931.689744,109362.782397,131013.186467,124018.824886,125884.659231,139895.69305
1,hh_inc_med_adj,Median household income ($2017),Lower East Side/Chinatown,42668.931507,39690,44894.061985,37180,41611.250053,46775.146543,47172.492693,42450.610129,43401.101873,41959.223153,42683.773277,43559.604234,42013.569153
2,hh_inc_med_adj,Median household income ($2017),Chelsea/Clinton/Midtown,80923.835616,87400,88461.949071,92130,90348.27578,92710.122833,86783.161452,90002.404347,91717.862765,105214.322926,101700.80206,105401.298444,103537.698631
3,hh_inc_med_adj,Median household income ($2017),Stuyvesant Town/Turtle Bay,100051.287671,106650,109152.710829,112480,122805.460807,111910.450839,101582.396705,110075.444497,98260.026605,112994.965756,108989.835914,110117.129545,112561.944526
4,hh_inc_med_adj,Median household income ($2017),Upper West Side,97108.60274,104140,110270.834436,100740,117308.238627,119920.980978,100334.913146,99899.091709,97609.424676,103790.3461,109563.548092,104126.498822,113447.95398


### Merging all the the CSV files

In [None]:
DATA_Features = pd.read_csv(data_dir + '/datF.csv', )
dt_feat = DATA_Features.drop(['y'], axis=1)
dt_feat.loc[:, 'variable'] = dt_feat.loc[:, 'variable'].str.replace('X', '')

dt_mrg = pd.merge(dt_income_rent, dt_feat, left_on=['Sub-Borough Area', 'Year'],
                  right_on=['Sub.Borough.Area', 'variable'])
dt_mrg = dt_mrg.drop(dt_mrg.columns[12:15], 1)

### Implementation of Feed Forward Neural network 

In [85]:

dt_mrg['gentrify_flag'].replace(['Gentrified', 'Gentrifing', 'Non-Gentrified'], [2,1,0],inplace=True)
dt_mrg['Poverty_flag'].replace([False, True], [0,1],inplace=True)

location = [i for i in range(len(sorted(list(set(dt_mrg['Sub-Borough Area'])))))]
dt_mrg['Sub-Borough Area'].replace(sorted(list(set(dt_mrg['Sub-Borough Area']))),location,inplace=True)

In [86]:
#y_label = list(dt_mrg['gentrify_flag'])
onehot = pd.get_dummies(dt_mrg['gentrify_flag'])
#target_labels = onehot.columns
y_label = onehot.as_matrix()
# y_label = dt_mrg['gentrify_flag']

dt_mrg.drop(['gentrify_flag'],axis=1,inplace=True)
dt_mrg.drop(dt_mrg.columns[[0]],axis=1,inplace=True)


  after removing the cwd from sys.path.


In [87]:
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from sklearn.model_selection import train_test_split

y_encoded = to_categorical(y_label)

x_train, x_test, y_train, y_test = train_test_split(dt_mrg, y_label, test_size=0.2, random_state=0)
model = Sequential()
model.add(Dense(48, input_dim=24, init='uniform', activation='relu'))
#model.add(Dropout(0.2))
model.add(Dense(24, init='uniform', activation='relu'))
#model.add(Dropout(0.2))
model.add(Dense(3, init='uniform', activation='softmax'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train, nb_epoch=500, batch_size=50)
train_acc = model.evaluate(x_train, y_train)
print("%s: %.2f%%" % (model.metrics_names[1], train_acc[1]*100))
    
test_acc = model.evaluate(x_test, y_test)
print("%s: %.2f%%" % (model.metrics_names[1], test_acc[1]*100))

  # Remove the CWD from sys.path while we load stuff.
  if sys.path[0] == '':
  


Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

Epoch 85/500
Epoch 86/500
Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 91/500
Epoch 92/500
Epoch 93/500
Epoch 94/500
Epoch 95/500
Epoch 96/500
Epoch 97/500
Epoch 98/500
Epoch 99/500
Epoch 100/500
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500
Epoch 111/500
Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500
Epoch 116/500
Epoch 117/500
Epoch 118/500
Epoch 119/500
Epoch 120/500
Epoch 121/500
Epoch 122/500
Epoch 123/500
Epoch 124/500
Epoch 125/500
Epoch 126/500
Epoch 127/500
Epoch 128/500
Epoch 129/500
Epoch 130/500
Epoch 131/500
Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 151/500
Epoch 152/500
Epoch 153/500
Epoch 154/500
Epoch 155/500
Epoch 156/500
Epoch 1

Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500
Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500
Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500
Epoch 219/500
Epoch 220/500
Epoch 221/500
Epoch 222/500
Epoch 223/500
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 227/500
Epoch 228/500
Epoch 229/500
Epoch 230/500
Epoch 231/500
Epoch 232/500
Epoch 233/500
Epoch 234/500
Epoch 235/500
Epoch 236/500
Epoch 237/500
Epoch 238/500
Epoch 

Epoch 250/500
Epoch 251/500
Epoch 252/500
Epoch 253/500
Epoch 254/500
Epoch 255/500
Epoch 256/500
Epoch 257/500
Epoch 258/500
Epoch 259/500
Epoch 260/500
Epoch 261/500
Epoch 262/500
Epoch 263/500
Epoch 264/500
Epoch 265/500
Epoch 266/500
Epoch 267/500
Epoch 268/500
Epoch 269/500
Epoch 270/500
Epoch 271/500
Epoch 272/500
Epoch 273/500
Epoch 274/500
Epoch 275/500
Epoch 276/500
Epoch 277/500
Epoch 278/500
Epoch 279/500
Epoch 280/500
Epoch 281/500
Epoch 282/500
Epoch 283/500
Epoch 284/500
Epoch 285/500
Epoch 286/500
Epoch 287/500
Epoch 288/500
Epoch 289/500
Epoch 290/500
Epoch 291/500
Epoch 292/500
Epoch 293/500
Epoch 294/500
Epoch 295/500
Epoch 296/500
Epoch 297/500
Epoch 298/500
Epoch 299/500
Epoch 300/500
Epoch 301/500
Epoch 302/500
Epoch 303/500
Epoch 304/500
Epoch 305/500
Epoch 306/500
Epoch 307/500
Epoch 308/500
Epoch 309/500
Epoch 310/500
Epoch 311/500
Epoch 312/500
Epoch 313/500
Epoch 314/500
Epoch 315/500
Epoch 316/500
Epoch 317/500
Epoch 318/500
Epoch 319/500
Epoch 320/500
Epoch 

Epoch 332/500
Epoch 333/500
Epoch 334/500
Epoch 335/500
Epoch 336/500
Epoch 337/500
Epoch 338/500
Epoch 339/500
Epoch 340/500
Epoch 341/500
Epoch 342/500
Epoch 343/500
Epoch 344/500
Epoch 345/500
Epoch 346/500
Epoch 347/500
Epoch 348/500
Epoch 349/500
Epoch 350/500
Epoch 351/500
Epoch 352/500
Epoch 353/500
Epoch 354/500
Epoch 355/500
Epoch 356/500
Epoch 357/500
Epoch 358/500
Epoch 359/500
Epoch 360/500
Epoch 361/500
Epoch 362/500
Epoch 363/500
Epoch 364/500
Epoch 365/500
Epoch 366/500
Epoch 367/500
Epoch 368/500
Epoch 369/500
Epoch 370/500
Epoch 371/500
Epoch 372/500
Epoch 373/500
Epoch 374/500
Epoch 375/500
Epoch 376/500
Epoch 377/500
Epoch 378/500
Epoch 379/500
Epoch 380/500
Epoch 381/500
Epoch 382/500
Epoch 383/500
Epoch 384/500
Epoch 385/500
Epoch 386/500
Epoch 387/500
Epoch 388/500
Epoch 389/500
Epoch 390/500
Epoch 391/500
Epoch 392/500
Epoch 393/500
Epoch 394/500
Epoch 395/500
Epoch 396/500
Epoch 397/500
Epoch 398/500
Epoch 399/500
Epoch 400/500
Epoch 401/500
Epoch 402/500
Epoch 

Epoch 414/500
Epoch 415/500
Epoch 416/500
Epoch 417/500
Epoch 418/500
Epoch 419/500
Epoch 420/500
Epoch 421/500
Epoch 422/500
Epoch 423/500
Epoch 424/500
Epoch 425/500
Epoch 426/500
Epoch 427/500
Epoch 428/500
Epoch 429/500
Epoch 430/500
Epoch 431/500
Epoch 432/500
Epoch 433/500
Epoch 434/500
Epoch 435/500
Epoch 436/500
Epoch 437/500
Epoch 438/500
Epoch 439/500
Epoch 440/500
Epoch 441/500
Epoch 442/500
Epoch 443/500
Epoch 444/500
Epoch 445/500
Epoch 446/500
Epoch 447/500
Epoch 448/500
Epoch 449/500
Epoch 450/500
Epoch 451/500
Epoch 452/500
Epoch 453/500
Epoch 454/500
Epoch 455/500
Epoch 456/500
Epoch 457/500
Epoch 458/500
Epoch 459/500
Epoch 460/500
Epoch 461/500
Epoch 462/500
Epoch 463/500
Epoch 464/500
Epoch 465/500
Epoch 466/500
Epoch 467/500
Epoch 468/500
Epoch 469/500
Epoch 470/500
Epoch 471/500
Epoch 472/500
Epoch 473/500
Epoch 474/500
Epoch 475/500
Epoch 476/500
Epoch 477/500
Epoch 478/500
Epoch 479/500
Epoch 480/500
Epoch 481/500
Epoch 482/500
Epoch 483/500
Epoch 484/500
Epoch 

Epoch 496/500
Epoch 497/500
Epoch 498/500
Epoch 499/500
Epoch 500/500
acc: 85.57%
acc: 79.45%


## Accuracy 85.57% and 79.45%