# Does California Carbon Investment Intensify Green Gentrification? 

PART 5: Regression

Haoyu Yue, Department of Urban Design and Planning, University of Washington

# Preparation

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon
import contextily as ctx
from matplotlib_scalebar.scalebar import ScaleBar
from matplotlib.patches import Patch

In [3]:
sf_demo_gen_pattern = gpd.read_file('data/regression/sf_demo_gen_pattern.geojson')
sf_invest = gpd.read_file('data/regression/sf_invest.geojson')

In [5]:
invest_types = ['research and planning', 'fire prevention', 'climate action',
       'public transportation', 'housing', 'green space', 'others', 'vehicle',
       'building', 'utilities', 'agriculture', 'clean air']

In [11]:
logistic_data = sf_invest.groupby('GEOID')[invest_types].sum().merge(sf_demo_gen_pattern[['GEOID','gentrified']],left_index=True,right_on='GEOID')

In [26]:
logistic_data.gentrified = logistic_data.gentrified.replace(['Gentrified','Non-Gentrified'],[1,0])

In [32]:
logistic_data = logistic_data.set_index('GEOID')

In [33]:
logistic_data

Unnamed: 0_level_0,research and planning,fire prevention,climate action,public transportation,housing,green space,others,vehicle,building,utilities,agriculture,clean air,gentrified
GEOID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
060014001001,0.0,9420.740132,4723.739169,559124.921267,0.0,0.000000,0.000000,329769.516251,0.0,0.000000,0.0,0.0,0
060014002001,0.0,0.000000,67.945656,10743.453670,0.0,0.000000,0.000000,104846.473926,0.0,1545.374463,0.0,0.0,0
060014002002,0.0,0.000000,74.169245,12843.971633,0.0,0.000000,0.000000,107718.305706,0.0,10300.538804,0.0,0.0,0
060014003001,0.0,0.000000,72.415163,12163.683424,0.0,0.000000,0.000000,121166.013456,0.0,2186.621960,0.0,0.0,0
060014003002,0.0,0.000000,85.752059,18102.618113,0.0,0.000000,0.000000,132196.706037,0.0,12830.675457,0.0,0.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
060952521021,0.0,0.000000,28.340224,1187.268435,0.0,0.000000,269.787619,205813.033430,0.0,0.000000,0.0,0.0,1
060971507023,0.0,0.000000,0.000000,22274.105336,0.0,1371.103458,4.529945,157021.825768,0.0,0.000000,0.0,0.0,0
060971508004,0.0,0.000000,0.000000,42773.446431,0.0,2632.959638,5.481248,280205.547988,0.0,0.000000,0.0,0.0,1
060971511001,0.0,0.000000,0.000000,0.000000,0.0,2136.836676,0.165814,244715.301563,0.0,0.000000,0.0,0.0,0


In [86]:
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X = logistic_data.drop("gentrified",1)/1000000
y = logistic_data["gentrified"]
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2, random_state = 0)


  X = logistic_data.drop("gentrified",1)/1000000


In [80]:
from numpy import random
random.seed(3)

clf = linear_model.LogisticRegressionCV(cv = 5, Cs=[0.001,0.005,0.01, 0.1, 0.2, 0.5, 0.8, 1, 5, 10, 20, 50],max_iter=1000)
LogisticCV = clf.fit(X=X_train,y=y_train)

y_pred_train = LogisticCV.predict(X_train)

print('Intercept:', LogisticCV.intercept_)
print('Coefficients:',LogisticCV.coef_)
print('Penalty value', LogisticCV.C_)
print('Training Accuracy Score',accuracy_score(y_train,y_pred_train))

Intercept: [-1.30036933]
Coefficients: [[-5.22027381e-04 -1.55010255e-03 -1.23515894e-04  1.04406833e-02
   2.10179152e-02 -2.14922944e-04 -2.28442718e-04 -3.15058949e-03
   1.38635458e-02  6.19549544e-04 -1.39172614e-04 -8.22723136e-06]]
Penalty value [0.001]
Training Accuracy Score 0.7855308219178082


In [106]:
X_train_log = sm.add_constant(X_train)
log_reg = sm.Logit(y_train, X_train).fit()
print(log_reg.summary())

Optimization terminated successfully.
         Current function value: 0.547143
         Iterations 18
                           Logit Regression Results                           
Dep. Variable:             gentrified   No. Observations:                 2336
Model:                          Logit   Df Residuals:                     2324
Method:                           MLE   Df Model:                           11
Date:                Mon, 09 May 2022   Pseudo R-squ.:                -0.05257
Time:                        18:35:26   Log-Likelihood:                -1278.1
converged:                       True   LL-Null:                       -1214.3
Covariance Type:            nonrobust   LLR p-value:                     1.000
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
research and planning    -0.2730      0.558     -0.490      0.624      -1.366       0

  x = pd.concat(x[::order], 1)
  return 1/(1+np.exp(-X))
  return 1/(1+np.exp(-X))


In [90]:
y_hat = log_reg.predict(X_test)
prediction = list(map(round, y_hat))
 
# comparing original and predicted values of y
print('Actual values', list(y_test.values))
print('Predictions :', prediction)

Actual values [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0

In [95]:
pre = pd.DataFrame(y_test.values,prediction).reset_index()

In [100]:
pre['test'] = pre['index'] - pre[0]

In [102]:
pre['test'].value_counts()

 0    453
-1    131
Name: test, dtype: int64

In [62]:
sf_gen_index = sf_demo_gen_pattern[sf_demo_gen_pattern.gentrified=='Gentrified']

In [68]:
sf_gen_index['edu_rate'] = (sf_gen_index['edu_2020']/sf_gen_index['edu_2015'])

TypeError: unsupported operand type(s) for /: 'str' and 'float'

In [70]:
sf_gen_index['index'] = (sf_gen_index['Median HH Income (in 2020 dollars)']/sf_gen_index['Median HH Income (in 2015 dollars)'])

TypeError: unsupported operand type(s) for /: 'str' and 'float'

In [66]:
sf_gen_index

Unnamed: 0,GEOID,area,popu_2015,edu_2015,Median HH Income (in 2015 dollars),Median Gross Rent 2015,white_15,popu_2020,edu_2020,Median HH Income (in 2020 dollars),Median Gross Rent 2020,white_20,gentrified,pattern,color,geometry
0,060750452002,187145.689267,1550,15.565567,66875.0,1310.0,802,1943,16.25190839694656,120921,2510,1370,Gentrified,0.0,Pattern A,"POLYGON ((-13632869.903 4547990.267, -13632749..."
3,060750479013,251690.139309,1392,14.104061,61333.0,1172.0,908,2185,13.58311688311688,130038,2946,956,Gentrified,0.0,Pattern A,"POLYGON ((-13637188.876 4548004.773, -13637069..."
6,060750476003,209279.793757,1032,14.755297,73848.0,2188.0,590,1447,15.07080504364694,170270,4000,858,Gentrified,0.0,Pattern A,"POLYGON ((-13634192.267 4547914.073, -13634073..."
17,060750451001,245768.845768,2336,15.352914,76331.0,1818.0,1156,2325,16.3764832793959,191169,3098,1677,Gentrified,5.0,Pattern F,"POLYGON ((-13632185.399 4548574.063, -13632059..."
19,060750477012,195058.411359,1471,13.194159,60795.0,1281.0,478,1851,15.08475783475783,88625,1780,866,Gentrified,1.0,Pattern B,"POLYGON ((-13634580.772 4548438.428, -13634461..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2901,060750119011,50449.913673,1027,15.498927,76806.0,1356.0,656,1502,16.07555178268251,99639,2030,816,Gentrified,0.0,Pattern A,"POLYGON ((-13627076.057 4550001.649, -13626892..."
2902,060750119022,75544.750673,763,14.375731,71750.0,1125.0,506,1378,15.95911111111111,91667,1892,1020,Gentrified,1.0,Pattern B,"POLYGON ((-13626809.447 4550044.755, -13626741..."
2907,060014059021,320632.620197,1614,9.570167,28203.0,938.0,179,1559,11.79178338001867,49931,1551,280,Gentrified,1.0,Pattern B,"POLYGON ((-13607903.613 4550322.267, -13607883..."
2914,060750118001,90734.196966,1622,9.842311,20478.0,598.0,173,1590,12.22094691535151,37164,888,197,Gentrified,1.0,Pattern B,"POLYGON ((-13626317.637 4550385.096, -13626264..."


In [58]:
sf_invest

Unnamed: 0,GEOID,TotalPopu,area,year,research and planning,fire prevention,climate action,public transportation,housing,green space,others,vehicle,building,utilities,agriculture,clean air,Total Invest,Invest per land,Invest per land per popu,geometry
0,060750452002,1838,1.871457e+05,2015,0.0,0.000000,0.000000,9003.260968,0.0,0.0,0.0,24364.574128,0.0,0.0,0.0,0.0,33367.835096,178298.710627,0.097007,"POLYGON ((-13632869.903 4547990.267, -13632749..."
1,060750164001,2053,3.005107e+05,2015,0.0,0.000000,0.000000,491.241908,0.0,0.0,0.0,27214.619523,0.0,0.0,0.0,0.0,27705.861431,92195.917868,0.044908,"POLYGON ((-13629569.614 4547980.690, -13629386..."
2,060750477021,1124,1.884071e+05,2015,0.0,0.000000,0.000000,105.240863,0.0,0.0,0.0,1644.952632,0.0,0.0,0.0,0.0,1750.193494,9289.425139,0.008265,"POLYGON ((-13635264.942 4547852.104, -13635146..."
3,060750479013,2092,2.516901e+05,2015,0.0,0.000000,0.000000,235.339390,0.0,0.0,0.0,27731.604502,0.0,0.0,0.0,0.0,27966.943892,111116.565665,0.053115,"POLYGON ((-13637188.876 4548004.773, -13637069..."
4,060750451002,1354,1.759849e+05,2015,0.0,0.000000,0.000000,6239.788622,0.0,0.0,0.0,17948.657981,0.0,0.0,0.0,0.0,24188.446603,137446.186732,0.101511,"POLYGON ((-13632512.233 4548010.970, -13632392..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17515,060014058002,1268,1.931100e+05,2020,0.0,0.000000,19.148698,1166.024856,0.0,0.0,0.0,12210.840761,0.0,0.0,0.0,0.0,13396.014316,69369.866849,0.054708,"POLYGON ((-13607124.710 4550198.865, -13607109..."
17516,060014033001,1887,1.340623e+06,2020,0.0,0.000000,4470.664870,146935.292499,0.0,0.0,0.0,15488.288779,0.0,0.0,0.0,0.0,166894.246147,124490.069493,0.065972,"POLYGON ((-13611454.927 4550088.565, -13611443..."
17517,060750113002,1234,8.266409e+04,2020,0.0,100.577850,450.886421,6800.941166,0.0,0.0,0.0,9990.600568,0.0,0.0,0.0,0.0,17343.006005,209800.975747,0.170017,"POLYGON ((-13626545.286 4550476.240, -13626525..."
17518,060750112001,1192,7.836443e+04,2020,0.0,85.756580,385.032271,7145.750593,0.0,0.0,0.0,9650.563919,0.0,0.0,0.0,0.0,17267.103364,220343.644869,0.184852,"POLYGON ((-13627136.281 4550381.151, -13627117..."


In [56]:
import statsmodels.api as sm

X_train_ols = sm.add_constant(X_train)
results = sm.OLS(y_train,X_train_ols).fit() 
print(results.summary())

  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,
  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,


                            OLS Regression Results                            
Dep. Variable:             gentrified   R-squared:                       0.010
Model:                            OLS   Adj. R-squared:                  0.005
Method:                 Least Squares   F-statistic:                     1.896
Date:                Mon, 09 May 2022   Prob (F-statistic):             0.0304
Time:                        17:48:21   Log-Likelihood:                -1223.1
No. Observations:                2336   AIC:                             2472.
Df Residuals:                    2323   BIC:                             2547.
Df Model:                          12                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const                     0.21

  x = pd.concat(x[::order], 1)
