### Import libs

In [8]:
# import libs
import numpy as np
from mgwr.gwr import GWR, MGWR
from mgwr.sel_bw import Sel_BW
import matplotlib.pyplot as plt
import pandas as pd 
from pyproj import CRS
import constant as c
from shapely.wkt import loads
import geopandas as gpd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import warnings; warnings.filterwarnings("ignore")
from sklearn.ensemble import RandomForestRegressor  # or RandomForestClassifier

In [9]:
# then read the data
df_2020 = pd.read_csv('../asset/preprocess/df_2020.csv', index_col=0)
df_2020['geometry_grids'] = df_2020['geometry_grids'].apply(lambda x: loads(x))
df_2021 = pd.read_csv('../asset/preprocess/df_2021.csv', index_col=0)
df_2021['geometry_grids'] = df_2021['geometry_grids'].apply(lambda x: loads(x))
df_2022 = pd.read_csv('../asset/preprocess/df_2022.csv', index_col=0)
df_2022['geometry_grids'] = df_2022['geometry_grids'].apply(lambda x: loads(x))

crs = 'EPSG:5179'  # Specify the coordinate reference system
gdf_2020 = gpd.GeoDataFrame(df_2020, geometry=df_2020['geometry_grids'], crs=crs)
gdf_2021 = gpd.GeoDataFrame(df_2021, geometry=df_2021['geometry_grids'], crs=crs)
gdf_2022 = gpd.GeoDataFrame(df_2022, geometry=df_2022['geometry_grids'], crs=crs)

gdf_2020_scbd = gdf_2020[gdf_2020['sigungunm'].isin(c.SCBD_NMS)]
gdf_2020_gbd = gdf_2020[gdf_2020['sigungunm'].isin(c.GBD_NMS)]
gdf_2020_ybd = gdf_2020[gdf_2020['sigungunm'].isin(c.YBD_NMS)]
gdf_2021_scbd = gdf_2021[gdf_2021['sigungunm'].isin(c.SCBD_NMS)]
gdf_2021_gbd = gdf_2021[gdf_2021['sigungunm'].isin(c.GBD_NMS)]
gdf_2021_ybd = gdf_2021[gdf_2021['sigungunm'].isin(c.YBD_NMS)]
gdf_2022_scbd = gdf_2022[gdf_2022['sigungunm'].isin(c.SCBD_NMS)]
gdf_2022_gbd = gdf_2022[gdf_2022['sigungunm'].isin(c.GBD_NMS)]
gdf_2022_ybd = gdf_2022[gdf_2022['sigungunm'].isin(c.YBD_NMS)]


In [10]:
# Setting COLS
SIM_CAL_COLS = c.SIM_CAL_COLS
STANDARDIZE_COLS = c.STANDARDIZE_COLS
PP_COLS = c.PP_COLS
BS_COLS = c.BS_COLS
BD_COLS = c.BD_COLS

VAR_COLS = [
    'pp_pop', # X1
    'pp_od', # X2
    'bs_ebit', # X3
    'bs_gas', # X4
    'bs_elct', # X5
    'bd_platarea', # X6
    'bd_archarea', # X7
    'bd_totarea', # X8
    'bd_totflrcnt', # X9
    'bd_elvtent', # X10
    'bd_height', # X11
    'bd_vintage' # X12
    ]

TARGET_COL = ['bd_ilp'] # y is individual land price

### Experiment Settings

In [11]:
# experiment settings
YEAR = 2021
CBD_NM = "gbd"

In [12]:
fpath = f"../asset/experiment/cbdindex/cbdi_{CBD_NM}_{YEAR}.csv"
cbdi_df = pd.read_csv(fpath)
cbdi_df['geometry_grids'] = cbdi_df['geometry_grids'].apply(lambda x: loads(x))
cbdi_df = gpd.GeoDataFrame(cbdi_df, geometry=cbdi_df['geometry_grids'], crs=crs)

In [13]:
# Filling any data point that is nan -> fill by mean value of column

X = cbdi_df[VAR_COLS].values
y = cbdi_df[TARGET_COL].values.reshape((-1,1)) 
print(X.shape)
print(y.shape)

lat = cbdi_df.centroid.x
lng = cbdi_df.centroid.y
coords = list(zip(lat,lng))

# calibrate GWR MODEL
gwr_selector = Sel_BW(coords, y, X)
gwr_bw = gwr_selector.search(bw_min=2)
gwr_results = GWR(coords, y, X, gwr_bw).fit()

# # append to df
# cbdi_df['gwr_const'] = gwr_results.params[:,0]
# for i, col_nm in enumerate(cbdi_df):
#     cbdi_df[f'gwr_{col_nm}'] = gwr_results.params[:,i+1]
# cbdi_df['gwr_r2'] = gwr_results.localR2

print(f'{YEAR}_{CBD_NM}')
# https://mgwr.readthedocs.io/en/latest/_modules/mgwr/gwr.html#GWRResults.summary
gwr_results.summary()

(210, 12)
(210, 1)
2021_gbd
Model type                                                         Gaussian
Number of observations:                                                 210
Number of covariates:                                                    13

Global Regression Results
---------------------------------------------------------------------------
Residual sum of squares:                                             88.935
Log-likelihood:                                                    -207.761
AIC:                                                                441.522
AICc:                                                               445.676
BIC:                                                               -964.445
R2:                                                                   0.800
Adj. R2:                                                              0.787

Variable                              Est.         SE  t(Est/SE)    p-value
------------------------------- 

In [14]:
# Filling any data point that is nan -> fill by mean value of column

X = cbdi_df[VAR_COLS+['cbdi']].values
y = cbdi_df[TARGET_COL].values.reshape((-1,1)) 
print(X.shape)
print(y.shape)

lat = cbdi_df.centroid.x
lng = cbdi_df.centroid.y
coords = list(zip(lat,lng))

# calibrate GWR MODEL
gwr_selector = Sel_BW(coords, y, X)
gwr_bw = gwr_selector.search(bw_min=2)
gwr_results = GWR(coords, y, X, gwr_bw).fit()

# # append to df
# cbdi_df['gwr_const'] = gwr_results.params[:,0]
# for i, col_nm in enumerate(cbdi_df):
#     cbdi_df[f'gwr_{col_nm}'] = gwr_results.params[:,i+1]
# cbdi_df['gwr_r2'] = gwr_results.localR2

print(f'{YEAR}_{CBD_NM}')
# https://mgwr.readthedocs.io/en/latest/_modules/mgwr/gwr.html#GWRResults.summary
gwr_results.summary()

(210, 13)
(210, 1)
2021_gbd
Model type                                                         Gaussian
Number of observations:                                                 210
Number of covariates:                                                    14

Global Regression Results
---------------------------------------------------------------------------
Residual sum of squares:                                             83.925
Log-likelihood:                                                    -201.672
AIC:                                                                431.344
AICc:                                                               435.819
BIC:                                                               -964.109
R2:                                                                   0.811
Adj. R2:                                                              0.798

Variable                              Est.         SE  t(Est/SE)    p-value
------------------------------- 