## ECON 570 Final Project
### Instructor: Ida Johnsson
### Group Members: Mingyu Zhao, Shang Gao, Yantong Li

In [28]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

In [3]:
data_source = "https://raw.githubusercontent.com/yantonglll/ECON570_Final_Project/main/ALL%20Prices%202019-2021%20mar.csv"
data = pd.read_csv(data_source)
data.head()

Unnamed: 0,month,town,town_dummy,flat_type,block,street_name,address,latitude,longitude,storey_range,...,price_psm_yearly,Core CPI,price cpi_adj,price_psm cpi_adj,bala lease pct,price lease_adj implied,price_psm lease_adj implied,price cpi_lease_adj implied,price_psm cpi_lease_adj implied,year_gni
0,2019-01,ANG MO KIO,2,5 ROOM,700B,ANG MO KIO AVE 6,700B ANG MO KIO AVE 6 SINGAPORE,1.369457,103.846276,19 TO 21,...,86.16086,99.961,794109.7028,7154.141466,92.2,826516.26898,7446.092513,826838.736104,7448.997622,78847
1,2019-01,ANG MO KIO,2,5 ROOM,316A,ANG MO KIO ST 31,316A ANG MO KIO ST 31 SINGAPORE,1.364621,103.84708,19 TO 21,...,81.395349,99.961,770300.4172,7002.731065,93.3,792282.958199,7202.572347,792592.069145,7205.382446,78847
2,2019-01,ANG MO KIO,2,4 ROOM,310B,ANG MO KIO AVE 1,310B ANG MO KIO AVE 1 SINGAPORE,1.364778,103.844221,25 TO 27,...,84.918478,99.961,750292.6141,7815.548064,95.0,757894.736842,7894.736842,758190.431091,7897.816991,78847
3,2019-01,ANG MO KIO,2,5 ROOM,315B,ANG MO KIO ST 31,315B ANG MO KIO ST 31 SINGAPORE,1.364079,103.847476,13 TO 15,...,76.955603,99.961,728284.0308,6620.763916,93.3,749067.524116,6809.704765,749359.774457,6812.361586,78847
4,2019-01,ANG MO KIO,2,5 ROOM,353,ANG MO KIO ST 32,353 ANG MO KIO ST 32 SINGAPORE,1.364015,103.851622,16 TO 18,...,81.705948,99.961,728284.0308,6620.763916,91.4,764638.949672,6951.263179,764937.275239,6953.975229,78847


## I. Introduction

## II. Data

## III. Model

### III.1 Model 1:
We first investigate a model where Y, the dependent variable, is price cpi_adj. Independent variables, or the covariates, are town_dummy, covid dummy, area_sqm, lease_rem, storey, and flat_type. The model should look like this:

$ $price cpi_adj$_i = c + \beta_1*$town_dummy$_i+\beta_2*$covid_dummy$_i+\beta_3*$area_sqm$_i+\beta_4*$lease_rem$_i+\beta_5*storey_i+\beta_6*$flat_type$_i+e_i$,  

where $e_i \sim N(0,\sigma^2)$

In [4]:
data_sum=data

In [5]:
# create a dummy for covid
data_sum["covid_dum"] = (data_sum.month >= "2020-01").astype(int)
data_sum

# rename "price cpi_adj"
data_sum = data_sum.rename(columns = {"price cpi_adj":"price_cpi_adj"})

data_sum.head()

Unnamed: 0,month,town,town_dummy,flat_type,block,street_name,address,latitude,longitude,storey_range,...,Core CPI,price_cpi_adj,price_psm cpi_adj,bala lease pct,price lease_adj implied,price_psm lease_adj implied,price cpi_lease_adj implied,price_psm cpi_lease_adj implied,year_gni,covid_dum
0,2019-01,ANG MO KIO,2,5 ROOM,700B,ANG MO KIO AVE 6,700B ANG MO KIO AVE 6 SINGAPORE,1.369457,103.846276,19 TO 21,...,99.961,794109.7028,7154.141466,92.2,826516.26898,7446.092513,826838.736104,7448.997622,78847,0
1,2019-01,ANG MO KIO,2,5 ROOM,316A,ANG MO KIO ST 31,316A ANG MO KIO ST 31 SINGAPORE,1.364621,103.84708,19 TO 21,...,99.961,770300.4172,7002.731065,93.3,792282.958199,7202.572347,792592.069145,7205.382446,78847,0
2,2019-01,ANG MO KIO,2,4 ROOM,310B,ANG MO KIO AVE 1,310B ANG MO KIO AVE 1 SINGAPORE,1.364778,103.844221,25 TO 27,...,99.961,750292.6141,7815.548064,95.0,757894.736842,7894.736842,758190.431091,7897.816991,78847,0
3,2019-01,ANG MO KIO,2,5 ROOM,315B,ANG MO KIO ST 31,315B ANG MO KIO ST 31 SINGAPORE,1.364079,103.847476,13 TO 15,...,99.961,728284.0308,6620.763916,93.3,749067.524116,6809.704765,749359.774457,6812.361586,78847,0
4,2019-01,ANG MO KIO,2,5 ROOM,353,ANG MO KIO ST 32,353 ANG MO KIO ST 32 SINGAPORE,1.364015,103.851622,16 TO 18,...,99.961,728284.0308,6620.763916,91.4,764638.949672,6951.263179,764937.275239,6953.975229,78847,0


Now, we create dummies for town_dummy, it is important to keep in mind that town_dummy is from 1-6, with 1 being the most prime area and 6 being the least prime area

In [7]:
# create dummies for town_dummy
town_dum = pd.get_dummies(data_sum['town_dummy'])
town_dum

# Attach these dummies to dataframe
data_c = pd.concat([data_sum,town_dum], axis=1)
data_c

# Rename columns
data_rn1 = data_c.rename(columns = {1: "town_1",2: "town_2",3: "town_3",4: "town_4",5: "town_5",6: "town_6"})
data_rn1.head()

#data_rn.columns

Unnamed: 0,month,town,town_dummy,flat_type,block,street_name,address,latitude,longitude,storey_range,...,price cpi_lease_adj implied,price_psm cpi_lease_adj implied,year_gni,covid_dum,town_1,town_2,town_3,town_4,town_5,town_6
0,2019-01,ANG MO KIO,2,5 ROOM,700B,ANG MO KIO AVE 6,700B ANG MO KIO AVE 6 SINGAPORE,1.369457,103.846276,19 TO 21,...,826838.736104,7448.997622,78847,0,0,1,0,0,0,0
1,2019-01,ANG MO KIO,2,5 ROOM,316A,ANG MO KIO ST 31,316A ANG MO KIO ST 31 SINGAPORE,1.364621,103.84708,19 TO 21,...,792592.069145,7205.382446,78847,0,0,1,0,0,0,0
2,2019-01,ANG MO KIO,2,4 ROOM,310B,ANG MO KIO AVE 1,310B ANG MO KIO AVE 1 SINGAPORE,1.364778,103.844221,25 TO 27,...,758190.431091,7897.816991,78847,0,0,1,0,0,0,0
3,2019-01,ANG MO KIO,2,5 ROOM,315B,ANG MO KIO ST 31,315B ANG MO KIO ST 31 SINGAPORE,1.364079,103.847476,13 TO 15,...,749359.774457,6812.361586,78847,0,0,1,0,0,0,0
4,2019-01,ANG MO KIO,2,5 ROOM,353,ANG MO KIO ST 32,353 ANG MO KIO ST 32 SINGAPORE,1.364015,103.851622,16 TO 18,...,764937.275239,6953.975229,78847,0,0,1,0,0,0,0


In [8]:
# set variable "flat_type" to a categorical variable
data_rn1["flat_type"].describe()
data_rn1["flat_type"] = data_rn1["flat_type"].astype("category")
data_rn1.dtypes

month                                object
town                                 object
town_dummy                            int64
flat_type                          category
block                                object
street_name                          object
address                              object
latitude                            float64
longitude                           float64
storey_range                         object
storey                                int64
area_sqm                            float64
flat_model                           object
lease_start                           int64
lease_rem                             int64
resale_price                        float64
price_psm                           float64
price_psm_yearly                    float64
Core CPI                            float64
price_cpi_adj                       float64
price_psm cpi_adj                   float64
bala lease pct                      float64
price lease_adj implied         

In [9]:
data_rn1["flat_type"] = data_rn1["flat_type"].cat.codes
data_rn1.head()

Unnamed: 0,month,town,town_dummy,flat_type,block,street_name,address,latitude,longitude,storey_range,...,price cpi_lease_adj implied,price_psm cpi_lease_adj implied,year_gni,covid_dum,town_1,town_2,town_3,town_4,town_5,town_6
0,2019-01,ANG MO KIO,2,4,700B,ANG MO KIO AVE 6,700B ANG MO KIO AVE 6 SINGAPORE,1.369457,103.846276,19 TO 21,...,826838.736104,7448.997622,78847,0,0,1,0,0,0,0
1,2019-01,ANG MO KIO,2,4,316A,ANG MO KIO ST 31,316A ANG MO KIO ST 31 SINGAPORE,1.364621,103.84708,19 TO 21,...,792592.069145,7205.382446,78847,0,0,1,0,0,0,0
2,2019-01,ANG MO KIO,2,3,310B,ANG MO KIO AVE 1,310B ANG MO KIO AVE 1 SINGAPORE,1.364778,103.844221,25 TO 27,...,758190.431091,7897.816991,78847,0,0,1,0,0,0,0
3,2019-01,ANG MO KIO,2,4,315B,ANG MO KIO ST 31,315B ANG MO KIO ST 31 SINGAPORE,1.364079,103.847476,13 TO 15,...,749359.774457,6812.361586,78847,0,0,1,0,0,0,0
4,2019-01,ANG MO KIO,2,4,353,ANG MO KIO ST 32,353 ANG MO KIO ST 32 SINGAPORE,1.364015,103.851622,16 TO 18,...,764937.275239,6953.975229,78847,0,0,1,0,0,0,0


#### Now let's run the regression with model 1:

In [10]:
# regression 1
est1 = smf.ols(formula="price_cpi_adj ~ covid_dum + town_2 + town_3 + town_4 + town_5 + town_6 + lease_rem + area_sqm + flat_type + storey", data=data_rn1).fit()

est1.summary()

0,1,2,3
Dep. Variable:,price_cpi_adj,R-squared:,0.775
Model:,OLS,Adj. R-squared:,0.775
Method:,Least Squares,F-statistic:,18140.0
Date:,"Mon, 02 May 2022",Prob (F-statistic):,0.0
Time:,15:34:14,Log-Likelihood:,-665020.0
No. Observations:,52641,AIC:,1330000.0
Df Residuals:,52630,BIC:,1330000.0
Df Model:,10,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-1.467e+05,3070.423,-47.771,0.000,-1.53e+05,-1.41e+05
covid_dum,1.836e+04,657.696,27.916,0.000,1.71e+04,1.96e+04
town_2,-7.309e+04,2019.519,-36.193,0.000,-7.71e+04,-6.91e+04
town_3,-6.765e+04,2324.524,-29.104,0.000,-7.22e+04,-6.31e+04
town_4,-2.018e+05,1965.077,-102.707,0.000,-2.06e+05,-1.98e+05
town_5,-2.571e+05,1990.389,-129.190,0.000,-2.61e+05,-2.53e+05
town_6,-2.558e+05,2100.351,-121.784,0.000,-2.6e+05,-2.52e+05
lease_rem,3949.6076,27.677,142.703,0.000,3895.360,4003.855
area_sqm,3676.5236,45.953,80.006,0.000,3586.455,3766.592

0,1,2,3
Omnibus:,4850.771,Durbin-Watson:,0.722
Prob(Omnibus):,0.0,Jarque-Bera (JB):,7319.515
Skew:,0.711,Prob(JB):,0.0
Kurtosis:,4.147,Cond. No.,1850.0


In [11]:
# regression 2
est2 = smf.ols(formula="price_cpi_adj ~ covid_dum + town_dummy + lease_rem + area_sqm + flat_type + storey", data=data_rn1).fit()

est2.summary()

0,1,2,3
Dep. Variable:,price_cpi_adj,R-squared:,0.752
Model:,OLS,Adj. R-squared:,0.752
Method:,Least Squares,F-statistic:,26660.0
Date:,"Mon, 02 May 2022",Prob (F-statistic):,0.0
Time:,15:34:23,Log-Likelihood:,-667560.0
No. Observations:,52641,AIC:,1335000.0
Df Residuals:,52634,BIC:,1335000.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-8.341e+04,2451.944,-34.019,0.000,-8.82e+04,-7.86e+04
covid_dum,1.826e+04,688.088,26.542,0.000,1.69e+04,1.96e+04
town_dummy,-5.382e+04,274.722,-195.892,0.000,-5.44e+04,-5.33e+04
lease_rem,3515.9028,27.762,126.643,0.000,3461.488,3570.317
area_sqm,3563.6814,47.895,74.406,0.000,3469.806,3657.556
flat_type,2.32e+04,1259.950,18.410,0.000,2.07e+04,2.57e+04
storey,6129.4330,62.180,98.575,0.000,6007.559,6251.307

0,1,2,3
Omnibus:,5624.206,Durbin-Watson:,0.68
Prob(Omnibus):,0.0,Jarque-Bera (JB):,9227.155
Skew:,0.763,Prob(JB):,0.0
Kurtosis:,4.369,Cond. No.,929.0


### III.2. Model 2

In [14]:
dummies = pd.get_dummies(data['town_dummy'])

In [15]:
dummies.rename(columns={1:'d1',2:'d2',3:'d3',4:'d4',5:'d5',6:'d6'},inplace = True)

In [16]:
dataTown=pd.concat([data,dummies.reindex(data.index)],axis=1)

In [17]:
dataTown['Covid']=dataTown.month.between('2020-02','2021-03').astype(int)

In [18]:
dataTown['Covid2']=dataTown['d2']*dataTown['Covid']
dataTown['Covid3']=dataTown['d3']*dataTown['Covid']
dataTown['Covid4']=dataTown['d4']*dataTown['Covid']
dataTown['Covid5']=dataTown['d5']*dataTown['Covid']
dataTown['Covid6']=dataTown['d6']*dataTown['Covid']

In [19]:
dataTown['flat_type'].replace('ROOM','',regex=True,inplace=True)

In [20]:
dataTown.head()

Unnamed: 0,month,town,town_dummy,flat_type,block,street_name,address,latitude,longitude,storey_range,...,d3,d4,d5,d6,Covid,Covid2,Covid3,Covid4,Covid5,Covid6
0,2019-01,ANG MO KIO,2,5,700B,ANG MO KIO AVE 6,700B ANG MO KIO AVE 6 SINGAPORE,1.369457,103.846276,19 TO 21,...,0,0,0,0,0,0,0,0,0,0
1,2019-01,ANG MO KIO,2,5,316A,ANG MO KIO ST 31,316A ANG MO KIO ST 31 SINGAPORE,1.364621,103.84708,19 TO 21,...,0,0,0,0,0,0,0,0,0,0
2,2019-01,ANG MO KIO,2,4,310B,ANG MO KIO AVE 1,310B ANG MO KIO AVE 1 SINGAPORE,1.364778,103.844221,25 TO 27,...,0,0,0,0,0,0,0,0,0,0
3,2019-01,ANG MO KIO,2,5,315B,ANG MO KIO ST 31,315B ANG MO KIO ST 31 SINGAPORE,1.364079,103.847476,13 TO 15,...,0,0,0,0,0,0,0,0,0,0
4,2019-01,ANG MO KIO,2,5,353,ANG MO KIO ST 32,353 ANG MO KIO ST 32 SINGAPORE,1.364015,103.851622,16 TO 18,...,0,0,0,0,0,0,0,0,0,0


In [21]:
for col in dataTown.columns:
    print(col)

month
town
town_dummy
flat_type
block
street_name
address
latitude
longitude
storey_range
storey
area_sqm
flat_model
lease_start
lease_rem
resale_price
price_psm
price_psm_yearly
Core CPI
price cpi_adj
price_psm cpi_adj
bala lease pct
price lease_adj implied
price_psm lease_adj implied
price cpi_lease_adj implied
price_psm cpi_lease_adj implied
year_gni
covid_dum
d1
d2
d3
d4
d5
d6
Covid
Covid2
Covid3
Covid4
Covid5
Covid6


In [22]:
dataTown['Y']=dataTown['price cpi_adj']

In [24]:
est1 = smf.ols(formula="Y ~ flat_type + storey + area_sqm + lease_rem + d2 + d3 + d4 + d5 + d6 + Covid + Covid2 + Covid3 + Covid4 + Covid5 + Covid6", data=dataTown).fit()

In [25]:
print(est1.summary())

                            OLS Regression Results                            
Dep. Variable:                      Y   R-squared:                       0.778
Model:                            OLS   Adj. R-squared:                  0.778
Method:                 Least Squares   F-statistic:                     9205.
Date:                Mon, 02 May 2022   Prob (F-statistic):               0.00
Time:                        15:39:57   Log-Likelihood:            -6.6472e+05
No. Observations:               52641   AIC:                         1.329e+06
Df Residuals:                   52620   BIC:                         1.330e+06
Df Model:                          20                                         
Covariance Type:            nonrobust                                         
                                    coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------------
Intercept     

In [26]:
est2 = smf.ols(formula="Y ~ flat_type + storey + area_sqm + lease_rem + d2 + d3 + d4 + d5 + d6 + Covid", data=dataTown).fit()

In [27]:
print (est2.summary())

                            OLS Regression Results                            
Dep. Variable:                      Y   R-squared:                       0.777
Model:                            OLS   Adj. R-squared:                  0.777
Method:                 Least Squares   F-statistic:                 1.225e+04
Date:                Mon, 02 May 2022   Prob (F-statistic):               0.00
Time:                        15:40:14   Log-Likelihood:            -6.6475e+05
No. Observations:               52641   AIC:                         1.330e+06
Df Residuals:                   52625   BIC:                         1.330e+06
Df Model:                          15                                         
Covariance Type:            nonrobust                                         
                                    coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------------
Intercept     

### III.3 Model 3

In [29]:
data_source = "https://raw.githubusercontent.com/yantonglll/ECON570_Final_Project/main/ALL%20Prices%202019-2021%20mar.csv"
df= pd.read_csv(data_source)

In [31]:
pd.set_option('display.max_columns', None)
df.head()

Unnamed: 0,month,town,town_dummy,flat_type,block,street_name,address,latitude,longitude,storey_range,storey,area_sqm,flat_model,lease_start,lease_rem,resale_price,price_psm,price_psm_yearly,Core CPI,price cpi_adj,price_psm cpi_adj,bala lease pct,price lease_adj implied,price_psm lease_adj implied,price cpi_lease_adj implied,price_psm cpi_lease_adj implied,year_gni
0,2019-01,ANG MO KIO,2,5 ROOM,700B,ANG MO KIO AVE 6,700B ANG MO KIO AVE 6 SINGAPORE,1.369457,103.846276,19 TO 21,20,111.0,Improved,2003,83,793800.0,7151.351351,86.16086,99.961,794109.7028,7154.141466,92.2,826516.26898,7446.092513,826838.736104,7448.997622,78847
1,2019-01,ANG MO KIO,2,5 ROOM,316A,ANG MO KIO ST 31,316A ANG MO KIO ST 31 SINGAPORE,1.364621,103.84708,19 TO 21,20,110.0,Improved,2006,86,770000.0,7000.0,81.395349,99.961,770300.4172,7002.731065,93.3,792282.958199,7202.572347,792592.069145,7205.382446,78847
2,2019-01,ANG MO KIO,2,4 ROOM,310B,ANG MO KIO AVE 1,310B ANG MO KIO AVE 1 SINGAPORE,1.364778,103.844221,25 TO 27,26,96.0,Model A,2012,92,750000.0,7812.5,84.918478,99.961,750292.6141,7815.548064,95.0,757894.736842,7894.736842,758190.431091,7897.816991,78847
3,2019-01,ANG MO KIO,2,5 ROOM,315B,ANG MO KIO ST 31,315B ANG MO KIO ST 31 SINGAPORE,1.364079,103.847476,13 TO 15,14,110.0,Improved,2006,86,728000.0,6618.181818,76.955603,99.961,728284.0308,6620.763916,93.3,749067.524116,6809.704765,749359.774457,6812.361586,78847
4,2019-01,ANG MO KIO,2,5 ROOM,353,ANG MO KIO ST 32,353 ANG MO KIO ST 32 SINGAPORE,1.364015,103.851622,16 TO 18,17,110.0,Improved,2001,81,728000.0,6618.181818,81.705948,99.961,728284.0308,6620.763916,91.4,764638.949672,6951.263179,764937.275239,6953.975229,78847


##### convert flat type to cateogry variables

In [34]:
df['flat_type'].replace('ROOM','',regex=True,inplace=True)

In [35]:
df['covid_dummy'] = (df.month >'2020-01').astype(int)

In [36]:
df['flat_model'].unique()

array(['Improved', 'Model A', 'DBSS', 'Standard', 'New Generation',
       'Apartment', 'Maisonette', 'Premium Apartment', 'Simplified',
       'Type S2', 'Type S1', 'Adjoined flat', 'Model A2', 'Terrace',
       'Premium Apartment Loft', 'Model A-Maisonette', 'Multi Generation',
       'Improved-Maisonette', 'Premium Maisonette', '2-room'],
      dtype=object)

In [37]:
len(df['flat_model'].unique())

20

In [41]:
flat_model_dummy = pd.get_dummies(df['flat_model'],drop_first = True)
#flat_model_dummy
df['flat_model'].replace(' ','-',regex=True,inplace=True)

In [42]:
df['flat_model'].replace('','-',regex=True,inplace=True)
df = df.join(flat_model_dummy)

In [43]:
df['covid_flatmodel'] = df['covid_dummy']
df.head()

Unnamed: 0,month,town,town_dummy,flat_type,block,street_name,address,latitude,longitude,storey_range,storey,area_sqm,flat_model,lease_start,lease_rem,resale_price,price_psm,price_psm_yearly,Core CPI,price cpi_adj,price_psm cpi_adj,bala lease pct,price lease_adj implied,price_psm lease_adj implied,price cpi_lease_adj implied,price_psm cpi_lease_adj implied,year_gni,covid_dummy,Adjoined-flat,Apartment,DBSS,Improved,Improved-Maisonette,Maisonette,Model-A,Model-A-Maisonette,Model-A2,Multi-Generation,New-Generation,Premium-Apartment,Premium-Apartment-Loft,Premium-Maisonette,Simplified,Standard,Terrace,Type-S1,Type-S2,covid_flatmodel
0,2019-01,ANG MO KIO,2,5,700B,ANG MO KIO AVE 6,700B ANG MO KIO AVE 6 SINGAPORE,1.369457,103.846276,19 TO 21,20,111.0,Improved,2003,83,793800.0,7151.351351,86.16086,99.961,794109.7028,7154.141466,92.2,826516.26898,7446.092513,826838.736104,7448.997622,78847,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,2019-01,ANG MO KIO,2,5,316A,ANG MO KIO ST 31,316A ANG MO KIO ST 31 SINGAPORE,1.364621,103.84708,19 TO 21,20,110.0,Improved,2006,86,770000.0,7000.0,81.395349,99.961,770300.4172,7002.731065,93.3,792282.958199,7202.572347,792592.069145,7205.382446,78847,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2019-01,ANG MO KIO,2,4,310B,ANG MO KIO AVE 1,310B ANG MO KIO AVE 1 SINGAPORE,1.364778,103.844221,25 TO 27,26,96.0,Model-A,2012,92,750000.0,7812.5,84.918478,99.961,750292.6141,7815.548064,95.0,757894.736842,7894.736842,758190.431091,7897.816991,78847,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
3,2019-01,ANG MO KIO,2,5,315B,ANG MO KIO ST 31,315B ANG MO KIO ST 31 SINGAPORE,1.364079,103.847476,13 TO 15,14,110.0,Improved,2006,86,728000.0,6618.181818,76.955603,99.961,728284.0308,6620.763916,93.3,749067.524116,6809.704765,749359.774457,6812.361586,78847,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,2019-01,ANG MO KIO,2,5,353,ANG MO KIO ST 32,353 ANG MO KIO ST 32 SINGAPORE,1.364015,103.851622,16 TO 18,17,110.0,Improved,2001,81,728000.0,6618.181818,81.705948,99.961,728284.0308,6620.763916,91.4,764638.949672,6951.263179,764937.275239,6953.975229,78847,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [44]:
y = df['price cpi_adj']

In [45]:
result = smf.ols(formula = 'y~flat_type+area_sqm+covid_dummy+storey+lease_rem+flat_model+covid_flatmodel',data = df).fit()
result.summary()
#dataTown['Covid2']=dataTown['d2']*dataTown['Covid']

0,1,2,3
Dep. Variable:,y,R-squared:,0.641
Model:,OLS,Adj. R-squared:,0.641
Method:,Least Squares,F-statistic:,3362.0
Date:,"Mon, 02 May 2022",Prob (F-statistic):,0.0
Time:,15:44:28,Log-Likelihood:,-677300.0
No. Observations:,52641,AIC:,1355000.0
Df Residuals:,52612,BIC:,1355000.0
Df Model:,28,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-9.826e+04,4.62e+04,-2.127,0.033,-1.89e+05,-7731.702
flat_type[T.2 ],-4.751e+04,1.87e+04,-2.537,0.011,-8.42e+04,-1.08e+04
flat_type[T.3 ],9197.9452,1.86e+04,0.494,0.622,-2.73e+04,4.57e+04
flat_type[T.4 ],4.887e+04,1.9e+04,2.566,0.010,1.15e+04,8.62e+04
flat_type[T.5 ],6.86e+04,1.97e+04,3.484,0.000,3e+04,1.07e+05
flat_type[T.EXECUTIVE],5.712e+04,2.06e+04,2.779,0.005,1.68e+04,9.74e+04
flat_type[T.MULTI GENERATION],1.419e+05,2.51e+04,5.655,0.000,9.27e+04,1.91e+05
flat_model[T.Adjoined-flat],1.365e+05,4.3e+04,3.170,0.002,5.21e+04,2.21e+05
flat_model[T.Apartment],6.759e+04,4.23e+04,1.597,0.110,-1.54e+04,1.51e+05

0,1,2,3
Omnibus:,10778.572,Durbin-Watson:,0.584
Prob(Omnibus):,0.0,Jarque-Bera (JB):,22931.775
Skew:,1.2,Prob(JB):,0.0
Kurtosis:,5.166,Cond. No.,1.02e+16


## IV Findings and Conclusion 