In [None]:
# Linear Regression Model
import pandas as pd
from sklearn.linear_model import LinearRegression

# 1. IMPORT DATA FROM "Data.xlsx"
file_path = "Data.xlsx" # uploaded in Colab
df = pd.read_excel(file_path)

X = df[['X1','X2','X3','X4','X5']]
y = df['Actual GDP']

model = LinearRegression()
model.fit(X, y)

intercept = model.intercept_
coefficients = model.coef_

# PREDICT GDP USING LINREG MODEL
df['Predicted GDP'] = model.predict(X).round(0).astype(int)

# PRINT INTERCEPT AND COEFFICIENTS
print(f"Intercept (β₀): {intercept:.2f}")
print("Coefficients:")
for feature, coef in zip(X.columns, coefficients):
    print(f"- {feature}: {coef:.4f}")

# 5. PREDICT GDP USING LINREG MODEL
df['Predicted GDP'] = model.predict(X).round(0).astype(int)

# 6. PRINT PREDICTED GDP VS ACTUAL GDP
print("=" * 50)
print("Model Predictions vs Actual GDP")
print("=" * 50)
print(df[['City', 'Predicted GDP', 'Actual GDP']])


Intercept (β₀): -178.43
Coefficients:
- β1: 0.3080
- β2: -1.3994
- β3: 11.3470
- β4: 2.2110
- β5: 109.1649
Model Predictions vs Actual GDP
              City  Predicted GDP  Actual GDP
0    New York City           1821        1870
1      Los Angeles            951        1060
2          Chicago            584         706
3           Dallas            350         592
4          Houston            265         513
5          Atlanta            275         455
6   Washington, DC            484         581
7     Philadelphia            596         455
8            Miami            480         385
9          Phoenix            205         308
10          Boston            597         504
11       Riverside            138         196
12   San Francisco            701         655
13         Detroit            323         270
14         Seattle            382         462
15     Minneapolis            186         278
16           Tampa            412         179
17       San Diego            412

In [None]:
# OLS
import pandas as pd
import statsmodels.api as sm

file_path = "Data.xlsx" # Import data
df = pd.read_excel(file_path)

X = df[['X1','X2','X3','X4','X5']]
y = df['Actual GDP']

X = sm.add_constant(X)

model = sm.OLS(y, X).fit()


print(model.summary())


                            OLS Regression Results                            
Dep. Variable:             Actual GDP   R-squared:                       0.879
Model:                            OLS   Adj. R-squared:                  0.854
Method:                 Least Squares   F-statistic:                     35.00
Date:                Mon, 25 Nov 2024   Prob (F-statistic):           2.87e-10
Time:                        19:44:56   Log-Likelihood:                -186.34
No. Observations:                  30   AIC:                             384.7
Df Residuals:                      24   BIC:                             393.1
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                           coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------
const                 -178.4337 

In [None]:
#Collinearity Test

from scipy.stats import pearsonr
from numpy import array
from scipy.linalg import svd
import numpy as np

walkabiliityScore = [88, 70, 77, 46, 48, 48, 77, 75, 77, 41, 83, 43, 89, 51, 74, 71, 50,
                      53, 61, 64, 41, 26, 66, 37, 67, 42, 62, 49, 42, 49]
landDensity = [3176, 2637, 1338, 934, 850, 718, 1045, 1357, 1220, 348, 1411, 172,
               1849, 1116, 689, 527, 1329, 777, 360, 1090, 807, 501, 356, 370,
               375, 586, 430, 475, 296, 518]
publicTransportRidership = [86.41, 14.78, 20.77, 4.62, 6.27, 7.58,
                            14.37, 16.96, 8.34, 7.94, 25.32, 3.74,
                            21.94, 2.78, 23.02, 10.04, 6.64, 13.41,
                            15.23, 15.03, 4.45, 3.43, 6.49, 9.34, 18.09,
                            7.33, 9.47, 3.6, 15.10, 5.25]
percentOfBuildingsNearCityCenter= [9.0, 37.9, 36.3, 53.7, 17.5, 34.6,
                                   67.2, 60.8, 67.7, 50.1, 10.8, 63.3, 10.8,
                                   30.8, 14.8, 22.4, 19.9, 59.0, 31.4, 10.3,
                                   2.8, 30.2, 32.2, 42.5, 21.8, 59.1, 34.6,
                                   21.7, 47.0, 30.4]
trafficCongestionIndex = [1.32, 1.50, 1.30, 1.23, 1.27, 1.25, 1.25, 1.23, 1.34, 1.22,
                          1.34, 1.29, 1.48, 1.19, 1.45, 1.26, 1.21, 1.31, 1.37, 1.27,
                          1.21, 1.18, 1.02, 1.22, 1.45, 1.37, 1.15, 1.27, 1.17, 1.22]

test1, _ = pearsonr(walkabiliityScore, landDensity)
print("walkabiliityScore, landDensity")
print('Pearsons correlation: %.3f' % test1)
test2, _ = pearsonr(walkabiliityScore, publicTransportRidership)
print("walkabiliityScore, publicTransportRidership")
print('Pearsons correlation: %.3f' % test2)
test3, _ = pearsonr(walkabiliityScore, percentOfBuildingsNearCityCenter)
print("walkabiliityScore, percentOfBuildingsNearCityCenter")
print('Pearsons correlation: %.3f' % test3)
test4, _ = pearsonr(walkabiliityScore, trafficCongestionIndex)
print("walkabiliityScore, trafficCongestionIndex")
print('Pearsons correlation: %.3f' % test4)
test5, _ = pearsonr(landDensity, publicTransportRidership)
print("landDensity, publicTransportRidership")
print('Pearsons correlation: %.3f' % test5)
test6, _ = pearsonr(landDensity, percentOfBuildingsNearCityCenter)
print("landDensity, percentOfBuildingsNearCityCenter")
print('Pearsons correlation: %.3f' % test6)
test7, _ = pearsonr(landDensity, trafficCongestionIndex )
print("landDensity, trafficCongestionIndex")
print('Pearsons correlation: %.3f' % test7)
test8, _ = pearsonr(publicTransportRidership, percentOfBuildingsNearCityCenter)
print("publicTransportRidership, percentOfBuildingsNearCityCenter")
print('Pearsons correlation: %.3f' % test8)
test9, _ = pearsonr(publicTransportRidership, trafficCongestionIndex)
print("publicTransportRidership, trafficCongestionIndex")
print('Pearsons correlation: %.3f' % test9)
test10, _ = pearsonr(percentOfBuildingsNearCityCenter, trafficCongestionIndex)
print("percentOfBuildingsNearCityCenter, trafficCongestionIndex")
print('Pearsons correlation: %.3f' % test10)

from scipy.stats import pearsonr
from numpy import array
from scipy.linalg import svd
import numpy as np

walkabiliityScore = [88, 70, 77, 46, 48, 48, 77, 75, 77, 41, 83, 43, 89, 51, 74, 71, 50,
                      53, 61, 64, 41, 26, 66, 37, 67, 42, 62, 49, 42, 49]
landDensity = [3176, 2637, 1338, 934, 850, 718, 1045, 1357, 1220, 348, 1411, 172,
               1849, 1116, 689, 527, 1329, 777, 360, 1090, 807, 501, 356, 370,
               375, 586, 430, 475, 296, 518]
publicTransportRidership = [86.41, 14.78, 20.77, 4.62, 6.27, 7.58,
                            14.37, 16.96, 8.34, 7.94, 25.32, 3.74,
                            21.94, 2.78, 23.02, 10.04, 6.64, 13.41,
                            15.23, 15.03, 4.45, 3.43, 6.49, 9.34, 18.09,
                            7.33, 9.47, 3.6, 15.10, 5.25]
percentOfBuildingsNearCityCenter= [9.0, 37.9, 36.3, 53.7, 17.5, 34.6,
                                   67.2, 60.8, 67.7, 50.1, 10.8, 63.3, 10.8,
                                   30.8, 14.8, 22.4, 19.9, 59.0, 31.4, 10.3,
                                   2.8, 30.2, 32.2, 42.5, 21.8, 59.1, 34.6,
                                   21.7, 47.0, 30.4]
trafficCongestionIndex = [1.32, 1.50, 1.30, 1.23, 1.27, 1.25, 1.25, 1.23, 1.34, 1.22,
                          1.34, 1.29, 1.48, 1.19, 1.45, 1.26, 1.21, 1.31, 1.37, 1.27,
                          1.21, 1.18, 1.02, 1.22, 1.45, 1.37, 1.15, 1.27, 1.17, 1.22]

test1, _ = pearsonr(walkabiliityScore, landDensity)
print("walkabiliityScore, landDensity")
print('Pearsons correlation: %.3f' % test1)
test2, _ = pearsonr(walkabiliityScore, publicTransportRidership)
print("walkabiliityScore, publicTransportRidership")
print('Pearsons correlation: %.3f' % test2)
test3, _ = pearsonr(walkabiliityScore, percentOfBuildingsNearCityCenter)
print("walkabiliityScore, percentOfBuildingsNearCityCenter")
print('Pearsons correlation: %.3f' % test3)
test4, _ = pearsonr(walkabiliityScore, trafficCongestionIndex)
print("walkabiliityScore, trafficCongestionIndex")
print('Pearsons correlation: %.3f' % test4)
test5, _ = pearsonr(landDensity, publicTransportRidership)
print("landDensity, publicTransportRidership")
print('Pearsons correlation: %.3f' % test5)
test6, _ = pearsonr(landDensity, percentOfBuildingsNearCityCenter)
print("landDensity, percentOfBuildingsNearCityCenter")
print('Pearsons correlation: %.3f' % test6)
test7, _ = pearsonr(landDensity, trafficCongestionIndex )
print("landDensity, trafficCongestionIndex")
print('Pearsons correlation: %.3f' % test7)
test8, _ = pearsonr(publicTransportRidership, percentOfBuildingsNearCityCenter)
print("publicTransportRidership, percentOfBuildingsNearCityCenter")
print('Pearsons correlation: %.3f' % test8)
test9, _ = pearsonr(publicTransportRidership, trafficCongestionIndex)
print("publicTransportRidership, trafficCongestionIndex")
print('Pearsons correlation: %.3f' % test9)
test10, _ = pearsonr(percentOfBuildingsNearCityCenter, trafficCongestionIndex)
print("percentOfBuildingsNearCityCenter, trafficCongestionIndex")
print('Pearsons correlation: %.3f' % test10)

cities = np.array([walkabiliityScore, landDensity, publicTransportRidership, percentOfBuildingsNearCityCenter, trafficCongestionIndex]).T
cities_standardized = (cities - np.mean(cities, axis=None)) / np.std(cities, axis=None)
covariance_matrix = np.cov(cities_standardized.T)
eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)
eigenvectors = eigenvectors[:, np.argsort(eigenvalues)[::-1]]
cities_transformed = np.dot(cities_standardized, eigenvectors[:, :4])
print(cities_transformed.T)

walkabiliityScore, landDensity
Pearsons correlation: 0.611
walkabiliityScore, publicTransportRidership
Pearsons correlation: 0.597
walkabiliityScore, percentOfBuildingsNearCityCenter
Pearsons correlation: -0.190
walkabiliityScore, trafficCongestionIndex
Pearsons correlation: 0.453
landDensity, publicTransportRidership
Pearsons correlation: 0.695
landDensity, percentOfBuildingsNearCityCenter
Pearsons correlation: -0.241
landDensity, trafficCongestionIndex
Pearsons correlation: 0.412
publicTransportRidership, percentOfBuildingsNearCityCenter
Pearsons correlation: -0.305
publicTransportRidership, trafficCongestionIndex
Pearsons correlation: 0.312
percentOfBuildingsNearCityCenter, trafficCongestionIndex
Pearsons correlation: -0.128
walkabiliityScore, landDensity
Pearsons correlation: 0.611
walkabiliityScore, publicTransportRidership
Pearsons correlation: 0.597
walkabiliityScore, percentOfBuildingsNearCityCenter
Pearsons correlation: -0.190
walkabiliityScore, trafficCongestionIndex
Pearsons