In [1]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor

file_path = "Civil_Engineering_Regression_Dataset.csv"
df = pd.read_csv(file_path)

X = df[['Building_Height', 'Material_Quality_Index', 'Labor_Cost', 'Concrete_Strength', 'Foundation_Depth']]
y = df['Construction_Cost']

X = sm.add_constant(X)

model = sm.OLS(y, X).fit()

regression_coefficients = model.params
highest_impact_variable = regression_coefficients[1:].abs().idxmax()

# Compute R-squared and Adjusted R-squared for multiple regression
r_squared_multiple = model.rsquared
adjusted_r_squared_multiple = model.rsquared_adj

# Fit a simple linear regression model using only 'Building_Height'
X_simple = sm.add_constant(df[['Building_Height']])
model_simple = sm.OLS(y, X_simple).fit()
r_squared_simple = model_simple.rsquared

# Calculate VIF for each independent variable
vif_data = pd.DataFrame()
vif_data["Variable"] = X.columns
vif_data["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]

print("Regression Coefficients:")
print(regression_coefficients)
print("\nVariable with Highest Impact:", highest_impact_variable)
print("\nR-squared (Simple Regression):", r_squared_simple)
print("R-squared (Multiple Regression):", r_squared_multiple)
print("Adjusted R-squared (Multiple Regression):", adjusted_r_squared_multiple)
print("\nVariance Inflation Factor (VIF):")
print(vif_data)

Regression Coefficients:
const                    -15.279975
Building_Height           49.889811
Material_Quality_Index    10.656039
Labor_Cost                 0.519088
Concrete_Strength         20.308442
Foundation_Depth          30.004214
dtype: float64

Variable with Highest Impact: Building_Height

R-squared (Simple Regression): 0.9154177373112963
R-squared (Multiple Regression): 0.9997946519351985
Adjusted R-squared (Multiple Regression): 0.9997837291657942

Variance Inflation Factor (VIF):
                 Variable        VIF
0                   const  36.217244
1         Building_Height   1.047164
2  Material_Quality_Index   1.048067
3              Labor_Cost   1.054086
4       Concrete_Strength   1.019701
5        Foundation_Depth   1.040594
