In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import PolynomialFeatures

# Load the data
data = pd.read_csv('insurance.csv')

# Check for missing data and remove rows with missing data
data = data.dropna()

# Remove outliers (optional)
# You can use any outlier detection technique to identify and remove outliers from the data

# Split the data into features (X) and target (y)
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

# Convert categorical variables to numerical using one-hot encoding
X = pd.get_dummies(X, drop_first=True)

# Scale the data using Min-Max scalar
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Linear Regression
regressor_lr = LinearRegression()
regressor_lr.fit(X_train, y_train)
y_pred_lr = regressor_lr.predict(X_test)

# Polynomial Regression
degrees = [2, 3, 4, 5]
for degree in degrees:
    poly = PolynomialFeatures(degree=degree)
    X_poly = poly.fit_transform(X_scaled)
    X_train_poly, X_test_poly, y_train_poly, y_test_poly = train_test_split(X_poly, y, test_size=0.2, random_state=42)
    regressor_poly = LinearRegression()
    regressor_poly.fit(X_train_poly, y_train_poly)
    y_pred_poly = regressor_poly.predict(X_test_poly)
    mse_poly = mean_squared_error(y_test_poly, y_pred_poly)
    print(f"Polynomial Regression (Degree {degree}): Mean Squared Error: {mse_poly}")

# Decision Tree Regression
regressor_dt = DecisionTreeRegressor(random_state=42)
regressor_dt.fit(X_train, y_train)
y_pred_dt = regressor_dt.predict(X_test)
mse_dt = mean_squared_error(y_test, y_pred_dt)
print(f"Decision Tree Regression: Mean Squared Error: {mse_dt}")

# Random Forest Regression
regressor_rf = RandomForestRegressor(random_state=42)
regressor_rf.fit(X_train, y_train)
y_pred_rf = regressor_rf.predict(X_test)
mse_rf = mean_squared_error(y_test, y_pred_rf)
print(f"Random Forest Regression: Mean Squared Error: {mse_rf}")

# AdaBoost Regression
regressor_ab = AdaBoostRegressor(random_state=42)
regressor_ab.fit(X_train, y_train)
y_pred_ab = regressor_ab.predict(X_test)
mse_ab = mean_squared_error(y_test, y_pred_ab)
print(f"AdaBoost Regression: Mean Squared Error: {mse_ab}")

Polynomial Regression (Degree 2): Mean Squared Error: 20712805.987918362
Polynomial Regression (Degree 3): Mean Squared Error: 23498217.99893398
Polynomial Regression (Degree 4): Mean Squared Error: 38158498.332725324
Polynomial Regression (Degree 5): Mean Squared Error: 194410024.56154895
Decision Tree Regression: Mean Squared Error: 42446748.91927925
Random Forest Regression: Mean Squared Error: 20997250.35426495
AdaBoost Regression: Mean Squared Error: 27764201.0378857
