In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# Load the preprocessed dataset
file_path = "C:\\Users\\HP\\Downloads\\assignment3\\Processed_Data_Set.csv"
preprocessed_data = pd.read_csv(file_path)

# Splitting the data into features and target variables
features = ['Matric percentage', 'Intermediate percentage', 
            'SGPA in BS First semester', 'SGPA in BS Second semester',
            'SGPA in BS Third semester', 'SGPA in BS Fourth semester']

X = preprocessed_data[features]
y_sgpa = preprocessed_data['SGPA in BS Fifth semester']
y_cgpa = preprocessed_data['CGPA in BS Fifth semester']

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_sgpa_train, y_sgpa_test, y_cgpa_train, y_cgpa_test = train_test_split(
    X, y_sgpa, y_cgpa, test_size=0.2, random_state=42)

# Model selection
models = {
    'Linear Regression': LinearRegression(),
    'Ridge Regression': Ridge(),
    'Lasso Regression': Lasso(),
    'Decision Tree': DecisionTreeRegressor(),
    'Random Forest': RandomForestRegressor(),
    'Support Vector Regression': SVR()
}

# Training and evaluation
results = []

for model_name, model in models.items():
    # Train for SGPA
    model.fit(X_train, y_sgpa_train)
    sgpa_predictions = model.predict(X_test)
    sgpa_mse = mean_squared_error(y_sgpa_test, sgpa_predictions)
    sgpa_r2 = r2_score(y_sgpa_test, sgpa_predictions)

    # Train for CGPA
    model.fit(X_train, y_cgpa_train)
    cgpa_predictions = model.predict(X_test)
    cgpa_mse = mean_squared_error(y_cgpa_test, cgpa_predictions)
    cgpa_r2 = r2_score(y_cgpa_test, cgpa_predictions)

    # Store results for each model
    results.append({
        'Model': model_name,
        'SGPA MSE': sgpa_mse,
        'SGPA R2': sgpa_r2,
        'CGPA MSE': cgpa_mse,
        'CGPA R2': cgpa_r2
    })

    # Save each model to a joblib file
    joblib.dump(model, f"{model_name.replace(' ', '_')}_model.joblib")

# Displaying the results
results_df = pd.DataFrame(results)
print(results_df)


                       Model  SGPA MSE   SGPA R2  CGPA MSE   CGPA R2
0          Linear Regression  0.055989  0.798984  0.807278  0.319302
1           Ridge Regression  0.055999  0.798950  0.807010  0.319528
2           Lasso Regression  0.361542 -0.298028  1.187117 -0.000979
3              Decision Tree  0.122033  0.561869  1.096620  0.075328
4              Random Forest  0.092558  0.667692  1.034075  0.128066
5  Support Vector Regression  0.130602  0.531106  0.900969  0.240301
