In [14]:
# Logistic Regression with L1, L2, ElasticNet on Heart Disease Dataset
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import joblib
import numpy as np

# reproducibility
SEED = 42
np.random.seed(SEED)

# load dataset (replace with your uploaded heart_disease_v1.csv)
df = pd.read_csv("heart.csv")

X = df.drop("target", axis=1)
y = df["target"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=SEED)

# try L1, L2, ElasticNet
penalties = ["l1","l2","elasticnet"]
for p in penalties:
    if p == "elasticnet":
        model = LogisticRegression(penalty=p, solver="saga", l1_ratio=0.5, max_iter=1000, random_state=SEED)
    else:
        model = LogisticRegression(penalty=p, solver="saga", max_iter=1000, random_state=SEED)

    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    print(f"\nPenalty: {p.upper()}")
    print("Accuracy:", accuracy_score(y_test, preds))
    print(classification_report(y_test, preds))

    # save model for reproducibility (MLOps practice)
    joblib.dump(model, f"log_reg_{p}.pkl")



Penalty: L1
Accuracy: 0.8524590163934426
              precision    recall  f1-score   support

           0       0.86      0.83      0.84        29
           1       0.85      0.88      0.86        32

    accuracy                           0.85        61
   macro avg       0.85      0.85      0.85        61
weighted avg       0.85      0.85      0.85        61


Penalty: L2
Accuracy: 0.8524590163934426
              precision    recall  f1-score   support

           0       0.86      0.83      0.84        29
           1       0.85      0.88      0.86        32

    accuracy                           0.85        61
   macro avg       0.85      0.85      0.85        61
weighted avg       0.85      0.85      0.85        61


Penalty: ELASTICNET
Accuracy: 0.8524590163934426
              precision    recall  f1-score   support

           0       0.86      0.83      0.84        29
           1       0.85      0.88      0.86        32

    accuracy                           0.85     



In [10]:
# Polynomial Regression on Student Scores
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# reproducibility
SEED = 42

# load dataset
df = pd.read_csv("student_scores_v1.csv")
X = df[["Hours"]]
y = df["Scores"]

# polynomial features
poly = PolynomialFeatures(degree=2)
X_poly = poly.fit_transform(X)

# model
model = LinearRegression()
model.fit(X_poly, y)

# predictions
y_pred = model.predict(X_poly)
print("MSE:", mean_squared_error(y, y_pred))
print("R2:", r2_score(y, y_pred))

# save model & preprocessor
joblib.dump(model, "poly_reg.pkl")
joblib.dump(poly, "poly_features.pkl")


MSE: 1.6743053095438258
R2: 0.9973916008824817


['poly_features.pkl']

In [15]:
import pandas as pd
import numpy as np

# Generate synthetic student dataset
np.random.seed(42)
data = {
    "hours_study": np.random.randint(1, 10, 50),   # hours of study
    "hours_sleep": np.random.randint(4, 9, 50),    # hours of sleep
    "attendance": np.random.randint(60, 100, 50),  # attendance %
}

# Assume target (score) is a combination of features + noise
data["score"] = (
    5*data["hours_study"]
    + 2*data["hours_sleep"]
    + 0.5*data["attendance"]
    + np.random.randint(-5, 5, 50)
)

df = pd.DataFrame(data)
df.to_csv("student_scores_v1.csv", index=False)

print("✅ student_scores_v1.csv created with", len(df), "rows")
df.head()


✅ student_scores_v1.csv created with 50 rows


Unnamed: 0,hours_study,hours_sleep,attendance,score
0,7,7,99,101.5
1,4,5,81,67.5
2,8,5,86,94.0
3,5,7,94,86.0
4,7,8,60,83.0


In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# Load dataset
df = pd.read_csv("student_scores_v1.csv")

# Features and target
X = df[["hours_study", "hours_sleep", "attendance"]]
y = df["score"]

# Train-test split (ensure multiple test samples)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Model
model = LinearRegression()
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("✅ Multiple Linear Regression Results")
print("MSE:", mse)
print("R²:", r2)
print("Coefficients:", model.coef_)
print("Intercept:", model.intercept_)

# Save model
joblib.dump(model, "multi_linear.pkl")
print("✅ Model saved as multi_linear.pkl")


✅ Multiple Linear Regression Results
MSE: 9.220212333988759
R²: 0.9439364445215326
Coefficients: [4.7928358  1.99138636 0.50799905]
Intercept: -1.2109548425206782
✅ Model saved as multi_linear.pkl
