In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import r2_score, mean_squared_error, accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split

sns.set(style="whitegrid")

house_data = pd.DataFrame({
    'Size': [1500, 1600, 1700, 1800, 1900, 2000, 2100, 2200, 2300, 2400],
    'Bedrooms': [3, 3, 2, 4, 3, 3, 4, 5, 4, 5],
    'Age': [10, 15, 20, 8, 12, 10, 5, 7, 6, 4],
    'Price': [300000, 320000, 310000, 360000, 340000, 400000, 420000, 450000, 440000, 480000]
})

X = house_data[['Size', 'Bedrooms', 'Age']]
y = house_data['Price']

model1 = LinearRegression()
model1.fit(X, y)

predicted_price = model1.predict([[2000, 3, 10]])
print("Predicted Price for Size=2000, Bedrooms=3, Age=10:", predicted_price[0])


print("Coefficients:", model1.coef_)
print("Intercept:", model1.intercept_)


student_data = pd.DataFrame({
    'Hours_Study': [1,2,3,4,5,6,7,8,9,10],
    'Hours_Sleep': [8,7,7,6,6,5,5,4,4,3],
    'Attendance': [80,82,85,88,90,92,94,96,98,100],
    'Marks': [55,60,63,67,70,75,78,83,87,90]
})

X2 = student_data[['Hours_Study', 'Hours_Sleep', 'Attendance']]
y2 = student_data['Marks']

model2 = LinearRegression()
model2.fit(X2, y2)


y_pred2 = model2.predict(X2)

plt.figure(figsize=(6,5))
plt.scatter(y2, y_pred2, color='blue')
plt.plot([y2.min(), y2.max()], [y2.min(), y2.max()], 'r--')
plt.title('Actual vs Predicted Marks')
plt.xlabel('Actual Marks')
plt.ylabel('Predicted Marks')
plt.show()

print("R² Score:", r2_score(y2, y_pred2))
print("Mean Squared Error:", mean_squared_error(y2, y_pred2))

student_class = pd.DataFrame({
    'Hours_Study': [10,9,8,7,6,5,4,3,2,1],
    'Hours_Sleep': [4,5,5,6,6,7,7,8,8,9],
    'Pass': [1,1,1,1,1,0,0,0,0,0]
})

X3 = student_class[['Hours_Study', 'Hours_Sleep']]
y3 = student_class['Pass']

log_model1 = LogisticRegression()
log_model1.fit(X3, y3)

prob = log_model1.predict_proba([[30, 6]])[0][1]
print("Predicted probability of passing (30 study hrs, 6 sleep hrs):", prob)


x_min, x_max = X3['Hours_Study'].min()-1, X3['Hours_Study'].max()+1
y_min, y_max = X3['Hours_Sleep'].min()-1, X3['Hours_Sleep'].max()+1
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100))
Z = log_model1.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.contourf(xx, yy, Z, alpha=0.3, cmap='RdYlBu')
plt.scatter(X3['Hours_Study'], X3['Hours_Sleep'], c=y3, edgecolor='k')
plt.title('Decision Boundary: Pass (1) vs Fail (0)')
plt.xlabel('Hours Study')
plt.ylabel('Hours Sleep')
plt.show()


diabetes_data = pd.DataFrame({
    'BMI': [25,28,32,30,26,35,29,31,24,33],
    'Age': [30,45,50,40,35,55,38,42,28,48],
    'Glucose': [100,150,160,130,110,180,140,155,95,170],
    'Diabetic': [0,1,1,0,0,1,0,1,0,1]
})

X4 = diabetes_data[['BMI', 'Age', 'Glucose']]
y4 = diabetes_data['Diabetic']

log_model2 = LogisticRegression()
log_model2.fit(X4, y4)

y_pred4 = log_model2.predict(X4)
print("Accuracy:", accuracy_score(y4, y_pred4))
print("Precision:", precision_score(y4, y_pred4))
print("Recall:", recall_score(y4, y_pred4))


new_pred = log_model2.predict([[28, 45, 150]])
print("Prediction for (BMI=28, Age=45, Glucose=150):", "Diabetic" if new_pred[0]==1 else "Not Diabetic")

comparison_data = pd.DataFrame({
    'Hours_Study': [1,2,3,4,5,6,7,8,9,10],
    'Exam_Score': [45,50,55,60,65,70,75,80,85,90],
    'Pass': [0,0,0,0,1,1,1,1,1,1]
})


lin_model = LinearRegression()
lin_model.fit(comparison_data[['Hours_Study']], comparison_data['Exam_Score'])
pred_lin = lin_model.predict(comparison_data[['Hours_Study']])

plt.figure(figsize=(6,4))
plt.scatter(comparison_data['Hours_Study'], comparison_data['Exam_Score'], label='Actual')
plt.plot(comparison_data['Hours_Study'], pred_lin, color='red', label='Linear Fit')
plt.title('Linear Regression: Predicting Exam Score')
plt.xlabel('Hours Study')
plt.ylabel('Exam Score')
plt.legend()
plt.show()


log_model3 = LogisticRegression()
log_model3.fit(comparison_data[['Hours_Study']], comparison_data['Pass'])
prob_log = log_model3.predict_proba(comparison_data[['Hours_Study']])[:,1]

plt.figure(figsize=(6,4))
plt.scatter(comparison_data['Hours_Study'], comparison_data['Pass'], label='Actual Pass/Fail')
plt.plot(comparison_data['Hours_Study'], prob_log, color='green', label='Logistic Fit')
plt.title('Logistic Regression: Pass/Fail Prediction')
plt.xlabel('Hours Study')
plt.ylabel('Probability of Passing')
plt.legend()
plt.show()

print("✅ Comparison Done:")
print("Linear Regression predicts continuous values (not ideal for classification).")
print("Logistic Regression predicts probabilities between 0 and 1 — ideal for binary classification.")


: 