<a href="https://colab.research.google.com/github/umamahesh7991/ML-/blob/main/logistic_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
data_path = '/content/result.csv'
data = pd.read_csv(data_path)

# Feature engineering: create a binary column 'IsGreaterThan100'
data['IsGreaterThan100'] = (data['Result'] > 100).astype(int)

# Display first few rows of the dataset (optional, for understanding the data)
print("Dataset preview:")
print(data.head())

# Extract features (Value1 and Value2) and the target (IsGreaterThan100)
X = data[['Value1', 'Value2']]  # Independent variables
y = data['IsGreaterThan100']    # Dependent variable (binary target)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create the Logistic Regression model
model = LogisticRegression()

# Train the model on the training data
model.fit(X_train, y_train)

# Predict probabilities on the testing set (these are continuous values between 0 and 1)
y_pred_prob = model.predict_proba(X_test)[:, 1]

# Calculate MSE (on predicted probabilities vs actual binary labels)
mse = mean_squared_error(y_test, y_pred_prob)
print(f"Mean Squared Error (MSE): {mse}")

# Calculate R² (on predicted probabilities vs actual binary labels)
r2 = r2_score(y_test, y_pred_prob)
print(f"R² Score: {r2}")

# Display the actual vs predicted probabilities (optional)
comparison = pd.DataFrame({'Actual': y_test, 'Predicted Probability': y_pred_prob})
print("\nComparison of actual labels and predicted probabilities:")
print(comparison.head())

# If you want to make predictions on new data (optional)
new_data = pd.DataFrame({'Value1': [3, 10], 'Value2': [4, 20]})
predicted_prob = model.predict_proba(new_data)[:, 1]
print(f"\nPredictions for new data:\n{new_data}\nPredicted Probabilities: {predicted_prob}")


Dataset preview:
   Value1  Value2  Result  IsGreaterThan100
0      14       0       0                 0
1      10       7      70                 0
2       0       9       0                 0
3      19       1      19                 0
4      12       5      60                 0
Mean Squared Error (MSE): 0.11116230868976126
R² Score: 0.5264129906512847

Comparison of actual labels and predicted probabilities:
     Actual  Predicted Probability
280       0               0.670844
434       1               0.453385
39        0               0.325983
417       1               0.389070
584       0               0.825804

Predictions for new data:
   Value1  Value2
0       3       4
1      10      20
Predicted Probabilities: [0.11059062 0.59170311]
