<a href="https://colab.research.google.com/github/yochana4/MLProject/blob/main/MLPROJECT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Problem Statement:** Predicting Pregnancy Complications.

**Naive Bayes Technique**

In [2]:
 import pandas as pd
 from sklearn.naive_bayes import GaussianNB
 from sklearn.metrics import accuracy_score
 # Load the generated dataset
 df = pd.read_csv("maternal_health_dataset.csv")
 # Map categorical data (Yes/No and Risk_Level) to numerical values for the classifier
 df["Smoking_Status"] = df["Smoking_Status"].map({"Yes": 1, "No": 0})
 df["Alcohol_Use"] = df["Alcohol_Use"].map({"Yes": 1, "No": 0})
 df["Risk_Level"] = df["Risk_Level"].map({"Low Risk": 0, "Mid Risk": 1, "High Risk": 2})
 # Define features and target
 X = df.drop(columns=["Patient_ID", "Risk_Level"])  # Drop Patient_ID as it's not a feature
 y = df["Risk_Level"]
 # Define split ratios and initialize list for results
 split_ratios = [0.5, 0.6, 0.7, 0.8]
 results = []
 # Train and test with exact split ratios
 for ratio in split_ratios:
    # Calculate exact number of training samples
    train_size = int(ratio * len(df))

    # Split data manually for exact ratios
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]
    # Initialize and train the Naive Bayes classifier
    model = GaussianNB()
    model.fit(X_train, y_train)
    # Make predictions and calculate accuracy
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred) * 100  # Convert to percentage
    # Append results for the table output
    train_ratio = int(ratio * 100)
    test_ratio = 100 - train_ratio
    results.append({"Train-Test Ratio": f"{train_ratio}:{test_ratio}", "Accuracy (%)": f"{accuracy:.2f}"})
 # Display results in a table
 results_df = pd.DataFrame(results)
 print(results_df)

  Train-Test Ratio Accuracy (%)
0            50:50        85.77
1            60:40        85.72
2            70:30        86.03
3            80:20        86.20


**KNN Technique**

In [4]:
 import pandas as pd
 from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsClassifier
 from sklearn.metrics import accuracy_score
 # Load the generated dataset
 df = pd.read_csv("maternal_health_dataset.csv")
 # Map categorical data (Yes/No and Risk_Level) to numerical values for the classifier
 df["Smoking_Status"] = df["Smoking_Status"].map({"Yes": 1, "No": 0})
 df["Alcohol_Use"] = df["Alcohol_Use"].map({"Yes": 1, "No": 0})
 df["Risk_Level"] = df["Risk_Level"].map({"Low Risk": 0, "Mid Risk": 1, "High Risk": 2})
 # Define features and target
 X = df.drop(columns=["Patient_ID", "Risk_Level"])  # Drop Patient_ID as it's not a feature
 y = df["Risk_Level"]
 # Define split ratios and initialize list for results
 split_ratios = [0.5, 0.6, 0.7, 0.8]
 results = []
 # Train and test with exact split ratios
 for ratio in split_ratios:
    # Calculate exact number of training samples
    train_size = int(ratio * len(df))

    # Split data manually for exact ratios
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]
    # Initialize and train the KNN classifier with a chosen number of neighbors (e.g., 5)
    model = KNeighborsClassifier(n_neighbors=5)
    model.fit(X_train, y_train)
    # Make predictions and calculate accuracy
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred) * 100  # Convert to percentage
    # Append results for the table output
    train_ratio = int(ratio * 100)
    test_ratio = 100 - train_ratio
    results.append({"Train-Test Ratio": f"{train_ratio}:{test_ratio}", "Accuracy (%)": f"{accuracy:.2f}"})
 # Display results in a table
 results_df = pd.DataFrame(results)
 print(results_df)

  Train-Test Ratio Accuracy (%)
0            50:50        94.44
1            60:40        94.55
2            70:30        94.57
3            80:20        94.67


**Linear Regression Technique**

In [6]:
 import numpy as np
 import pandas as pd
 from sklearn.metrics import accuracy_score
 # Load the generated dataset
 df = pd.read_csv("maternal_health_dataset.csv")
 # Map categorical data (Yes/No and Risk_Level) to numerical values for the model
 df["Smoking_Status"] = df["Smoking_Status"].map({"Yes": 1, "No": 0})
 df["Alcohol_Use"] = df["Alcohol_Use"].map({"Yes": 1, "No": 0})
 # Map Risk_Level to numerical values for classification (0 = Low, 1 = Mid, 2 = High)
 df["Risk_Level"] = df["Risk_Level"].map({"Low Risk": 0, "Mid Risk": 1, "High Risk": 2})
 # Define features and target
 X = df.drop(columns=["Patient_ID", "Risk_Level"])  # Drop Patient_ID as it's not a feature
 y = df["Risk_Level"]
 # Add a column of ones to X for the intercept term (bias)
 X = np.c_[np.ones(X.shape[0]), X]  # Add a column of ones as the first column in X
 # Define split ratios and initialize list for results
 split_ratios = [0.5, 0.6, 0.7, 0.8]
 results = []
 # Train and test with exact split ratios
 for ratio in split_ratios:
    # Calculate exact number of training samples
    train_size = int(ratio * len(df))
    test_size = len(df) - train_size  # Remaining samples for testing
    # Split data manually for exact ratios
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]
    # Calculate the coefficients (beta) using the Normal Equation
    # β = (X^T * X)^(-1) * X^T * y
    X_train_T = X_train.T
    beta = np.linalg.inv(X_train_T.dot(X_train)).dot(X_train_T).dot(y_train)
    # Make predictions on the test set
    y_pred = X_test.dot(beta)  # Here beta already includes the intercept
    # Round predictions to nearest integer and calculate accuracy
    y_pred_class = np.round(y_pred)  # Convert predictions to nearest class (0, 1, or 2)

    # Ensure that predicted classes stay within valid range (0, 1, 2)
    y_pred_class = np.clip(y_pred_class, 0, 2)
    # Calculate accuracy as percentage
    accuracy = accuracy_score(y_test, y_pred_class) * 100  # Multiply by 100 for percentage

    # Append the result for the current ratio
    results.append({
        "Train-Test Ratio": f"{int(ratio*100)}:{100 - int(ratio*100)}",  # Display ratio as exact integers
        "Accuracy (%)": f"{accuracy:.2f}"  # Format accuracy to 2 decimal places
    })
 # Display results in a table
 results_df = pd.DataFrame(results)
 print(results_df)

  Train-Test Ratio Accuracy (%)
0            50:50        94.69
1            60:40        94.77
2            70:30        94.83
3            80:20        94.91


**Logistic Regression Technique**

In [7]:
 import numpy as np
 import pandas as pd
 from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import accuracy_score
 from sklearn.model_selection import train_test_split
 # Load the generated dataset (1 Lakh records)
 df = pd.read_csv("maternal_health_dataset.csv")
 # Map categorical data (Yes/No and Risk_Level) to numerical values for the model
 df["Smoking_Status"] = df["Smoking_Status"].map({"Yes": 1, "No": 0})
 df["Alcohol_Use"] = df["Alcohol_Use"].map({"Yes": 1, "No": 0})
 # Map Risk_Level to numerical values for classification (0 = Low, 1 = Mid, 2 = High)
 df["Risk_Level"] = df["Risk_Level"].map({"Low Risk": 0, "Mid Risk": 1, "High Risk": 2})
 # Define features and target
 X = df.drop(columns=["Patient_ID", "Risk_Level"])  # Drop Patient_ID as it's not a feature
 y = df["Risk_Level"]
 # Define split ratios and initialize list for results
 split_ratios = [0.5, 0.6, 0.7, 0.8]
 results = []
 # Train and test with exact split ratios
 for ratio in split_ratios:
    # Calculate the number of training and testing samples to ensure exact ratios
    train_size = int(ratio * len(df))
    test_size = len(df) - train_size
    # Manually split data for exact ratios with train_test_split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, train_size=train_size, test_size=test_size, shuffle=True, random_state=42
    )

    # Initialize Logistic Regression model with a high iteration count for large data
    model = LogisticRegression(max_iter=2000, solver='lbfgs', multi_class='ovr')
    # Train the model
    model.fit(X_train, y_train)
    # Make predictions on the test set
    y_pred = model.predict(X_test)
    # Calculate accuracy as a percentage
    accuracy = accuracy_score(y_test, y_pred) * 100  # Multiply by 100 for percentage

    # Append results for the current ratio
    results.append({
        "Train-Test Ratio": f"{int(ratio*100)}:{100 - int(ratio*100)}",
        "Accuracy (%)": f"{accuracy:.2f}"
    })
 # Display results in a table
 results_df = pd.DataFrame(results)
 print(results_df)



  Train-Test Ratio Accuracy (%)
0            50:50        95.51
1            60:40        95.54
2            70:30        95.60
3            80:20        95.61
