In [2]:
import os
import pandas as pd
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from pathlib import Path

In [2]:
%pwd

'e:\\AI DA Portfolio\\Multiple-Disease-Prediction\\research'

In [4]:
os.chdir("../")

In [5]:
%pwd

'e:\\AI DA Portfolio\\Multiple-Disease-Prediction'

In [6]:
artifacts_root = Path("artifacts")
heart_train_data = artifacts_root / "data_ingestion/heart_disease/heart_train_data.csv"
parkinsons_train_data = artifacts_root / "data_ingestion/parkinsons/parkinsons_train_data.csv"
diabetes_train_data = artifacts_root / "data_ingestion/diabetes/diab_train_data.csv"

In [7]:
heart_df = pd.read_csv(heart_train_data)
parkinsons_df = pd.read_csv(parkinsons_train_data)
diabetes_df = pd.read_csv(diabetes_train_data)

In [11]:
def train_and_save_model(X, y, model, model_name):
    # Train the model
    model.fit(X, y)

    # Save the model as a pickle file
    model_file_path = artifacts_root / f"{model_name.lower().replace(' ', '_')}.pkl"
    with open(model_file_path, 'wb') as f:
        pickle.dump(model, f)
    print(f"{model_name} saved as {model_file_path}")

# Train model for heart disease
X_heart = heart_df.drop('target', axis=1)  # Replace 'target_column' with actual target column name
y_heart = heart_df['target']  # Replace 'target_column' with actual target column name

# Preprocessing for heart disease
# Encode categorical variables if necessary
X_heart_encoded = pd.get_dummies(X_heart)

# Scale the features
scaler_heart = StandardScaler()
X_heart_scaled = scaler_heart.fit_transform(X_heart_encoded)

logistic_model = LogisticRegression()
train_and_save_model(X_heart_scaled, y_heart, logistic_model, "Heart Disease Logistic Regression")

# Train model for Parkinson's
X_parkinsons = parkinsons_df.drop('status', axis=1)  # Replace 'status' with actual target column name
y_parkinsons = parkinsons_df['status']  # Replace 'status' with actual target column name

# Preprocessing for Parkinson's
X_parkinsons_encoded = pd.get_dummies(X_parkinsons)

# Scale the features
scaler_parkinsons = StandardScaler()
X_parkinsons_scaled = scaler_parkinsons.fit_transform(X_parkinsons_encoded)

svm_model_parkinsons = SVC()
train_and_save_model(X_parkinsons_scaled, y_parkinsons, svm_model_parkinsons, "Parkinson's SVM")

# Train model for Diabetes
X_diabetes = diabetes_df.drop('Outcome', axis=1)  # Replace 'target_column' with actual target column name
y_diabetes = diabetes_df['Outcome']  # Replace 'target_column' with actual target column name

# Preprocessing for Diabetes
X_diabetes_encoded = pd.get_dummies(X_diabetes)

# Scale the features
scaler_diabetes = StandardScaler()
X_diabetes_scaled = scaler_diabetes.fit_transform(X_diabetes_encoded)

svm_model_diabetes = SVC()
train_and_save_model(X_diabetes_scaled, y_diabetes, svm_model_diabetes, "Diabetes SVM")

print("Model training completed and models saved!")

Heart Disease Logistic Regression saved as artifacts\heart_disease_logistic_regression.pkl
Parkinson's SVM saved as artifacts\parkinson's_svm.pkl
Diabetes SVM saved as artifacts\diabetes_svm.pkl
Model training completed and models saved!


In [13]:
import pandas as pd
from sklearn.metrics import accuracy_score
from pathlib import Path
import pickle

# Define paths to datasets and model files
artifacts_root = Path("artifacts")
heart_train_data = artifacts_root / "data_ingestion/heart_disease/heart_train_data.csv"
heart_val_data = artifacts_root / "data_ingestion/heart_disease/heart_valid_data.csv"
parkinsons_train_data = artifacts_root / "data_ingestion/parkinsons/parkinsons_train_data.csv"
parkinsons_val_data = artifacts_root / "data_ingestion/parkinsons/parkinsons_valid_data.csv"
diabetes_train_data = artifacts_root / "data_ingestion/diabetes/diab_train_data.csv"
diabetes_val_data = artifacts_root / "data_ingestion/diabetes/diab_valid_data.csv"

# Load datasets
heart_train_df = pd.read_csv(heart_train_data)
heart_val_df = pd.read_csv(heart_val_data)

parkinsons_train_df = pd.read_csv(parkinsons_train_data)
parkinsons_val_df = pd.read_csv(parkinsons_val_data)

diabetes_train_df = pd.read_csv(diabetes_train_data)
diabetes_val_df = pd.read_csv(diabetes_val_data)

# Load models
with open(artifacts_root / "heart_disease_logistic_regression.pkl", 'rb') as f:
    heart_model = pickle.load(f)

with open(artifacts_root / "parkinson's_svm.pkl", 'rb') as f:
    parkinsons_model = pickle.load(f)

with open(artifacts_root / "diabetes_svm.pkl", 'rb') as f:
    diabetes_model = pickle.load(f)

# Function to evaluate model accuracy
def evaluate_model_accuracy(model, X_train, y_train, X_val, y_val, model_name):
    # Make predictions
    train_predictions = model.predict(X_train)
    val_predictions = model.predict(X_val)

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, train_predictions)
    val_accuracy = accuracy_score(y_val, val_predictions)

    print(f"{model_name} - Train Accuracy: {train_accuracy:.4f}, Validation Accuracy: {val_accuracy:.4f}")

# Evaluate Heart Disease model
X_heart_train = heart_train_df.drop('target', axis=1)  # Replace with actual target column name
y_heart_train = heart_train_df['target']  # Replace with actual target column name
X_heart_val = heart_val_df.drop('target', axis=1)  # Replace with actual target column name
y_heart_val = heart_val_df['target']  # Replace with actual target column name

evaluate_model_accuracy(heart_model, X_heart_train, y_heart_train, X_heart_val, y_heart_val, "Heart Disease Logistic Regression")

# Evaluate Parkinson's model
X_parkinsons_train = parkinsons_train_df.drop('status', axis=1)  # Replace 'status' with actual target column name
y_parkinsons_train = parkinsons_train_df['status']  # Replace 'status' with actual target column name
X_parkinsons_val = parkinsons_val_df.drop('status', axis=1)  # Replace 'status' with actual target column name
y_parkinsons_val = parkinsons_val_df['status']  # Replace 'status' with actual target column name

evaluate_model_accuracy(parkinsons_model, X_parkinsons_train, y_parkinsons_train, X_parkinsons_val, y_parkinsons_val, "Parkinson's SVM")

# Evaluate Diabetes model
X_diabetes_train = diabetes_train_df.drop('Outcome', axis=1)  # Replace with actual target column name
y_diabetes_train = diabetes_train_df['Outcome']  # Replace with actual target column name
X_diabetes_val = diabetes_val_df.drop('Outcome', axis=1)  # Replace with actual target column name
y_diabetes_val = diabetes_val_df['Outcome']  # Replace with actual target column name

evaluate_model_accuracy(diabetes_model, X_diabetes_train, y_diabetes_train, X_diabetes_val, y_diabetes_val, "Diabetes SVM")

Heart Disease Logistic Regression - Train Accuracy: 0.6557, Validation Accuracy: 0.7778




ValueError: could not convert string to float: 'phon_R01_S08_3'