In [1]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv('/content/parkinson_data.csv')

# Display dataset preview
print("Dataset Preview:")
print(df.head())

# Handling missing values
df = df.dropna()

# Keep only numeric columns
df = df.select_dtypes(include=[np.number])

# Check if 'status' exists in the dataset
if 'status' not in df.columns:
    raise ValueError("Dataset does not contain a 'status' column. Check column names.")

# Separating features and target variable
X = df.drop(columns=['status'])
y = df['status']

# Splitting into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the feature values
scaler_parkinson = StandardScaler()
X_train = scaler_parkinson.fit_transform(X_train)
X_test = scaler_parkinson.transform(X_test)

# Training the model
model_parkinson = RandomForestClassifier(n_estimators=200, random_state=42)
model_parkinson.fit(X_train, y_train)

# Evaluating the model
y_pred = model_parkinson.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'\nParkinson Model Accuracy: {accuracy:.2f}')

# Save the trained model and scaler
joblib.dump(model_parkinson, 'parkinson_model.pkl')
joblib.dump(scaler_parkinson, 'scaler_parkinson.pkl')

print("\nParkinson Model and Scaler saved successfully!")


Dataset Preview:
             name  MDVP:Fo(Hz)  MDVP:Fhi(Hz)  MDVP:Flo(Hz)  MDVP:Jitter(%)  \
0  phon_R01_S01_1      119.992       157.302        74.997         0.00784   
1  phon_R01_S01_2      122.400       148.650       113.819         0.00968   
2  phon_R01_S01_3      116.682       131.111       111.555         0.01050   
3  phon_R01_S01_4      116.676       137.871       111.366         0.00997   
4  phon_R01_S01_5      116.014       141.781       110.655         0.01284   

   MDVP:Jitter(Abs)  MDVP:RAP  MDVP:PPQ  Jitter:DDP  MDVP:Shimmer  ...  \
0           0.00007   0.00370   0.00554     0.01109       0.04374  ...   
1           0.00008   0.00465   0.00696     0.01394       0.06134  ...   
2           0.00009   0.00544   0.00781     0.01633       0.05233  ...   
3           0.00009   0.00502   0.00698     0.01505       0.05492  ...   
4           0.00011   0.00655   0.00908     0.01966       0.06425  ...   

   Shimmer:DDA      NHR     HNR  status      RPDE       DFA   spread1

In [2]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv('/content/heart_disease_data.csv')

# Display dataset preview
print("Dataset Preview:")
print(df.head())

# Handling missing values
df = df.dropna()

# Keep only numeric columns
df = df.select_dtypes(include=[np.number])

# Check if 'target' exists in the dataset
if 'target' not in df.columns:
    raise ValueError("Dataset does not contain a 'target' column. Check column names.")

# Separating features and target variable
X = df.drop(columns=['target'])
y = df['target']

# Splitting into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the feature values
scaler_heart = StandardScaler()
X_train = scaler_heart.fit_transform(X_train)
X_test = scaler_heart.transform(X_test)

# Training the model
model_heart = RandomForestClassifier(n_estimators=200, random_state=42)
model_heart.fit(X_train, y_train)

# Evaluating the model
y_pred = model_heart.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'\nHeart Disease Model Accuracy: {accuracy:.2f}')

# Save the trained model and scaler
joblib.dump(model_heart, 'heart_disease_model.pkl')
joblib.dump(scaler_heart, 'scaler_heart.pkl')

print("\nHeart Disease Model and Scaler saved successfully!")


Dataset Preview:
   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   3       145   233    1        0      150      0      2.3      0   
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       1  
1   0     2       1  
2   0     2       1  
3   0     2       1  
4   0     2       1  

Heart Disease Model Accuracy: 0.84

Heart Disease Model and Scaler saved successfully!


In [3]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv('/content/diabetes_data.csv')

# Display dataset preview
print("Dataset Preview:")
print(df.head())

# Handling missing values
df = df.dropna()

# Keep only numeric columns
df = df.select_dtypes(include=[np.number])

# Check if 'Outcome' exists in the dataset
if 'Outcome' not in df.columns:
    raise ValueError("Dataset does not contain an 'Outcome' column. Check column names.")

# Separating features and target variable
X = df.drop(columns=['Outcome'])
y = df['Outcome']

# Splitting into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the feature values
scaler_diabetes = StandardScaler()
X_train = scaler_diabetes.fit_transform(X_train)
X_test = scaler_diabetes.transform(X_test)

# Training the model
model_diabetes = RandomForestClassifier(n_estimators=200, random_state=42)
model_diabetes.fit(X_train, y_train)

# Evaluating the model
y_pred = model_diabetes.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'\nDiabetes Model Accuracy: {accuracy:.2f}')

# Save the trained model and scaler
joblib.dump(model_diabetes, 'diabetes_model.pkl')
joblib.dump(scaler_diabetes, 'scaler_diabetes.pkl')

print("\nDiabetes Model and Scaler saved successfully!")


Dataset Preview:
   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  

Diabetes Model Accuracy: 0.73

Diabetes Model and Scaler saved successfully!
