In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Load the dataset
print("Loading the dataset from the specified path.")
df = pd.read_csv('/Users/astha/Desktop/Diabetes Prediction/Datasets/Dataset1/diabetes.csv')

# Display the first few rows of the dataset
print("\nDisplaying the first 5 rows of the dataset to understand its structure.")
print(df.head())

# Splitting features and labels
print("\nSeparating the dataset into features (first 8 columns) and labels (last column).")
features = df.iloc[:, :8]
labels = df.iloc[:, 8]

# Displaying the shapes of features and labels
print("\nDisplaying the shape of the features (rows, columns) and labels (rows).")
print("Features shape:", features.shape)
print("Labels shape:", labels.shape)

# Displaying feature and label values
print("\nDisplaying the feature values.")
print(features.values)
print("\nDisplaying the label values.")
print(labels.values)

# Counting unique label values
print("\nCounting the unique values in the labels (Outcome column).")
print(df.Outcome.value_counts())

# Splitting the dataset into training and testing sets
print("\nSplitting the dataset into training and testing sets (80% training, 20% testing).")
features_train, features_test, label_train, label_test = train_test_split(
    features, labels, test_size=0.2, random_state=0)

# Displaying the shapes of the training and testing sets
print("\nDisplaying the shapes of training and testing sets for features and labels.")
print("Features training set shape:", features_train.shape)
print("Labels training set shape:", label_train.shape)
print("Features testing set shape:", features_test.shape)
print("Labels testing set shape:", label_test.shape)

# Training the Naive Bayes classifier
print("\nTraining the Naive Bayes classifier with specified priors (80% class 0, 20% class 1).")
naive_class = GaussianNB(priors=[0.8, 0.2])
naive_class.fit(features_train, label_train)

# Predicting the labels for the training set
print("\nPredicting the labels for the training dataset.")
labels_train_pred = naive_class.predict(features_train)
print("Predicted labels for the training dataset:")
print(labels_train_pred)

# Calculating and displaying the accuracy of the model on the training dataset
print("\nCalculating the accuracy of the model on the training dataset.")
train_accuracy = accuracy_score(label_train, labels_train_pred)
print("Training Accuracy of the model: " + str(round(train_accuracy * 100, 2)) + "%")


# Predicting the labels for the test set
print("\nPredicting the labels for the test dataset.")
labels_test_pred = naive_class.predict(features_test)
print("Predicted labels for the test dataset:")
print(labels_test_pred)

# Calculating and displaying the accuracy of the model on the test dataset
print("\nCalculating the accuracy of the model on the test dataset.")
test_accuracy = accuracy_score(label_test, labels_test_pred)
print("Test Accuracy of the model: " + str(round(test_accuracy * 100, 2)) + "%")

Loading the dataset from the specified path.

Displaying the first 5 rows of the dataset to understand its structure.
   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  

Separating the dataset into features (first 8 columns) and labels (last column).

Displaying the shape of the features (rows, columns) and labels (rows).
Features shape: (768, 8)
Label