# **Import SKLearn functions to perform Machine Learning**

In [None]:
#imports the pandas library and assigns it the alias pd.
import pandas as pd

#imports the train_test_split function from scikit-learn's model_selection module to split data into training and testing sets.
from sklearn.model_selection import train_test_split

#imports the GaussianNB class from scikit-learn's naive_bayes module to create a Gaussian Naive Bayes classifier.
from sklearn.naive_bayes import GaussianNB

#imports the accuracy_score function from scikit-learn's metrics module to evaluate the performance of a classifier by computing the accuracy of its predictions.
from sklearn.metrics import accuracy_score


**Read and Print the dataset**

In [None]:
#reads in a CSV file called 'Naive-Bayes-Classification-Data.csv' using the read_csv() function from the pandas library, and stores the resulting DataFrame in a variable called df.
df = pd.read_csv('Naive-Bayes-Classification-Data.csv')
df

Unnamed: 0,Glucose,BloodPressure,Diabetes
0,148,72,1
1,85,66,0
2,183,64,1
3,89,66,0
4,137,40,1
...,...,...,...
990,113,80,0
991,138,82,0
992,108,68,0
993,99,70,0


**Featured Value and Predicted Value**

In [None]:
#these two lines of code are preparing the data for machine learning by separating the features (stored in x) from the target variable or label (stored in y)
x = df.drop('Diabetes', axis=1)
y = df['Diabetes']

NameError: ignored

**Set the Training and Testing Data for Validation**

In [None]:
#this line of code is splitting the data into training and testing sets, which is a common step in machine learning tasks to ensure that the model is able to generalize well to new, unseen data.
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.2)

**80% of train data (796)**

In [None]:
#x_train is the subset of the original data x that has been randomly selected to train the machine learning model.
x_train

Unnamed: 0,Glucose,BloodPressure
672,68,106
704,110,76
899,139,80
296,146,70
251,129,84
...,...,...
761,170,74
701,125,78
441,83,66
208,96,64


**20% of train data (199)**

In [None]:
#x_test refers to the variable that contains the features (or independent variables) for the testing data
x_test

Unnamed: 0,Glucose,BloodPressure
657,120,80
364,147,74
927,125,68
804,75,64
406,115,72
...,...,...
241,91,70
852,119,0
185,194,68
746,147,94


**80% of test data (796)**

In [None]:
#y_train refers to the variable that contains the target variable (or dependent variable) for the training data.
y_train

111    1
744    0
622    0
864    0
80     0
      ..
157    0
206    1
628    0
876    0
218    1
Name: Diabetes, Length: 796, dtype: int64

**20% of test data (199)**

In [None]:
#y_test refers to the variable that contains the target variable (or dependent variable) for the testing data.
y_test

428    0
672    0
748    1
617    0
611    1
      ..
674    0
318    0
446    0
131    1
910    0
Name: Diabetes, Length: 199, dtype: int64

**Import Machine Learning Model**

In [None]:
#this line creates a new instance of a Gaussian Naive Bayes model, which is an algorithm that is commonly used for classification tasks.
model = GaussianNB()

#this line trains the Naive Bayes model on the training data. Specifically, it uses the x_train data (the independent variables) and the y_train data (the dependent variable) to learn how to predict the target variable.
model.fit(x_train, y_train)

**Predicted Values of x_test**

In [None]:
#this line uses the trained model to make predictions on the x_test data.
predicted_value = model.predict(x_test)

#this line simply prints out the predictions that were made by the model on the x_test data.
predicted_value

array([0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0,
       0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0,
       0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1,
       1])

**Accuracy Score**

In [None]:
#this line of code calculates the accuracy of the Naive Bayes model's predictions on the x_test data and expresses it as a percentage.
accuracy_score(predicted_value, y_test) * 100

76.88442211055276

# **Generate the Confusion Matrix**

In [None]:
#confusion_matrix is used to create a matrix that shows the number of true positives, true negatives, false positives, and false negatives for a set of predictions.
#accuracy_score is used to calculate the accuracy of the predictions based on the confusion matrix.
from sklearn.metrics import confusion_matrix, accuracy_score

**Read and Print the confusion matrix**

In [None]:
#these two lines of code calculate and print the confusion matrix for the predictions made by the Naive Bayes model on the x_test data.
cf = confusion_matrix (predicted_value, y_test)
cf

array([[111,  26],
       [ 20,  42]])

**Computation of Recall**

In [None]:
#the recall is calculated as the number of true positives divided by the sum of true positives and false negatives. The result is then multiplied by 100 to express it as a percentage.
recall = cf [0,0] / (cf [0,0] + cf [1,0]) * 100

#this line of code prints out the recall score that was calculated in the previous line of code, formatted to two decimal places and expressed as a percentage.
print ('Recall: {:.2f}%'.format(recall))

Recall: 88.00%


**Computation of Precision**

In [None]:
#precision is calculated as the number of true positives divided by the sum of true positives and false positives. The result is then multiplied by 100 to express it as a percentage.
precision = cf [0,0] / (cf [0,0] + cf [0,1]) * 100

#this line of code prints out the precision score that was calculated in the previous line of code, formatted to two decimal places and expressed as a percentage.
print ('Precision: {:.2f}%'.format(precision))

Precision: 81.02%


**Computation of Accuracy**

In [None]:
#this line of code calculates the total number of records in the x_test data by counting the number of rows in the data frame using the len() function.
total = len(x_test.index)

#the accuracy is calculated as the sum of true positives and true negatives divided by the total number of records in the x_test data, and then multiplied by 100 to express it as a percentage.
accuracy = (cf [0,0] + cf [1,1]) / total * 100

#this line of code prints out the accuracy score that was calculated in the previous line of code, formatted to two decimal places and expressed as a percentage.
print ('Accuracy: {:.2f}%'.format(accuracy))

Accuracy: 73.87%


**Extract the Values from the Confusion Matrix**

In [None]:
#his line of code uses the ravel() function from the NumPy library to "flatten" the confusion matrix cf into a one-dimensional array, and then assigns each element of that array to a separate variable.
tp, fp, tn, fn = cf.ravel()

#represents the number of true positives in the model's predictions.
print ('True Positive', tp)

#represents the number of false positives in the model's predictions.
print ('False Positive', fp)

#represents the number of true negatives in the model's predictions.
print ('Tre Negative', tn)

#represents the number of false negatives in the model's predictions.
print ('False Negative', fn)

True Positive 110
False Positive 37
Tre Negative 15
False Negative 37


**Computation of Type 1 and Type 2 error**

In [None]:
#this line of code calculates the total number of type I and type II errors made by the model, expressed as a percentage of the total number of predictions.
type_er = (cf [0,1] + cf [1,0]) / total * 100

#this line of code prints out the value of type_er in a human-readable format, indicating the percentage of total predictions that resulted in a type I or type II error.
print ('Type I and Type 2 Error: {:.2f}%'.format(type_er))

Type I and Type 2 Error: 26.13%


*# **Test Case #1**

In [None]:
#Program to input the glucose and blood pressure

#this DataFrame will be used to hold a single sample of data that the user enters in the next two lines of code.
df_sample = pd.DataFrame (columns = ['Glucose', 'BloodPressure'])

#this line of code prompts the user to enter a value for the 'Glucose' variable
glucose = input ('Enter Glucose:')

#this line of code prompts the user to enter a value for the 'BloodPressure' variable
bloodpressure = input ('Enter Blood Pressure:')

#this line of code adds the user's input for 'Glucose' and 'BloodPressure' to a new row at the bottom of the df_sample DataFrame.
df_sample.loc [len(df_sample.index)] = [glucose,bloodpressure]


Enter Glucose:92
Enter Blood Pressure:62


In [None]:
#Showing Sample Data
df_sample

Unnamed: 0,Glucose,BloodPressure
0,92,62


In [None]:
#Predicted value of the sample data frame
model.predict(df_sample)

array([0])

In [None]:
#Creating python program to compute prediction
prediction = model.predict(df_sample)
if prediction == 0:
  print ('Not Diabetic')
else:
  print ('Diabetic')


Not Diabetic



# **Test Case #2** - False Negative

In [None]:
#Program to input the glucose and blood pressure
df_sample = pd.DataFrame (columns = ['Glucose', 'BloodPressure'])
glucose = input ('Enter Glucose:')
bloodpressure = input ('Enter Blood Pressure:')
df_sample.loc [len(df_sample.index)] = [glucose,bloodpressure]


#Showing Sample Data
df_sample


#Predicted value of the sample data frame
model.predict(df_sample)


#Creating python program to compute prediction
prediction = model.predict(df_sample)
if prediction == 0:
  print ('Not Diabetic')
else:
  print ('Diabetic')


Enter Glucose:134
Enter Blood Pressure:72
Not Diabetic



# **Test Case #3**

In [None]:
#Program to input the glucose and blood pressure
df_sample = pd.DataFrame (columns = ['Glucose', 'BloodPressure'])
glucose = input ('Enter Glucose:')
bloodpressure = input ('Enter Blood Pressure:')
df_sample.loc [len(df_sample.index)] = [glucose,bloodpressure]


#Showing Sample Data
df_sample


#Predicted value of the sample data frame
model.predict(df_sample)


#Creating python program to compute prediction
prediction = model.predict(df_sample)
if prediction == 0:
  print ('Not Diabetic')
else:
  print ('Diabetic')


Enter Glucose:117
Enter Blood Pressure:92
Not Diabetic



# **Test Case #4** - False Positive

In [None]:
#Program to input the glucose and blood pressure
df_sample = pd.DataFrame (columns = ['Glucose', 'BloodPressure'])
glucose = input ('Enter Glucose:')
bloodpressure = input ('Enter Blood Pressure:')
df_sample.loc [len(df_sample.index)] = [glucose,bloodpressure]


#Showing Sample Data
df_sample


#Predicted value of the sample data frame
model.predict(df_sample)


#Creating python program to compute prediction
prediction = model.predict(df_sample)
if prediction == 0:
  print ('Not Diabetic')
else:
  print ('Diabetic')


Enter Glucose:145
Enter Blood Pressure:82
Diabetic



# **Test Case #5** - False Negative

In [None]:
#Program to input the glucose and blood pressure
df_sample = pd.DataFrame (columns = ['Glucose', 'BloodPressure'])
glucose = input ('Enter Glucose:')
bloodpressure = input ('Enter Blood Pressure:')
df_sample.loc [len(df_sample.index)] = [glucose,bloodpressure]


#Showing Sample Data
df_sample


#Predicted value of the sample data frame
model.predict(df_sample)


#Creating python program to compute prediction
prediction = model.predict(df_sample)
if prediction == 0:
  print ('Not Diabetic')
else:
  print ('Diabetic')


Enter Glucose:114
Enter Blood Pressure:66
Not Diabetic


# **Conclusion of Test Cases**

Based on the test cases presented above, there appears to be a relationship between high glucose levels, high blood pressure, and the likelihood of being diagnosed with diabetes. It is important to note, however, that a diagnosis of diabetes should only be made by a healthcare professional after performing appropriate tests and considering the individual's medical history and symptoms. Nonetheless, the results suggest a pattern wherein high glucose levels are strongly associated with a diagnosis of diabetes.

# **Recomendation to Diabetic and Non-Diabetic Person**
Aside from daily monitoring of blood glucose levels and blood pressure, it is important for people with diabetes to follow a healthy lifestyle that includes regular exercise, a balanced diet, and medication management as prescribed by their healthcare provider. Regular medical check-ups, including eye exams and foot exams, are also important for detecting and managing any complications of diabetes.

To avoid diabetes, maintain a healthy diet, exercise regularly, maintain a healthy weight, quit smoking, and manage stress levels. While it may not be possible to eliminate the risk of diabetes completely, making these lifestyle modifications can help to reduce the risk and improve overall health. Consult with a healthcare professional for personalized advice.