In [314]:
#Let's start with importing necessary libraries
import pandas as pd 
import numpy as np 
from sklearn.preprocessing import StandardScaler 
from sklearn.linear_model  import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [315]:
df=pd.read_csv('CleanData.csv')

In [316]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,23,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [317]:
# make a prediction with a multinomial logistic regression model
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression

In [318]:
## Independent input feature
X=df.drop('Outcome',axis=1)

In [319]:
## Dependent output feature
y=df['Outcome']

In [320]:
# Train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

In [321]:
X_train.shape,y_train.shape,X_test.shape,y_test.shape

((576, 8), (576,), (192, 8), (192,))

In [322]:
import pickle
##standard Scaling- Standardization
def scaler_standard(X_train, X_test):
    #scaling the data
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    #saving the model
    file = open('standardScalar.pkl','wb')
    pickle.dump(scaler,file)
    file.close()
    
    return X_train_scaled, X_test_scaled

In [323]:
X_train_scaled, X_test_scaled = scaler_standard(X_train, X_test)

In [324]:
log_reg = LogisticRegression()
log_reg.fit(X_train_scaled,y_train)

In [325]:
## Hyperparameter Tuning
## GridSearch CV
from sklearn.model_selection import GridSearchCV
import numpy as np
import warnings
warnings.filterwarnings('ignore')
# parameter grid
parameters = {
    'penalty' : ['l1','l2'], 
    'C'       : np.logspace(-3,3,7),
    'solver'  : ['newton-cg', 'lbfgs', 'liblinear'],
}


In [345]:
logreg = LogisticRegression()
clf = GridSearchCV(logreg,                    # model
                    param_grid = parameters,   # hyperparameters
                   scoring='accuracy',        # metric for scoring
                   cv=10)                     # number of folds

clf.fit(X_train_scaled,y_train)

In [346]:
clf.best_params_

{'C': 1.0, 'penalty': 'l2', 'solver': 'newton-cg'}

In [347]:
clf.best_score_

0.763762855414398

### let's see how well our model performs on the test data set.

In [329]:
y_pred = clf.predict(X_test_scaled)

## Confusion Matrix, Accuracy, ClassificationReport

In [330]:
conf_mat = confusion_matrix(y_test,y_pred)
conf_mat

array([[118,  12],
       [ 26,  36]])

In [331]:
true_positive = conf_mat[0][0]
false_positive = conf_mat[0][1]
false_negative = conf_mat[1][0]
true_negative = conf_mat[1][1]

In [332]:
Accuracy = (true_positive + true_negative) / (true_positive +false_positive + false_negative + true_negative)
Accuracy

0.8020833333333334

In [333]:
Precision = true_positive/(true_positive+false_positive)
Precision


0.9076923076923077

In [334]:
Recall = true_positive/(true_positive+false_negative)
Recall

0.8194444444444444

In [335]:
F1_Score = 2*(Recall * Precision) / (Recall + Precision)
F1_Score

0.8613138686131386

## New Data From User

In [336]:
import pickle
file = open('modelForPrediction.pkl','wb')
pickle.dump(log_reg,file)
file.close()

In [337]:
data=pd.DataFrame({"Pregnancies":[1],	"Glucose":[85],	"BloodPressure":[66],	"SkinThickness":[28],	"Insulin":[0],	"BMI":[26.6],	"DiabetesPedigreeFunction":[0.351],	"Age":[31]})

In [338]:
standarize_input=pickle.load(open('standardScalar.pkl','rb'))

In [339]:
train,inpt= scaler_standard(X_train,data)

In [340]:
model=pickle.load(open('modelForPrediction.pkl','rb'))

In [341]:
Result=model.predict(inpt)

In [342]:
result=Result[0]

In [343]:
def func(result):
    if(result==0):
        return ('Person Has No Diabetes')
    else:
        return('Person Has Diabetes')

In [344]:
func(result)

'Person Has No Diabetes'