In [1]:
import pandas as pd

In [2]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from xgboost import XGBRegressor
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline

In [3]:
#loading data
data = pd.read_csv('diabetes.csv')

In [4]:
data.columns

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

In [5]:
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [6]:
features = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age']

In [7]:
X = data[features]

In [8]:
y = data['Outcome']

In [9]:
trainx,valx,trainy,valy = train_test_split(X,y,test_size=0.2,random_state=1)

In [10]:
pipeline=Pipeline(steps=[('imputer', SimpleImputer(strategy='mean')),
    ('model',XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42))])

In [11]:
pipeline.fit(trainx,trainy)

In [12]:
pred=pipeline.predict(valx)


In [13]:
print(mean_absolute_error(valy, pred))


0.29394495487213135


In [14]:
pipeline.predict(valx)

array([ 4.52309400e-01,  2.14719176e-01,  1.03778414e-01,  3.28687280e-01,
        2.43146628e-01,  3.18028301e-01,  3.52065474e-01,  1.85042582e-02,
        2.01427594e-01,  1.48892313e-01,  6.05069876e-01, -2.17963085e-02,
        6.96414888e-01,  7.09740341e-01,  3.93345952e-03,  6.62775099e-01,
        2.88184762e-01,  4.43300575e-01, -8.74687433e-02,  3.37152213e-01,
        5.49265683e-01,  3.79458368e-01,  9.82872367e-01,  1.78817168e-01,
       -1.04538426e-02,  4.47683096e-01,  2.83928782e-01,  9.27209616e-01,
        2.99819469e-01,  3.25106829e-01,  2.30391026e-01,  2.31671527e-01,
        7.67527744e-02,  8.58360529e-01,  3.60742092e-01,  8.38633716e-01,
        7.27995411e-02,  3.21997702e-01,  1.39587820e-01,  3.85291189e-01,
        1.66881084e-01,  6.35864735e-02,  3.75234615e-03,  6.91334605e-01,
       -4.99495305e-04,  7.25507736e-03,  8.92427385e-01,  8.66644263e-01,
       -1.28540136e-02,  1.83099180e-01, -5.84518611e-02,  3.10873628e-01,
        9.68808651e-01, -

In [15]:
import numpy as np

In [16]:
arr = np.array([pred])

In [17]:
binary_outcome = (arr > 0.0).astype(int)

In [18]:
#our model is complete
print(binary_outcome)

[[1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1
  1 1 1 1 1 1 1 1 0 1 1 1 0 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
  1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
  1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1
  1 1 1 1 1 1 1 1 1 1]]


In [19]:
#lets check test it by using a different dataseyt as testing dataset

In [20]:
testingdata = pd.read_csv('testdiabetes.csv')
testingdata.columns

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

In [21]:
features=['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age' ]
Testingx = testingdata[features]
Testingy =testingdata['Outcome']

In [22]:
test = pipeline.predict(Testingx)

In [23]:
binarized = np.array([test])

In [24]:
testbinary_outcome = (binarized > 0.0).astype(int)

In [25]:
print(testbinary_outcome)

[[1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1 1
  1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
  1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1 1 0 1
  1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1
  1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
  0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1
  1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1
  1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1
  0 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1
  1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1
  1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
  1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0
  0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 0 1 1 1 1 0 1
  1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 

In [26]:
#done