In [32]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
%matplotlib inline

In [33]:
data = pd.read_csv('bmi.csv')
data.head()

Unnamed: 0,Gender,Height,Weight,Index
0,Male,174,96,4
1,Male,189,87,2
2,Female,185,110,4
3,Female,195,104,3
4,Male,149,61,3


In [34]:
data.describe()

Unnamed: 0,Height,Weight,Index
count,500.0,500.0,500.0
mean,169.944,106.0,3.748
std,16.375261,32.382607,1.355053
min,140.0,50.0,0.0
25%,156.0,80.0,3.0
50%,170.5,106.0,4.0
75%,184.0,136.0,5.0
max,199.0,160.0,5.0


In [35]:
def give_names_to_indices(ind):
    if ind==0:
        return 'Extremely Weak'
    elif ind==1:
        return 'Weak'
    elif ind==2:
        return 'Normal'
    elif ind==3:
        return 'OverWeight'
    elif ind==4:
        return 'Obesity'
    elif ind==5:
        return 'Extremely Obese'


In [36]:
data['Index'] = data['Index'].apply(give_names_to_indices)

In [37]:
data

Unnamed: 0,Gender,Height,Weight,Index
0,Male,174,96,Obesity
1,Male,189,87,Normal
2,Female,185,110,Obesity
3,Female,195,104,OverWeight
4,Male,149,61,OverWeight
...,...,...,...,...
495,Female,150,153,Extremely Obese
496,Female,184,121,Obesity
497,Female,141,136,Extremely Obese
498,Male,150,95,Extremely Obese


In [50]:
print(data.columns)


RangeIndex(start=0, stop=4, step=1)


In [39]:
people = data['Gender'].value_counts()
people

Gender
Female    255
Male      245
Name: count, dtype: int64

In [40]:
categories = data['Index'].value_counts()
categories

Index
Extremely Obese    198
Obesity            130
Normal              69
OverWeight          68
Weak                22
Extremely Weak      13
Name: count, dtype: int64

In [41]:
# STATS FOR MEN
data[data['Gender']=='Male']['Index'].value_counts()

Index
Extremely Obese    105
Obesity             59
OverWeight          32
Normal              28
Weak                15
Extremely Weak       6
Name: count, dtype: int64

In [42]:
# STATS FOR WOMEN
data[data['Gender']=='Female']['Index'].value_counts()

Index
Extremely Obese    93
Obesity            71
Normal             41
OverWeight         36
Weak                7
Extremely Weak      7
Name: count, dtype: int64

In [43]:
data2 = pd.get_dummies(data['Gender'])
data.drop('Gender',axis=1,inplace=True)
data = pd.concat([data,data2],axis=1)
data.head()

Unnamed: 0,Height,Weight,Index,Female,Male
0,174,96,Obesity,False,True
1,189,87,Normal,False,True
2,185,110,Obesity,True,False
3,195,104,OverWeight,True,False
4,149,61,OverWeight,False,True


In [44]:
y=data['Index']
data =data.drop(['Index'],axis=1)

In [45]:
data.head()

Unnamed: 0,Height,Weight,Female,Male
0,174,96,False,True
1,189,87,False,True
2,185,110,True,False
3,195,104,True,False
4,149,61,False,True


In [46]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
data = scaler.fit_transform(data)
data=pd.DataFrame(data)
data


Unnamed: 0,0,1,2,3
0,0.247939,-0.309117,-1.020204,1.020204
1,1.164872,-0.587322,-1.020204,1.020204
2,0.920357,0.123647,0.980196,-0.980196
3,1.531645,-0.061823,0.980196,-0.980196
4,-1.280283,-1.391027,-1.020204,1.020204
...,...,...,...,...
495,-1.219155,1.452850,0.980196,-0.980196
496,0.859228,0.463676,0.980196,-0.980196
497,-1.769315,0.927351,0.980196,-0.980196
498,-1.219155,-0.340029,-1.020204,1.020204


In [47]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.3, random_state=101)

In [48]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

In [51]:
param_grid = {'n_estimators':[100,200,300,400,500,600,700,800,1000]}
grid_cv = GridSearchCV(RandomForestClassifier(random_state=101),param_grid,verbose=3)

In [52]:
grid_cv.fit(X_train,y_train)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5] END ..................n_estimators=100;, score=0.857 total time=   0.1s
[CV 2/5] END ..................n_estimators=100;, score=0.886 total time=   0.1s
[CV 3/5] END ..................n_estimators=100;, score=0.829 total time=   0.1s
[CV 4/5] END ..................n_estimators=100;, score=0.786 total time=   0.1s
[CV 5/5] END ..................n_estimators=100;, score=0.900 total time=   0.1s
[CV 1/5] END ..................n_estimators=200;, score=0.843 total time=   0.2s
[CV 2/5] END ..................n_estimators=200;, score=0.900 total time=   0.2s
[CV 3/5] END ..................n_estimators=200;, score=0.843 total time=   0.2s
[CV 4/5] END ..................n_estimators=200;, score=0.829 total time=   0.2s
[CV 5/5] END ..................n_estimators=200;, score=0.886 total time=   0.3s
[CV 1/5] END ..................n_estimators=300;, score=0.843 total time=   0.4s
[CV 2/5] END ..................n_estimators=300;,

In [53]:
grid_cv.best_params_

{'n_estimators': 200}

In [54]:
pred = grid_cv.predict(X_test)

In [55]:
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score
print(classification_report(y_test,pred))
print('\n')
print(confusion_matrix(y_test,pred))
print('\n')
print('Acuuracy is --> ',accuracy_score(y_test,pred)*100)
print('\n')

                 precision    recall  f1-score   support

Extremely Obese       0.91      0.97      0.94        63
 Extremely Weak       1.00      1.00      1.00         1
         Normal       0.92      0.96      0.94        23
        Obesity       0.78      0.82      0.79        38
     OverWeight       0.92      0.58      0.71        19
           Weak       0.83      0.83      0.83         6

       accuracy                           0.87       150
      macro avg       0.89      0.86      0.87       150
   weighted avg       0.88      0.87      0.87       150



[[61  0  0  2  0  0]
 [ 0  1  0  0  0  0]
 [ 0  0 22  0  0  1]
 [ 6  0  0 31  1  0]
 [ 0  0  1  7 11  0]
 [ 0  0  1  0  0  5]]


Acuuracy is -->  87.33333333333333




In [60]:
def lp(details):
    gender = details[0]
    height = details[1]
    weight = details[2]
    
    if gender == 'Male':
        details = np.array([[float(height), float(weight), 0.0, 1.0]])
    elif gender == 'Female':
        details = np.array([[float(height), float(weight), 1.0, 0.0]])
    
    y_pred = grid_cv.predict(scaler.transform(details))
    return y_pred[0]

# Live predictor
your_details = ['Male', 175, 80]
print(lp(your_details))


OverWeight




In [61]:
# Example 1
details_1 = ['Male', 180, 85]
print(lp(details_1))

# Example 2
details_2 = ['Female', 160, 55]
print(lp(details_2))

# Example 3
details_3 = ['Female', 170, 65]
print(lp(details_3))


OverWeight
Normal
Normal


