In [30]:
import pandas as pd

### Load the iris dataset from sklearn 

In [31]:
from sklearn.datasets import load_iris

In [32]:
iris = load_iris()
type(iris)

sklearn.utils.Bunch

In [33]:
print(iris.feature_names)

['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


In [34]:
print(iris.target_names)

['setosa' 'versicolor' 'virginica']


In [35]:
# Define feature matrix in "X"
X = iris.data

# Define target response vector in "y"
y = iris.target

### Get the basic statistics for the features

In [36]:
X_df = pd.DataFrame(data=X, columns = iris.feature_names)
X_df.head(3)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2


In [37]:
X_df.describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


### Split the dataset into training and testing dataset

In [38]:
from sklearn.model_selection import train_test_split

In [39]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1, stratify=y)

### Use the Random Forest Classifier

In [40]:
from sklearn.ensemble import RandomForestClassifier

In [41]:
clf_rf = RandomForestClassifier(random_state = 1, n_estimators = 10, n_jobs = -1)
estimator_rf = clf_rf

In [42]:
estimator_rf.fit(X=X_train, y=y_train)

RandomForestClassifier(n_estimators=10, n_jobs=-1, random_state=1)

### Testing Accuracy

In [43]:
estimator_rf.score(X_test,y_test)

0.9666666666666667

### Cross Validated Accuracy

In [44]:
from sklearn.model_selection import cross_val_score

In [45]:
estimator_cv = clf_rf
scores = cross_val_score(estimator_cv, X, y, cv = 5, scoring = 'accuracy')
scores.mean()

0.9666666666666668

## Serialize the model

In [46]:
from joblib import dump, load

In [47]:
dump(estimator_rf, 'APIProjectFolder/Prediction/classifier/IRISRandomForestClassifier.joblib') 

['APIProjectFolder/Prediction/classifier/IRISRandomForestClassifier.joblib']

### Load back the saved model and check accuracy

In [48]:
loaded_classifier = load('APIProjectFolder/Prediction/classifier/IRISRandomForestClassifier.joblib') 

In [49]:
loaded_classifier.score(X_test,y_test)

0.9666666666666667

In [50]:
target_map = {}
i = 0
for target in iris.target_names:
    target_map[i]= target
    i+=1

def iris_predictor(X):
    y_pred = loaded_classifier.predict(X)
    y_pred = pd.Series(y_pred)
    y_pred = y_pred.map(target_map).to_numpy()
    return(y_pred)

In [51]:
y_pred = iris_predictor(pd.DataFrame(X_test[0].reshape(1, -1)))

In [52]:
print(y_pred[0])

virginica


In [53]:
loaded_classifier.predict_proba(X)

array([[1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [0.9, 0.1, 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. , 0. ],
       [1. , 0. 

In [55]:
pd.DataFrame(X_test[0].reshape(1, -1))

Unnamed: 0,0,1,2,3
0,7.3,2.9,6.3,1.8
