In [1]:
import streamlit as st
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.model_selection import StratifiedKFold
import time
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import pickle

# Package for generating classification report
from sklearn.metrics import classification_report

# Suppress warnings
import warnings
warnings.filterwarnings("ignore")

In [2]:


input = pd.read_csv('fetal_health.csv')
X = input.drop(columns = 'fetal_health')
y = input['fetal_health']

train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.3, random_state=1)
rfclassifier = RandomForestClassifier(random_state = 1)
rfclassifier.fit(train_X,train_y)

ypred = rfclassifier.predict(test_X)


In [3]:
rfcm = confusion_matrix(test_y, ypred, labels = rfclassifier.classes_)
rfdisp = ConfusionMatrixDisplay(confusion_matrix = rfcm, display_labels = rfclassifier.classes_)


fig, ax = plt.subplots(figsize = ((10, 8)))
plt.rcParams.update({'font.size': 12})


rfdisp.plot(cmap = 'PuRd', ax = ax);
plt.savefig('rfcm.svg')

In [4]:
rfreport = classification_report(test_y, ypred, output_dict = True)
rfreport_df = pd.DataFrame(rfreport)
rfreport_df.to_csv('rf_report.csv') 

In [5]:
importance = rfclassifier.feature_importances_

# Storing feature importance as a dataframe
feature_imp = pd.DataFrame(list(zip(train_X.columns, importance)),
               columns = ['Feature', 'Importance'])

feature_imp = feature_imp.sort_values('Importance', ascending = False).reset_index(drop = True)

# Bar plot
plt.figure(figsize=(10, 5))
plt.barh(feature_imp['Feature'], feature_imp['Importance'], color = ['purple', 'pink'])

plt.xlabel("Importance")
plt.ylabel("Input Feature")
plt.title('Which features affect price of mobiles') 
plt.tight_layout()
plt.savefig("rfimp.svg");


In [6]:
rf_pickle = open('random_forest.pickle', 'wb') 

# Write DT model to the file
pickle.dump(rfclassifier, rf_pickle) 

# Close the file
rf_pickle.close() 