In [9]:
import streamlit as st
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import VotingClassifier
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

import time
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import pickle

# Package for generating classification report
from sklearn.metrics import classification_report

# Suppress warnings
import warnings
warnings.filterwarnings("ignore")

In [7]:
input = pd.read_csv('fetal_health.csv')
X = input.drop(columns = 'fetal_health')
y = input['fetal_health']

train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.3, random_state=1)


In [2]:
rf_pickle = open('random_forest.pickle', 'rb') 
rf_model = pickle.load(rf_pickle) 
rf_pickle.close()

In [3]:
ada_pickle = open('adaboost.pickle', 'rb') 
ada_model = pickle.load(ada_pickle) 
ada_pickle.close()

In [10]:
dt_pickle = open('decision_tree.pickle', 'rb') 
dt_model = pickle.load(dt_pickle) 
dt_pickle.close()

In [11]:
#from 372 notebook
f1_sum = f1_score(test_y, dt_model.predict(test_X), average = 'macro') + f1_score(test_y, rf_model.predict(test_X), average = 'macro')+f1_score(test_y, ada_model.predict(test_X), average = 'macro')
weights = [f1_score(test_y, dt_model.predict(test_X), average = 'macro')/f1_sum, f1_score(test_y, rf_model.predict(test_X), average = 'macro')/f1_sum,f1_score(test_y, ada_model.predict(test_X), average = 'macro')/f1_sum]

In [12]:
soft_voting_clf = VotingClassifier(estimators = [('dt', dt_model), ('rf', rf_model), ('ada',ada_model)], voting = 'soft',weights = weights)
soft_voting_clf.fit(train_X, train_y)

In [13]:
predictions = soft_voting_clf.predict(test_X)
probabilities = soft_voting_clf.predict_proba(test_X)

In [14]:
votingcm = confusion_matrix(test_y, predictions, labels = soft_voting_clf.classes_)
votingdisp = ConfusionMatrixDisplay(confusion_matrix = votingcm, display_labels = soft_voting_clf.classes_)


fig, ax = plt.subplots(figsize = ((10, 8)))
plt.rcParams.update({'font.size': 12})

votingdisp.plot(cmap = 'PuRd', ax = ax);
plt.savefig('votingcm.svg')

In [16]:
votingreport = classification_report(test_y, predictions, output_dict = True)
votingreport_df = pd.DataFrame(votingreport)
votingreport_df.to_csv('voting_report.csv') 

In [26]:

feature_importances = [model.feature_importances_ for model in [dt_model,rf_model,ada_model]] 
feature_names = train_X.columns 

#this line is from chat gpt
aggregated_importance = np.sum([w * imp for w, imp in zip(weights, feature_importances)], axis=0)

feature_imp = pd.DataFrame({
    'Feature': feature_names,
    'Importance': aggregated_importance
}).sort_values(by='Importance', ascending=False)

plt.barh(feature_imp['Feature'], feature_imp['Importance'], color='skyblue')
plt.xlabel('Aggregated Importance')
plt.ylabel('Features')
plt.title('Aggregated Feature Importance')
plt.tight_layout()
plt.show()
plt.savefig('votingimp.svg')


In [27]:
voting_pickle = open('voting.pickle', 'wb') 

# Write DT model to the file
pickle.dump(soft_voting_clf, voting_pickle) 

# Close the file
voting_pickle.close() 