<a href="https://colab.research.google.com/github/wall456/CTG/blob/main/ALL_Models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit
raw_data = pd.read_csv("CTG.csv")
raw_data = raw_data.iloc[:-3]
cleaned_data = raw_data.drop(columns=['LBE', 'SegFile', 'FileName', 'Date', 'DR', 'b', 'e', 'A', 'B', 'C', 'D', 'E', 'AD', 'DE', 'LD', 'FS', 'SUSP', 'CLASS', 'NSP'])
X = cleaned_data
y = raw_data['NSP']
n, d = X.shape


holdout_frac = 0.2
holdout_splitter = StratifiedShuffleSplit(n_splits=1, test_size=holdout_frac, random_state=42)
model_idx, holdout_idx = next(holdout_splitter.split(X, y))
X_m, y_m = X.loc[model_idx], y.loc[model_idx]
X_h, y_h = X.loc[holdout_idx], y.loc[holdout_idx]

In [None]:
from sklearn.pipeline import Pipeline 
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

In [None]:
pipe1 = Pipeline([('scale', StandardScaler()),
                 ('dim_reduce', PCA()),
                 ('classify'  , KNeighborsClassifier())
                 ])
hyperparams1 = {'dim_reduce__n_components' : np.arange(1, d+1),
               'classify__n_neighbors' : np.arange(1, 11),
               'classify__weights' : ['uniform', 'distance'],
               'classify__p' : np.arange(1, 6)
               }
grid1 = GridSearchCV(pipe1, hyperparams1, cv=10, scoring='f1_macro').fit(X_m, y_m)

results1 = grid1.cv_results_
df1 = pd.DataFrame(results1['params'])
df1['score'] = results1['mean_test_score']
M1 = df1['score'].max()
mask1 = df1['score'] >= M1 * 0.95
df1[mask1].sort_values(by='score', ascending=False)

Unnamed: 0,classify__n_neighbors,classify__p,classify__weights,dim_reduce__n_components,score
662,4,1,distance,12,0.857422
664,4,1,distance,14,0.853038
663,4,1,distance,13,0.851158
1084,6,1,distance,14,0.850854
1083,6,1,distance,13,0.850526
...,...,...,...,...,...
769,4,4,uniform,14,0.814757
348,2,4,uniform,13,0.814747
349,2,4,uniform,14,0.814747
645,4,1,uniform,16,0.814632


In [None]:
pipe2 = Pipeline([('scale', MinMaxScaler()),
                 ('dim_reduce', PCA()),
                 ('classify', KNeighborsClassifier()),
                 ])
hyperparams2 = {'dim_reduce__n_components': np.arange(1, d+1),
               'classify__n_neighbors': np.arange(1, 11),
               'classify__weights': ['uniform', 'distance'],
               'classify__p': np.arange(1,6)
               }
grid2 = GridSearchCV(pipe2, hyperparams2, cv=10, scoring='f1_macro').fit(X_m, y_m)

results2 = grid2.cv_results_
df2 = pd.DataFrame(results2['params'])
df2['score'] = results2['mean_test_score']
M2 = df2['score'].max()
mask2 = df2['score'] >= M2 * 0.95
df2[mask2].sort_values(by='score', ascending=False)

Unnamed: 0,classify__n_neighbors,classify__p,classify__weights,dim_reduce__n_components,score
712,4,2,distance,20,0.852068
711,4,2,distance,19,0.852068
713,4,2,distance,21,0.852068
710,4,2,distance,18,0.851882
709,4,2,distance,17,0.851073
...,...,...,...,...,...
364,2,4,distance,8,0.809633
133,1,4,uniform,8,0.809633
154,1,4,distance,8,0.809633
890,5,2,uniform,9,0.809612


In [None]:
pipe3 = Pipeline([('scale', RobustScaler()),
                 ('dim_reduce', PCA()),
                 ('classify', KNeighborsClassifier())
                 ])
hyperparams3 = {'dim_reduce__n_components': np.arange(1, d+1),
               'classify__n_neighbors': np.arange(1, 11),
               'classify__weights': ['uniform', 'distance'],
               'classify__p': np.arange(1,6)
               }
grid3 = GridSearchCV(pipe3, hyperparams3, cv=10, scoring='f1_macro').fit(X_m, y_m)

results3 = grid3.cv_results_
df3 = pd.DataFrame(results3['params'])
df3['score'] = results3['mean_test_score']
M3 = df3['score'].max()
mask3 = df3['score'] >= M3 * 0.95
df3[mask3].sort_values(by='score', ascending=False)

Unnamed: 0,classify__n_neighbors,classify__p,classify__weights,dim_reduce__n_components,score
876,5,1,distance,16,0.852448
666,4,1,distance,16,0.851236
668,4,1,distance,18,0.850321
458,3,1,distance,18,0.850020
667,4,1,distance,17,0.849627
...,...,...,...,...,...
326,2,3,distance,12,0.810059
116,1,3,distance,12,0.810059
642,4,1,uniform,13,0.810049
558,3,4,uniform,13,0.809959


In [None]:
pipe4 = Pipeline([('scale', StandardScaler()),
                 ('dim_reduce', PCA()),
                 ('classify', GaussianNB())
                 ])
hyperparams4 = {'dim_reduce__n_components': np.arange(1, d+1),
              }
grid4 = GridSearchCV(pipe4, hyperparams4, cv=10, scoring='f1_macro').fit(X_m, y_m)

results4 = grid4.cv_results_
df4 = pd.DataFrame(results4['params'])
df4['score'] = results4['mean_test_score']
M4 = df4['score'].max()
mask4 = df4['score'] >= M4 * 0.95
df4[mask4].sort_values(by='score', ascending=False)

Unnamed: 0,dim_reduce__n_components,score
3,4,0.759535
4,5,0.749736
6,7,0.740804
5,6,0.735842
9,10,0.734726
7,8,0.73156
11,12,0.729698
10,11,0.729212
8,9,0.729147
2,3,0.727695


In [None]:
pipe5 = Pipeline([('scale', MinMaxScaler()),
                 ('dim_reduce', PCA()),
                 ('classify', GaussianNB())
                 ])
hyperparams5 = {'dim_reduce__n_components': np.arange(1, d+1),
               }
grid5 = GridSearchCV(pipe5, hyperparams5, cv=10, scoring='f1_macro').fit(X_m, y_m)

results5 = grid5.cv_results_
df5 = pd.DataFrame(results5['params'])
df5['score'] = results5['mean_test_score']
M5 = df5['score'].max()
mask5 = df5['score'] >= M5 * 0.95
df5[mask5].sort_values(by='score', ascending=False)

Unnamed: 0,dim_reduce__n_components,score
8,9,0.747123
7,8,0.746602
10,11,0.743803
9,10,0.743361
11,12,0.736351
12,13,0.733714
6,7,0.733501
13,14,0.719451


In [None]:
pipe6 = Pipeline([('scale', RobustScaler()),
                 ('dim_reduce', PCA()),
                 ('classify', GaussianNB())
                 ])
hyperparams6 = {'dim_reduce__n_components': np.arange(1, d+1),
               }
grid6 = GridSearchCV(pipe6, hyperparams6, cv=10, scoring='f1_macro').fit(X_m, y_m)

results6 = grid6.cv_results_
df6 = pd.DataFrame(results6['params'])
df6['score'] = results6['mean_test_score']
M6 = df6['score'].max()
mask6 = df6['score'] >= M6 * 0.95
df6[mask6].sort_values(by='score', ascending=False)

Unnamed: 0,dim_reduce__n_components,score
12,13,0.731232
14,15,0.729974
13,14,0.728527
11,12,0.724014
10,11,0.723625
9,10,0.719501
16,17,0.717862
15,16,0.716146
8,9,0.712386
17,18,0.709503


In [None]:
pipe7 = Pipeline([('scale', StandardScaler()),
                 ('dim_reduce', PCA()),
                 ('classify', DecisionTreeClassifier())
                 ])
hyperparams7 = {'dim_reduce__n_components': np.arange(1, d+1),
               'classify__criterion': ['gini', 'entropy'],
               'classify__max_depth': np.arange(1, d+1)
               }
grid7 = GridSearchCV(pipe7, hyperparams7, cv=10, scoring='f1_macro').fit(X_m, y_m)

results7 = grid7.cv_results_
df7 = pd.DataFrame(results7['params'])
df7['score'] = results7['mean_test_score']
M7 = df7['score'].max()
mask7 = df7['score'] >= M7 * 0.95
df7[mask7].sort_values(by='score', ascending=False)

Unnamed: 0,classify__criterion,classify__max_depth,dim_reduce__n_components,score
837,entropy,19,19,0.802396
752,entropy,15,18,0.800152
710,entropy,13,18,0.799974
606,entropy,8,19,0.799381
857,entropy,20,18,0.799369
...,...,...,...,...
88,gini,5,5,0.762339
389,gini,19,12,0.762336
760,entropy,16,5,0.762325
550,entropy,6,5,0.762290


In [None]:
pipe8 = Pipeline([('scale', MinMaxScaler()),
                 ('dim_reduce', PCA()),
                 ('classify', DecisionTreeClassifier())
                 ])
hyperparams8 = {'dim_reduce__n_components': np.arange(1, d+1),
               'classify__criterion': ['gini', 'entropy'],
               'classify__max_depth': (1, d+1)
               }
grid8 = GridSearchCV(pipe8, hyperparams8, cv=10, scoring='f1_macro').fit(X_m, y_m)

results8 = grid8.cv_results_
df8 = pd.DataFrame(results8['params'])
df8['score'] = results8['mean_test_score']
M8 = df8['score'].max()
mask8 = df8['score'] >= M8 * 0.95
df8[mask8].sort_values(by='score', ascending=False)

Unnamed: 0,classify__criterion,classify__max_depth,dim_reduce__n_components,score
70,entropy,22,8,0.801432
79,entropy,22,17,0.801396
71,entropy,22,9,0.798538
78,entropy,22,16,0.797248
76,entropy,22,14,0.795773
75,entropy,22,13,0.794744
80,entropy,22,18,0.794332
77,entropy,22,15,0.793923
81,entropy,22,19,0.793728
82,entropy,22,20,0.790575


In [None]:
pipe9 = Pipeline([('scale', RobustScaler()),
                 ('dim_reduce', PCA()),
                 ('classify', DecisionTreeClassifier())
                 ])
hyperparams9 = {'dim_reduce__n_components': np.arange(1, d+1),
               'classify__criterion': ['gini', 'entropy'],
               'classify__max_depth': np.arange(1, d+1)
               }
grid9 = GridSearchCV(pipe9, hyperparams9, cv=10, scoring='f1_macro').fit(X_m, y_m)

results9 = grid9.cv_results_
df9 = pd.DataFrame(results9['params'])
df9['score'] = results9['mean_test_score']
M9 = df9['score'].max()
mask9 = df9['score'] >= M9 * 0.95
df9[mask9].sort_values(by='score', ascending=False)

Unnamed: 0,classify__criterion,classify__max_depth,dim_reduce__n_components,score
878,entropy,21,18,0.816229
667,entropy,11,17,0.814773
665,entropy,11,15,0.812248
836,entropy,19,18,0.811687
748,entropy,15,14,0.810777
...,...,...,...,...
177,gini,9,10,0.775962
394,gini,19,17,0.775939
619,entropy,9,11,0.775813
573,entropy,7,7,0.775762


In [None]:
pipe10 = Pipeline([('scale', StandardScaler()),
                  ('dim_reduce', PCA()),
                  ('classify', RandomForestClassifier())
                  ])
hyperparams10 = {'dim_reduce__n_components': np.arange(1, d+1),
                'classify__criterion': ['gini', 'entropy'],
                'classify__max_depth': np.arange(1, 10),
                'classify__min_samples_split': np.arange(2, 4)
                }
grid10 = GridSearchCV(pipe10, hyperparams10, cv=10, scoring='f1_macro').fit(X_m, y_m)

results10 = grid10.cv_results_
df10 = pd.DataFrame(results10['params'])
df10['score'] = results10['mean_test_score']
M10 = df10['score'].max()
mask10 = df10['score'] >= M10 * 0.95
df10[mask10].sort_values(by='score', ascending=False)

Unnamed: 0,classify__criterion,classify__max_depth,classify__min_samples_split,dim_reduce__n_components,score
683,entropy,8,2,12,0.830076
731,entropy,9,2,18,0.828497
684,entropy,8,2,13,0.828461
726,entropy,9,2,13,0.827928
722,entropy,9,2,9,0.827850
...,...,...,...,...,...
612,entropy,6,3,4,0.789925
285,gini,7,3,13,0.789611
625,entropy,6,3,17,0.789526
291,gini,7,3,19,0.789079


In [None]:
pipe11 = Pipeline([('scale', MinMaxScaler()),
                  ('dim_reduce', PCA()),
                  ('classify', RandomForestClassifier())
                  ])
hyperparams11 = {'dim_reduce__n_components': np.arange(1, d+1),
                'classify__criterion': ['gini', 'entropy'],
                'classify__max_depth': np.arange(1, 10),
                'classify__min_samples_split': np.arange(2, 4)
                }
grid11 = GridSearchCV(pipe11, hyperparams11, cv=10, scoring='f1_macro').fit(X_m, y_m)

In [None]:
results11 = grid11.cv_results_
df11 = pd.DataFrame(results11['params'])
df11['score'] = results11['mean_test_score']
M11 = df11['score'].max()
mask11 = df11['score'] >= M11 * 0.95
df11[mask11].sort_values(by='score', ascending=False)

Unnamed: 0,classify__criterion,classify__max_depth,classify__min_samples_split,dim_reduce__n_components,score
743,entropy,9,3,9,0.839355
722,entropy,9,2,9,0.838500
730,entropy,9,2,17,0.836073
342,gini,9,2,7,0.835137
751,entropy,9,3,17,0.833839
...,...,...,...,...,...
335,gini,8,3,21,0.798684
650,entropy,7,2,21,0.798527
624,entropy,6,3,16,0.798484
237,gini,6,3,7,0.798080


In [None]:
pipe12 = Pipeline([('scale', RobustScaler()),
                  ('dim_reduce', PCA()),
                  ('classify', RandomForestClassifier())
                  ])
hyperparams12 = {'dim_reduce__n_components': np.arange(1, d+1),
                'classify__criterion': ['gini', 'entropy'],
                'classify__max_depth': np.arange(1, 10),
                'classify__min_samples_split': np.arange(2, 4)
                }
grid12 = GridSearchCV(pipe12, hyperparams12, cv=10, scoring='f1_macro').fit(X_m, y_m)

In [None]:
results12 = grid12.cv_results_
df12 = pd.DataFrame(results12['params'])
df12['score'] = results12['mean_test_score']
M12 = df12['score'].max()
mask12 = df12['score'] >= M12 * 0.95
df12[mask12].sort_values(by='score', ascending=False)

Unnamed: 0,classify__criterion,classify__max_depth,classify__min_samples_split,dim_reduce__n_components,score
727,entropy,9,2,14,0.842002
728,entropy,9,2,15,0.840037
724,entropy,9,2,11,0.839759
708,entropy,8,3,16,0.838961
730,entropy,9,2,17,0.837806
...,...,...,...,...,...
262,gini,7,2,11,0.800976
636,entropy,7,2,7,0.800708
340,gini,9,2,5,0.800245
223,gini,6,2,14,0.800185


In [None]:
pipe13 = Pipeline([('scale', StandardScaler()),
                  ('dim_reduce', PCA()),
                  ('classify', SVC())
                  ])
hyperparams13 = {'dim_reduce__n_components': np.arange(1, d+1),
                'classify__C': [1, 10, 100],
                'classify__kernel': ['linear', 'poly', 'rbf', 'sigmoid']
                }
grid13 = GridSearchCV(pipe13, hyperparams13, cv=10, scoring='f1_macro').fit(X_m, y_m)

In [None]:
results13 = grid13.cv_results_
df13 = pd.DataFrame(results13['params'])
df13['score'] = results13['mean_test_score']
M13 = df13['score'].max()
mask13 = df13['score'] >= M13 * 0.95
df13[mask13].sort_values(by='score', ascending=False)

Unnamed: 0,classify__C,classify__kernel,dim_reduce__n_components,score
228,100,rbf,19,0.876844
226,100,rbf,17,0.876297
230,100,rbf,21,0.875782
229,100,rbf,20,0.875782
142,10,rbf,17,0.875714
227,100,rbf,18,0.875172
143,10,rbf,18,0.874656
223,100,rbf,14,0.872897
224,100,rbf,15,0.872181
222,100,rbf,13,0.871267


In [None]:
pipe14 = Pipeline([('scale', MinMaxScaler()),
                  ('dim_reduce', PCA()),
                  ('classify', SVC())
                  ])
hyperparams14 = {'dim_reduce__n_components': np.arange(1, d+1),
                'classify__C': [1, 10, 100],
                'classify__kernel': ['linear', 'poly', 'rbf', 'sigmoid']
                }
grid14 = GridSearchCV(pipe14, hyperparams14, cv=10, scoring='f1_macro').fit(X_m, y_m)

In [None]:
results14 = grid14.cv_results_
df14 = pd.DataFrame(results14['params'])
df14['score'] = results14['mean_test_score']
M14 = df14['score'].max()
mask14 = df14['score'] >= M14 * 0.95
df14[mask14].sort_values(by='score', ascending=False)

Unnamed: 0,classify__C,classify__kernel,dim_reduce__n_components,score
136,10,rbf,11,0.875746
135,10,rbf,10,0.875043
222,100,rbf,13,0.872971
141,10,rbf,16,0.872654
221,100,rbf,12,0.872648
...,...,...,...,...
57,1,rbf,16,0.833929
54,1,rbf,13,0.833482
60,1,rbf,19,0.832670
58,1,rbf,17,0.832554


In [None]:
pipe15 = Pipeline([('scale', RobustScaler()),
                  ('dim_reduce', PCA()),
                  ('classify', SVC())
                  ])
hyperparams15 = {'dim_reduce__n_components': np.arange(1, d+1),
                'classify__C': [1, 10, 100],
                'classify__kernel': ['linear', 'poly', 'rbf', 'sigmoid']
                }
grid15 = GridSearchCV(pipe15, hyperparams15, cv=10, scoring='f1_macro').fit(X_m, y_m)

In [None]:
results15 = grid15.cv_results_
df15 = pd.DataFrame(results15['params'])
df15['score'] = results15['mean_test_score']
M15 = df15['score'].max()
mask15 = df15['score'] >= M15 * 0.95
df15[mask15].sort_values(by='score', ascending=False)

Unnamed: 0,classify__C,classify__kernel,dim_reduce__n_components,score
230,100,rbf,21,0.834287
229,100,rbf,20,0.834287
225,100,rbf,16,0.833796
228,100,rbf,19,0.833313
222,100,rbf,13,0.832677
226,100,rbf,17,0.832495
221,100,rbf,12,0.831757
223,100,rbf,14,0.831523
227,100,rbf,18,0.831144
220,100,rbf,11,0.830875
