```bash
## TO RUN THIS NOTEBOOK FROM THE TERMINAL
$ jupyter nbconvert --to script x10_classifier_pipeline.ipynb
$ python x10_classifier_pipeline.py
```

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from xgboost import XGBClassifier

from imblearn.over_sampling import RandomOverSampler
from imblearn.over_sampling import SMOTE, ADASYN
from imblearn.over_sampling import BorderlineSMOTE

from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.gaussian_process import kernels, GaussianProcessClassifier

### Common libraries

In [4]:
import pipeline as pl
from model import plotlib

In [6]:
from model.AdData import *
from model.AdFeatures import *
from model.AdClassify import TestPerformance
from model import hyperparams

In [7]:
from importlib import reload
reload(pl)

<module 'pipeline' from '/home/hdd/Dropbox/work-Proj/Proj-UV-GNR-ML/code/pipeline/__init__.py'>

### Initialize

In [8]:
plotlib.load_fonts("../../../common/fonts/")
plt.style.use("matplotlib.mplstyle")

inputCsv = "../Data/imputed_data.mice.csv"
ignoreXCols = ['imp', 'id', 'quality', 'lobe', 'full', 'other', 'coatingId']

In [9]:
loader = LoadData()
loader.Execute(inputCsv)

 -- LoadData() ... 
	 Add coating classes: ['full', 'other', 'lobe']
	 Test IDs: [18, 17, 21, 14, 26, 3]
	 Test classes: ['full', 'other', 'full', 'full', 'lobe', 'lobe']



xCols: []

### Define grid pipeline

In [10]:
grid = [
    loader,
    (
        ObservedData(),
        ImputedData()
    ),
    pl.SetYCol('coatingId'),
    pl.Set(scoring='f1_weighted'),
    pl.DropCol('coating'),
    pl.AugmentByQuality(F=2, scale=0.3, qcol='quality'),
    (
        None,
        pl.AugmentImb(RandomOverSampler()),
        pl.AugmentImb(BorderlineSMOTE()),
        pl.AugmentImb(SMOTE()),
        pl.AugmentImb(ADASYN()),
    ),
    AggregateFeatures(show=False),
    (
        pl.AllValidFeatures(ignoreCols=ignoreXCols),
        pl.NonCollinearFeatures(keepCols=['teosVolPct', 'teosVolume'],
                             ignoreCols=ignoreXCols, show=False),
    ),
    pl.ScaleX(allColumns=False),
    (
        # SetModel(RandomForestClassifier()),
        pl.SetModel(DecisionTreeClassifier()),
    ),
    (
        None,
        pl.SelectFeaturesRFE(show=True)
    ),
    (
        # SetModel(XGBClassifier()),
        pl.SetModel(KNeighborsClassifier()),
        # SetModel(SVC()),
        # SetModel(GaussianProcessClassifier()),
        # SetModel(RandomForestClassifier()),
    ),
    pl.SearchHyperParams(hyperparams.space),
    TestPerformance(show=True)
]

In [11]:
reload(pipeline)
pipe = pipeline.GridLine(grid)
pipe.Execute(inputCsv)

Pipeline 01:
 -- LoadData() ... ok
 -- ObservedData() ... ok
 -- SetYCol() ... 'coatingId' ok
 -- Set: scoring ... 
	 {'scoring': 'f1_weighted'}

 -- DropCol() ... ok
 -- AugmentByQuality: F=2 scale=0.30 ... L01 FAILED: at least one array or dtype is required
Done 01.

Pipeline 02:
 -- LoadData() ... ok
 -- ObservedData() ... ok
 -- SetYCol() ... 'coatingId' ok
 -- Set: scoring ... 
	 {'scoring': 'f1_weighted'}

 -- DropCol() ... ok
 -- AugmentByQuality: F=2 scale=0.30 ... L02 FAILED: at least one array or dtype is required
Done 02.

Pipeline 03:
 -- LoadData() ... ok
 -- ObservedData() ... ok
 -- SetYCol() ... 'coatingId' ok
 -- Set: scoring ... 
	 {'scoring': 'f1_weighted'}

 -- DropCol() ... ok
 -- AugmentByQuality: F=2 scale=0.30 ... L03 FAILED: at least one array or dtype is required
Done 03.

Pipeline 04:
 -- LoadData() ... ok
 -- ObservedData() ... ok
 -- SetYCol() ... 'coatingId' ok
 -- Set: scoring ... 
	 {'scoring': 'f1_weighted'}

 -- DropCol() ... ok
 -- AugmentByQuality: F

Traceback (most recent call last):
  File "/home/hdd/Dropbox/work-Proj/Proj-UV-GNR-ML/code/pipeline/pipeline.py", line 119, in _pipeline
    X = adapter.Execute(X, i+1, self.muted)
  File "/home/hdd/Dropbox/work-Proj/Proj-UV-GNR-ML/code/pipeline/pipeline.py", line 78, in Execute
    X = self.Process(X)
  File "/home/hdd/Dropbox/work-Proj/Proj-UV-GNR-ML/code/pipeline/AdAugment.py", line 47, in Process
    sclr = StandardScaler().fit(df)
  File "/home/akhlak/miniconda3/lib/python3.9/site-packages/sklearn/preprocessing/_data.py", line 824, in fit
    return self.partial_fit(X, y, sample_weight)
  File "/home/akhlak/miniconda3/lib/python3.9/site-packages/sklearn/preprocessing/_data.py", line 861, in partial_fit
    X = self._validate_data(
  File "/home/akhlak/miniconda3/lib/python3.9/site-packages/sklearn/base.py", line 546, in _validate_data
    X = check_array(X, input_name="X", **check_params)
  File "/home/akhlak/miniconda3/lib/python3.9/site-packages/sklearn/utils/validation.py", lin

L36 FAILED: at least one array or dtype is required
Done 36.

Pipeline 37:
 -- LoadData() ... ok
 -- ImputedData() ... ok
 -- SetYCol() ... 'coatingId' ok
 -- Set: scoring ... 
	 {'scoring': 'f1_weighted'}

 -- DropCol() ... ok
 -- AugmentByQuality: F=2 scale=0.30 ... L37 FAILED: at least one array or dtype is required
Done 37.

Pipeline 38:
 -- LoadData() ... ok
 -- ImputedData() ... ok
 -- SetYCol() ... 'coatingId' ok
 -- Set: scoring ... 
	 {'scoring': 'f1_weighted'}

 -- DropCol() ... ok
 -- AugmentByQuality: F=2 scale=0.30 ... L38 FAILED: at least one array or dtype is required
Done 38.

Pipeline 39:
 -- LoadData() ... ok
 -- ImputedData() ... ok
 -- SetYCol() ... 'coatingId' ok
 -- Set: scoring ... 
	 {'scoring': 'f1_weighted'}

 -- DropCol() ... ok
 -- AugmentByQuality: F=2 scale=0.30 ... L39 FAILED: at least one array or dtype is required
Done 39.

Pipeline 40:
 -- LoadData() ... ok
 -- ImputedData() ... ok
 -- SetYCol() ... 'coatingId' ok
 -- Set: scoring ... 
	 {'scoring': 'f

Traceback (most recent call last):
  File "/home/hdd/Dropbox/work-Proj/Proj-UV-GNR-ML/code/pipeline/pipeline.py", line 119, in _pipeline
    X = adapter.Execute(X, i+1, self.muted)
  File "/home/hdd/Dropbox/work-Proj/Proj-UV-GNR-ML/code/pipeline/pipeline.py", line 78, in Execute
    X = self.Process(X)
  File "/home/hdd/Dropbox/work-Proj/Proj-UV-GNR-ML/code/pipeline/AdAugment.py", line 47, in Process
    sclr = StandardScaler().fit(df)
  File "/home/akhlak/miniconda3/lib/python3.9/site-packages/sklearn/preprocessing/_data.py", line 824, in fit
    return self.partial_fit(X, y, sample_weight)
  File "/home/akhlak/miniconda3/lib/python3.9/site-packages/sklearn/preprocessing/_data.py", line 861, in partial_fit
    X = self._validate_data(
  File "/home/akhlak/miniconda3/lib/python3.9/site-packages/sklearn/base.py", line 546, in _validate_data
    X = check_array(X, input_name="X", **check_params)
  File "/home/akhlak/miniconda3/lib/python3.9/site-packages/sklearn/utils/validation.py", lin

In [10]:
res = pipe.Summarize()
print(res)
try:
    res.to_csv("gridline_results.csv")
except:
    input("Please close the excel file if open and press enter ...")
    res.to_csv("gridline_results.csv")
    print("Saved")

    score model xcols       S01           S02      S03           S04      S05  \
L01     0  None        LoadData  ObservedData  SetYCol  Set: scoring  DropCol   
L02     0  None        LoadData  ObservedData  SetYCol  Set: scoring  DropCol   
L23     0  None        LoadData   ImputedData  SetYCol  Set: scoring  DropCol   
L24     0  None        LoadData   ImputedData  SetYCol  Set: scoring  DropCol   
L25     0  None        LoadData   ImputedData  SetYCol  Set: scoring  DropCol   
L26     0  None        LoadData   ImputedData  SetYCol  Set: scoring  DropCol   
L27     0  None        LoadData   ImputedData  SetYCol  Set: scoring  DropCol   
L28     0  None        LoadData   ImputedData  SetYCol  Set: scoring  DropCol   
L29     0  None        LoadData   ImputedData  SetYCol  Set: scoring  DropCol   
L30     0  None        LoadData   ImputedData  SetYCol  Set: scoring  DropCol   
L31     0  None        LoadData   ImputedData  SetYCol  Set: scoring  DropCol   
L32     0  None        LoadD

In [None]:
class get_ipython:
    def system(*args):
        pass

```bash
## RUN THIS NOTEBOOK FROM THE TERMINAL
$ jupyter nbconvert --to script PlayGround.ipynb
$ python PlayGround.py
```