In [3]:
# Import the needed packages
import json
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

In [4]:
from ydata.connectors import LocalConnector
from ydata.connectors.filetype import FileType

  from distributed.utils import LoopRunner, format_bytes


In [5]:
# Initialize connector and read data
connector = LocalConnector()

# Read the data. Augumented and original
aug_df = connector.read_file('data_augmented.csv', file_type = FileType.CSV).to_pandas()
orig_df = connector.read_file('data_processed.csv', file_type = FileType.CSV).to_pandas()

Index(['PRBUsageUL', 'PRBUsageDL', 'meanThr_DL', 'meanThr_UL', 'maxThr_DL',
       'maxThr_UL', 'meanUE_UL', 'maxUE_UL+DL', 'Unusual', '14.147', '2.627',
       '0.907', '0.081', '32.678', '1.104', '1.152', '9', '0'],
      dtype='object')


In [None]:
# Create the DecisionTreeClassifier. One for the normal data and another for the augmented data.
orig_tree_clf = DecisionTreeClassifier()
aug_tree_clf = DecisionTreeClassifier()

# Split the data into train and test
x_train, x_test = train_test_split(orig_df)
x_train_augmented = aug_df.drop(x_test.index)

In [None]:
# Fit the model and predict
orig_tree_clf.fit(x_train.drop('Unusual', axis=1), x_train['Unusual'])
aug_tree_clf.fit(x_train_augmented.drop('Unusual', axis=1), x_train_augmented['Unusual'])

preds = orig_tree_clf.predict(x_test.drop('Unusual', axis=1))
aug_preds = aug_tree_clf.predict(x_test.drop('Unusual', axis=1))

In [None]:
# Look at F1 score and Accuracy

acc = accuracy_score(x_test['Unusual'].values, preds)
f1 = f1_score(x_test['Unusual'].values, preds)
aug_acc = accuracy_score(x_test['Unusual'].values, aug_preds)
aug_f1 = f1_score(x_test['Unusual'].values, aug_preds)

In [None]:
# Create Artifact. The table with the metrics will be shown on the "Run Output"  section of the "Runs". 
metrics = {
    'metrics': [
        {
            'name': 'accuracy-score',
            'numberValue':  acc,
            'format': 'PERCENTAGE'
        },
        {
            'name': 'f1-score',
            'numberValue':  f1,
            'format': 'PERCENTAGE'
        },
         {
            'name': 'augmented-accuracy-score',
            'numberValue':  aug_acc,
            'format': 'PERCENTAGE'
        },
         {
            'name': 'augmented-f1-score',
            'numberValue':  aug_f1,
            'format': 'PERCENTAGE'
        }
    ]
  }

with open("mlpipeline-metrics.json", 'w') as f:
    json.dump(metrics, f)