In [1]:
import os
from pathlib import Path
import time
import logging
import itertools

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from capstone.config import CapstoneConfig
from capstone.data_access import DataClass
from capstone.features import Features
from capstone.evaluation import CustomEvaluation
from capstone.utils.constants import (
    TEXT,
    TARGET,
    ORIGINAL_TEXT,
    SPLIT,
    DEVELOP,
    TEST,
    PREDICTION,
    CORRECTED
)

sns.set_style("darkgrid")
PARENT_PATH = Path(os.getcwd()).parent.absolute()

FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
logging.basicConfig(format=FORMAT, level=logging.INFO)

In [2]:
config = CapstoneConfig()
config.CURRENT_PATH = PARENT_PATH

data = DataClass(config)
df = data.build()
df.head(3)

Unnamed: 0,Text,Standard mapping?
0,General Quality System Requirements […] Manage...,"[quality management, quality planning]"
1,General Quality System Requirements […] Manage...,[quality management]
2,General Quality System Requirements […] Manage...,[quality management]


In [3]:
features = Features(config)
df_features = features.build(df)

2022-12-01 04:22:50,834 - capstone.utils.utils - INFO - func:clean took: 3.05 sec
2022-12-01 04:22:51,450 - capstone.utils.utils - INFO - func:fit took: 3.67 sec
2022-12-01 04:22:53,098 - capstone.utils.utils - INFO - func:clean took: 1.65 sec
2022-12-01 04:22:53,325 - capstone.utils.utils - INFO - func:transform took: 1.87 sec
2022-12-01 04:22:53,325 - capstone.utils.utils - INFO - func:fit_transform took: 5.54 sec
2022-12-01 04:22:53,510 - capstone.utils.utils - INFO - func:clean took: 0.18 sec
2022-12-01 04:22:53,536 - capstone.utils.utils - INFO - func:transform took: 0.21 sec
2022-12-01 04:22:53,563 - capstone.utils.utils - INFO - func:build took: 5.79 sec


In [4]:
dev = df_features[df_features[SPLIT].isin([DEVELOP])].copy()
dev.reset_index(drop=True, inplace=True)
Y_dev = dev[features.mlb.classes_].copy()

test = df_features[df_features[SPLIT].isin([TEST])].copy()
test.reset_index(drop=True, inplace=True)
Y_test = test[features.mlb.classes_].copy()

In [5]:
bert = pd.read_pickle(os.path.join(data.reports_path, "bert-evaluation.pkl"))
ada = pd.read_pickle(os.path.join(data.reports_path, "ada-evaluation.pkl"))
curie = pd.read_pickle(os.path.join(data.reports_path, "curie-evaluation.pkl"))
davinci = pd.read_pickle(os.path.join(data.reports_path, "davinci-evaluation.pkl"))

bert.reset_index(drop=True, inplace=True)
ada.reset_index(drop=True, inplace=True)
curie.reset_index(drop=True, inplace=True)
davinci.reset_index(drop=True, inplace=True)

In [6]:
eval = CustomEvaluation()

combined = (bert[PREDICTION] + ada[PREDICTION] + curie[PREDICTION])
combined = combined.apply(eval.majority_vote, minimum_votes=2)
pred = features.mlb.transform(combined)

In [7]:
eval.compute_sample_wise_metrics(y_true=Y_test.to_numpy(), y_pred=pred)

Hamming Loss    0.011914
Accuracy        0.752604
Precision       0.881771
Recall          0.841611
F1 Score        0.846606
F2 Score        0.840392
dtype: float64

In [8]:
eval.compute_label_wise_metrics(
    y_true=Y_test.to_numpy(),
    y_pred=pred,
    labels=features.mlb.classes_,
    dev_samples=Y_dev.to_numpy().sum(axis=0).tolist()
)

Unnamed: 0,Precision,Recall,F1 Score,F2 Score,Development Samples,Test Samples
audit inspection,0.777778,0.777778,0.777778,0.777778,86,9
calibration,0.8,0.666667,0.727273,0.689655,78,6
capa,0.842105,0.8,0.820513,0.808081,202,20
change controls,0.875,0.777778,0.823529,0.795455,71,9
clinical research,0.965517,0.965517,0.965517,0.965517,269,29
controled substances,1.0,0.833333,0.909091,0.862069,40,6
csv,0.0,0.0,0.0,0.0,37,3
data management,0.764706,1.0,0.866667,0.942029,150,13
design controls,1.0,0.909091,0.952381,0.925926,132,11
documentation management,0.818182,0.692308,0.75,0.714286,117,13
