# Experiment Notebook

This notebook will contain the steps to run the experiments for this.

In [101]:
import logging
import pyarrow as pa
import pyarrow.parquet as pq
import pandas as pd
import random

from dataset_handling import book_train_test_split, load_dataset

LOGGER_NAME = "proj2_logger"

class Model:
    def __init__(self, df: pd.DataFrame):
        self.logger = logging.getLogger(LOGGER_NAME)

    def create_features(self, df: pd.DataFrame):
        raise NotImplementedError("Function was not implemented in subclass")
    def fit(self) -> None:
        raise NotImplementedError("Function was not implemented in subclass")
    def predict(self) -> []:
        raise NotImplementedError("Function was not implemented in subclass")

class TransformerModel(Model):
    def create_features(self, df: pd.DataFrame):
        return None
        # perform train-test-split
    def fit(self):
        # fit transformer
        return None
    def predict(self):
        # run tests, return metrics
        return None

class ClassicalModels(Model):
    def create_features(self, df: pd.DataFrame):
        # create tf-idf and word embedding datasets
        return None
    def fit(self):
        # fit models
        return None
    def predict(self):
        # run all models and return metrics
        return None

def experiment(datafile_path='data/dataset.parquet'):
    # load dataset
    # run train-test-split on df (will produce label column)
    df = book_train_test_split(load_dataset())
    models = [TransformerModel(), ClassicalModels()]
    metrics = []
    for model in models:
        model.create_features(df)
        model.fit()
        metrics += model.predict()

    metrics_df = pd.DataFrame(metrics, columns=['model_name', 'data_type', 'time', 'accuracy'])
        