In [None]:
import os
import sys

project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
app_root = os.path.abspath(os.path.join(project_root, '../../app', '..'))

if project_root not in sys.path:
    sys.path.append(project_root)
    sys.path.append(app_root)

In [None]:
from train.datasets_preprocessing.datasets_preprocessing import load_json_data, make_pipeline
import pandas as pd

math_pipeline = make_pipeline('math')

X_json_raw = load_json_data('../datasets_preprocessing/datasets/math')
math_pipeline.fit_transform(X_json_raw)

math_df = pd.read_csv(os.path.join('../datasets_preprocessing/csv_question_files', 'math.csv'))
math_df.head(10)

In [None]:
bio_pipeline = make_pipeline('bio')

X_json_raw = load_json_data('../datasets_preprocessing/datasets/bio')
bio_pipeline.fit_transform(X_json_raw)

bio_df = pd.read_csv(os.path.join('../datasets_preprocessing/csv_question_files', 'bio.csv'))
bio_df.head(10)

In [None]:
code_pipeline = make_pipeline('code')

X_json_raw = load_json_data('../datasets_preprocessing/datasets/code')
code_pipeline.fit_transform(X_json_raw)

code_df = pd.read_csv(os.path.join('../datasets_preprocessing/csv_question_files', 'code.csv'))
code_df.head(10)

In [None]:
full_df = pd.concat(
	[
		math_df,
		bio_df,
		code_df
	],
    ignore_index=True,
	axis=0
)

full_df = full_df.drop_duplicates(subset=["question"], keep="first")
full_df

In [None]:
from train.reporting.model_interface import ModelInterface # IMPORTANT
from train.reporting.text_svm_wrapper import TextSVMWrapper # IMPORTANT cannot load models without
from typing import Tuple
import pickle

import pandas as pd

def import_model_and_its_test_set(path: str) -> Tuple[ModelInterface, pd.DataFrame]:
        with open(path + "/model.pkl", "rb") as f:
            model = pickle.load(f)


        test_set = pd.read_csv(
            path + "/test_set.csv", index_col=0)
        return model, test_set



math_model, _ = import_model_and_its_test_set("../saved_models/math")
bio_model, _ = import_model_and_its_test_set("../saved_models/bio")
code_model, _ = import_model_and_its_test_set("../saved_models/code")

# Tutaj wybieramy numer zbioru testowego

In [None]:
test_set_number = 0 # allowed 0 1 2

In [None]:
test_df = pd.read_csv(f"../datasets_preprocessing/test_all_models/test_{test_set_number}.csv", index_col=0)

test_df.drop_duplicates(subset=["question"], keep="first")
test_df

In [None]:
test_df_with_labels = test_df  # test_df.merge(full_df.drop(columns="tags_str"), on="question", how="left")

test_df_with_labels

In [None]:
test_df_with_labels["real_class"] = (test_df_with_labels["math"] * 0 +  test_df_with_labels["bio"] * 1 +
                                 test_df_with_labels["code"]
                                * 2)
test_df_with_labels

In [None]:
test_df_with_labels["math_preds"] = math_model.predict_proba(test_df_with_labels["question"])[:, 1]
test_df_with_labels["bio_preds"] = bio_model.predict_proba(test_df_with_labels["question"])[:, 1]
test_df_with_labels["code_preds"] = code_model.predict_proba(test_df_with_labels["question"])[:, 1]

In [None]:
test_df_with_labels

In [None]:
import numpy as np

cols = ['math_preds', 'bio_preds', 'code_preds']

max_values = test_df_with_labels[cols].max(axis=1)
max_names = test_df_with_labels[cols].idxmax(axis=1)

class_mapping = {'math_preds': 0, 'bio_preds': 1, 'code_preds': 2}

predicted_class = max_names.map(class_mapping)

test_df_with_labels['predicted_class'] = np.where(max_values > 0.5, predicted_class, -1)
test_df_with_labels


In [None]:
from sklearn.metrics import accuracy_score


accuracy_score(test_df_with_labels["real_class"], test_df_with_labels["predicted_class"])

## Sample for analyzing errors based on wrong label or ambiguity

In [None]:
wrong_sample = test_df_with_labels[test_df_with_labels['real_class'] != test_df_with_labels['predicted_class']].sample(n=10)

wrong_sample

In [None]:
for question in wrong_sample['question']:
	print(question)


## Other models winning over wrong code model test

In [None]:
test_df_with_labels[(test_df_with_labels['real_class'] == test_df_with_labels['predicted_class']) & # But other
                    # models overcame te wrong one which resulted in a correct prediction
                    (test_df_with_labels['real_class'] != 2) & # but real_class is not code
                    (test_df_with_labels['code_preds'] >= 0.5)] # would predict code

In [None]:
test_df_with_labels[(test_df_with_labels['real_class'] != 2) &
                    (test_df_with_labels['code_preds'] >= 0.5)]

## Other models winning over wrong math model test

In [None]:
test_df_with_labels[(test_df_with_labels['real_class'] == test_df_with_labels['predicted_class']) &  # But other
                    # models overcame te wrong one which resulted in a correct prediction
                    (test_df_with_labels['real_class'] != 0) &  # but real_class is not math
                    (test_df_with_labels['math_preds'] >= 0.5)]  # would predict math

In [None]:
test_df_with_labels[(test_df_with_labels['real_class'] != 0) &
                    (test_df_with_labels['math_preds'] >= 0.5)] # all wrong predictions

## Other models winning over wrong bio model test

In [None]:
test_df_with_labels[(test_df_with_labels['real_class'] == test_df_with_labels['predicted_class']) &  # But other
                    # models overcame te wrong one which resulted in a correct prediction
                    (test_df_with_labels['real_class'] != 1) &  # but real_class is not bio
                    (test_df_with_labels['bio_preds'] >= 0.5)]  # would predict bio

In [None]:
test_df_with_labels[(test_df_with_labels['real_class'] != 1) &
                    (test_df_with_labels['bio_preds'] >= 0.5)] # all wrong predictions

## Other models winning over right code model test

In [None]:
test_df_with_labels[(test_df_with_labels['real_class'] != test_df_with_labels['predicted_class']) & # But other
                    # models overcame the right one which resulted in a wrong prediction
                    (test_df_with_labels['real_class'] == 2) & # real_class is code
                    (test_df_with_labels['code_preds'] >= 0.5)] # would predict code

In [None]:
test_df_with_labels[(test_df_with_labels['real_class'] == 2) &
                    (test_df_with_labels['code_preds'] >= 0.5)]

## Other models winning over right math model

In [None]:
test_df_with_labels[(test_df_with_labels['real_class'] != test_df_with_labels['predicted_class']) & # But other
                    # models overcame the right one which resulted in a wrong prediction
                    (test_df_with_labels['real_class'] == 0) & # real_class is math
                    (test_df_with_labels['math_preds'] >= 0.5)] # would predict math

In [None]:
test_df_with_labels[(test_df_with_labels['real_class'] == 0) &
                    (test_df_with_labels['math_preds'] >= 0.5)]

## Other models winning over right bio model

In [None]:
test_df_with_labels[(test_df_with_labels['real_class'] != test_df_with_labels['predicted_class']) & # But other
                    # models overcame the right one which resulted in a wrong prediction
                    (test_df_with_labels['real_class'] == 1) & # real_class is bio
                    (test_df_with_labels['bio_preds'] >= 0.5)] # would predict bio

In [None]:
test_df_with_labels[(test_df_with_labels['real_class'] == 1) &
                    (test_df_with_labels['bio_preds'] >= 0.5)]