In [1]:
import os
import sys

project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
app_root = os.path.abspath(os.path.join(project_root, '../../app', '..'))

if project_root not in sys.path:
    sys.path.append(project_root)
    sys.path.append(app_root)

In [2]:
from train.datasets_preprocessing.datasets_preprocessing import load_json_data, make_pipeline
import pandas as pd

math_pipeline = make_pipeline('math')

X_json_raw = load_json_data('../datasets_preprocessing/datasets/math')
math_pipeline.fit_transform(X_json_raw)

math_df = pd.read_csv(os.path.join('../datasets_preprocessing/csv_question_files', 'math.csv'))
math_df.head(10)

Unnamed: 0,question,tags_str,math,bio,code
0,Function on the unit circle and exponential,"complex-analysis,continuity",1,0,0
1,What does $\sum_{n=0}^\infty 1/n^n$ converge to?,"sequences-and-series,number-theory,limits",1,0,0
2,Random variable measurable with respect to sto...,"measure-theory,random-variables,stopping-times...",1,0,0
3,What is the distribution of 2 consecutive Bino...,"probability,binomial-coefficients,binomial-dis...",1,0,0
4,Determine lines intersecting four skew lines i...,"projective-geometry,projective-space,cross-ratio",1,0,0
5,How do you deal with absolute values in a func...,"calculus,solid-of-revolution",1,0,0
6,Aren't $ f’(xy) $ and $ f’(x/y)$ ambiguous not...,"multivariable-calculus,functions",1,0,0
7,Why do counits go that way?,"soft-question,category-theory,education,adjoin...",1,0,0
8,Not understanding a proof about coherent sheav...,"algebraic-geometry,proof-explanation,schemes,s...",1,0,0
9,Model theory of the naturals with a multiplica...,"model-theory,first-order-logic,nonstandard-models",1,0,0


In [3]:
bio_pipeline = make_pipeline('bio')

X_json_raw = load_json_data('../datasets_preprocessing/datasets/bio')
bio_pipeline.fit_transform(X_json_raw)

bio_df = pd.read_csv(os.path.join('../datasets_preprocessing/csv_question_files', 'bio.csv'))
bio_df.head(10)

Unnamed: 0,question,tags_str,math,bio,code
0,How many kg of seed can one expect from 230 kg...,agriculture,0,1,0
1,Adaptive Optics in Microscopy: what are the fa...,"biophysics,microscopy,fluorescent-microscopy,o...",0,1,0
2,Which part of the reflex arc takes the longest...,"human-biology,reflexes",0,1,0
3,Is wiping with RNAse Zap enough to destroy RNA...,"molecular-biology,lab-techniques,rna,lab-reagents",0,1,0
4,When there is incomplete dominance of one alle...,"genetics,terminology",0,1,0
5,Does drinking dry water have same effect as dr...,"human-biology,food",0,1,0
6,Do non-migratory canada geese still exhibit mi...,"ornithology,migration",0,1,0
7,"If life is discovered on another planet, will ...","taxonomy,astrobiology",0,1,0
8,Why do toenails grow much slower than fingerna...,"human-biology,human-anatomy",0,1,0
9,"Why do, humans, like many birds,tend to stand ...","brain,muscles,balance",0,1,0


In [4]:
code_pipeline = make_pipeline('code')

X_json_raw = load_json_data('../datasets_preprocessing/datasets/code')
code_pipeline.fit_transform(X_json_raw)

code_df = pd.read_csv(os.path.join('../datasets_preprocessing/csv_question_files', 'code.csv'))
code_df.head(10)

Unnamed: 0,question,tags_str,math,bio,code
0,WorkGroup Data Service with JSON / Web based API,"c#,linq,json",0,0,1
1,Design pattern for logger implementation,"object-oriented,design-patterns,salesforce-apex",0,0,1
2,Temperature calculator in Rust,"beginner,rust,unit-conversion",0,0,1
3,Read binary serial data and parse integers,"c#,serial-port",0,0,1
4,Brain-flak interpreter,"parsing,go,interpreter",0,0,1
5,Implement bash auto completion in Python,"python,python-3.x,bash,autocomplete",0,0,1
6,How well or poorly structured are my routes in...,"javascript,node.js",0,0,1
7,“Proper” Asynchronous implementation,"c#,asynchronous",0,0,1
8,Determining whether a loop iterated at least o...,"python,python-3.x,generator",0,0,1
9,Output JavaScript object into HTML table of ke...,"javascript,jquery",0,0,1


In [5]:
full_df = pd.concat(
	[
		math_df,
		bio_df,
		code_df
	],
    ignore_index=True,
	axis=0
)

full_df = full_df.drop_duplicates(subset=["question"], keep="first")
full_df

Unnamed: 0,question,tags_str,math,bio,code
0,Function on the unit circle and exponential,"complex-analysis,continuity",1,0,0
1,What does $\sum_{n=0}^\infty 1/n^n$ converge to?,"sequences-and-series,number-theory,limits",1,0,0
2,Random variable measurable with respect to sto...,"measure-theory,random-variables,stopping-times...",1,0,0
3,What is the distribution of 2 consecutive Bino...,"probability,binomial-coefficients,binomial-dis...",1,0,0
4,Determine lines intersecting four skew lines i...,"projective-geometry,projective-space,cross-ratio",1,0,0
...,...,...,...,...,...
1445782,Virtual Memory - Non-contiguous Memory Allocation,"operating-systems,memory-management,virtual-me...",0,0,1
1445783,Residence time in multi server system,"algorithm-analysis,distributed-systems,queuein...",0,0,1
1445784,How can I improve my algorithm for finding opt...,"algorithms,partitions",0,0,1
1445785,When can I use dynamic programming to reduce t...,"algorithms,dynamic-programming,efficiency,algo...",0,0,1


In [6]:
from train.reporting.model_interface import ModelInterface # IMPORTANT
from train.reporting.text_svm_wrapper import TextSVMWrapper # IMPORTANT cannot load models without
from typing import Tuple
import pickle

import pandas as pd

def import_model_and_its_test_set(path: str) -> Tuple[ModelInterface, pd.DataFrame]:
        with open(path + "/model.pkl", "rb") as f:
            model = pickle.load(f)


        test_set = pd.read_csv(
            path + "/test_set.csv", index_col=0)
        return model, test_set



math_model, _ = import_model_and_its_test_set("saved_models/math")
bio_model, _ = import_model_and_its_test_set("saved_models/bio")
code_model, _ = import_model_and_its_test_set("saved_models/code")

# Tutaj wybieramy numer zbioru testowego

In [7]:
test_set_number = 2 # allowed 0 1 2

In [8]:
test_df = pd.read_csv(f"../datasets_preprocessing/datasets/test_all_models/test_{test_set_number}.csv", index_col=0)

test_df.drop_duplicates(subset=["question"], keep="first")
test_df

Unnamed: 0,question,tags_str,math,bio,code
100560,Multiplicative property for the coefficients o...,"number-theory,modular-forms",1,0,0
264249,Standard deviation of binned sample,standard-deviation,1,0,0
847180,Show that $\int_0^1x^4f_\theta(x)dx$ is strict...,"calculus,integration,definite-integrals",1,0,0
765320,slope of level curve,"multivariable-calculus,partial-derivative",1,0,0
432718,slice up a slice of a triangle into n areas of...,"geometry,triangles",1,0,0
...,...,...,...,...,...
73741,reduce k-colorable to 3-colorable graph problem,algorithms,0,0,1
25886,Using exceptions inside CakePHP controller act...,"php,cakephp",0,0,1
29832,Make an Array Reactive (Observable),"javascript,array,observer-pattern",0,0,1
51865,Algorithm: Calculate every number in a given l...,algorithms,0,0,1


In [9]:
test_df_with_labels = test_df  # test_df.merge(full_df.drop(columns="tags_str"), on="question", how="left")

test_df_with_labels

Unnamed: 0,question,tags_str,math,bio,code
100560,Multiplicative property for the coefficients o...,"number-theory,modular-forms",1,0,0
264249,Standard deviation of binned sample,standard-deviation,1,0,0
847180,Show that $\int_0^1x^4f_\theta(x)dx$ is strict...,"calculus,integration,definite-integrals",1,0,0
765320,slope of level curve,"multivariable-calculus,partial-derivative",1,0,0
432718,slice up a slice of a triangle into n areas of...,"geometry,triangles",1,0,0
...,...,...,...,...,...
73741,reduce k-colorable to 3-colorable graph problem,algorithms,0,0,1
25886,Using exceptions inside CakePHP controller act...,"php,cakephp",0,0,1
29832,Make an Array Reactive (Observable),"javascript,array,observer-pattern",0,0,1
51865,Algorithm: Calculate every number in a given l...,algorithms,0,0,1


In [10]:
test_df_with_labels["real_class"] = (test_df_with_labels["math"] * 0 +  test_df_with_labels["bio"] * 1 +
                                 test_df_with_labels["code"]
                                * 2)
test_df_with_labels

Unnamed: 0,question,tags_str,math,bio,code,real_class
100560,Multiplicative property for the coefficients o...,"number-theory,modular-forms",1,0,0,0
264249,Standard deviation of binned sample,standard-deviation,1,0,0,0
847180,Show that $\int_0^1x^4f_\theta(x)dx$ is strict...,"calculus,integration,definite-integrals",1,0,0,0
765320,slope of level curve,"multivariable-calculus,partial-derivative",1,0,0,0
432718,slice up a slice of a triangle into n areas of...,"geometry,triangles",1,0,0,0
...,...,...,...,...,...,...
73741,reduce k-colorable to 3-colorable graph problem,algorithms,0,0,1,2
25886,Using exceptions inside CakePHP controller act...,"php,cakephp",0,0,1,2
29832,Make an Array Reactive (Observable),"javascript,array,observer-pattern",0,0,1,2
51865,Algorithm: Calculate every number in a given l...,algorithms,0,0,1,2


In [11]:
test_df_with_labels["math_preds"] = math_model.predict_proba(test_df_with_labels["question"])[:, 1]
test_df_with_labels["bio_preds"] = bio_model.predict_proba(test_df_with_labels["question"])[:, 1]
test_df_with_labels["code_preds"] = code_model.predict_proba(test_df_with_labels["question"])[:, 1]

In [12]:
test_df_with_labels

Unnamed: 0,question,tags_str,math,bio,code,real_class,math_preds,bio_preds,code_preds
100560,Multiplicative property for the coefficients o...,"number-theory,modular-forms",1,0,0,0,0.962205,0.012471,0.013200
264249,Standard deviation of binned sample,standard-deviation,1,0,0,0,0.927190,0.506328,0.265348
847180,Show that $\int_0^1x^4f_\theta(x)dx$ is strict...,"calculus,integration,definite-integrals",1,0,0,0,0.974111,0.006833,0.068694
765320,slope of level curve,"multivariable-calculus,partial-derivative",1,0,0,0,0.953620,0.068730,0.029274
432718,slice up a slice of a triangle into n areas of...,"geometry,triangles",1,0,0,0,0.398257,0.125646,0.352884
...,...,...,...,...,...,...,...,...,...
73741,reduce k-colorable to 3-colorable graph problem,algorithms,0,0,1,2,0.487244,0.000111,0.986329
25886,Using exceptions inside CakePHP controller act...,"php,cakephp",0,0,1,2,0.146509,0.006185,0.971435
29832,Make an Array Reactive (Observable),"javascript,array,observer-pattern",0,0,1,2,0.004393,0.020554,0.985668
51865,Algorithm: Calculate every number in a given l...,algorithms,0,0,1,2,0.081242,0.000001,0.999996


In [13]:
import numpy as np

cols = ['math_preds', 'bio_preds', 'code_preds']

max_values = test_df_with_labels[cols].max(axis=1)
max_names = test_df_with_labels[cols].idxmax(axis=1)

class_mapping = {'math_preds': 0, 'bio_preds': 1, 'code_preds': 2}

predicted_class = max_names.map(class_mapping)

test_df_with_labels['predicted_class'] = np.where(max_values > 0.5, predicted_class, -1)
test_df_with_labels


Unnamed: 0,question,tags_str,math,bio,code,real_class,math_preds,bio_preds,code_preds,predicted_class
100560,Multiplicative property for the coefficients o...,"number-theory,modular-forms",1,0,0,0,0.962205,0.012471,0.013200,0
264249,Standard deviation of binned sample,standard-deviation,1,0,0,0,0.927190,0.506328,0.265348,0
847180,Show that $\int_0^1x^4f_\theta(x)dx$ is strict...,"calculus,integration,definite-integrals",1,0,0,0,0.974111,0.006833,0.068694,0
765320,slope of level curve,"multivariable-calculus,partial-derivative",1,0,0,0,0.953620,0.068730,0.029274,0
432718,slice up a slice of a triangle into n areas of...,"geometry,triangles",1,0,0,0,0.398257,0.125646,0.352884,-1
...,...,...,...,...,...,...,...,...,...,...
73741,reduce k-colorable to 3-colorable graph problem,algorithms,0,0,1,2,0.487244,0.000111,0.986329,2
25886,Using exceptions inside CakePHP controller act...,"php,cakephp",0,0,1,2,0.146509,0.006185,0.971435,2
29832,Make an Array Reactive (Observable),"javascript,array,observer-pattern",0,0,1,2,0.004393,0.020554,0.985668,2
51865,Algorithm: Calculate every number in a given l...,algorithms,0,0,1,2,0.081242,0.000001,0.999996,2


In [14]:
from sklearn.metrics import accuracy_score


accuracy_score(test_df_with_labels["real_class"], test_df_with_labels["predicted_class"])

0.8803333333333333

## Sample for analyzing errors based on wrong label or ambiguity

In [15]:
wrong_sample = test_df_with_labels[test_df_with_labels['real_class'] != test_df_with_labels['predicted_class']].sample(n=10)

wrong_sample

Unnamed: 0,question,tags_str,math,bio,code,real_class,math_preds,bio_preds,code_preds,predicted_class
48399,What happens at the decode phase of the instru...,computer-architecture,0,0,1,2,0.015741,0.687458,0.535602,1
56333,If $n^{\log n}$ is not polynomial or exponenti...,"algorithms,terminology,polynomial-time",0,0,1,2,0.985254,2.2e-05,0.066486,0
6605,Is Asteraceae and Compositae the same family?,"botany,classification",0,1,0,1,0.761965,0.675498,0.081119,0
81854,data structures/ Classes for RNA & DNA,"object-oriented,bioinformatics",0,0,1,2,0.002765,0.999998,0.058086,1
9377,What limits chromosomal length?,"chromosome,genomes,dna-helix",0,1,0,1,0.917717,0.174256,0.069568,0
1051387,counting numbers with specific Quality,"combinatorics,discrete-mathematics",1,0,0,0,0.126777,0.294129,0.695429,2
16636,How to search NCBI in bulk for a list of acces...,"bioinformatics,database,ncbi",0,1,0,1,0.007589,0.358909,0.745176,2
28854,Calculating numbers in the Collatz sequence,"java,performance,algorithm,comparative-review,...",0,0,1,2,0.94791,0.00127,0.543748,0
64845,Choosing an element from a set satisfying a pr...,"algorithms,randomized-algorithms,streaming-alg...",0,0,1,2,0.955677,7e-06,0.742735,0
11302,Image Processing Suite for bacterial microscop...,"molecular-biology,cell-biology,microbiology,ba...",0,1,0,1,0.060203,0.693979,0.867039,2


In [16]:
for question in wrong_sample['question']:
	print(question)


What happens at the decode phase of the instruction cycle?
If $n^{\log n}$ is not polynomial or exponential, then what this function is called?
Is Asteraceae and Compositae the same family?
data structures/ Classes for RNA & DNA
What limits chromosomal length?
counting numbers with specific Quality
How to search NCBI in bulk for a list of accession numbers?
Calculating numbers in the Collatz sequence
Choosing an element from a set satisfying a predicate uniformly at random in $O(1)$ space
Image Processing Suite for bacterial microscopy: Schnitzcells or MicrobeTracker?


## Other models winning over wrong code model test

In [17]:
test_df_with_labels[(test_df_with_labels['real_class'] == test_df_with_labels['predicted_class']) & # But other
                    # models overcame te wrong one which resulted in a correct prediction
                    (test_df_with_labels['real_class'] != 2) & # but real_class is not code
                    (test_df_with_labels['code_preds'] >= 0.5)] # would predict code

Unnamed: 0,question,tags_str,math,bio,code,real_class,math_preds,bio_preds,code_preds,predicted_class
963611,"""Number of Decompositions into $k$ Powers of $...","combinatorics,number-theory",1,0,0,0,0.953502,0.000051,0.694741,0
505609,Finding the area on a graph bounded by four cu...,calculus,1,0,0,0,0.985296,0.000075,0.723187,0
821056,Help Solving Recurrence Relation: $a_n = n^3a_...,"combinatorics,recurrence-relations,generating-...",1,0,0,0,0.954870,0.004365,0.765295,0
793048,Summation of all j-combinations (Expanding com...,combinatorics,1,0,0,0,0.909016,0.000034,0.730463,0
873619,Is the product of two objects in a concrete ca...,category-theory,1,0,0,0,0.949238,0.000001,0.881882,0
...,...,...,...,...,...,...,...,...,...,...
13993,Is there a free alternative to Gelcompar for c...,"microbiology,gel-electrophoresis,software",0,1,0,1,0.082877,0.937540,0.811903,1
14245,Are there any known rules that neurons always ...,"neuroscience,brain,neurophysiology,neuroanatom...",0,1,0,1,0.017326,0.991625,0.552264,1
1354,Is there a season for hummingbird moths (US)?,entomology,0,1,0,1,0.235324,0.999992,0.672929,1
4999,Are there any open plant databases (database/l...,"bioinformatics,botany,species-identification,t...",0,1,0,1,0.005023,0.992310,0.556298,1


In [18]:
test_df_with_labels[(test_df_with_labels['real_class'] != 2) &
                    (test_df_with_labels['code_preds'] >= 0.5)]

Unnamed: 0,question,tags_str,math,bio,code,real_class,math_preds,bio_preds,code_preds,predicted_class
1058554,Is there any intuition why this relation holds?,"functions,derivatives",1,0,0,0,0.684196,0.125422,0.828946,2
963611,"""Number of Decompositions into $k$ Powers of $...","combinatorics,number-theory",1,0,0,0,0.953502,0.000051,0.694741,0
505609,Finding the area on a graph bounded by four cu...,calculus,1,0,0,0,0.985296,0.000075,0.723187,0
821056,Help Solving Recurrence Relation: $a_n = n^3a_...,"combinatorics,recurrence-relations,generating-...",1,0,0,0,0.954870,0.004365,0.765295,0
793048,Summation of all j-combinations (Expanding com...,combinatorics,1,0,0,0,0.909016,0.000034,0.730463,0
...,...,...,...,...,...,...,...,...,...,...
890,Are chromosomal microdeletions passed on?,"genetics,human-genetics",0,1,0,1,0.073591,0.108738,0.667529,2
3955,Why is the Tm defined as the temperature at wh...,"molecular-biology,pcr",0,1,0,1,0.132023,0.521228,0.620853,2
12390,While preparing Jam we use sugar.Why? (can you...,osmosis,0,1,0,1,0.003918,0.556170,0.881399,2
17620,Confusion related to the use of PCA to determi...,"bioinformatics,genetics",0,1,0,1,0.059075,0.373012,0.595708,2


## Other models winning over wrong math model test

In [19]:
test_df_with_labels[(test_df_with_labels['real_class'] == test_df_with_labels['predicted_class']) &  # But other
                    # models overcame te wrong one which resulted in a correct prediction
                    (test_df_with_labels['real_class'] != 0) &  # but real_class is not math
                    (test_df_with_labels['math_preds'] >= 0.5)]  # would predict math

Unnamed: 0,question,tags_str,math,bio,code,real_class,math_preds,bio_preds,code_preds,predicted_class
15902,"Is a ""Fact"" any theory for which there is over...","evolution,terminology,philosophy-of-science",0,1,0,1,0.785820,0.920705,0.002773,1
3895,Is there a complete connectome of a cortical m...,"neuroscience,brain",0,1,0,1,0.743552,0.890825,0.062202,1
20053,Is organ transplantation race dependent?,transplantation,0,1,0,1,0.656302,0.942753,0.158925,1
3904,What are negative and positive after potentials?,"neurophysiology,action-potential",0,1,0,1,0.545652,0.968302,0.022165,1
14322,A question on glycolysis,"cell-biology,glucose",0,1,0,1,0.610167,0.873713,0.010629,1
...,...,...,...,...,...,...,...,...,...,...
66454,"Is {xy | x, y ∈ Σ∗ and x contains more a’s tha...","formal-languages,regular-languages,finite-auto...",0,0,1,2,0.737727,0.003068,0.881474,2
83775,Not able to prove non regularity using pumping...,"formal-languages,regular-languages,pumping-lemma",0,0,1,2,0.804660,0.005420,0.833150,2
83551,Factorial usage within proof using the pumping...,pumping-lemma,0,0,1,2,0.874542,0.000176,0.956319,2
20598,Function to get constants of range,"vba,excel",0,0,1,2,0.536967,0.025127,0.639589,2


In [20]:
test_df_with_labels[(test_df_with_labels['real_class'] != 0) &
                    (test_df_with_labels['math_preds'] >= 0.5)] # all wrong predictions

Unnamed: 0,question,tags_str,math,bio,code,real_class,math_preds,bio_preds,code_preds,predicted_class
11671,How to derive the Equilibrium value $\hat F$ e...,population-genetics,0,1,0,1,0.970388,0.070252,0.017027,0
15902,"Is a ""Fact"" any theory for which there is over...","evolution,terminology,philosophy-of-science",0,1,0,1,0.785820,0.920705,0.002773,1
994,What is the focus distance of a 3D screen?,"eyes,vision,psychology",0,1,0,1,0.801499,0.512721,0.360632,0
12208,What is positive and negative supercoiling?,"genetics,dna,molecular-genetics,human-genetics...",0,1,0,1,0.861753,0.470861,0.024798,0
5294,Determining sequence of oligoribonucleotide,"molecular-biology,homework,rna,enzymes",0,1,0,1,0.892218,0.416034,0.518754,0
...,...,...,...,...,...,...,...,...,...,...
55092,Is this an abuse big O notation as a power of ...,time-complexity,0,0,1,2,0.696020,0.000592,0.761329,2
73464,Maximize points of K dance moves,dynamic-programming,0,0,1,2,0.783544,0.762593,0.671716,0
33114,Finding the middle permutation,"performance,ruby,array,combinatorics",0,0,1,2,0.652304,0.010534,0.153859,0
42944,Increasing speed of BWT inverse,"python,performance,strings,compression",0,0,1,2,0.696257,0.007405,0.058712,0


## Other models winning over wrong bio model test

In [21]:
test_df_with_labels[(test_df_with_labels['real_class'] == test_df_with_labels['predicted_class']) &  # But other
                    # models overcame te wrong one which resulted in a correct prediction
                    (test_df_with_labels['real_class'] != 1) &  # but real_class is not bio
                    (test_df_with_labels['bio_preds'] >= 0.5)]  # would predict bio

Unnamed: 0,question,tags_str,math,bio,code,real_class,math_preds,bio_preds,code_preds,predicted_class
264249,Standard deviation of binned sample,standard-deviation,1,0,0,0,0.927190,0.506328,0.265348,0
108228,A Binomial coefficient sequence,"algebra-precalculus,binomial-coefficients",1,0,0,0,0.976170,0.882456,0.014246,0
1029353,Find all the units in the indicated rings.,"abstract-algebra,group-theory,ring-theory",1,0,0,0,0.671070,0.636102,0.043082,0
415140,Show that $a_{rs} = kb_{rs}$ given $a_{rs}b_{t...,"linear-algebra,tensors",1,0,0,0,0.793921,0.527388,0.120298,0
332828,Mean and Standard Deviation of samples consist...,"statistics,standard-deviation,means",1,0,0,0,0.724169,0.641491,0.217371,0
...,...,...,...,...,...,...,...,...,...,...
40924,Detecting the presence of multiple URL segments,"c#,comparative-review,error-handling,url",0,0,1,2,0.045008,0.525854,0.986162,2
77341,Storing Specific data on DB Table(s),"data-structures,database-theory",0,0,1,2,0.007926,0.582595,0.972758,2
73981,Are assembly languages untyped?,"type-checking,assembly",0,0,1,2,0.009725,0.685443,0.981169,2
8805,I can has(kell) moar cheezburgers?,"beginner,strings,haskell",0,0,1,2,0.235324,0.693915,0.827198,2


In [22]:
test_df_with_labels[(test_df_with_labels['real_class'] != 1) &
## Other models winning over wrong code model test

test_df_with_labels[(test_df_with_labels['real_class'] == test_df_with_labels['predicted_class']) & # But other
                    # models overcame te wrong one which resulted in a correct prediction
                    (test_df_with_labels['real_class'] != 2) & # but real_class is not code
                    (test_df_with_labels['code_preds'] >= 0.5)] # would predict code

test_df_with_labels[(test_df_with_labels['real_class'] != 2) &
                    (test_df_with_labels['code_preds'] >= 0.5)]
                    (test_df_with_labels['bio_preds'] >= 0.5)] # all wrong predictions

SyntaxError: invalid syntax. Perhaps you forgot a comma? (3738981596.py, line 1)

## Other models winning over right code model test

In [None]:
test_df_with_labels[(test_df_with_labels['real_class'] != test_df_with_labels['predicted_class']) & # But other
                    # models overcame the right one which resulted in a wrong prediction
                    (test_df_with_labels['real_class'] == 2) & # real_class is code
                    (test_df_with_labels['code_preds'] >= 0.5)] # would predict code

In [None]:
test_df_with_labels[(test_df_with_labels['real_class'] == 2) &
                    (test_df_with_labels['code_preds'] >= 0.5)]

## Other models winning over right math model

In [None]:
test_df_with_labels[(test_df_with_labels['real_class'] != test_df_with_labels['predicted_class']) & # But other
                    # models overcame the right one which resulted in a wrong prediction
                    (test_df_with_labels['real_class'] == 0) & # real_class is math
                    (test_df_with_labels['math_preds'] >= 0.5)] # would predict math

In [None]:
test_df_with_labels[(test_df_with_labels['real_class'] == 0) &
                    (test_df_with_labels['math_preds'] >= 0.5)]

## Other models winning over right bio model

In [None]:
test_df_with_labels[(test_df_with_labels['real_class'] != test_df_with_labels['predicted_class']) & # But other
                    # models overcame the right one which resulted in a wrong prediction
                    (test_df_with_labels['real_class'] == 1) & # real_class is bio
                    (test_df_with_labels['bio_preds'] >= 0.5)] # would predict bio

In [None]:
test_df_with_labels[(test_df_with_labels['real_class'] == 1) &
                    (test_df_with_labels['bio_preds'] >= 0.5)]