In [1]:
import os
import sys

project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
app_root = os.path.abspath(os.path.join(project_root, '../../app', '..'))

if project_root not in sys.path:
    sys.path.append(project_root)
    sys.path.append(app_root)

In [2]:
from train.datasets_preprocessing.datasets_preprocessing import load_json_data, make_pipeline
import pandas as pd

math_pipeline = make_pipeline('math')

X_json_raw = load_json_data('../datasets_preprocessing/datasets/math')
math_pipeline.fit_transform(X_json_raw)

math_df = pd.read_csv(os.path.join('../datasets_preprocessing/csv_question_files', 'math.csv'))
math_df.head(10)

Unnamed: 0,question,tags_str,math,bio,code
0,Function on the unit circle and exponential,"complex-analysis,continuity",1,0,0
1,What does $\sum_{n=0}^\infty 1/n^n$ converge to?,"sequences-and-series,number-theory,limits",1,0,0
2,Random variable measurable with respect to sto...,"measure-theory,random-variables,stopping-times...",1,0,0
3,What is the distribution of 2 consecutive Bino...,"probability,binomial-coefficients,binomial-dis...",1,0,0
4,Determine lines intersecting four skew lines i...,"projective-geometry,projective-space,cross-ratio",1,0,0
5,How do you deal with absolute values in a func...,"calculus,solid-of-revolution",1,0,0
6,Aren't $ f’(xy) $ and $ f’(x/y)$ ambiguous not...,"multivariable-calculus,functions",1,0,0
7,Why do counits go that way?,"soft-question,category-theory,education,adjoin...",1,0,0
8,Not understanding a proof about coherent sheav...,"algebraic-geometry,proof-explanation,schemes,s...",1,0,0
9,Model theory of the naturals with a multiplica...,"model-theory,first-order-logic,nonstandard-models",1,0,0


In [3]:
bio_pipeline = make_pipeline('bio')

X_json_raw = load_json_data('../datasets_preprocessing/datasets/bio')
bio_pipeline.fit_transform(X_json_raw)

bio_df = pd.read_csv(os.path.join('../datasets_preprocessing/csv_question_files', 'bio.csv'))
bio_df.head(10)

Unnamed: 0,question,tags_str,math,bio,code
0,How many kg of seed can one expect from 230 kg...,agriculture,0,1,0
1,Adaptive Optics in Microscopy: what are the fa...,"biophysics,microscopy,fluorescent-microscopy,o...",0,1,0
2,Which part of the reflex arc takes the longest...,"human-biology,reflexes",0,1,0
3,Is wiping with RNAse Zap enough to destroy RNA...,"molecular-biology,lab-techniques,rna,lab-reagents",0,1,0
4,When there is incomplete dominance of one alle...,"genetics,terminology",0,1,0
5,Does drinking dry water have same effect as dr...,"human-biology,food",0,1,0
6,Do non-migratory canada geese still exhibit mi...,"ornithology,migration",0,1,0
7,"If life is discovered on another planet, will ...","taxonomy,astrobiology",0,1,0
8,Why do toenails grow much slower than fingerna...,"human-biology,human-anatomy",0,1,0
9,"Why do, humans, like many birds,tend to stand ...","brain,muscles,balance",0,1,0


In [4]:
code_pipeline = make_pipeline('code')

X_json_raw = load_json_data('../datasets_preprocessing/datasets/code')
code_pipeline.fit_transform(X_json_raw)

code_df = pd.read_csv(os.path.join('../datasets_preprocessing/csv_question_files', 'code.csv'))
code_df.head(10)

Unnamed: 0,question,tags_str,math,bio,code
0,WorkGroup Data Service with JSON / Web based API,"c#,linq,json",0,0,1
1,Design pattern for logger implementation,"object-oriented,design-patterns,salesforce-apex",0,0,1
2,Temperature calculator in Rust,"beginner,rust,unit-conversion",0,0,1
3,Read binary serial data and parse integers,"c#,serial-port",0,0,1
4,Brain-flak interpreter,"parsing,go,interpreter",0,0,1
5,Implement bash auto completion in Python,"python,python-3.x,bash,autocomplete",0,0,1
6,How well or poorly structured are my routes in...,"javascript,node.js",0,0,1
7,“Proper” Asynchronous implementation,"c#,asynchronous",0,0,1
8,Determining whether a loop iterated at least o...,"python,python-3.x,generator",0,0,1
9,Output JavaScript object into HTML table of ke...,"javascript,jquery",0,0,1


In [5]:
full_df = pd.concat(
	[
		math_df,
		bio_df,
		code_df
	],
    ignore_index=True,
	axis=0
)

full_df = full_df.drop_duplicates(subset=["question"], keep="first")
full_df

Unnamed: 0,question,tags_str,math,bio,code
0,Function on the unit circle and exponential,"complex-analysis,continuity",1,0,0
1,What does $\sum_{n=0}^\infty 1/n^n$ converge to?,"sequences-and-series,number-theory,limits",1,0,0
2,Random variable measurable with respect to sto...,"measure-theory,random-variables,stopping-times...",1,0,0
3,What is the distribution of 2 consecutive Bino...,"probability,binomial-coefficients,binomial-dis...",1,0,0
4,Determine lines intersecting four skew lines i...,"projective-geometry,projective-space,cross-ratio",1,0,0
...,...,...,...,...,...
1445782,Virtual Memory - Non-contiguous Memory Allocation,"operating-systems,memory-management,virtual-me...",0,0,1
1445783,Residence time in multi server system,"algorithm-analysis,distributed-systems,queuein...",0,0,1
1445784,How can I improve my algorithm for finding opt...,"algorithms,partitions",0,0,1
1445785,When can I use dynamic programming to reduce t...,"algorithms,dynamic-programming,efficiency,algo...",0,0,1


In [6]:
from train.reporting.model_interface import ModelInterface # IMPORTANT
from train.reporting.text_svm_wrapper import TextSVMWrapper # IMPORTANT cannot load models without
from typing import Tuple
import pickle

import pandas as pd

def import_model_and_its_test_set(path: str) -> Tuple[ModelInterface, pd.DataFrame]:
        with open(path + "/model.pkl", "rb") as f:
            model = pickle.load(f)


        test_set = pd.read_csv(
            path + "/test_set.csv", index_col=0)
        return model, test_set



math_model, _ = import_model_and_its_test_set("../saved_models/math")
bio_model, _ = import_model_and_its_test_set("../saved_models/bio")
code_model, _ = import_model_and_its_test_set("../saved_models/code")

# Tutaj wybieramy numer zbioru testowego

In [7]:
test_set_number = 0 # allowed 0 1 2

In [8]:
test_df = pd.read_csv(f"../datasets_preprocessing/test_all_models/test_{test_set_number}.csv", index_col=0)

test_df.drop_duplicates(subset=["question"], keep="first")
test_df

Unnamed: 0,question,tags_str,math,bio,code
1010159,Math subject GRE test 9768 Q.26,"maxima-minima,gre-exam",1,0,0
611031,What if there is no enough bit to change (synd...,coding-theory,1,0,0
774145,Show that a power series is continuous in its ...,"real-analysis,sequences-and-series,convergence...",1,0,0
864291,Proof verification that Q is dense in R,"real-analysis,self-learning,proof-verification",1,0,0
816207,What are all the even positive integers $n$ su...,"number-theory,binomial-coefficients",1,0,0
...,...,...,...,...,...
38340,Dutch National flag solution in C++,c++,0,0,1
67798,"""Implied atomic propositions"" in propositional...",propositional-logic,0,0,1
1448,Hackerrank Sum vs XoR,"c#,programming-challenge,time-limit-exceeded,b...",0,0,1
31637,Transforming XML as it is being generated on a...,"xml,xslt",0,0,1


In [9]:
test_df_with_labels = test_df  # test_df.merge(full_df.drop(columns="tags_str"), on="question", how="left")

test_df_with_labels

Unnamed: 0,question,tags_str,math,bio,code
1010159,Math subject GRE test 9768 Q.26,"maxima-minima,gre-exam",1,0,0
611031,What if there is no enough bit to change (synd...,coding-theory,1,0,0
774145,Show that a power series is continuous in its ...,"real-analysis,sequences-and-series,convergence...",1,0,0
864291,Proof verification that Q is dense in R,"real-analysis,self-learning,proof-verification",1,0,0
816207,What are all the even positive integers $n$ su...,"number-theory,binomial-coefficients",1,0,0
...,...,...,...,...,...
38340,Dutch National flag solution in C++,c++,0,0,1
67798,"""Implied atomic propositions"" in propositional...",propositional-logic,0,0,1
1448,Hackerrank Sum vs XoR,"c#,programming-challenge,time-limit-exceeded,b...",0,0,1
31637,Transforming XML as it is being generated on a...,"xml,xslt",0,0,1


In [10]:
test_df_with_labels["real_class"] = (test_df_with_labels["math"] * 0 +  test_df_with_labels["bio"] * 1 +
                                 test_df_with_labels["code"]
                                * 2)
test_df_with_labels

Unnamed: 0,question,tags_str,math,bio,code,real_class
1010159,Math subject GRE test 9768 Q.26,"maxima-minima,gre-exam",1,0,0,0
611031,What if there is no enough bit to change (synd...,coding-theory,1,0,0,0
774145,Show that a power series is continuous in its ...,"real-analysis,sequences-and-series,convergence...",1,0,0,0
864291,Proof verification that Q is dense in R,"real-analysis,self-learning,proof-verification",1,0,0,0
816207,What are all the even positive integers $n$ su...,"number-theory,binomial-coefficients",1,0,0,0
...,...,...,...,...,...,...
38340,Dutch National flag solution in C++,c++,0,0,1,2
67798,"""Implied atomic propositions"" in propositional...",propositional-logic,0,0,1,2
1448,Hackerrank Sum vs XoR,"c#,programming-challenge,time-limit-exceeded,b...",0,0,1,2
31637,Transforming XML as it is being generated on a...,"xml,xslt",0,0,1,2


In [11]:
test_df_with_labels["math_preds"] = math_model.predict_proba(test_df_with_labels["question"])[:, 1]
test_df_with_labels["bio_preds"] = bio_model.predict_proba(test_df_with_labels["question"])[:, 1]
test_df_with_labels["code_preds"] = code_model.predict_proba(test_df_with_labels["question"])[:, 1]

In [12]:
test_df_with_labels

Unnamed: 0,question,tags_str,math,bio,code,real_class,math_preds,bio_preds,code_preds
1010159,Math subject GRE test 9768 Q.26,"maxima-minima,gre-exam",1,0,0,0,0.605825,0.080062,0.442586
611031,What if there is no enough bit to change (synd...,coding-theory,1,0,0,0,0.100390,0.637746,0.558531
774145,Show that a power series is continuous in its ...,"real-analysis,sequences-and-series,convergence...",1,0,0,0,1.000000,0.000126,0.000396
864291,Proof verification that Q is dense in R,"real-analysis,self-learning,proof-verification",1,0,0,0,0.988245,0.003601,0.059158
816207,What are all the even positive integers $n$ su...,"number-theory,binomial-coefficients",1,0,0,0,0.931001,0.009177,0.088320
...,...,...,...,...,...,...,...,...,...
38340,Dutch National flag solution in C++,c++,0,0,1,2,0.278477,0.027369,0.264458
67798,"""Implied atomic propositions"" in propositional...",propositional-logic,0,0,1,2,0.751757,0.001532,0.817298
1448,Hackerrank Sum vs XoR,"c#,programming-challenge,time-limit-exceeded,b...",0,0,1,2,0.030458,0.001645,0.969061
31637,Transforming XML as it is being generated on a...,"xml,xslt",0,0,1,2,0.348931,0.095666,0.892323


In [13]:
import numpy as np

cols = ['math_preds', 'bio_preds', 'code_preds']

max_values = test_df_with_labels[cols].max(axis=1)
max_names = test_df_with_labels[cols].idxmax(axis=1)

class_mapping = {'math_preds': 0, 'bio_preds': 1, 'code_preds': 2}

predicted_class = max_names.map(class_mapping)

test_df_with_labels['predicted_class'] = np.where(max_values > 0.5, predicted_class, -1)
test_df_with_labels


Unnamed: 0,question,tags_str,math,bio,code,real_class,math_preds,bio_preds,code_preds,predicted_class
1010159,Math subject GRE test 9768 Q.26,"maxima-minima,gre-exam",1,0,0,0,0.605825,0.080062,0.442586,0
611031,What if there is no enough bit to change (synd...,coding-theory,1,0,0,0,0.100390,0.637746,0.558531,1
774145,Show that a power series is continuous in its ...,"real-analysis,sequences-and-series,convergence...",1,0,0,0,1.000000,0.000126,0.000396,0
864291,Proof verification that Q is dense in R,"real-analysis,self-learning,proof-verification",1,0,0,0,0.988245,0.003601,0.059158,0
816207,What are all the even positive integers $n$ su...,"number-theory,binomial-coefficients",1,0,0,0,0.931001,0.009177,0.088320,0
...,...,...,...,...,...,...,...,...,...,...
38340,Dutch National flag solution in C++,c++,0,0,1,2,0.278477,0.027369,0.264458,-1
67798,"""Implied atomic propositions"" in propositional...",propositional-logic,0,0,1,2,0.751757,0.001532,0.817298,2
1448,Hackerrank Sum vs XoR,"c#,programming-challenge,time-limit-exceeded,b...",0,0,1,2,0.030458,0.001645,0.969061,2
31637,Transforming XML as it is being generated on a...,"xml,xslt",0,0,1,2,0.348931,0.095666,0.892323,2


In [14]:
from sklearn.metrics import accuracy_score


accuracy_score(test_df_with_labels["real_class"], test_df_with_labels["predicted_class"])

0.8783333333333333

## Sample for analyzing errors based on wrong label or ambiguity

In [15]:
wrong_sample = test_df_with_labels[test_df_with_labels['real_class'] != test_df_with_labels['predicted_class']].sample(n=10)

wrong_sample

Unnamed: 0,question,tags_str,math,bio,code,real_class,math_preds,bio_preds,code_preds,predicted_class
290518,Size of $x^TAx$ in comparison to $|x|$,linear-algebra,1,0,0,0,0.014159,0.062228,0.966291,2
196077,Why is Sesame Street's Count von Count's favor...,"soft-question,prime-numbers,radicals",1,0,0,0,0.128519,0.179542,0.864567,2
76533,What is the difference between cyber security ...,"terminology,knowledge-representation,ontologies",0,0,1,2,0.162206,0.134813,0.279804,-1
52931,Help understanding how to make a simple 3D min...,"algorithms,computational-geometry",0,0,1,2,0.57319,0.004308,0.43262,0
61824,Z Notation: sequence of sequences - find sum,"formal-methods,z-notation",0,0,1,2,0.999995,0.000565,0.00285,0
70106,"For which c, d is Gap2SAT[c, d] in P (such tha...","complexity-theory,2-sat",0,0,1,2,0.235324,0.693915,0.336591,1
14714,Is there any math formula that can be used to ...,"botany,zoology",0,1,0,1,0.969152,0.041635,0.154161,0
780395,"Given a Cayley table, is there an algorithm to...","abstract-algebra,group-theory,algorithms,finit...",1,0,0,0,0.436623,2.5e-05,0.92896,2
267533,Finding unknown numbers using $ LCM $ and $ HCF $,"number-theory,elementary-number-theory",1,0,0,0,0.628972,5e-05,0.95055,2
21013,Procedural generation of a general matrix repr...,"python,performance,random,matrix",0,0,1,2,0.664917,0.003274,0.344891,0


In [16]:
for question in wrong_sample['question']:
	print(question)


Size of $x^TAx$ in comparison to $|x|$
Why is Sesame Street's Count von Count's favorite number $34,\!969$?
What is the difference between cyber security ontologies and scenario ontologies in this system?
Help understanding how to make a simple 3D minimum bounding sphere?
Z Notation: sequence of sequences - find sum
For which c, d is Gap2SAT[c, d] in P (such that 0<c<d<1)?
Is there any math formula that can be used to describe shape of leaves?
Given a Cayley table, is there an algorithm to determine if it is a dihedral group?
Finding unknown numbers using $ LCM $ and $ HCF $
Procedural generation of a general matrix representing a tile map


## Other models winning over wrong code model test

In [17]:
test_df_with_labels[(test_df_with_labels['real_class'] == test_df_with_labels['predicted_class']) & # But other
                    # models overcame te wrong one which resulted in a correct prediction
                    (test_df_with_labels['real_class'] != 2) & # but real_class is not code
                    (test_df_with_labels['code_preds'] >= 0.5)] # would predict code

Unnamed: 0,question,tags_str,math,bio,code,real_class,math_preds,bio_preds,code_preds,predicted_class
1006473,Is a given function Lipschitz?,real-analysis,1,0,0,0,0.933626,0.000764,0.529401,0
598680,Logic problem on sum of possible numbers a giv...,logic,1,0,0,0,0.942607,0.000008,0.881549,0
1273518,Maximum of a Gaussian random vector,"probability-theory,probability-distributions,n...",1,0,0,0,0.983136,0.000326,0.500000,0
25735,maximum modulus principle with a discontinuity...,complex-analysis,1,0,0,0,0.759591,0.010427,0.718486,0
461840,Clues to find the graphs of functions,"analysis,graphing-functions",1,0,0,0,0.734723,0.000468,0.629921,0
...,...,...,...,...,...,...,...,...,...,...
6233,What does characterization mean in a genomics ...,genetics,0,1,0,1,0.091341,0.928404,0.558417,1
2770,What are tail currents?,"neuroscience,electrophysiology",0,1,0,1,0.170936,0.859488,0.655186,1
20164,Telomere shortening during replication,"dna,molecular-genetics,telomere,replication",0,1,0,1,0.042501,0.961113,0.529834,1
14295,Is there a term for a procedure in which the c...,"terminology,methods,chromatography",0,1,0,1,0.102915,0.927216,0.838238,1


In [18]:
test_df_with_labels[(test_df_with_labels['real_class'] != 2) &
                    (test_df_with_labels['code_preds'] >= 0.5)]

Unnamed: 0,question,tags_str,math,bio,code,real_class,math_preds,bio_preds,code_preds,predicted_class
611031,What if there is no enough bit to change (synd...,coding-theory,1,0,0,0,0.100390,0.637746,0.558531,1
1167102,Simplex algorithm/basic artificial variable,"linear-algebra,linear-programming,two-phase-si...",1,0,0,0,0.029946,0.001250,0.984235,2
1006473,Is a given function Lipschitz?,real-analysis,1,0,0,0,0.933626,0.000764,0.529401,0
1153278,"In a directed graph with $n \geq 2$ nodes, if ...","graph-theory,proof-writing,computer-science",1,0,0,0,0.006212,0.000042,0.986519,2
1082989,Implementing Recursive descent algorithm for P...,"recursive-algorithms,linear-approximation",1,0,0,0,0.008582,0.000001,1.000000,2
...,...,...,...,...,...,...,...,...,...,...
2770,What are tail currents?,"neuroscience,electrophysiology",0,1,0,1,0.170936,0.859488,0.655186,1
20164,Telomere shortening during replication,"dna,molecular-genetics,telomere,replication",0,1,0,1,0.042501,0.961113,0.529834,1
14295,Is there a term for a procedure in which the c...,"terminology,methods,chromatography",0,1,0,1,0.102915,0.927216,0.838238,1
2001,Where does the proton come in the reduction of...,biochemistry,0,1,0,1,0.004415,0.661277,0.879828,2


## Other models winning over wrong math model test

In [19]:
test_df_with_labels[(test_df_with_labels['real_class'] == test_df_with_labels['predicted_class']) &  # But other
                    # models overcame te wrong one which resulted in a correct prediction
                    (test_df_with_labels['real_class'] != 0) &  # but real_class is not math
                    (test_df_with_labels['math_preds'] >= 0.5)]  # would predict math

Unnamed: 0,question,tags_str,math,bio,code,real_class,math_preds,bio_preds,code_preds,predicted_class
2610,"What is ""contigs"" in Picard's ReorderSAM?",bioinformatics,0,1,0,1,0.595778,0.693915,0.336591,1
19708,Which part of oranges contain fiber?,"nutrition,fruit",0,1,0,1,0.554120,0.933833,0.079310,1
10954,Cannot conjugate Biotin-labeled DNA to Strepta...,"biochemistry,molecular-biology,pcr",0,1,0,1,0.520412,0.988774,0.007007,1
13501,Confusion about the construction of the rat's ...,"neuroscience,neurophysiology,neurology,neurotr...",0,1,0,1,0.530351,0.826168,0.206548,1
14074,The properties of benign tumours,"cancer,growth,apoptosis",0,1,0,1,0.696491,0.865576,0.494331,1
...,...,...,...,...,...,...,...,...,...,...
56228,Computability of an expression of a function r...,computability,0,0,1,2,0.801009,0.000405,0.880197,2
61977,Calculating the number of unique BST generatab...,"trees,probability-theory,permutations",0,0,1,2,0.572415,0.000208,0.924626,2
11528,Compute the next occurrence of Friday the 13th,"python,python-3.x,datetime",0,0,1,2,0.599956,0.195297,0.625400,2
71565,Rules regarding Chomsky Normal Form (CNF) gram...,"context-free,formal-grammars",0,0,1,2,0.505155,0.012156,0.949551,2


In [20]:
test_df_with_labels[(test_df_with_labels['real_class'] != 0) &
                    (test_df_with_labels['math_preds'] >= 0.5)] # all wrong predictions

Unnamed: 0,question,tags_str,math,bio,code,real_class,math_preds,bio_preds,code_preds,predicted_class
21483,Bayes theorem for mutations,"human-biology,genetics,dna,homework,statistics",0,1,0,1,0.616876,2.475875e-02,0.464884,0
1903,What's so special about Chassaignac tubercle?,"physiology,human-anatomy,cardiology,circulator...",0,1,0,1,0.788620,1.974203e-02,0.140232,0
23046,Is dinoprost PGF2-alpha or PGE2?,pharmacology,0,1,0,1,0.866290,2.413623e-01,0.024511,0
20846,Are Lambic beers the product of quorum sensing?,"microbiology,fermentation,quorum-sensing",0,1,0,1,0.972707,1.471987e-01,0.163562,0
2610,"What is ""contigs"" in Picard's ReorderSAM?",bioinformatics,0,1,0,1,0.595778,6.939147e-01,0.336591,1
...,...,...,...,...,...,...,...,...,...,...
82808,Transformation Function: Gonzalez and Woods,"notation,digital-image-processing",0,0,1,2,0.992776,7.445501e-04,0.011999,0
38849,"Prompting for grades, with confirmation if a s...","c++,validation",0,0,1,2,0.721706,9.917562e-01,0.315271,1
83516,How to find an axis-aligned hyper box whose se...,"algorithms,computational-geometry",0,0,1,2,0.997133,6.573083e-07,0.671530,0
71565,Rules regarding Chomsky Normal Form (CNF) gram...,"context-free,formal-grammars",0,0,1,2,0.505155,1.215629e-02,0.949551,2


## Other models winning over wrong bio model test

In [21]:
test_df_with_labels[(test_df_with_labels['real_class'] == test_df_with_labels['predicted_class']) &  # But other
                    # models overcame te wrong one which resulted in a correct prediction
                    (test_df_with_labels['real_class'] != 1) &  # but real_class is not bio
                    (test_df_with_labels['bio_preds'] >= 0.5)]  # would predict bio

Unnamed: 0,question,tags_str,math,bio,code,real_class,math_preds,bio_preds,code_preds,predicted_class
827652,What is the difference between subgame perfect...,"real-analysis,game-theory",1,0,0,0,0.689291,0.643970,0.316368,0
873153,Find a sufficient statistic.,"estimation,parameter-estimation,estimation-theory",1,0,0,0,0.902755,0.598992,0.451036,0
19769,What is the probability that from 23 people 2 ...,"probability,probability-theory,birthday",1,0,0,0,0.677439,0.506101,0.017567,0
724657,Vector Components - Superposition of Forces,"linear-algebra,physics",1,0,0,0,0.648213,0.647027,0.185936,0
150003,What does the entries in Agency Matrix say?,matrices,1,0,0,0,0.951556,0.556278,0.025007,0
...,...,...,...,...,...,...,...,...,...,...
47528,What is a certificate in plain english?,"complexity-theory,np",0,0,1,2,0.188737,0.543542,0.951325,2
29579,Simulate Pascal's/Delphi's Insert in C,c,0,0,1,2,0.121408,0.733354,0.880493,2
33190,Entering pupil information,java,0,0,1,2,0.682131,0.606877,0.808774,2
38933,Asynchronous version of AutoResetEvent,"c#,.net,multithreading,asynchronous,async-await",0,0,1,2,0.178002,0.642078,0.791935,2


In [22]:
test_df_with_labels[(test_df_with_labels['real_class'] != 1) &
                    (test_df_with_labels['bio_preds'] >= 0.5)] # all wrong predictions

Unnamed: 0,question,tags_str,math,bio,code,real_class,math_preds,bio_preds,code_preds,predicted_class
611031,What if there is no enough bit to change (synd...,coding-theory,1,0,0,0,0.100390,0.637746,0.558531,1
727353,How does the quantumstate evolve?,"ordinary-differential-equations,physics,mathem...",1,0,0,0,0.044073,0.999999,0.010573,1
19009,"Interpret a relation definition, it's meaning ...","definition,relations",1,0,0,0,0.575963,0.927714,0.110593,1
827652,What is the difference between subgame perfect...,"real-analysis,game-theory",1,0,0,0,0.689291,0.643970,0.316368,0
47176,The volume of the salt at any instant,calculus,1,0,0,0,0.457314,0.737009,0.358782,1
...,...,...,...,...,...,...,...,...,...,...
82430,Backpropagation in multiple output neural netw...,"machine-learning,neural-networks",0,0,1,2,0.050017,0.844397,0.999985,2
53688,Can preorder and postorder traversals be used ...,data-structures,0,0,1,2,0.008374,0.919245,0.762088,1
48549,Turing tests and humans,"turing-machines,machine-learning,artificial-in...",0,0,1,2,0.000531,0.924240,0.681106,1
38849,"Prompting for grades, with confirmation if a s...","c++,validation",0,0,1,2,0.721706,0.991756,0.315271,1


## Other models winning over right code model test

In [23]:
test_df_with_labels[(test_df_with_labels['real_class'] != test_df_with_labels['predicted_class']) & # But other
                    # models overcame the right one which resulted in a wrong prediction
                    (test_df_with_labels['real_class'] == 2) & # real_class is code
                    (test_df_with_labels['code_preds'] >= 0.5)] # would predict code

Unnamed: 0,question,tags_str,math,bio,code,real_class,math_preds,bio_preds,code_preds,predicted_class
7164,Count the occurence of nucleobases in DNA string,"programming-challenge,ruby,regex,comparative-r...",0,0,1,2,0.000202,8.600075e-01,0.654905,1
55676,Question on Recurrence $T^2(n) = T(n/2) * T(2n...,recurrence-relation,0,0,1,2,0.802088,1.869528e-02,0.695835,0
59270,Recurrence formula for a known sequence?,"combinatorics,recurrence-relation",0,0,1,2,0.909428,1.320584e-03,0.603272,0
71836,"For a given shape, find set of points with the...",packing,0,0,1,2,0.997243,4.256658e-05,0.933270,0
82735,What are transitive successor and transitive p...,"graphs,compilers",0,0,1,2,0.819033,6.083078e-03,0.753171,0
...,...,...,...,...,...,...,...,...,...,...
65231,Proving the set of finite languages is countab...,turing-machines,0,0,1,2,0.883752,7.520135e-07,0.595416,0
53688,Can preorder and postorder traversals be used ...,data-structures,0,0,1,2,0.008374,9.192447e-01,0.762088,1
20188,Ordered-by-insertion map,"c++,hash-map",0,0,1,2,0.704622,2.427735e-02,0.528171,0
48549,Turing tests and humans,"turing-machines,machine-learning,artificial-in...",0,0,1,2,0.000531,9.242398e-01,0.681106,1


In [24]:
test_df_with_labels[(test_df_with_labels['real_class'] == 2) &
                    (test_df_with_labels['code_preds'] >= 0.5)]

Unnamed: 0,question,tags_str,math,bio,code,real_class,math_preds,bio_preds,code_preds,predicted_class
22430,Collecting all relevant data from xlsm files,"performance,vba,excel",0,0,1,2,0.002174,0.097563,0.972519,2
697,Asynchronous task and close sql connection,"c#,entity-framework,async-await",0,0,1,2,0.011358,0.003836,0.979920,2
42050,"Fetching data from a website using ""POST"" request","vba,web-scraping",0,0,1,2,0.024823,0.000368,0.996342,2
81780,Bellman-Ford algorithm - Why can edges be upda...,"algorithms,shortest-path",0,0,1,2,0.003505,0.000127,0.992811,2
17748,Routes for a user controller in a Node.js appl...,"javascript,node.js,express.js,url-routing",0,0,1,2,0.008295,0.000042,0.999999,2
...,...,...,...,...,...,...,...,...,...,...
71565,Rules regarding Chomsky Normal Form (CNF) gram...,"context-free,formal-grammars",0,0,1,2,0.505155,0.012156,0.949551,2
67798,"""Implied atomic propositions"" in propositional...",propositional-logic,0,0,1,2,0.751757,0.001532,0.817298,2
1448,Hackerrank Sum vs XoR,"c#,programming-challenge,time-limit-exceeded,b...",0,0,1,2,0.030458,0.001645,0.969061,2
31637,Transforming XML as it is being generated on a...,"xml,xslt",0,0,1,2,0.348931,0.095666,0.892323,2


## Other models winning over right math model

In [25]:
test_df_with_labels[(test_df_with_labels['real_class'] != test_df_with_labels['predicted_class']) & # But other
                    # models overcame the right one which resulted in a wrong prediction
                    (test_df_with_labels['real_class'] == 0) & # real_class is math
                    (test_df_with_labels['math_preds'] >= 0.5)] # would predict math

Unnamed: 0,question,tags_str,math,bio,code,real_class,math_preds,bio_preds,code_preds,predicted_class
653633,need to show image of $f$ contains the unit disk.,complex-analysis,1,0,0,0,0.616074,0.086869,0.824728,2
553452,How to use the Rules of Inference to a stateme...,"logic,discrete-mathematics,quantifiers,predica...",1,0,0,0,0.623226,0.059206,0.976278,2
1047456,Get the width and height of the inner well ali...,"geometry,rectangles",1,0,0,0,0.560443,0.067014,0.608596,2
1232109,A a.e. strongly convex function,convex-analysis,1,0,0,0,0.750501,0.000650,0.801448,2
19009,"Interpret a relation definition, it's meaning ...","definition,relations",1,0,0,0,0.575963,0.927714,0.110593,1
...,...,...,...,...,...,...,...,...,...,...
771122,How to show that R(binary relation on A x A) i...,"discrete-mathematics,relations",1,0,0,0,0.868672,0.000240,0.879930,2
284253,How to decide which pair is more relavent to e...,probability,1,0,0,0,0.701105,0.389796,0.864236,2
756123,Deconvolution with respect to a particular fun...,"convolution,estimation-theory,decision-theory,...",1,0,0,0,0.513850,0.018656,0.702519,2
198600,Essential singularity,complex-analysis,1,0,0,0,0.610192,0.622510,0.321568,1


In [26]:
test_df_with_labels[(test_df_with_labels['real_class'] == 0) &
                    (test_df_with_labels['math_preds'] >= 0.5)]

Unnamed: 0,question,tags_str,math,bio,code,real_class,math_preds,bio_preds,code_preds,predicted_class
1010159,Math subject GRE test 9768 Q.26,"maxima-minima,gre-exam",1,0,0,0,0.605825,0.080062,0.442586,0
774145,Show that a power series is continuous in its ...,"real-analysis,sequences-and-series,convergence...",1,0,0,0,1.000000,0.000126,0.000396,0
864291,Proof verification that Q is dense in R,"real-analysis,self-learning,proof-verification",1,0,0,0,0.988245,0.003601,0.059158,0
816207,What are all the even positive integers $n$ su...,"number-theory,binomial-coefficients",1,0,0,0,0.931001,0.009177,0.088320,0
1276009,Non-Trivial Solutions To The Partial Different...,ordinary-differential-equations,1,0,0,0,0.999999,0.000188,0.002006,0
...,...,...,...,...,...,...,...,...,...,...
913707,The degree of a splitting field of a polynomia...,"abstract-algebra,field-theory,galois-theory,ex...",1,0,0,0,0.999992,0.000097,0.022522,0
1212137,Number of injective field homomorphism,"abstract-algebra,field-theory",1,0,0,0,0.994280,0.015579,0.005706,0
133386,"Heat equation, heat ball, and level set","partial-differential-equations,heat-equation",1,0,0,0,0.969606,0.082561,0.019552,0
1253100,How can we evaluate the graph by CDF?,"statistics,probability-distributions,descripti...",1,0,0,0,0.688257,0.008645,0.677775,0


## Other models winning over right bio model

In [27]:
test_df_with_labels[(test_df_with_labels['real_class'] != test_df_with_labels['predicted_class']) & # But other
                    # models overcame the right one which resulted in a wrong prediction
                    (test_df_with_labels['real_class'] == 1) & # real_class is bio
                    (test_df_with_labels['bio_preds'] >= 0.5)] # would predict bio

Unnamed: 0,question,tags_str,math,bio,code,real_class,math_preds,bio_preds,code_preds,predicted_class
14275,What are the advantages of heterologous expres...,"cell-biology,protein-expression",0,1,0,1,0.082251,0.786366,0.804044,2
5161,Goodness of fit: How to decide which ratio to ...,"statistics,biostatistics",0,1,0,1,0.262181,0.809809,0.825639,2
12477,tandem repeat sequence; causes of contraction ...,"molecular-biology,dna,homework,molecular-genet...",0,1,0,1,0.921729,0.863755,0.003395,0
11939,Access and decay time of long-term memory,"brain,memory",0,1,0,1,0.002906,0.765332,0.886802,2
21873,Is there a known glucosepane cross-link breaker?,"human-biology,biochemistry,physiology,pharmaco...",0,1,0,1,0.269967,0.549541,0.583432,2
11534,Constant or variable number of chiasmata durin...,"human-biology,genetics,cell-biology,dna,molecu...",0,1,0,1,0.519164,0.519112,0.153993,0
15750,probability of hairpin vs self-dimer formation?,"dna,primer",0,1,0,1,0.74527,0.737325,0.024077,0
19957,How do booklice avoid dessication?,"entomology,physiology,death",0,1,0,1,0.016643,0.62215,0.904743,2
18678,What does the Gini index mean in a biochemical...,"biochemistry,biostatistics",0,1,0,1,0.018971,0.577862,0.77379,2
8559,Global Acidification or Warming,global-warming,0,1,0,1,0.529344,0.623577,0.731388,2


In [28]:
test_df_with_labels[(test_df_with_labels['real_class'] == 1) &
                    (test_df_with_labels['bio_preds'] >= 0.5)]

Unnamed: 0,question,tags_str,math,bio,code,real_class,math_preds,bio_preds,code_preds,predicted_class
5688,Are abrasions considered closed wounds?,pathology,0,1,0,1,0.447555,0.940973,0.027536,1
17615,Why isn't Bubonic Plague as virulent as it onc...,bacteriology,0,1,0,1,0.049391,0.809215,0.338201,1
14390,Is there any evidence for secondarily poikilot...,"evolution,physiology,mammals",0,1,0,1,0.121749,0.994466,0.355761,1
6659,What could cause no pain but sense of touch?,"neuroscience,touch",0,1,0,1,0.017772,0.999999,0.006977,1
2921,Notation for repetitive nucleic acids,"molecular-biology,terminology,nucleic-acids",0,1,0,1,0.296801,0.955513,0.070480,1
...,...,...,...,...,...,...,...,...,...,...
12953,How to dialyze large amounts of precipitated e...,"enzymes,purification",0,1,0,1,0.176892,0.979887,0.031029,1
3625,Are these cats calico?,"genetics,zoology,feline",0,1,0,1,0.061913,0.984917,0.103034,1
18879,NMDA receptor mediated plasticity figure refer...,"neuroscience,neurophysiology,neurotransmitter,...",0,1,0,1,0.079935,0.976596,0.102427,1
18543,chaperone protein names,protein-folding,0,1,0,1,0.001775,1.000000,0.037882,1
