In [1]:
import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin, ClassifierMixin
import gensim
from scipy.sparse.csr import csr_matrix
import time
from functools import reduce
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from functools import reduce
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import Normalizer
from sklearn.pipeline import make_pipeline
from preprocessor import normalize_money, normalize_number, stemmer, pipe
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, accuracy_score
from sklearn.svm import LinearSVC
import sys

In [2]:
gold_standard = pd.read_csv('../gold_standard.csv', dtype={'sense': str})
gold_standard.head()

Unnamed: 0,id,word,sense,kalimat
0,13,asing,,"Para pecinta film indonesia atau tv, pasti tak..."
1,19,asing,5301.0,Pasti telinga kita merasa asing dan aneh mende...
2,41,asing,5302.0,Warga negara asing atau warga negara Persemakm...
3,44,asing,,"Selama lima belas tahun memerintah, Sultan Mah..."
4,121,asing,5302.0,Yang kemudian diikuti dengan donat-donat waral...


In [3]:
ambiguous_words = set(gold_standard.word)

# Dummy Classifier

In [4]:
result = pd.read_csv('../dummy_baseline_classification.csv', header=None, names=['id', 'word', 'sense'], dtype={'sense': str})
result.head()

Unnamed: 0,id,word,sense
0,13,asing,5302
1,19,asing,5302
2,41,asing,5302
3,44,asing,5302
4,121,asing,5302


# Supervised WSD + Supervised NER + Rule-based MWE

In [56]:
result = pd.read_csv('../supervised_wsd_supervised_ner_rulebased_mwe.csv', header=None, names=['id', 'word', 'sense'], dtype={'sense': str})
result.head()

Unnamed: 0,id,word,sense
0,13,asing,5301
1,19,asing,5301
2,41,asing,5302
3,44,asing,5302
4,121,asing,5302


# Labeled-Data only

In [30]:
labeled = gold_standard.sense.notnull()

In [33]:
classification_report

<function sklearn.metrics.classification.classification_report(y_true, y_pred, labels=None, target_names=None, sample_weight=None, digits=2, output_dict=False)>

In [98]:
len(gold_standard.sense.dropna())

3385

# Evaluate

In [94]:
evaluation = {'word': [], 'accuracy': [], 'precision': [], 'recall': [], 'f1': []}

In [95]:
sum_correct_ans = 0

for w in sorted(ambiguous_words):
    evaluation['word'].append(w)
    y_true = gold_standard[labeled].query('word == "{}"'.format(w)).sense
    y_pred = result[labeled].query('word == "{}"'.format(w)).sense
    possible_labels = set([*list(y_true), *list(y_pred)])
    correct_ans = np.sum(np.array(y_true) == np.array(y_pred))
    sum_correct_ans += correct_ans
    report = classification_report(y_true, y_pred, output_dict=True)
    sum_precision = 0
    sum_recall = 0
    sum_f1 = 0
    label_count = 0
    for p in possible_labels:
        if report[p]['support'] > 0:
            label_count += 1
            sum_precision += report[p]['precision']
            sum_recall += report[p]['recall']
            sum_f1 += report[p]['f1-score']
    evaluation['accuracy'].append(correct_ans/len(y_true))
    evaluation['f1'].append(sum_f1 / label_count)
    evaluation['precision'].append(sum_precision / label_count)
    evaluation['recall'].append(sum_recall / label_count)

report = pd.DataFrame(evaluation)
print('Micro accuracy:', sum_correct_ans/len(gold_standard[labeled]))
print('Macro accuracy:', report.accuracy.mean())
print('Macro precision:', report.precision.mean())
print('Macro recall:', report.recall.mean())
print('Macro f1:', report.f1.mean())
# y_true = gold_standard[labeled].sense
# y_pred = result[labeled].sense
# sum_correct_ans += correct_ans
# print(classification_report(y_true, y_pred))
report

Micro accuracy: 0.43870014771048743
Macro accuracy: 0.45706757958468536
Macro precision: 0.1484041387332503
Macro recall: 0.2908289241622575
Macro f1: 0.18954348814265493


Unnamed: 0,word,accuracy,precision,recall,f1
0,asing,0.621212,0.207071,0.333333,0.255452
1,atas,0.178947,0.022368,0.125,0.037946
2,badan,0.387097,0.096774,0.25,0.139535
3,baru,0.629032,0.157258,0.25,0.193069
4,berat,0.449438,0.074906,0.166667,0.103359
5,besar,0.134021,0.019146,0.142857,0.033766
6,bidang,0.734694,0.244898,0.333333,0.282353
7,bintang,0.4,0.133333,0.333333,0.190476
8,bisa,0.612245,0.204082,0.333333,0.253165
9,buah,0.530303,0.265152,0.5,0.346535


In [96]:
report.to_csv('test_result_dummy.csv')

In [100]:
pd.DataFrame({
    'lol': {
        'a': [1,2,3],
        'b': [3,2,4]
    }
})

Unnamed: 0,lol
a,"[1, 2, 3]"
b,"[3, 2, 4]"
