# SemEval Pattern Matchers on our dataset

In [1]:
import pandas as pd
from sklearn.metrics import f1_score as f1, accuracy_score as acc, precision_score as prec, recall_score as rec, matthews_corrcoef as mcc
import numpy
from nltk.tokenize import word_tokenize
import nltk
import re
from collections import Counter
from textblob import TextBlob
# from spellchecker import SpellChecker
import string
import numpy as np

sigdig = 3

## The Data

In [2]:
data = 'askparents'

train = pd.read_csv('../../annotated_data/' + data + '_train.tsv', sep='\t', header=0)
train['Sentence'] = train['Sentence'].apply(lambda x: x.lower())
train_sentences = train['Sentence'].tolist()
train_labels_DS = train['DS_Label'].values
train_labels_Maj = train['Majority_label'].values


dev = pd.read_csv('../../annotated_data/' + data + '_dev.tsv', sep='\t', header=0)
dev['Sentence'] = dev['Sentence'].apply(lambda x: x.lower())
dev_sentences = dev['Sentence'].tolist()
dev_labels_DS = dev['DS_Label'].values
dev_labels_Maj = dev['Majority_label'].values

test = pd.read_csv('../../annotated_data/' + data + '_test.tsv', sep='\t', header=0)
test['Sentence'] = test['Sentence'].apply(lambda x: x.lower())
test_sentences_ap = test['Sentence'].tolist()
test_labels_DS_ap = test['DS_Label'].values
test_labels_Maj_ap = test['Majority_label'].values

In [3]:
print("1 is advice, 0 is not.")
print("Distribution of Train set:", Counter(train_labels_DS), np.round(Counter(train_labels_DS)[1]/len(train_labels_DS),2))
print("Distribution of Dev set:", Counter(dev_labels_DS),  np.round(Counter(dev_labels_DS)[1]/len(dev_labels_DS), 2))
print("Distribution of Test set:", Counter(test_labels_DS_ap),  np.round(Counter(test_labels_DS_ap)[1]/len(test_labels_DS_ap), 2))


1 is advice, 0 is not.
Distribution of Train set: Counter({0: 6205, 1: 2496}) 0.29
Distribution of Dev set: Counter({0: 534, 1: 268}) 0.33
Distribution of Test set: Counter({0: 806, 1: 285}) 0.26


In [24]:
train['Post.ID'] = train['ID'].apply(lambda x: x.split('-')[0])
print(len(train['Post.ID'].unique()))

223

## SemEval Baseline

In [4]:
def classify(sent_list):

    keywords = ["suggest","recommend","hopefully","go for","request","it would be nice","adding",
                "should come with","should be able","could come with", "i need" , "we need","needs", 
                "would like to","would love to","allow","add"]

    # Goldberg et al.
    pattern_strings = [r'.*would\slike.*if.*', r'.*i\swish.*', r'.*i\shope.*', r'.*i\swant.*', 
                       r'.*hopefully.*', r".*if\sonly.*", r".*would\sbe\sbetter\sif.*", r".*should.*",
                       r".*would\sthat.*", r".*can't\sbelieve.*didn't.*", r".*don't\sbelieve.*didn't.*", 
                       r".*do\swant.*", r".*i\scan\shas.*"]
    compiled_patterns = []
    for patt in pattern_strings:
        compiled_patterns.append(re.compile(patt))


    label_list = []
    for sent in sent_list:
        tokenized_sent = word_tokenize(sent)
        tagged_sent = nltk.pos_tag(tokenized_sent)
        tags = [i[1] for i in tagged_sent]
        label = 0
        patt_matched = False
        for compiled_patt in compiled_patterns:
            joined_sent = " ".join(tokenized_sent)
            matches = compiled_patt.findall(joined_sent)
            if len(matches) > 0:
                patt_matched = True
        keyword_match = any(elem in keywords for elem in tokenized_sent)

        pos_match = any(elem in ['MD', 'VB'] for elem in tags)

        if patt_matched:
            label = 1
        elif keyword_match == True:
            label = 1
        elif pos_match == True:
            label = 1

        label_list.append(label)

    return label_list

In [5]:
dev_pred_labels_baseline = classify(dev_sentences)
print("F1:", np.round(f1(dev_labels_DS, dev_pred_labels_baseline), sigdig))
print("MCC: ", np.round(mcc(dev_labels_DS, dev_pred_labels_baseline), sigdig))
print("Acc: ", np.round(acc(dev_labels_DS, dev_pred_labels_baseline), sigdig))
print("Precision: ", np.round(prec(dev_labels_DS, dev_pred_labels_baseline), sigdig))
print("Recall: ", np.round(rec(dev_labels_DS, dev_pred_labels_baseline), sigdig))


F1: 0.489
MCC:  0.123
Acc:  0.527
Precision:  0.383
Recall:  0.675


In [6]:
print("All 1 F1 on dev:", np.round(f1(dev_labels_DS, [1 for i in range(len(dev_labels_DS))]), sigdig))
print("All 1 precision on dev:", np.round(prec(dev_labels_DS, [1 for i in range(len(dev_labels_DS))]), sigdig))
print("All 1 recall on dev:", np.round(rec(dev_labels_DS, [1 for i in range(len(dev_labels_DS))]), sigdig))
print("All 1 acc on dev:", np.round(acc(dev_labels_DS, [0 for i in range(len(dev_labels_DS))]), sigdig))
print("All 1 mcc on dev:", np.round(mcc(dev_labels_DS, [1 for i in range(len(dev_labels_DS))]), sigdig))

All 1 F1 on dev: 0.501
All 1 precision on dev: 0.334
All 1 recall on dev: 1.0
All 1 acc on dev: 0.666
All 1 mcc on dev: 0.0


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


# NTUA-IS stuff

## Subtask A Classifier

In [7]:
def gr_classify(sent_list, sk, P_ab=True, P_c=True, imperative=True, spelling=False):
    
    # words from above with other example words they included - P_a
    pattern_pa = ["suggest","recommend","hopefully","go for","request","it would be nice","adding",
                   "should come with","should be able","could come with", "i need" , "we need","needs", 
                   "would like to","would love to","allow","add", "helpful", "allow", "disallow", "idea",
                   "consider"]

    # Goldberg et al.
    pattern_pc = [r'.*would\slike.*if.*', r'.*i\swish.*', r'.*i\shope.*', r'.*i\swant.*', 
                  r'.*hopefully.*', r".*if\sonly.*", r".*would\sbe\sbetter\sif.*", r".*should.*",
                  r".*would\sthat.*", r".*can't\sbelieve.*didn't.*", r".*don't\sbelieve.*didn't.*", 
                  r".*do\swant.*", r".*i\scan\shas.*"]
    
    # pattern list P_c rules for subtask A
    pattern_pc += [r'.*should\s(not|be|take|include|start).*', r'.*be\sbetter.*', r'.*that\sway.*',
                   r'.*so\sthat.*', r'.*why\snot.*', r'.*suggestion\sis.*', r'.*good\ssolution.*',
                   r'.*the\sidea.*', r'.*to\sallow.*', r'.*would\smake.*', r'.*(will|would)\sbe.*',
                   r'.*(to|would|could)\senable\s(i|would|id)\s(like|prefer).*', r'.*am\sasking\sfor.*',
                   r'.*look\sinto.*', r'.*make\sit.*', r'.*at\sleast.*', r'.*we\sneed.*']
    compiled_pc = [re.compile(patt) for patt in pattern_pc]
    
    # pattern list P_b rules for subtask B (and possibly the same for subtask A)
    # pattern list P_b rules for subtask A
    pattern_pb = [r'.*do\snot.*', r'.*if\sonly.*', r'.*(so|before|can|for|if)\syou.*', 
                   r'.*you\s(will|need|can|may).*', r'.*(make|be)\ssure.*', r'.*watch\sout.*', 
                   r'.*(go|going|asking|wishing)\sfor.*', r'.*would\sadvise.*', 
                   r'.*(will|would|could)\sbe.*', r'.*be\s(prepared|careful|warned|forewarned).*',
                   r'.*(i/would/i\'d)\s(like|prefer).*', r'.*highly\srecommended.*', 
                   r'.*(look|looking)\s(into|for|up|around).*', r'.*why\snot.*', r'.*is\sthere.*',
                   r'.*we\sneed.*']
    compiled_pb = [re.compile(patt) for patt in pattern_pb]
        
    pos_pattern_strings = [r'^UH\sVBP.*', r'^MD\sRB\sPRP.*', r'^(VB|VBP).*', r'^MD.*', 
                           r'^(DT|RB|PRP|NN)\sVB.*']
    compiled_pos_patterns = [re.compile(patt) for patt in pos_pattern_strings]


    label_list = []
    for sent in sent_list:
        score = 0
        
        if len(sent.split()) < 5:
            score -=0.2
        
        clause_split = [a for a in re.split("[.,!?;]|(Please|please)", sent) if a not in 
                        [None, '', ' ', 'Please', 'please']]
        for clause in clause_split:
            clause_pos = TextBlob(clause).tags
            
            words = [i[0] for i in clause_pos]
            tags = [i[1] for i in clause_pos]
            
            # Correct misspells
            if spelling:
                words = [spell.correction(w) if w not in spell else w for w in words]
            
            if P_ab:            
                # Pattern P_a
                if any(elem in pattern_pa for elem in words):
                    score += 0.3

                # Pattern P_b
                for compiled_patt in compiled_pb:
                    joined_sent = " ".join(words)
                    matches = compiled_patt.findall(joined_sent)
                    if len(matches) > 0:
                        score += 0.1
            if P_c:
                # Pattern P_c
                for compiled_patt in compiled_pc:
                    joined_sent = " ".join(words)
                    matches = compiled_patt.findall(joined_sent)
                    if len(matches) > 0:
                        score += 0.25

            if imperative:
                # Imperative POS pattern check
                for compiled_pos_patt in compiled_pos_patterns:
                    joined_sent = " ".join(tags)
                    matches = compiled_pos_patt.findall(joined_sent)
                    if len(matches) > 0:
                        score += sk

        if score > 0:
            label_list.append(1)
        else:
            label_list.append(0)

    return label_list

In [8]:
dev_pred_labels = gr_classify(dev_sentences, sk=0)
print("F1:", f1(dev_labels_DS, dev_pred_labels))
print("Precision:", prec(dev_labels_DS, dev_pred_labels))
print("Recall:", rec(dev_labels_DS, dev_pred_labels))

F1: 0.25
Precision: 0.3787878787878788
Recall: 0.1865671641791045


## Subtask B

In [9]:
def gr_classify_b(sent_list, pos_s, P_a=True, P_b=True, imperative=True, spelling=False):
    
    # words from above with other example words they included - P_a
    pattern_pa = ['avoid', 'beware', "don't", 'expect', 'remember', 'tip', 'advise', 'advice', 'recommended',
                  'recommendation', 'suggest', 'suggestion', 'ask', 'bring', 'pick', 'consider', 'spend', 
                  'expect', 'can', 'please', 'can', 'hopefully', 'enjoying', 'want', 'wanting', 'prefer']
    

#     # Goldberg et al.
    pattern_pc = [r'.*would\slike.*if.*', r'.*i\swish.*', r'.*i\shope.*', r'.*i\swant.*', 
                  r'.*hopefully.*', r".*if\sonly.*", r".*would\sbe\sbetter\sif.*", r".*should.*",
                  r".*would\sthat.*", r".*can't\sbelieve.*didn't.*", r".*don't\sbelieve.*didn't.*", 
                  r".*do\swant.*", r".*i\scan\shas.*"]
    
    # pattern list P_c rules for subtask A
    pattern_pc += [r'.*should\s(not|be|take|include|start).*', r'.*be\sbetter.*', r'.*that\sway.*',
                   r'.*so\sthat.*', r'.*why\snot.*', r'.*suggestion\sis.*', r'.*good\ssolution.*',
                   r'.*the\sidea.*', r'.*to\sallow.*', r'.*would\smake.*', r'.*(will|would)\sbe.*',
                   r'.*(to|would|could)\senable\s(i|would|id)\s(like|prefer).*', r'.*am\sasking\sfor.*',
                   r'.*look\sinto.*', r'.*make\sit.*', r'.*at\sleast.*', r'.*we\sneed.*']
    compiled_pc = [re.compile(patt) for patt in pattern_pc]
    
    # pattern list P_b rules for subtask B (and possibly the same for subtask A)
    # pattern list P_b rules for subtask A
    pattern_pb = [r'.*do\snot.*', r'.*if\sonly.*', r'.*(so|before|can|for|if)\syou.*', 
                   r'.*you\s(will|need|can|may).*', r'.*(make|be)\ssure.*', r'.*watch\sout.*', 
                   r'.*(go|going|asking|wishing)\sfor.*', r'.*would\sadvise.*', 
                   r'.*(will|would|could)\sbe.*', r'.*be\s(prepared|careful|warned|forewarned).*',
                   r'.*(i/would/i\'d)\s(like|prefer).*', r'.*highly\srecommended.*', 
                   r'.*(look|looking)\s(into|for|up|around).*', r'.*why\snot.*', r'.*is\sthere.*',
                   r'.*we\sneed.*']
    compiled_pb = [re.compile(patt) for patt in pattern_pb]
        
    pos_pattern_strings = [r'^UH\sVBP.*', r'^MD\sRB\sPRP.*', r'^(VB|VBP).*', r'^MD.*', 
                           r'^(DT|RB|PRP|NN)\sVB.*']
    compiled_pos_patterns = [re.compile(patt) for patt in pos_pattern_strings]


    label_list = []
    for sent in sent_list:
        score = 0
        
        if len(sent.split()) < 5:
            score -=0.2
        
        clause_split = [a for a in re.split("[.,!?;]|(please)", sent) if a not in 
                        [None, '', ' ', 'please']]
        for clause in clause_split:
            clause_pos = TextBlob(clause).tags
            
            words = [i[0] for i in clause_pos]
            tags = [i[1] for i in clause_pos]
            
            # Correct misspells
            if spelling:
                words = [spell.correction(w) if w not in spell else w for w in words]
            
            if P_a:            
                # Pattern P_a
                if any(elem in pattern_pa for elem in words):
                    score += 0.25


            if P_b:
                # Pattern P_b
                for compiled_patt in compiled_pb:
                    joined_sent = " ".join(words)
                    matches = compiled_patt.findall(joined_sent)
                    if len(matches) > 0:
                        score += 0.1

            if imperative:
                # Imperative POS pattern check
                for compiled_pos_patt in compiled_pos_patterns:
                    joined_sent = " ".join(tags)
                    matches = compiled_pos_patt.findall(joined_sent)
                    if len(matches) > 0:
                        score += pos_s

        if score > 0.1:
            label_list.append(1)
        else:
            label_list.append(0)

    return label_list

In [10]:
dev_pred_labels_b = gr_classify_b(dev_sentences, pos_s=0.15)
print("F1:", f1(dev_labels_DS, dev_pred_labels_b))
print("Precision:", prec(dev_labels_DS, dev_pred_labels_b))
print("Recall:", rec(dev_labels_DS, dev_pred_labels_b))

F1: 0.43722304283604135
Precision: 0.36185819070904646
Recall: 0.5522388059701493


### Both combined

In [11]:
def gr_classify_all(sent_list, pos_s, P_a=True, P_b=True, P_c=True, imperative=True, spelling=False):
    
    # words from above with other example words they included - P_a
    pattern_pa = ["suggest","recommend","hopefully","go for","request","it would be nice","adding",
                   "should come with","should be able","could come with", "i need" , "we need","needs", 
                   "would like to","would love to","allow","add", "helpful", "allow", "disallow", "idea",
                   "consider"]
    pattern_pa += ['avoid', 'beware', "don't", 'expect', 'remember', 'tip', 'advise', 'advice', 'recommended',
                  'recommendation', 'suggest', 'suggestion', 'ask', 'bring', 'pick', 'consider', 'spend', 
                  'expect', 'can', 'please', 'can', 'hopefully', 'enjoying', 'want', 'wanting', 'prefer']
    

#     # Goldberg et al.
    pattern_pc = [r'.*would\slike.*if.*', r'.*i\swish.*', r'.*i\shope.*', r'.*i\swant.*', 
                  r'.*hopefully.*', r".*if\sonly.*", r".*would\sbe\sbetter\sif.*", r".*should.*",
                  r".*would\sthat.*", r".*can't\sbelieve.*didn't.*", r".*don't\sbelieve.*didn't.*", 
                  r".*do\swant.*", r".*i\scan\shas.*"]
    
    # pattern list P_c rules for subtask A
    pattern_pc += [r'.*should\s(not|be|take|include|start).*', r'.*be\sbetter.*', r'.*that\sway.*',
                   r'.*so\sthat.*', r'.*why\snot.*', r'.*suggestion\sis.*', r'.*good\ssolution.*',
                   r'.*the\sidea.*', r'.*to\sallow.*', r'.*would\smake.*', r'.*(will|would)\sbe.*',
                   r'.*(to|would|could)\senable\s(i|would|id)\s(like|prefer).*', r'.*am\sasking\sfor.*',
                   r'.*look\sinto.*', r'.*make\sit.*', r'.*at\sleast.*', r'.*we\sneed.*']
    compiled_pc = [re.compile(patt) for patt in pattern_pc]
    
    # pattern list P_b rules for subtask B (and possibly the same for subtask A)
    # pattern list P_b rules for subtask A
    pattern_pb = [r'.*do\snot.*', r'.*if\sonly.*', r'.*(so|before|can|for|if)\syou.*', 
                   r'.*you\s(will|need|can|may).*', r'.*(make|be)\ssure.*', r'.*watch\sout.*', 
                   r'.*(go|going|asking|wishing)\sfor.*', r'.*would\sadvise.*', 
                   r'.*(will|would|could)\sbe.*', r'.*be\s(prepared|careful|warned|forewarned).*',
                   r'.*(i/would/i\'d)\s(like|prefer).*', r'.*highly\srecommended.*', 
                   r'.*(look|looking)\s(into|for|up|around).*', r'.*why\snot.*', r'.*is\sthere.*',
                   r'.*we\sneed.*']
    compiled_pb = [re.compile(patt) for patt in pattern_pb]
        
    pos_pattern_strings = [r'^UH\sVBP.*', r'^MD\sRB\sPRP.*', r'^(VB|VBP).*', r'^MD.*', 
                           r'^(DT|RB|PRP|NN)\sVB.*']
    compiled_pos_patterns = [re.compile(patt) for patt in pos_pattern_strings]


    label_list = []
    for sent in sent_list:
        score = 0
        
        if len(sent.split()) < 5:
            score -=0.2
        
        clause_split = [a for a in re.split("[.,!?;]|(please)", sent) if a not in 
                        [None, '', ' ', 'please']]
        for clause in clause_split:
            clause_pos = TextBlob(clause).tags
            
            words = [i[0] for i in clause_pos]
            tags = [i[1] for i in clause_pos]
            
            # Correct misspells
            if spelling:
                words = [spell.correction(w) if w not in spell else w for w in words]
            
            if P_a:            
                # Pattern P_a
                if any(elem in pattern_pa for elem in words):
                    score += 0.25


            if P_b:
                # Pattern P_b
                for compiled_patt in compiled_pb:
                    joined_sent = " ".join(words)
                    matches = compiled_patt.findall(joined_sent)
                    if len(matches) > 0:
                        score += 0.1
            
            if P_c:
                # Pattern P_c
                for compiled_patt in compiled_pc:
                    joined_sent = " ".join(words)
                    matches = compiled_patt.findall(joined_sent)
                    if len(matches) > 0:
                        score += 0.25

            if imperative:
                # Imperative POS pattern check
                for compiled_pos_patt in compiled_pos_patterns:
                    joined_sent = " ".join(tags)
                    matches = compiled_pos_patt.findall(joined_sent)
                    if len(matches) > 0:
                        score += pos_s

        if score > 0.1:
            label_list.append(1)
        else:
            label_list.append(0)

    return label_list

In [12]:
dev_pred_labels_b = gr_classify_all(dev_sentences, pos_s=0.15)
print("F1:", np.round(f1(dev_labels_DS, dev_pred_labels_b), sigdig))
print("MCC: ", np.round(mcc(dev_labels_DS, dev_pred_labels_b), sigdig))
print("Acc: ", np.round(acc(dev_labels_DS, dev_pred_labels_b), sigdig))
print("Precision: ", np.round(prec(dev_labels_DS, dev_pred_labels_b), sigdig))
print("Recall: ", np.round(rec(dev_labels_DS, dev_pred_labels_b), sigdig))

F1: 0.455
MCC:  0.072
Acc:  0.516
Precision:  0.365
Recall:  0.604


# Need Advice

In [13]:
data = 'needadvice'

train = pd.read_csv('../../annotated_data/' + data + '_train.tsv', sep='\t', header=0)
train['Sentence'] = train['Sentence'].apply(lambda x: x.lower())
train_sentences = train['Sentence'].tolist()
train_labels_DS = train['DS_Label'].values
train_labels_Maj = train['Majority_label'].values


dev = pd.read_csv('../../annotated_data/' + data + '_dev.tsv', sep='\t', header=0)
dev['Sentence'] = dev['Sentence'].apply(lambda x: x.lower())
dev_sentences = dev['Sentence'].tolist()
dev_labels_DS = dev['DS_Label'].values
dev_labels_Maj = dev['Majority_label'].values

test = pd.read_csv('../../annotated_data/' + data + '_test.tsv', sep='\t', header=0)
test['Sentence'] = test['Sentence'].apply(lambda x: x.lower())
test_sentences_na = test['Sentence'].tolist()
test_labels_DS_na = test['DS_Label'].values
test_labels_Maj_na = test['Majority_label'].values

In [14]:
print("1 is advice, 0 is not.")
print("Distribution of Train set:", Counter(train_labels_DS), np.round(Counter(train_labels_DS)[1]/len(train_labels_DS),2))
print("Distribution of Dev set:", Counter(dev_labels_DS),  np.round(Counter(dev_labels_DS)[1]/len(dev_labels_DS), 2))
print("Distribution of Test set:", Counter(test_labels_DS_na),  np.round(Counter(test_labels_DS_na)[1]/len(test_labels_DS_na), 2))



1 is advice, 0 is not.
Distribution of Train set: Counter({0: 3851, 1: 2297}) 0.37
Distribution of Dev set: Counter({0: 542, 1: 274}) 0.34
Distribution of Test set: Counter({0: 568, 1: 330}) 0.37


## SEMEVAL Baseline

In [15]:
dev_pred_labels_baseline = classify(dev_sentences)
print("F1:", np.round(f1(dev_labels_DS, dev_pred_labels_baseline), sigdig))
print("MCC: ", np.round(mcc(dev_labels_DS, dev_pred_labels_baseline), sigdig))
print("Acc: ", np.round(acc(dev_labels_DS, dev_pred_labels_baseline), sigdig))
print("Precision: ", np.round(prec(dev_labels_DS, dev_pred_labels_baseline), sigdig))
print("Recall: ", np.round(rec(dev_labels_DS, dev_pred_labels_baseline), sigdig))

F1: 0.557
MCC:  0.248
Acc:  0.559
Precision:  0.42
Recall:  0.825


## NTUA-IS rules

In [16]:
dev_pred_labels_b = gr_classify_all(dev_sentences, pos_s=0.15)
print("F1:", np.round(f1(dev_labels_DS, dev_pred_labels_b), sigdig))
print("MCC: ", np.round(mcc(dev_labels_DS, dev_pred_labels_b), sigdig))
print("Acc: ", np.round(acc(dev_labels_DS, dev_pred_labels_b), sigdig))
print("Precision: ", np.round(prec(dev_labels_DS, dev_pred_labels_b), sigdig))
print("Recall: ", np.round(rec(dev_labels_DS, dev_pred_labels_b), sigdig))

F1: 0.483
MCC:  0.098
Acc:  0.506
Precision:  0.372
Recall:  0.686


# Test results

## Askparents

### SEMEVAL

In [17]:
test_pred_labels_bs_ap = classify(test_sentences_ap)
print("F1:", np.round(f1(test_labels_DS_ap, test_pred_labels_bs_ap), sigdig))
print("MCC: ", np.round(mcc(test_labels_DS_ap, test_pred_labels_bs_ap), sigdig))
print("Acc: ", np.round(acc(test_labels_DS_ap, test_pred_labels_bs_ap), sigdig))
print("Precision: ", np.round(prec(test_labels_DS_ap, test_pred_labels_bs_ap), sigdig))
print("Recall: ", np.round(rec(test_labels_DS_ap, test_pred_labels_bs_ap), sigdig))

F1: 0.446
MCC:  0.169
Acc:  0.544
Precision:  0.327
Recall:  0.702


### NTUA-IS

In [18]:
test_pred_labels_b_ap = gr_classify_all(test_sentences_ap, pos_s=0.15)
print("F1:", np.round(f1(test_labels_DS_ap, test_pred_labels_b_ap), sigdig))
print("MCC: ", np.round(mcc(test_labels_DS_ap, test_pred_labels_b_ap), sigdig))
print("Acc: ", np.round(acc(test_labels_DS_ap, test_pred_labels_b_ap), sigdig))
print("Precision: ", np.round(prec(test_labels_DS_ap, test_pred_labels_b_ap), sigdig))
print("Recall: ", np.round(rec(test_labels_DS_ap, test_pred_labels_b_ap), sigdig))

F1: 0.423
MCC:  0.129
Acc:  0.537
Precision:  0.314
Recall:  0.649


## NeedAdvice

### SEMEVAL

In [19]:
test_pred_labels_bs_na = classify(test_sentences_na)
print("F1:", np.round(f1(test_labels_DS_na, test_pred_labels_bs_na), sigdig))
print("MCC: ", np.round(mcc(test_labels_DS_na, test_pred_labels_bs_na), sigdig))
print("Acc: ", np.round(acc(test_labels_DS_na, test_pred_labels_bs_na), sigdig))
print("Precision: ", np.round(prec(test_labels_DS_na, test_pred_labels_bs_na), sigdig))
print("Recall: ", np.round(rec(test_labels_DS_na, test_pred_labels_bs_na), sigdig))

F1: 0.572
MCC:  0.225
Acc:  0.559
Precision:  0.445
Recall:  0.803


### NTUA-IS

In [20]:
test_pred_labels_b_na = gr_classify_all(test_sentences_na, pos_s=0.15)
print("F1:", np.round(f1(test_labels_DS_na, test_pred_labels_b_na), sigdig))
print("MCC: ", np.round(mcc(test_labels_DS_na, test_pred_labels_b_na), sigdig))
print("Acc: ", np.round(acc(test_labels_DS_na, test_pred_labels_b_na), sigdig))
print("Precision: ", np.round(prec(test_labels_DS_na, test_pred_labels_b_na), sigdig))
print("Recall: ", np.round(rec(test_labels_DS_na, test_pred_labels_b_na), sigdig))

F1: 0.535
MCC:  0.161
Acc:  0.548
Precision:  0.43
Recall:  0.709


In [21]:
Counter(test_pred_labels_b_na)

Counter({1: 544, 0: 354})