    Use CAL to find the most contrastive examples for annotation.

In [1]:
# Label
# 0: Physical
# 1: Mental

# Load Environment

In [3]:
'''
To use nltk's wordnet, you should download WordNet from https://www.nltk.org/nltk_data/ and then unzip wordnet.zip
under your home directory with path as /home_path/nltk_data/corpora/

'''
from nltk.corpus import wordnet as wn
import pandas as pd
import numpy as np
import tqdm
from collections import defaultdict
import joblib
import time
import json

from cal import cal

In [18]:
# we use fasttext embedding vector to represent word
# As 'cc.en.300.bin' is very large, it is not included in the submission. Download URL: https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.bin.gz
# Check more details in website: https://fasttext.cc/docs/en/crawl-vectors.html

import fasttext as ft
fast_model=ft.load_model('source/cc.en.300.bin')



In [4]:
adj=pd.read_csv('Data/adj.csv') # load extracted adjectives from `high_entropy.ipynb`
adj.shape

(7292, 4)

# Build TestSet

In [5]:
testset=pd.read_csv('Data/test.csv')

# CAL

    All labeling steps below are implemented by two persons. If the labeling results disagree, we ask a third person to arbitrate.
    
    In iteration T, for each word in `unlabeled_words`, nearest `neighK` neighbors in `labeled_words` are found 
    to compute KL divergence between predicted probabilities. Thus each word in `unlabeled_words` is associated 
    with a scalar. By sorting with descending order, `acq_size` number of words are selected for annotation as most contrastive samples.

In [16]:
acq_size=120 
neighK=10  

In [63]:
# candidate words are from all valid adjectives except testset
adj_cand=[]
for w in adj['word']:
    if w not in testset['word']:
        adj_cand.append(w)

## Iter1

### Setup

In [8]:
# Randomly annotate some positive and negative words for training. Proportion of samples, 1:1
# `desc`: `physical` adj, describe physical attributes
# `opin`: `mental` adj, usually relates to mental attributes

desc=['anemic','arranged','assorted','available','baked','bitter','black','blue','broken','cherry',
      'citric','corrugated','commercial','cooked','crispy','crushed','crusty','decorative','dietetic',
     'digestible','dried','drippy','edible','empty','fake']
opin=['amazing','awesome','awful','aware','bad','basic','beneficial','best','bold','bothersome','careful',
      'casual','certain','cheap','clean','clear','cold','comfortable','common','comparable','competitive',
     'complete','consistent','contributive','convenient','conventional','cool','costly','crazy']

In [9]:
word_type='adj'
trainset=[]
        
for w in desc:
    defn=adj.loc[adj.word==w,'text'].values[0]
    trainset.append((w,defn,0)) # 0: physical
    
for w in opin:
    defn=adj.loc[adj.word==w,'text'].values[0]
    trainset.append((w,defn,1)) # 1: mental


In [10]:
train=pd.DataFrame()
train['word'],train['text'],train['target']=list(zip(*(trainset)))

In [12]:
train.to_csv('Data/train.v8.1.round1.csv')

### Contrastives

    We use the trained model to make predictions over all valid adjectives. The output probability is used in finding contrastive samples.

In [None]:
pred=joblib.load('Data/pred.v8.1.round1') # load prediction result
assert pred.shape[0]==adj.shape[0]

labeled_words=train['word'].tolist()
w_prob_map={}
for i,w in enumerate(adj['word'].tolist()):
    w_prob_map[w]=[1-pred[i].item(),pred[i].item()]
unlabeled_words=[ w for w in adj['word'].tolist() if w not in labeled_words]
cs=cal(labeled_words, unlabeled_words, acq_size, w_prob_map, neighK , fast_model)
    


In [29]:
for w,kl in cs:
    
    print(w)
    print(adj.loc[adj.word==w,'text'].values[0]) # check definition
    print('\n')

apoplectic
pertaining to or characteristic of apoplexy


psychotic
characteristic of or suffering from psychosis


arthritic
of or pertaining to arthritis


crispiest
tender and brittle


impeccable
without fault or error; not capable of sin


gluttonous
given to excess in consumption of especially food or drink


greasier
containing an unusual amount of grease or oil; smeared or soiled with grease or oil


chronological
relating to or arranged according to temporal order


damned
expletives used informally as intensifiers; in danger of the eternal punishment of Hell


addictive
causing or characterized by addiction


strep
of or relating to or caused by streptococci


demented
affected with madness or insanity


instinct
(followed by `with')deeply filled or permeated


tragic
very sad; especially involving grief or death or destruction; of or relating to or characteristic of tragedy


schizophrenic
suffering from some form of schizophrenia; of or relating to or characteristic of schiz

## Iter2

### Setup

In [31]:
# pick 20-20 words from annotation result of the most contrastive samples.

desc2=['apoplectic','arthritic','crispiest','greasier','chronological','strep','creamiest','adulterate','separable',
       'logistical','scorching','olfactive','menopausal','hypoglycemic','capsulated','breathtaking','hormonal',
       'thyroid','impassable','anaphylactic','premenstrual']

opin2=['psychotic','impeccable','gluttonous','damned','addictive','demented','tragic','schizophrenic','lunatic',
       'irreversible','autistic','greasiest','nauseating','irrelevant','erroneous','sickening','invariable',
       'unpopular']


# If any word exists in testset, delete it and resample.
for w in desc2+opin2:
    if w in testset['word']:
        print(w)

desc_all=desc+desc2
opin_all=opin+opin2

    
word_type='adj'
trainset=[]
for w in desc_all:
    defn=adj.loc[adj.word==w,'text'].values[0]
    trainset.append((w,defn,0)) # 0: physical
for w in opin_all:
    defn=adj.loc[adj.word==w,'text'].values[0]
    trainset.append((w,defn,1)) # 1: mental
    
    
train2=pd.DataFrame()
train2['word'],train2['text'],train2['target']=list(zip(*(trainset)))

train2.to_csv('Data/train.v8.2.round2.csv')

    Training is done by `train_cal.py`. Remember setting Config.is_predict=False before training. 

### Evaluate

In [6]:
pred=joblib.load('Data/pred.v8.2.round2') # load prediction result
assert pred.shape[0]==adj.shape[0]

# testset performance

pred_dict={}
for i,w in enumerate(adj['word'].tolist()):
    pred_dict[w]=pred[i]
    
    
testset['pred_score']=testset['word'].apply(lambda x: pred_dict[x])
testset['pred_label']=testset['pred_score'].apply(lambda x: 1 if x>0.5 else 0)

# positive::precision
print("positive::precision %.3f"%(testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.pred_label==1)].shape[0]))
print("positive::recall %.3f"%(testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.target==1)].shape[0]))


print("negative::precision %.3f"%(testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.pred_label==0)].shape[0]))
print("negative::recall %.3f"%(testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.target==0)].shape[0]))

pp=testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.pred_label==1)].shape[0]
pr=testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.target==1)].shape[0]

print("F1: %.2f"%(2*pp*pr/(pp+pr)))


np=testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.pred_label==0)].shape[0]
nr=testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.target==0)].shape[0]

print("N F1: %.2f"%(2*np*nr/(np+nr)))

positive::precision 0.476
positive::recall 0.833
negative::precision 0.838
negative::recall 0.484
F1: 0.61
N F1: 0.61


## Iter3

### Setup

In [None]:
pred=joblib.load('Data/pred.v8.2.round2') # load prediction result
assert pred.shape[0]==adj.shape[0]

labeled_words=train2['word'].tolist()
w_prob_map={}
for i,w in enumerate(adj['word'].tolist()):
    w_prob_map[w]=[1-pred[i].item(),pred[i].item()]
unlabeled_words=[ w for w in adj['word'].tolist() if w not in labeled_words]
cs=cal(labeled_words, unlabeled_words, acq_size, w_prob_map, neighK , fast_model)

In [35]:
for w,kl in cs:
    
    print(w)
    print(adj.loc[adj.word==w,'text'].values[0])
    print('\n')

cardiovascular
of or pertaining to or involving the heart and blood vessels


cerebrovascular
of or relating to the brain and the blood vessels that supply it


gravitational
of or relating to or caused by gravitation


ergonomic
of or relating to ergonomics


gustatorial
of or relating to gustation


laryngopharyngeal
of or relating to the larynx and pharynx


inertial
of or relating to inertia


multicultural
of or relating to or including several cultures


formulary
of or relating to or of the nature of a formula


gastroesophageal
of or relating to or involving the stomach and esophagus


futuristic
of or relating to futurism


translational
of or relating to uniform movement without rotation


behavioral
of or relating to behavior


epidemiological
of or relating to epidemiology


areal
of or relating to or involving an area


rotational
of or pertaining to rotation


colloidal
of or relating to or having the properties of a colloid


gynecological
of or relating to or practicing

In [37]:
# pick 20-20 words from annotation result of the most contrastive samples.

desc3=['cardiovascular','gravitational','ergonomic','gustatorial','laryngopharyngeal','inertial','formulary',
       'gastroesophageal','translational','behavioral','epidemiological','areal','rotational','colloidal',
       'gynecological','narcoleptic','endothelial','organizational','ulcerative','nutritional','dermatological']

opin3=['multicultural','futuristic','graduate','developmental','prospective']


# If any word exists in testset, delete it and resample.
for w in desc3+opin3:
    if w in testset['word']:
        print(w)

desc_all=desc+desc2+desc3
opin_all=opin+opin2+opin3

    
word_type='adj'
trainset=[]
for w in desc_all:
    defn=adj.loc[adj.word==w,'text'].values[0]
    trainset.append((w,defn,0)) # 0: physical
for w in opin_all:
    defn=adj.loc[adj.word==w,'text'].values[0]
    trainset.append((w,defn,1)) # 1: mental
    
    
train3=pd.DataFrame()
train3['word'],train3['text'],train3['target']=list(zip(*(trainset)))

train3.to_csv('Data/train.v8.3.round3.csv')

### Evaluate

In [7]:
pred=joblib.load('Data/pred.v8.3.1.round3') # load prediction result
assert pred.shape[0]==adj.shape[0]

# testset performance

pred_dict={}
for i,w in enumerate(adj['word'].tolist()):
    pred_dict[w]=pred[i]
    
    
testset['pred_score']=testset['word'].apply(lambda x: pred_dict[x])
testset['pred_label']=testset['pred_score'].apply(lambda x: 1 if x>0.5 else 0)

# positive::precision
print("positive::precision %.3f"%(testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.pred_label==1)].shape[0]))
print("positive::recall %.3f"%(testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.target==1)].shape[0]))


print("negative::precision %.3f"%(testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.pred_label==0)].shape[0]))
print("negative::recall %.3f"%(testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.target==0)].shape[0]))

pp=testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.pred_label==1)].shape[0]
pr=testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.target==1)].shape[0]

print("F1: %.2f"%(2*pp*pr/(pp+pr)))

np=testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.pred_label==0)].shape[0]
nr=testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.target==0)].shape[0]

print("N F1: %.2f"%(2*np*nr/(np+nr)))

positive::precision 0.538
positive::recall 0.778
negative::precision 0.833
negative::recall 0.625
F1: 0.64
N F1: 0.71


###  RunTwoMore

    For another two runs, only show the prediction result over testset.

In [8]:
pred=joblib.load('Data/pred.v8.3.2.round3') # load prediction result
assert pred.shape[0]==adj.shape[0]

# testset performance

pred_dict={}
for i,w in enumerate(adj['word'].tolist()):
    pred_dict[w]=pred[i]
    
    
testset['pred_score']=testset['word'].apply(lambda x: pred_dict[x])
testset['pred_label']=testset['pred_score'].apply(lambda x: 1 if x>0.5 else 0)

# positive::precision
print("positive::precision %.3f"%(testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.pred_label==1)].shape[0]))
print("positive::recall %.3f"%(testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.target==1)].shape[0]))


print("negative::precision %.3f"%(testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.pred_label==0)].shape[0]))
print("negative::recall %.3f"%(testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.target==0)].shape[0]))

pp=testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.pred_label==1)].shape[0]
pr=testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.target==1)].shape[0]

print("F1: %.2f"%(2*pp*pr/(pp+pr)))

np=testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.pred_label==0)].shape[0]
nr=testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.target==0)].shape[0]

print("N F1: %.2f"%(2*np*nr/(np+nr)))

positive::precision 0.509
positive::recall 0.750
negative::precision 0.809
negative::recall 0.594
F1: 0.61
N F1: 0.68


In [9]:
pred=joblib.load('Data/pred.v8.3.3.round3') # load prediction result
assert pred.shape[0]==adj.shape[0]

# testset performance

pred_dict={}
for i,w in enumerate(adj['word'].tolist()):
    pred_dict[w]=pred[i]
    
    
testset['pred_score']=testset['word'].apply(lambda x: pred_dict[x])
testset['pred_label']=testset['pred_score'].apply(lambda x: 1 if x>0.5 else 0)

# positive::precision
print("positive::precision %.3f"%(testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.pred_label==1)].shape[0]))
print("positive::recall %.3f"%(testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.target==1)].shape[0]))


print("negative::precision %.3f"%(testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.pred_label==0)].shape[0]))
print("negative::recall %.3f"%(testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.target==0)].shape[0]))

pp=testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.pred_label==1)].shape[0]
pr=testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.target==1)].shape[0]

print("F1: %.2f"%(2*pp*pr/(pp+pr)))

np=testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.pred_label==0)].shape[0]
nr=testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.target==0)].shape[0]

print("N F1: %.2f"%(2*np*nr/(np+nr)))

positive::precision 0.556
positive::recall 0.694
negative::precision 0.800
negative::recall 0.688
F1: 0.62
N F1: 0.74


## Iter4

### Setup

In [None]:
pred=joblib.load('Data/pred.v8.3.1.round3') # load prediction result
assert pred.shape[0]==adj.shape[0]
labeled_words=train3['word'].tolist()

w_prob_map={}
for i,w in enumerate(adj['word'].tolist()):
    w_prob_map[w]=[1-pred[i].item(),pred[i].item()]
unlabeled_words=[ w for w in adj['word'].tolist() if w not in labeled_words]
cs=cal(labeled_words, unlabeled_words, acq_size, w_prob_map, neighK , fast_model)

In [45]:
for w,kl in cs:
    
    print(w)
    print(adj.loc[adj.word==w,'text'].values[0])
    print('\n')

toxic
of or relating to or caused by a toxin or poison


creamier
of the color of cream; thick like cream


nutritious
of or providing nourishment


lethal
of an instrument of certain death


traumatic
of or relating to a physical injury or wound to the body; psychologically painful


fishy
of or relating to or resembling fish; not as expected


motivational
of or relating to motivation


meteoric
of or pertaining to atmospheric phenomena, especially weather and weather conditions; pertaining to or consisting of meteors or meteoroids; like a meteor in speed or brilliance or transience


fishier
of or relating to or resembling fish; not as expected


tutorial
of or relating to tutors or tutoring


menial
used of unskilled work (especially domestic work)


murkier
(of liquids) clouded as with sediment; dark or gloomy


squashed
that has been violently compressed


gassier
resembling gas; suffering from excessive gas in the alimentary canal


bearable
capable of being borne though unpleas

In [47]:
desc4=['toxic','creamier','nutritious','lethal','traumatic','fishy','meteoric',
       'menial','murkier','squashed','gassier','thrown','woodier','darwinian',
       'buff','sundry','browner','phonetic','departmental','shrimpy','watered']

opin4=['motivational','tutorial','bearable','instructional','procurable','untold','statistical','transitional',
       'sufficient','tangential','ripened','litigious','god-awful','obtainable','social','hypnotic','harsher',
       'top-notch','short-term','evolutionary']


# If any word exists in testset, delete it and resample.
for w in desc4+opin4:
    if w in testset['word']:
        print(w)

desc_all=desc+desc2+desc3+desc4
opin_all=opin+opin2+opin3+opin4

    
word_type='adj'
trainset=[]
for w in desc_all:
    defn=adj.loc[adj.word==w,'text'].values[0]
    trainset.append((w,defn,0)) # 0: physical
for w in opin_all:
    defn=adj.loc[adj.word==w,'text'].values[0]
    trainset.append((w,defn,1)) # 1: mental
    
    
train4=pd.DataFrame()
train4['word'],train4['text'],train4['target']=list(zip(*(trainset)))

train4.to_csv('Data/train.v8.4.round4.csv')

### Evaluate

In [10]:
pred=joblib.load('Data/pred.v8.4.1.round4') # load prediction result
assert pred.shape[0]==adj.shape[0]

# testset performance

pred_dict={}
for i,w in enumerate(adj['word'].tolist()):
    pred_dict[w]=pred[i]
    
    
testset['pred_score']=testset['word'].apply(lambda x: pred_dict[x])
testset['pred_label']=testset['pred_score'].apply(lambda x: 1 if x>0.5 else 0)

# positive::precision
print("positive::precision %.3f"%(testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.pred_label==1)].shape[0]))
print("positive::recall %.3f"%(testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.target==1)].shape[0]))


print("negative::precision %.3f"%(testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.pred_label==0)].shape[0]))
print("negative::recall %.3f"%(testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.target==0)].shape[0]))

pp=testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.pred_label==1)].shape[0]
pr=testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.target==1)].shape[0]

print("F1: %.2f"%(2*pp*pr/(pp+pr)))

np=testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.pred_label==0)].shape[0]
nr=testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.target==0)].shape[0]

print("N F1: %.2f"%(2*np*nr/(np+nr)))

positive::precision 0.508
positive::recall 0.889
negative::precision 0.892
negative::recall 0.516
F1: 0.65
N F1: 0.65


### RunTwoMore

    For another two runs, only show the prediction result over testset.

In [12]:
pred=joblib.load('Data/pred.v8.4.2.round4') # load prediction result
assert pred.shape[0]==adj.shape[0]

# testset performance

pred_dict={}
for i,w in enumerate(adj['word'].tolist()):
    pred_dict[w]=pred[i]
    
    
testset['pred_score']=testset['word'].apply(lambda x: pred_dict[x])
testset['pred_label']=testset['pred_score'].apply(lambda x: 1 if x>0.5 else 0)

# positive::precision
print("positive::precision %.3f"%(testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.pred_label==1)].shape[0]))
print("positive::recall %.3f"%(testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.target==1)].shape[0]))


print("negative::precision %.3f"%(testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.pred_label==0)].shape[0]))
print("negative::recall %.3f"%(testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.target==0)].shape[0]))

pp=testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.pred_label==1)].shape[0]
pr=testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.target==1)].shape[0]

print("F1: %.2f"%(2*pp*pr/(pp+pr)))

np=testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.pred_label==0)].shape[0]
nr=testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.target==0)].shape[0]

print("N F1: %.2f"%(2*np*nr/(np+nr)))

positive::precision 0.547
positive::recall 0.806
negative::precision 0.851
negative::recall 0.625
F1: 0.65
N F1: 0.72


In [13]:
pred=joblib.load('Data/pred.v8.4.3.round4') # load prediction result
assert pred.shape[0]==adj.shape[0]

# testset performance

pred_dict={}
for i,w in enumerate(adj['word'].tolist()):
    pred_dict[w]=pred[i]
    
    
testset['pred_score']=testset['word'].apply(lambda x: pred_dict[x])
testset['pred_label']=testset['pred_score'].apply(lambda x: 1 if x>0.5 else 0)

# positive::precision
print("positive::precision %.3f"%(testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.pred_label==1)].shape[0]))
print("positive::recall %.3f"%(testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.target==1)].shape[0]))


print("negative::precision %.3f"%(testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.pred_label==0)].shape[0]))
print("negative::recall %.3f"%(testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.target==0)].shape[0]))

pp=testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.pred_label==1)].shape[0]
pr=testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.target==1)].shape[0]

print("F1: %.2f"%(2*pp*pr/(pp+pr)))

np=testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.pred_label==0)].shape[0]
nr=testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.target==0)].shape[0]

print("N F1: %.2f"%(2*np*nr/(np+nr)))

positive::precision 0.485
positive::recall 0.917
negative::precision 0.906
negative::recall 0.453
F1: 0.63
N F1: 0.60


## Iter5

### Setup

In [None]:
pred=joblib.load('Data/pred.v8.4.1.round4') # load prediction result
assert pred.shape[0]==adj.shape[0]
labeled_words=train4['word'].tolist()

w_prob_map={}
for i,w in enumerate(adj['word'].tolist()):
    w_prob_map[w]=[1-pred[i].item(),pred[i].item()]
unlabeled_words=[ w for w in adj['word'].tolist() if w not in labeled_words]
cs=cal(labeled_words, unlabeled_words, acq_size, w_prob_map, neighK , fast_model)

In [54]:
for w,kl in cs:
    
    print(w)
    print(adj.loc[adj.word==w,'text'].values[0])
    print('\n')

reptilian
of or relating to the class Reptilia


peachy
very good; of something resembling a peach in color


yellowish
of the color intermediate between green and orange in the color spectrum; of something resembling the color of an egg yolk


rounded
curving and somewhat round in shape rather than jagged


bubbly
emitting or filled with bubbles as from carbonation or fermentation; full of or showing high spirits


pet
preferred above all others and treated with partiality


bubblier
emitting or filled with bubbles as from carbonation or fermentation; full of or showing high spirits


peachier
very good; of something resembling a peach in color


squishy
easily squashed; resembling a sponge in having soft porous texture and compressibility


floaty
tending to float on a liquid or rise in air or gas


nourishing
of or providing nourishment


underdone
insufficiently cooked


gnarly
used of old persons or old trees; covered with knobs or knots


leavened
made light by aerating, as with 

In [58]:
desc5=['reptilian','rounded','squishy','floaty','nourishing','underdone','gnarly',
       'leavened','electric','warped','grungy','grayish','fledged','crowded',
       'golden-yellow','cultivated','tacky','dark-brown','wavier','darkened','creaky']

opin5=['pet','composite','lessened','rare','grave','liked','infrequent','blunted',
       'lightheaded','spoiled','disconcerting','spaced','uncouth']


# If any word exists in testset, delete it and resample.
for w in desc5+opin5:
    if w in testset['word']:
        print(w)

desc_all=desc+desc2+desc3+desc4+desc5
opin_all=opin+opin2+opin3+opin4+opin5

    
word_type='adj'
trainset=[]
for w in desc_all:
    defn=adj.loc[adj.word==w,'text'].values[0]
    trainset.append((w,defn,0)) # 0: physical
for w in opin_all:
    defn=adj.loc[adj.word==w,'text'].values[0]
    trainset.append((w,defn,1)) # 1: mental
    
    
train5=pd.DataFrame()
train5['word'],train5['text'],train5['target']=list(zip(*(trainset)))

train5.to_csv('Data/train.v8.5.round5.csv')

### Evaluate

In [14]:
pred=joblib.load('Data/pred.v8.5.1.round5') # load prediction result
assert pred.shape[0]==adj.shape[0]

# testset performance

pred_dict={}
for i,w in enumerate(adj['word'].tolist()):
    pred_dict[w]=pred[i]
    
    
testset['pred_score']=testset['word'].apply(lambda x: pred_dict[x])
testset['pred_label']=testset['pred_score'].apply(lambda x: 1 if x>0.5 else 0)

# positive::precision
print("positive::precision %.3f"%(testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.pred_label==1)].shape[0]))
print("positive::recall %.3f"%(testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.target==1)].shape[0]))


print("negative::precision %.3f"%(testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.pred_label==0)].shape[0]))
print("negative::recall %.3f"%(testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.target==0)].shape[0]))

pp=testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.pred_label==1)].shape[0]
pr=testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.target==1)].shape[0]

print("F1: %.2f"%(2*pp*pr/(pp+pr)))

np=testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.pred_label==0)].shape[0]
nr=testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.target==0)].shape[0]

print("N F1: %.2f"%(2*np*nr/(np+nr)))

positive::precision 0.538
positive::recall 0.972
negative::precision 0.971
negative::recall 0.531
F1: 0.69
N F1: 0.69


###  RunTwoMore

    For another two runs, only show the prediction result over testset.

In [15]:
pred=joblib.load('Data/pred.v8.5.2.round5') # load prediction result
assert pred.shape[0]==adj.shape[0]

# testset performance

pred_dict={}
for i,w in enumerate(adj['word'].tolist()):
    pred_dict[w]=pred[i]
    
    
testset['pred_score']=testset['word'].apply(lambda x: pred_dict[x])
testset['pred_label']=testset['pred_score'].apply(lambda x: 1 if x>0.5 else 0)

# positive::precision
print("positive::precision %.3f"%(testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.pred_label==1)].shape[0]))
print("positive::recall %.3f"%(testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.target==1)].shape[0]))


print("negative::precision %.3f"%(testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.pred_label==0)].shape[0]))
print("negative::recall %.3f"%(testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.target==0)].shape[0]))

pp=testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.pred_label==1)].shape[0]
pr=testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.target==1)].shape[0]

print("F1: %.2f"%(2*pp*pr/(pp+pr)))

np=testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.pred_label==0)].shape[0]
nr=testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.target==0)].shape[0]

print("N F1: %.2f"%(2*np*nr/(np+nr)))

positive::precision 0.548
positive::recall 0.944
negative::precision 0.947
negative::recall 0.562
F1: 0.69
N F1: 0.71


In [16]:
pred=joblib.load('Data/pred.v8.5.3.round5') # load prediction result
assert pred.shape[0]==adj.shape[0]

# testset performance

pred_dict={}
for i,w in enumerate(adj['word'].tolist()):
    pred_dict[w]=pred[i]
    
    
testset['pred_score']=testset['word'].apply(lambda x: pred_dict[x])
testset['pred_label']=testset['pred_score'].apply(lambda x: 1 if x>0.5 else 0)

# positive::precision
print("positive::precision %.3f"%(testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.pred_label==1)].shape[0]))
print("positive::recall %.3f"%(testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.target==1)].shape[0]))


print("negative::precision %.3f"%(testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.pred_label==0)].shape[0]))
print("negative::recall %.3f"%(testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.target==0)].shape[0]))

pp=testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.pred_label==1)].shape[0]
pr=testset.loc[(testset.pred_label==1)&(testset.target==1)].shape[0]/testset.loc[(testset.target==1)].shape[0]

print("F1: %.2f"%(2*pp*pr/(pp+pr)))

np=testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.pred_label==0)].shape[0]
nr=testset.loc[(testset.pred_label==0)&(testset.target==0)].shape[0]/testset.loc[(testset.target==0)].shape[0]

print("N F1: %.2f"%(2*np*nr/(np+nr)))

positive::precision 0.525
positive::recall 0.889
negative::precision 0.897
negative::recall 0.547
F1: 0.66
N F1: 0.68


# Summary

    Iter2: Annotate 50 words where we pick 20-20 for training.
    Iter3: Annotate 120 words where we pick 5(pos)-20 for training. Could NOT find enough positive samples.
    Iter4: Annotate 80 words where we pick 20-20 for training.
    Iter5: Annotate 120 words where we pick 13(pos)-20 for training. Could NOT find enough positive samples.
    Annotate >120words/iter. This method needs to annotate more samples than HighEntropy/CORESET/RANDOM.
    
    For classification performance, CAL doesn't beat HighEntropy and CORESET, but better than Random. The reason might be the size of the labeled words is too small to guarantee that the nearest neighors to the target unlabeled word are not similar in semantics. That's to say, the selected words are not valid contrastive samples.  