Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
35 changed files
with
25,971 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,19 @@ | ||
adj_supersense_classifier | ||
========================= | ||
|
||
Adjective supersense classifier | ||
===================== | ||
|
||
|
||
|
||
Supersense classifier described in the paper | ||
|
||
Augmenting English Adjective Senses with Supersenses. | ||
--------- | ||
**Yulia Tsvetkov, Nathan Schneider, Dirk Hovy, Archna Bhatia, Manaal Faruqui and Chris Dyer.** | ||
In Proc. LREC'14. | ||
|
||
<i class="icon-share"></i> http://www.cs.cmu.edu/~ytsvetko/ | ||
|
||
You can run the classifier like this: | ||
|
||
./adj_supersense_tagger.sh | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
#!/bin/bash | ||
|
||
export ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" | ||
export DATA_DIR=${ROOT_DIR}/data | ||
export WORK_DIR=${ROOT_DIR}/work # for temporary files | ||
export OUT_DIR=${ROOT_DIR}/predicted_supersenses # for classifier outputs | ||
export SRC_DIR=${ROOT_DIR}/src | ||
|
||
LABELS=${DATA_DIR}/seed.txt | ||
FEATURES=${DATA_DIR}/VSM/eacl14-faruqui-en-svd-de-64.adj.txt | ||
|
||
mkdir -p ${WORK_DIR} | ||
mkdir -p ${OUT_DIR} | ||
|
||
if [ -a ${FEATURES}.gz ] ; then | ||
gunzip -f ${FEATURES}.gz | ||
fi | ||
|
||
echo "Extract seed annotations" | ||
${SRC_DIR}/extract_seed.py --seed_dir ${DATA_DIR}/annotations \ | ||
--out_seed_file ${LABELS} | ||
|
||
echo "Split Test and Train" | ||
${SRC_DIR}/split_train_test.py --seed_file ${LABELS} \ | ||
--features ${FEATURES} \ | ||
--out_train ${WORK_DIR}/train_seed.txt \ | ||
--out_test ${WORK_DIR}/test_seed.txt | ||
|
||
echo "Expand by WN synonyms and antonyms" | ||
${SRC_DIR}/expand_labeled_data.py --labeled_data ${WORK_DIR}/train_seed.txt \ | ||
--out_file ${WORK_DIR}/expanded.txt --expand | ||
|
||
${SRC_DIR}/build_training_sets.py --in_file ${WORK_DIR}/train_seed.txt \ | ||
--out_feat ${WORK_DIR}/train_seed.feat \ | ||
--out_labels ${WORK_DIR}/train_seed.labels \ | ||
--test_set ${WORK_DIR}/expanded.txt \ | ||
--out_test_feat ${WORK_DIR}/expanded.feat \ | ||
--out_test_labels ${WORK_DIR}/expanded.labels \ | ||
--features ${FEATURES} | ||
|
||
echo "Run multi-way classifier. " #Default - Random Forest with 300 trees | ||
${SRC_DIR}/classify.py --train_features ${WORK_DIR}/train_seed.feat \ | ||
--train_labels ${WORK_DIR}/train_seed.labels \ | ||
--test_features ${WORK_DIR}/expanded.feat \ | ||
--test_predicted_labels_out ${WORK_DIR}/expanded.predicted \ | ||
--write_posterior_probabilities | ||
|
||
echo "Selecting best expanded words" | ||
${SRC_DIR}/filter_expanded.py --predictions ${WORK_DIR}/expanded.predicted \ | ||
--orig_seed ${WORK_DIR}/train_seed.txt \ | ||
--out_file ${WORK_DIR}/expanded_seed.txt | ||
|
||
echo "Expand by WN synonyms and antonyms" | ||
${SRC_DIR}/expand_labeled_data.py --labeled_data ${WORK_DIR}/expanded_seed.txt \ | ||
--out_file ${WORK_DIR}/expanded.txt --expand | ||
|
||
${SRC_DIR}/build_training_sets.py --in_file ${WORK_DIR}/expanded_seed.txt \ | ||
--out_feat ${WORK_DIR}/train_seed.feat \ | ||
--out_labels ${WORK_DIR}/train_seed.labels \ | ||
--out_test_feat ${WORK_DIR}/vocab.feat \ | ||
--features ${FEATURES} \ | ||
--include_training | ||
|
||
echo "Run multi-way classifier" #Default - Random Forest with 300 trees | ||
${SRC_DIR}/classify.py --train_features ${WORK_DIR}/train_seed.feat \ | ||
--train_labels ${WORK_DIR}/train_seed.labels \ | ||
--test_features ${WORK_DIR}/vocab.feat \ | ||
--test_predicted_labels_out ${WORK_DIR}/vocab.predicted \ | ||
--write_posterior_probabilities \ | ||
|
||
echo "Accuracy-at-k:" | ||
${SRC_DIR}/eval.py --predicted_results ${WORK_DIR}/vocab.predicted \ | ||
--held_out_seed ${WORK_DIR}/test_seed.txt | ||
echo "(Results may vary for different runs)." | ||
echo "" | ||
|
||
${SRC_DIR}/soft_voting.py --predicted_results ${WORK_DIR}/vocab.predicted \ | ||
--out_lemma_file ${OUT_DIR}/words.predicted \ | ||
--out_synset_file ${OUT_DIR}/synsets.predicted | ||
|
||
echo "See classifier predictions for lemmas at ${OUT_DIR}/words.predicted" | ||
echo "See classifier predictions for synsets at ${OUT_DIR}/synsets.predicted" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
adjectives+annotations+MACE.txt file contains SemCor adjectives | ||
annotated by 3-5 Amazon Mechanical Turk workers. | ||
|
||
File format | ||
|
||
word \t annotations \t aggregated label | ||
|
||
Responses for each lemma are aggregated with the MACE tool, which takes | ||
annotator-specific patterns into account to better determine the true label. | ||
|
||
http://www.isi.edu/publications/licensed-sw/mace/ | ||
|
||
Dirk Hovy, Taylor Berg-Kirkpatrick, Ashish Vaswani, | ||
and Eduard Hovy. 2013. Learning Whom to trust | ||
with MACE. In Proceedings of NAACL HLT. |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
Word vectors created by Faruqui & Dyer (2014) are a variation on | ||
traditional latent semantic analysis that uses multilingual information | ||
to produce representations in which synonymous words have similar vectors. | ||
|
||
|
||
Before running the adjective classifier run this command: | ||
|
||
gunzip eacl14-faruqui-en-svd-de-64.adj.txt.gz | ||
|
||
Publication: | ||
|
||
Manaal Faruqui and Chris Dyer. 2014. | ||
Improving vector space word representations using multilingual correlation. | ||
In Proceedings of the 14th Conference of the European Chapter of the | ||
Association for Computational Linguistics, | ||
EACL ’14. Association for Computational Linguistics. |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
active | ||
adept | ||
aggressive | ||
alienated | ||
amiable | ||
amicable | ||
amoral | ||
ardent | ||
argumentative | ||
awkward | ||
bossy | ||
brave | ||
calm | ||
calmer | ||
caring | ||
charming | ||
cheerful | ||
compulsive | ||
considerate | ||
consistent | ||
cooperative | ||
courageous | ||
cordial | ||
crazy | ||
cruel | ||
deceitful | ||
despised | ||
despicable | ||
determined | ||
disciplined | ||
domineering | ||
egotistic | ||
energetic | ||
enthusiastic | ||
evaluative | ||
evil | ||
excitable | ||
faithful | ||
friendly | ||
funny | ||
furious | ||
furiouser | ||
generous | ||
greedy | ||
harmonious | ||
helpful | ||
hostile | ||
hypoactive | ||
impulsive | ||
inconsiderate | ||
inveterate | ||
irritating | ||
jovial | ||
kind | ||
lazy | ||
loving | ||
loyal | ||
manic | ||
manipulative | ||
moody | ||
moral | ||
mute | ||
organized | ||
panicked | ||
placid | ||
pleasing | ||
plodding | ||
poised | ||
polite | ||
popular | ||
queer | ||
reconciling | ||
reluctant | ||
rough | ||
rude | ||
screaming | ||
secluded | ||
selfish | ||
sincere | ||
sociable | ||
skeptical | ||
spiteful | ||
still | ||
strict | ||
sympathetic | ||
tactful | ||
talkative | ||
thoughtful | ||
thoughtless | ||
timid | ||
used_to | ||
vicious | ||
wild | ||
yielding |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
achy | ||
ailing | ||
albino | ||
alive | ||
athletic | ||
attractive | ||
bald | ||
beautiful | ||
beefy | ||
blind | ||
blonde | ||
blue-eyed | ||
breathing | ||
brown-eyed | ||
brunette | ||
chubby | ||
crippled | ||
curvy | ||
cute | ||
dead | ||
deaf | ||
debilitated | ||
defective | ||
dehydrated | ||
dirty | ||
disabled | ||
diseased | ||
dizzy | ||
drymouth | ||
exhausted | ||
fat | ||
feeble | ||
female | ||
feminine | ||
feverish | ||
fleshy | ||
gorgeous | ||
hairy | ||
handicapped | ||
handsome | ||
healthy | ||
heavy | ||
hungry | ||
ill | ||
impaired | ||
incapacitated | ||
infected | ||
irritated | ||
itching | ||
lame | ||
large | ||
lean | ||
lifeless | ||
lined | ||
living | ||
male | ||
masculine | ||
muscular | ||
mutilated | ||
numb | ||
obese | ||
ordinary | ||
pain | ||
paralyzed | ||
pettite | ||
plump | ||
pregnant | ||
pretty | ||
quivering | ||
redhead | ||
scarred | ||
scrawny | ||
scruffy | ||
shapely | ||
short | ||
shuddering | ||
sick | ||
sickly | ||
skinny | ||
slender | ||
slobbery | ||
small | ||
sound | ||
starving | ||
stocky | ||
stout | ||
strong | ||
sturdy | ||
sweaty | ||
tall | ||
tattooed | ||
thick | ||
thin | ||
thirsty | ||
tired | ||
trembling | ||
ugly | ||
undiseased | ||
unhealthy | ||
weak |
Oops, something went wrong.