Skip to content

Commit

Permalink
Test on my embeddings
Browse files Browse the repository at this point in the history
  • Loading branch information
turian committed Jul 5, 2009
1 parent 07c0379 commit 549a30d
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 34 deletions.
46 changes: 12 additions & 34 deletions test.py
@@ -1,41 +1,19 @@
#!/usr/bin/python
#!/usr/bin/env python

#import cPickle
#from common.file import myopen
#(example_metadata, examples) = cPickle.load(myopen("/home/turianjo/dev/python/nlpreprocess/wikipedia/work-legal/sparseexamples.unshuffled.pkl"))
#x = examples.todense()
#titles = [e["title"] for e in example_metadata]
#x = x[:500]

import string, numpy
#o = open("/home/turianjo/data/word_embeddings.collobert-and-weston/lm-weights.txt")
#o.readline()
#x = [float(i) for i in string.split(o.readline())]
#o = open("/home/turianjo/data/word_embeddings.collobert-and-weston.100K/lm-100K/lm-weights-100K.txt")
o = open("/home/joseph/data/word_embeddings.collobert-and-weston.100K/lm-100K/lm-weights-100K.txt")
x = [float(i) for i in string.split(o.read())]
import string, numpy, gzip
o = gzip.open("testdata/english-embeddings.turian.txt.gz", "rb")
titles, x = [], []
for l in o:
toks = string.split(l)
titles.append(toks[0])
x.append([float(f) for f in toks[1:]])
x = numpy.array(x)
print x.shape
#x.resize(30000,50)
x.resize(100000,50)
print x.shape
#titles = [string.strip(t) for t in open("/u/turian/data/word_embeddings.collobert-and-weston/words.asc")]
#titles = [string.strip(t) for t in open("/home/turianjo/data/word_embeddings.collobert-and-weston.100K/lm-100K/lm-words-100K.txt")]
titles = [string.strip(t) for t in open("/home/joseph/data/word_embeddings.collobert-and-weston.100K/lm-100K/lm-words-100K.txt")]
x = x[:500]
titles = titles[:500]

def normalize(x):
import numpy
return x / numpy.sum(x, axis=1)

#x = normalize(x)

#from calc_tsne import calc_tsne
#out = calc_tsne(x, NO_DIMS=2, PERPLEX=30, INITIAL_DIMS=30, LANDMARKS=1, USE_PCA=False)
from tsne import tsne
#from calc_tsne import tsne
#out = tsne(x, no_dims=2, perplexity=30, initial_dims=30, USE_PCA=False)
out = tsne(x, no_dims=2, perplexity=30, initial_dims=30)
out = tsne(x, no_dims=2, perplexity=30, initial_dims=30, use_pca=False)
#out = tsne(x, no_dims=2, perplexity=30, initial_dims=30)

import render
render.render([(title, point[0], point[1]) for title, point in zip(titles, out)], "out.png", width=3000, height=1800)
render.render([(title, point[0], point[1]) for title, point in zip(titles, out)], "test-output.png", width=3000, height=1800)
Binary file added testdata/english-embeddings.turian.txt.gz
Binary file not shown.

0 comments on commit 549a30d

Please sign in to comment.