Skip to content

Commit

Permalink
make output to /tmp folder
Browse files Browse the repository at this point in the history
  • Loading branch information
libofang committed May 4, 2018
1 parent 936eec5 commit b2af7c9
Show file tree
Hide file tree
Showing 7 changed files with 37 additions and 35 deletions.
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"current_datetime": "2018-05-04 02:29:28.460915", "batchsize": 64, "epoch": 5, "gpu": -1, "layer": 1, "dropout": 0, "model": "bow", "char_based": false, "out": "./tests/data/benchmarks_results/text_classification/", "unit": 4, "dataset": "./tests/data/benchmarks/text_classification/", "vocab_path": "./tests/data/benchmarks_results/text_classification/vocab.json", "model_path": "./tests/data/benchmarks_results/text_classification/best_model.npz", "n_class": 2, "datetime": "2018-05-04 02:29:28.460915"}
{"current_datetime": "2018-05-04 11:39:50.824318", "batchsize": 64, "epoch": 5, "gpu": -1, "layer": 1, "dropout": 0, "model": "bow", "char_based": false, "out": "./tests/data/benchmarks_results/text_classification/", "unit": 4, "dataset": "./tests/data/benchmarks/text_classification/", "vocab_path": "./tests/data/benchmarks_results/text_classification/vocab.json", "model_path": "./tests/data/benchmarks_results/text_classification/best_model.npz", "n_class": 2, "datetime": "2018-05-04 11:39:50.824318"}
Binary file modified tests/data/benchmarks_results/text_classification/best_model.npz
Binary file not shown.
40 changes: 20 additions & 20 deletions tests/data/benchmarks_results/text_classification/log
Original file line number Diff line number Diff line change
@@ -1,38 +1,38 @@
[
{
"main/loss": 0.6962488293647766,
"main/accuracy": 0.4375,
"validation/main/loss": 0.6995988488197327,
"validation/main/accuracy": 0.46666666865348816,
"main/loss": 0.7636308670043945,
"main/accuracy": 0.46875,
"validation/main/loss": 0.7174736261367798,
"validation/main/accuracy": 0.5333333611488342,
"epoch": 1,
"iteration": 1,
"elapsed_time": 0.0065841870091389865
"elapsed_time": 0.00541385097312741
},
{
"main/loss": 0.6889790296554565,
"main/accuracy": 0.53125,
"validation/main/loss": 0.6992061138153076,
"validation/main/accuracy": 0.46666666865348816,
"main/loss": 0.7468587160110474,
"main/accuracy": 0.484375,
"validation/main/loss": 0.716312050819397,
"validation/main/accuracy": 0.5333333611488342,
"epoch": 2,
"iteration": 2,
"elapsed_time": 0.01622019399655983
"elapsed_time": 0.012854741973569617
},
{
"main/loss": 0.6849422454833984,
"main/accuracy": 0.5909090638160706,
"validation/main/loss": 0.6990599632263184,
"validation/main/accuracy": 0.46666666865348816,
"main/loss": 0.7709426283836365,
"main/accuracy": 0.4545454680919647,
"validation/main/loss": 0.7152009010314941,
"validation/main/accuracy": 0.5333333611488342,
"epoch": 3,
"iteration": 3,
"elapsed_time": 0.022680485009914264
"elapsed_time": 0.020706564973806962
},
{
"main/loss": 0.6885284185409546,
"main/accuracy": 0.5581395626068115,
"validation/main/loss": 0.6989443302154541,
"validation/main/accuracy": 0.46666666865348816,
"main/loss": 0.7399059534072876,
"main/accuracy": 0.5348837375640869,
"validation/main/loss": 0.7141532897949219,
"validation/main/accuracy": 0.5333333611488342,
"epoch": 4,
"iteration": 4,
"elapsed_time": 0.02903877801145427
"elapsed_time": 0.026553130999673158
}
]
6 changes: 3 additions & 3 deletions tests/test_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,15 @@ def test_text_classification(self):

tc = Text_classification(model='cnn')
result = tc.get_result(embs, path_text_classification_dataset,
"./tests/data/benchmarks_results/text_classification/")
"/tmp/tests/data/benchmarks_results/text_classification/")
print(result)
tc = Text_classification(model='rnn')
result = tc.get_result(embs, path_text_classification_dataset,
"./tests/data/benchmarks_results/text_classification/")
"/tmp/tests/data/benchmarks_results/text_classification/")
print(result)
tc = Text_classification(model='bow')
result = tc.get_result(embs, path_text_classification_dataset,
"./tests/data/benchmarks_results/text_classification/")
"/tmp/tests/data/benchmarks_results/text_classification/")
print(result)


Expand Down
2 changes: 1 addition & 1 deletion vecto/benchmarks/analogy/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def run_results(path_embeds=["tests/data/embeddings/text/plain_with_file_header"
analogy = method()
results = analogy.run(embs, path_analogy_dataset)
print(results)
save_json(results, os.path.join("tests/data/benchmarks_results/analogy/", datetime.datetime.now().isoformat()))
save_json(results, os.path.join("/tmp/tests/data/benchmarks_results/analogy/", datetime.datetime.now().isoformat()))



Expand Down
2 changes: 1 addition & 1 deletion vecto/benchmarks/similarity/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def run_results(path_embeds=["tests/data/embeddings/text/plain_with_file_header"
similarity = Similarity()
results = similarity.run(embs, path_analogy_dataset)
print(results)
save_json(results, os.path.join("tests/data/benchmarks_results/similarity/", datetime.datetime.now().isoformat()))
save_json(results, os.path.join("/tmp/tests/data/benchmarks_results/similarity/", datetime.datetime.now().isoformat()))



Expand Down
20 changes: 11 additions & 9 deletions vecto/benchmarks/text_classification/text_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,19 +49,21 @@ def load_model(model_path, wv):

return model, vocab, setup


def predict(model, sentence):
model, vocab, setup = model
sentence = sentence.strip()
text = nlp_utils.normalize_text(sentence)
words = nlp_utils.split_text(text, char_based=setup['char_based'])
xs = nlp_utils.transform_to_array([words], vocab, with_label=False)
xs = nlp_utils.convert_seq(xs, device=-1, with_label=False) # todo use GPU
xs = nlp_utils.convert_seq(xs, device=-1, with_label=False) # todo use GPU
with chainer.using_config('train', False), chainer.no_backprop_mode():
prob = model.predict(xs, softmax=True)[0]
answer = int(model.xp.argmax(prob))
score = float(prob[answer])
return answer, score


def get_vectors(model, sentences):
model, vocab, setup = model
vectors = []
Expand All @@ -70,16 +72,14 @@ def get_vectors(model, sentences):
text = nlp_utils.normalize_text(sentence)
words = nlp_utils.split_text(text, char_based=setup['char_based'])
xs = nlp_utils.transform_to_array([words], vocab, with_label=False)
xs = nlp_utils.convert_seq(xs, device=-1, with_label=False) # todo use GPU
xs = nlp_utils.convert_seq(xs, device=-1, with_label=False) # todo use GPU
with chainer.using_config('train', False), chainer.no_backprop_mode():
vector = model.encoder(xs)
vectors.append(vector.data[0])
vectors = numpy.asarray(vectors)
return vectors




class Text_classification(Benchmark):

def __init__(self, batchsize=64, epoch=5, gpu=-1, layer=1, dropout=0, model=['cnn', 'rnn', 'bow'][1],
Expand All @@ -97,21 +97,23 @@ def get_result(self, embs, path_dataset, path_output='/tmp/text_classification/'
self.out = path_output
self.unit = embs.matrix.shape[1]

if not os.path.isdir(path_output):
os.makedirs(path_output)

# Load a dataset
self.dataset = path_dataset
if self.dataset == 'dbpedia':
train, test, vocab = text_datasets.get_dbpedia(
char_based=self.char_based, vocab=embs.vocabulary.dic_words_ids,)
char_based=self.char_based, vocab=embs.vocabulary.dic_words_ids, )
elif self.dataset.startswith('imdb.'):
train, test, vocab = text_datasets.get_imdb(
fine_grained=self.dataset.endswith('.fine'),
char_based=self.char_based, vocab=embs.vocabulary.dic_words_ids,)
char_based=self.char_based, vocab=embs.vocabulary.dic_words_ids, )
elif self.dataset in ['TREC', 'stsa.binary', 'stsa.fine',
'custrev', 'mpqa', 'rt-polarity', 'subj']:
train, test, vocab = text_datasets.get_other_text_dataset(
self.dataset, char_based=self.char_based, vocab=embs.vocabulary.dic_words_ids,)
else: # finallly, if file is not downloadable, load from local path
self.dataset, char_based=self.char_based, vocab=embs.vocabulary.dic_words_ids, )
else: # finallly, if file is not downloadable, load from local path
train, test, vocab = text_datasets.get_dataset_from_path(path_dataset, vocab=embs.vocabulary.dic_words_ids,
char_based=self.char_based)

Expand Down Expand Up @@ -194,4 +196,4 @@ def get_result(self, embs, path_dataset, path_output='/tmp/text_classification/'
result['experiment_setup'] = experiment_setup
result['log'] = load_json(os.path.join(self.out, 'log'))
result['result'] = result['log'][-1]['validation/main/accuracy']
return result
return result

0 comments on commit b2af7c9

Please sign in to comment.