Skip to content

Commit

Permalink
Merge pull request #68 from vecto-ai/cli-bofang
Browse files Browse the repository at this point in the history
Cli bofang
  • Loading branch information
undertherain committed Jan 21, 2019
2 parents 743c468 + 55d3b31 commit 0d6aff3
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 2 deletions.
7 changes: 6 additions & 1 deletion vecto/benchmarks/categorization/categorization.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import csv
import numpy as np
from scipy.spatial import distance
import os

OTHER_EXT = 'None'
BENCHMARK = 'benchmark'
Expand Down Expand Up @@ -106,7 +107,8 @@ def evaluate(self, embs, data):
# add experiment_setup and result entry for result
result["experiment_setup"] = {}
result["result"] = result['global_stats']['scores']
result["experiment_setup"]['default_measurement'] = {'Purity'}
result["experiment_setup"]['default_measurement'] = 'Purity'

return result

def read_datasets_from_dir(self, path_to_dir):
Expand All @@ -127,6 +129,9 @@ def run(self, embs, path_dataset):
datasets = self.read_datasets_from_dir(path_dataset)
for dataset_name, dataset_data in datasets.items():
result = self.evaluate(embs, dataset_data)
result['experiment_setup']['dataset'] = os.path.basename(os.path.normpath(path_dataset))
result['experiment_setup']['embeddings'] = embs.metadata
result['experiment_setup']['method'] = self.method
results.append(result)
return results

Expand Down
20 changes: 19 additions & 1 deletion vecto/benchmarks/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def clean_dic(data):
return data_clean


def df_from_file(path):
def df_from_file_bak(path):
logger.debug(f"processing {path}")
data = load_json(path)
data_clean = [clean_dic(x) for x in data]
Expand All @@ -43,6 +43,24 @@ def df_from_file(path):
# df["reciprocal_rank"] = 1 / (df["rank"] + 1)
return dframe

def df_from_file(path):
data = load_json(path)
meta = [["experiment_setup", "task"],
["experiment_setup", "subcategory"],
["experiment_setup", "method"],
["experiment_setup", "embeddings"]]
dframe = json_normalize(data, meta=meta)
if "details" in dframe:
dframe.drop("details", axis="columns", inplace=True)
default_measurement = "accuracy"
try:
default_measurement = dframe["experiment_setup.default_measurement"].unique()[0]
except:
logger.warning(f"default_measurement not specified in {path}")
dframe["result"] = dframe["result." + default_measurement]
# df["reciprocal_rank"] = 1 / (df["rank"] + 1)
return dframe


def df_from_dir(path):
dfs = []
Expand Down

0 comments on commit 0d6aff3

Please sign in to comment.