Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
23 changed files
with
4,216 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,3 +5,4 @@ eda/result | |
log | ||
lsc_env/ | ||
config_data.py | ||
deploy/data/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Last update: 2018-11-14 12:16:34 |
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Binary file not shown.
Binary file not shown.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
{ | ||
"dataset": "emoji", | ||
"data_type": "image", | ||
"train_split": 13500, | ||
"initial_dim": 32, | ||
"initial_projection": "t-SNE", | ||
"dims": [4, 8, 16, 32, 64, 128], | ||
"schema": { | ||
"type": {}, | ||
"meta": ["i","name", "mean_color", "category", "platform", "version", "codepoints", "shortcode"] | ||
}, | ||
"rendering": { | ||
"dot_color": "mean_color", | ||
"ext": "png" | ||
}, | ||
"search": { | ||
"advanced": true, | ||
"by": ["name", "codepoints", "shortcode"], | ||
"filter": "platform" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
{ | ||
"dataset": "glove_6b", | ||
"data_type": "text", | ||
"train_split": 10000, | ||
"initial_projection": "t-SNE", | ||
"dims": [50, 100, 200, 300], | ||
"schema": { | ||
"type": {}, | ||
"meta": ["i", "name"] | ||
}, | ||
"rendering": { | ||
"dot_color": null | ||
}, | ||
"search": { | ||
"simple": true, | ||
"advanced": true, | ||
"by": ["name"] | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
{ | ||
"dataset": "logo", | ||
"date_type": "image", | ||
"train_split": 15000, | ||
"initial_projection": "t-SNE", | ||
"dims": [32, 64, 128, 256, 512, 1024], | ||
"schema": { | ||
"meta": ["i", "name", "mean_color", "source", "industry"] | ||
}, | ||
"rendering": { | ||
"dot_color": "mean_color", | ||
"ext": "jpg" | ||
}, | ||
"search": { | ||
"simple": true | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
{ | ||
"dataset": "tybalt", | ||
"data_type": "other", | ||
"train_split": 10458, | ||
"initial_projection": "t-SNE", | ||
"dims": [100], | ||
"schema": { | ||
"type": { | ||
"organ": "categorical", | ||
"race": "categorical", | ||
"stage":"categorical", | ||
"gender": "categorical", | ||
"age_at_diagnosis": "numeric", | ||
"ovarian_cancer_subtype": "categorical" | ||
}, | ||
"meta": ["i", "name", "platform", "age_at_diagnosis", "race", "stage", "vital_status", | ||
"disease", "organ", "gender", "analysis_center", "year_of_diagnosis", | ||
"ovarian_cancer_subtype"], | ||
"header": ["i", "gene"] | ||
}, | ||
"rendering": { | ||
"dot_color": "organ" | ||
}, | ||
"search": { | ||
"advanced": true, | ||
"by": ["name"], | ||
"filter": "stage" | ||
}, | ||
"filter": { | ||
"fields": ["organ", "race", "stage", "vital_status", "gender", | ||
"ovarian_cancer_subtype"] | ||
}, | ||
"color_by": ["organ", "race", "stage", "vital_status", "gender", | ||
"analysis_center", "ovarian_cancer_subtype"], | ||
"y_axis": ["y", "organ", "race", "stage", "gender", "age_at_diagnosis", | ||
"ovarian_cancer_subtype"] | ||
} |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
<!DOCTYPE html> | ||
<html lang="en"> | ||
<head> | ||
<meta charset="utf-8"> | ||
<title>Latent Space Cartography</title> | ||
<link rel="shortcut icon" href="/favicon.ico"> | ||
</head> | ||
<body> | ||
<div id="app"></div> | ||
|
||
<!-- Our built files --> | ||
<script src="/build/fontawesome.js"></script> | ||
<script src="/build/bundle.js"></script> | ||
</body> | ||
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
# -*- coding: utf-8 -*- | ||
# pairwise cosine similarity of random pairs in the latent space | ||
|
||
import os | ||
import h5py | ||
import numpy as np | ||
from sklearn.metrics.pairwise import cosine_similarity | ||
|
||
# for absolute path | ||
def abs_path (rel_path): | ||
return os.path.join(os.path.dirname(__file__), rel_path) | ||
|
||
class RandomCosine (object): | ||
def __init__ (self, dset, dims): | ||
self.dset = dset | ||
self.dims = dims | ||
self.out = abs_path('./data/{}/pairs.h5').format(dset) | ||
|
||
# read latent space | ||
def read_ls (self, latent_dim): | ||
rawpath = abs_path('./data/{}/latent/latent{}.h5'.format(self.dset, latent_dim)) | ||
with h5py.File(rawpath, 'r') as f: | ||
X = np.asarray(f['latent']) | ||
return X | ||
|
||
# remove previous result | ||
def clean (self): | ||
if os.path.exists(self.out): | ||
os.remove(self.out) | ||
|
||
# we want to re-use the same random pairs | ||
def random_pairs (self, latent_dim, num_pairs=2000): | ||
X = self.read_ls(latent_dim) | ||
n, _ = X.shape | ||
ids = np.random.choice(n, size=(num_pairs, 2), replace=False) | ||
|
||
with h5py.File(self.out, 'w') as f: | ||
f.create_dataset('id', data=ids) | ||
|
||
return ids | ||
|
||
# pairwise cosine similarity | ||
def cosine (self, latent_dim, ids): | ||
X = self.read_ls(latent_dim) | ||
V = X[ids][:, 1, :] - X[ids][:, 0, :] | ||
|
||
# cosine similarity | ||
cs = cosine_similarity(V) | ||
|
||
# we want only the lower triangle (excluding the diagonal) | ||
cs = np.tril(cs, k=-1) | ||
cs = cs[np.nonzero(cs)] | ||
|
||
score = np.mean(cs) | ||
print 'average {}, max {}, min {}, std {}'.format(round(score, 2), \ | ||
round(np.amax(cs), 2), round(np.amin(cs), 2), round(np.std(cs), 2)) | ||
|
||
return cs | ||
|
||
# precompute the index and pariwise cosine of the random pairs | ||
# we re-use the same random pairs across all dimensions for consistency | ||
def compute (self): | ||
self.clean() | ||
ids = self.random_pairs(self.dims[0]) | ||
|
||
f = h5py.File(self.out, 'w') | ||
for dim in self.dims: | ||
cs = self.cosine(dim, ids) | ||
f.create_dataset('cosine{}'.format(dim), data=cs) | ||
|
||
f.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
absl-py==0.1.10 | ||
appnope==0.1.0 | ||
backports-abc==0.5 | ||
backports.functools-lru-cache==1.5 | ||
backports.shutil-get-terminal-size==1.0.0 | ||
backports.weakref==1.0.post1 | ||
bleach==1.5.0 | ||
boto==2.48.0 | ||
boto3==1.7.50 | ||
botocore==1.10.50 | ||
bz2file==0.98 | ||
certifi==2018.4.16 | ||
chardet==3.0.4 | ||
click==6.7 | ||
configparser==3.5.0 | ||
cycler==0.10.0 | ||
decorator==4.3.0 | ||
docutils==0.14 | ||
entrypoints==0.2.3 | ||
enum34==1.1.6 | ||
Flask==0.12.2 | ||
Flask-MySQL==1.4.0 | ||
funcsigs==1.0.2 | ||
functools32==3.2.3.post2 | ||
futures==3.2.0 | ||
gensim==3.4.0 | ||
h5py==2.8.0 | ||
html5lib==0.9999999 | ||
idna==2.7 | ||
ipykernel==4.8.2 | ||
ipython==5.7.0 | ||
ipython-genutils==0.2.0 | ||
ipywidgets==7.2.1 | ||
itsdangerous==0.24 | ||
Jinja2==2.10 | ||
jmespath==0.9.3 | ||
jsonschema==2.6.0 | ||
jupyter==1.0.0 | ||
jupyter-client==5.2.3 | ||
jupyter-console==5.2.0 | ||
jupyter-core==4.4.0 | ||
Keras==2.1.6 | ||
llvmlite==0.25.0 | ||
Markdown==2.6.11 | ||
MarkupSafe==1.0 | ||
matplotlib==2.1.2 | ||
mistune==0.8.3 | ||
mock==2.0.0 | ||
nbconvert==5.3.1 | ||
nbformat==4.4.0 | ||
notebook==5.5.0 | ||
numba==0.40.1 | ||
numpy==1.14.3 | ||
pandocfilters==1.4.2 | ||
pathlib2==2.3.2 | ||
pbr==3.1.1 | ||
pexpect==4.6.0 | ||
pickleshare==0.7.4 | ||
Pillow==5.0.0 | ||
prompt-toolkit==1.0.15 | ||
protobuf==3.5.1 | ||
ptyprocess==0.6.0 | ||
Pygments==2.2.0 | ||
PyMySQL==0.8.0 | ||
pyparsing==2.2.0 | ||
python-dateutil==2.6.1 | ||
pytz==2018.3 | ||
PyYAML==3.12 | ||
pyzmq==17.0.0 | ||
qtconsole==4.3.1 | ||
requests==2.19.1 | ||
s3transfer==0.1.13 | ||
scandir==1.7 | ||
scikit-learn==0.19.1 | ||
scipy==1.1.0 | ||
Send2Trash==1.5.0 | ||
simplegeneric==0.8.1 | ||
singledispatch==3.4.0.3 | ||
six==1.11.0 | ||
sklearn==0.0 | ||
smart-open==1.6.0 | ||
subprocess32==3.2.7 | ||
tensorflow==1.5.0 | ||
tensorflow-tensorboard==1.5.1 | ||
terminado==0.8.1 | ||
testpath==0.3.1 | ||
tornado==5.0.2 | ||
traitlets==4.3.2 | ||
umap-learn==0.3.5 | ||
urllib3==1.23 | ||
wcwidth==0.1.7 | ||
Werkzeug==0.14.1 | ||
widgetsnbextension==3.2.1 |
Oops, something went wrong.