Skip to content

Commit

Permalink
fixed cache for filled
Browse files Browse the repository at this point in the history
  • Loading branch information
zommiommy committed Jan 14, 2020
1 parent 629d585 commit a42d7cd
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 51 deletions.
145 changes: 100 additions & 45 deletions Example.ipynb
Expand Up @@ -32,7 +32,10 @@
"name": "stdout",
"output_type": "stream",
"text": [
"\r"
"\r",
"loading /tmp/hg19/200/115c6522363239a82561d6e6dddd228ae9623415000f7c6d2f72df5ca88cee26_filled.pkl\n",
"loading /tmp/hg19/200/115c6522363239a82561d6e6dddd228ae9623415000f7c6d2f72df5ca88cee26_tasselized.pkl\n",
"loading /tmp/hg19/200/115c6522363239a82561d6e6dddd228ae9623415000f7c6d2f72df5ca88cee26_sequences.pkl\n"
]
},
{
Expand All @@ -43,7 +46,7 @@
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, description='Rendering gaps in hg19', max=2, style=ProgressStyle(descripti"
"HBox(children=(IntProgress(value=0, description='Groupping Train windows', max=4, style=ProgressStyle(descript"
]
},
"metadata": {},
Expand All @@ -64,7 +67,7 @@
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, description='Gropping Train windows', max=2, style=ProgressStyle(descripti…"
"HBox(children=(IntProgress(value=0, description='Groupping Test windows', max=4, style=ProgressStyle(descripti…"
]
},
"metadata": {},
Expand All @@ -74,28 +77,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"\r"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, description='Gropping Test windows', max=2, style=ProgressStyle(descriptio…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r"
"\r",
"loading /tmp/hg19/200/115c6522363239a82561d6e6dddd228ae9623415000f7c6d2f72df5ca88cee26_gap_mask.pkl\n"
]
}
],
Expand All @@ -106,6 +89,7 @@
" batch_size=3,\n",
" buffer_size=5,\n",
" train_chromosomes=[\"chrM\",\"chr18\"],\n",
" test_chromosomes=[\"chr16\",\"chr17\"],\n",
" cache_dir=\"/tmp\",\n",
" n_type=\"uniform\"\n",
")"
Expand All @@ -119,61 +103,132 @@
{
"data": {
"text/plain": [
"(array([[[0., 0., 1., 0.],\n",
" [1., 0., 0., 0.],\n",
"(array([[[1., 0., 0., 0.],\n",
" [0., 0., 0., 1.],\n",
" [0., 0., 1., 0.],\n",
" ...,\n",
" [0., 1., 0., 0.],\n",
" [0., 0., 1., 0.],\n",
" [0., 0., 0., 1.],\n",
" [1., 0., 0., 0.]],\n",
" [0., 0., 0., 1.]],\n",
" \n",
" [[0., 0., 1., 0.],\n",
" [0., 1., 0., 0.],\n",
" [0., 1., 0., 0.],\n",
" ...,\n",
" [0., 0., 1., 0.],\n",
" [1., 0., 0., 0.],\n",
" [0., 0., 1., 0.],\n",
" [0., 0., 1., 0.]],\n",
" \n",
" [[1., 0., 0., 0.],\n",
" [0., 0., 0., 1.],\n",
" [0., 1., 0., 0.],\n",
" ...,\n",
" [0., 0., 1., 0.],\n",
" [0., 0., 0., 1.],\n",
" [0., 0., 0., 1.]]]), array([[[1., 0., 0., 0.],\n",
" [0., 0., 0., 1.],\n",
" [0., 0., 1., 0.],\n",
" ...,\n",
" [0., 1., 0., 0.],\n",
" [1., 0., 0., 0.]],\n",
" [0., 0., 1., 0.],\n",
" [0., 0., 0., 1.]],\n",
" \n",
" [[0., 0., 0., 1.],\n",
" [1., 0., 0., 0.],\n",
" [1., 0., 0., 0.],\n",
" [[0., 0., 1., 0.],\n",
" [0., 1., 0., 0.],\n",
" [0., 1., 0., 0.],\n",
" ...,\n",
" [0., 0., 1., 0.],\n",
" [0., 0., 1., 0.],\n",
" [0., 0., 1., 0.]],\n",
" \n",
" [[1., 0., 0., 0.],\n",
" [0., 0., 0., 1.],\n",
" [0., 1., 0., 0.],\n",
" ...,\n",
" [0., 0., 1., 0.],\n",
" [1., 0., 0., 0.]]]), array([[[0., 0., 1., 0.],\n",
" [0., 0., 0., 1.],\n",
" [0., 0., 0., 1.]]]))"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"next(data_generator.train())"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([[[0., 0., 0., 1.],\n",
" [1., 0., 0., 0.],\n",
" [0., 0., 1., 0.],\n",
" [0., 0., 0., 1.],\n",
" ...,\n",
" [1., 0., 0., 0.],\n",
" [0., 0., 0., 1.],\n",
" [0., 1., 0., 0.]],\n",
" \n",
" [[1., 0., 0., 0.],\n",
" [0., 0., 0., 1.],\n",
" [0., 0., 0., 1.],\n",
" ...,\n",
" [0., 0., 1., 0.],\n",
" [0., 0., 0., 1.],\n",
" [1., 0., 0., 0.]],\n",
" [0., 1., 0., 0.]],\n",
" \n",
" [[0., 0., 1., 0.],\n",
" [[0., 1., 0., 0.],\n",
" [0., 1., 0., 0.],\n",
" [1., 0., 0., 0.],\n",
" ...,\n",
" [0., 1., 0., 0.],\n",
" [0., 0., 1., 0.],\n",
" [0., 1., 0., 0.]]]), array([[[0., 0., 0., 1.],\n",
" [1., 0., 0., 0.],\n",
" [0., 0., 0., 1.],\n",
" ...,\n",
" [1., 0., 0., 0.],\n",
" [0., 0., 0., 1.],\n",
" [0., 1., 0., 0.]],\n",
" \n",
" [[1., 0., 0., 0.],\n",
" [0., 0., 0., 1.],\n",
" [0., 0., 0., 1.],\n",
" ...,\n",
" [0., 0., 1., 0.],\n",
" [0., 1., 0., 0.],\n",
" [1., 0., 0., 0.]],\n",
" [0., 0., 0., 1.],\n",
" [0., 1., 0., 0.]],\n",
" \n",
" [[0., 0., 0., 1.],\n",
" [1., 0., 0., 0.],\n",
" [[0., 1., 0., 0.],\n",
" [0., 1., 0., 0.],\n",
" [1., 0., 0., 0.],\n",
" ...,\n",
" [0., 1., 0., 0.],\n",
" [0., 0., 1., 0.],\n",
" [1., 0., 0., 0.]]]))"
" [0., 1., 0., 0.]]]))"
]
},
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"next(data_generator.train())"
"next(data_generator.test())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
2 changes: 1 addition & 1 deletion genome_windows_generator/__version__.py
@@ -1,2 +1,2 @@
"""Current version of package genome_windows_generator"""
__version__ = "1.0.0"
__version__ = "1.0.1"
1 change: 1 addition & 0 deletions genome_windows_generator/decorators.py
Expand Up @@ -7,6 +7,7 @@ def cache_decorator(f):
def wrapped_method(self, *args, **kwargs):
path = path_format.format(**{**vars(self), **kwargs})
if os.path.exists(path):
print(f"loading {path}")
value = load(path)
return value
result = f(self, *args, **kwargs)
Expand Down
10 changes: 7 additions & 3 deletions genome_windows_generator/genome_windows_generator.py
Expand Up @@ -111,7 +111,7 @@ def __init__(self,
self.compile()

def compile(self):
filled = self.genome.filled(chromosomes=self.chromosomes)
filled = self._filled()
windows = self._tasselize_windows(filled, self.window_size)
sequences = self._encode_sequences(windows)

Expand All @@ -130,7 +130,7 @@ def _train_test_split(self, sequences):
sequences[chrom].sequence.tolist()
for chrom in tqdm(
self.chromosomes,
desc="Gropping Train windows",
desc="Groupping Train windows",
leave=False
)
if chrom not in self.test_chromosomes
Expand All @@ -141,7 +141,7 @@ def _train_test_split(self, sequences):
sequences[chrom].sequence.tolist()
for chrom in tqdm(
self.chromosomes,
desc="Gropping Test windows",
desc="Groupping Test windows",
leave=False
)
if chrom in self.test_chromosomes
Expand All @@ -152,6 +152,10 @@ def _train_test_split(self, sequences):
def __len__(self):
return len(self._windows_train) // self.batch_size

@cache_method("{_cache_directory}/{instance_hash}_filled.pkl")
def _filled(self):
return self.genome.filled(chromosomes=self.chromosomes)

@cache_method("{_cache_directory}/{instance_hash}_gap_mask.pkl")
def _render_gaps(self):
# Compute
Expand Down
7 changes: 5 additions & 2 deletions makefile
Expand Up @@ -6,6 +6,9 @@ install:
test:
~/anaconda3/bin/pytest -s --cov genome_windows_generator --cov-report html

publish:
build:
~/anaconda3/bin/python setup.py sdist
~/anaconda3/bin/twine upload $PATH

publish:
echo "Uploading ./dist/$$(ls ./dist | grep .tar.gz | sort | tail -n 1)"
twine upload "./dist/$$(ls ./dist | grep .tar.gz | sort | tail -n 1)"

0 comments on commit a42d7cd

Please sign in to comment.