-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
465 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
# Copyright 2016-2020 The Van Valen Lab at the California Institute of | ||
# Technology (Caltech), with support from the Paul Allen Family Foundation, | ||
# Google, & National Institutes of Health (NIH) under Grant U24CA224309-01. | ||
# All rights reserved. | ||
# | ||
# Licensed under a modified Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.github.com/vanvalenlab/caliban-toolbox/LICENSE | ||
# | ||
# The Work provided may be used for non-commercial academic purposes only. | ||
# For any other use of the Work, including commercial use, please contact: | ||
# vanvalenlab@gmail.com | ||
# | ||
# Neither the name of Caltech nor the names of its contributors may be used | ||
# to endorse or promote products derived from this software without specific | ||
# prior written permission. | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================== | ||
import pandas as pd | ||
import numpy as np | ||
|
||
|
||
def make_experiment_metadata_file(raw_metadata, image_names): | ||
"""Creates a metadata file for a specific experiment | ||
Args: | ||
raw_metadata: metadata file from the raw ontology | ||
image_names: names of images that are being processed | ||
Returns: | ||
pd.DataFrame: metadata file | ||
""" | ||
|
||
experiment_metadata = pd.DataFrame({'PROJECT_ID': raw_metadata['PROJECT_ID'], | ||
'EXPERIMENT_ID': raw_metadata['EXPERIMENT_ID'], | ||
'image_name': image_names, | ||
'job_folder': 'NA', | ||
'job_id': 'NA', | ||
'status': 'awaiting_prediction' | ||
}) | ||
|
||
return experiment_metadata | ||
|
||
|
||
def update_job_metadata(metadata, update_dict): | ||
"""Updates a metadata for a specific job | ||
Args: | ||
metadata: the metadata file to be updated | ||
update_dict: the dictionary containing the update stats for the job | ||
Returns: | ||
pd.DataFrame: updated metadata file | ||
""" | ||
|
||
# TODO: check that these images belong to specific job | ||
# TODO: figure out workflow for remaining in progress jobs | ||
|
||
in_progress = metadata.loc[metadata.status == 'in_progress', 'image_name'] | ||
included, excluded = update_dict['included'], update_dict['excluded'] | ||
|
||
# make sure supplied excluded and included images are in progress for this job | ||
if not np.all(np.isin(included, in_progress)): | ||
raise ValueError('Invalid fovs supplied') | ||
|
||
if not np.all(np.isin(excluded, in_progress)): | ||
raise ValueError('Invalid fovs supplied') | ||
|
||
metadata.loc[np.isin(metadata.image_name, included), 'status'] = 'included' | ||
metadata.loc[np.isin(metadata.image_name, excluded), 'status'] = 'excluded' | ||
|
||
return metadata |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
# Copyright 2016-2020 The Van Valen Lab at the California Institute of | ||
# Technology (Caltech), with support from the Paul Allen Family Foundation, | ||
# Google, & National Institutes of Health (NIH) under Grant U24CA224309-01. | ||
# All rights reserved. | ||
# | ||
# Licensed under a modified Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.github.com/vanvalenlab/caliban-toolbox/LICENSE | ||
# | ||
# The Work provided may be used for non-commercial academic purposes only. | ||
# For any other use of the Work, including commercial use, please contact: | ||
# vanvalenlab@gmail.com | ||
# | ||
# Neither the name of Caltech nor the names of its contributors may be used | ||
# to endorse or promote products derived from this software without specific | ||
# prior written permission. | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================== | ||
import numpy as np | ||
|
||
from caliban_toolbox import metadata | ||
import importlib | ||
importlib.reload(metadata) | ||
|
||
|
||
def _make_raw_metadata(): | ||
metadata_file = {'PROJECT_ID': np.random.randint(1, 100), | ||
'EXPERIMENT_ID': np.random.randint(1, 100)} | ||
|
||
return metadata_file | ||
|
||
|
||
def _make_fov_ids(num_fovs): | ||
all_fovs = np.random.randint(low=1, high=num_fovs * 10, size=num_fovs) | ||
fovs = ['fov_{}'.format(i) for i in all_fovs] | ||
|
||
return fovs | ||
|
||
|
||
def test_make_experiment_metadata_file(): | ||
raw_metadata = _make_raw_metadata() | ||
image_names = _make_fov_ids(10) | ||
experiment_metadata = metadata.make_experiment_metadata_file(raw_metadata, image_names) | ||
|
||
assert experiment_metadata.loc[0, 'PROJECT_ID'] == raw_metadata['PROJECT_ID'] | ||
assert experiment_metadata.loc[0, 'EXPERIMENT_ID'] == raw_metadata['EXPERIMENT_ID'] | ||
assert np.all(np.isin(image_names, experiment_metadata['image_name'])) | ||
|
||
|
||
def test_update_job_metadata(): | ||
raw_metadata = _make_raw_metadata() | ||
image_names = _make_fov_ids(10) | ||
experiment_metadata = metadata.make_experiment_metadata_file(raw_metadata, image_names) | ||
experiment_metadata['status'] = 'in_progress' | ||
|
||
included_images = image_names[:6] | ||
excluded_images = image_names[6:8] | ||
in_process = image_names[8:] | ||
|
||
updated_metadata = metadata.update_job_metadata(metadata=experiment_metadata, | ||
update_dict={'included': included_images, | ||
'excluded': excluded_images}) | ||
pred_included = updated_metadata.loc[updated_metadata.status == 'included', 'image_name'] | ||
assert np.all(np.isin(pred_included, included_images)) | ||
|
||
pred_excluded = updated_metadata.loc[updated_metadata.status == 'excluded', 'image_name'] | ||
assert np.all(np.isin(pred_excluded, excluded_images)) | ||
|
||
pred_in_progress = updated_metadata.loc[updated_metadata.status == 'awaiting_prediction', | ||
'image_name'] | ||
|
||
assert np.all(np.isin(pred_in_progress, in_process)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
# Copyright 2016-2020 The Van Valen Lab at the California Institute of | ||
# Technology (Caltech), with support from the Paul Allen Family Foundation, | ||
# Google, & National Institutes of Health (NIH) under Grant U24CA224309-01. | ||
# All rights reserved. | ||
# | ||
# Licensed under a modified Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.github.com/vanvalenlab/caliban-toolbox/LICENSE | ||
# | ||
# The Work provided may be used for non-commercial academic purposes only. | ||
# For any other use of the Work, including commercial use, please contact: | ||
# vanvalenlab@gmail.com | ||
# | ||
# Neither the name of Caltech nor the names of its contributors may be used | ||
# to endorse or promote products derived from this software without specific | ||
# prior written permission. | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================== | ||
|
||
import os | ||
|
||
import numpy as np | ||
import pandas as pd | ||
import xarray as xr | ||
|
||
from caliban_toolbox import metadata | ||
from caliban_toolbox.utils.pipeline_utils import get_job_folder_name | ||
|
||
|
||
def create_experiment_folder(image_names, raw_metadata, base_dir): | ||
"""Takes the output of the data loader and creates an experiment folder | ||
Args: | ||
image_names: names of images from current experiment | ||
raw_metadata: metadata file from raw ontology | ||
base_dir: directory where experiment folder will be created | ||
Returns: | ||
string: full path to newly created experiment folder | ||
""" | ||
|
||
experiment_id = raw_metadata['EXPERIMENT_ID'] | ||
experiment_folder = os.path.join(base_dir, 'experiment_{}'.format(experiment_id)) | ||
os.makedirs(experiment_folder) | ||
|
||
# create metadata file | ||
exp_metadata = metadata.make_experiment_metadata_file(raw_metadata, image_names) | ||
|
||
# save metadata file | ||
exp_metadata.to_csv(os.path.join(experiment_folder, 'metadata.csv')) | ||
|
||
return experiment_folder | ||
|
||
|
||
def create_job_folder(experiment_dir, metadata, fov_data, fov_names, fov_num): | ||
"""Creates a folder to hold a single caliban job | ||
Args: | ||
experiment_dir: directory of relevant experiment | ||
fov_num: number of FOVs to include in job | ||
""" | ||
|
||
# Create sequentially named job folder | ||
job_folder_path, job_name = get_job_folder_name(experiment_dir) | ||
os.makedirs(job_folder_path) | ||
|
||
available_fovs = metadata[metadata['status'] == 'awaiting_prediction'] | ||
new_fov_names = available_fovs['image_name'][:fov_num].values | ||
|
||
metadata.loc[metadata['image_name'].isin(new_fov_names), | ||
['status', 'job_folder']] = 'in_progress', job_name | ||
|
||
fov_idx = np.isin(fov_names, new_fov_names) | ||
|
||
new_fov_data = fov_data[fov_idx] | ||
|
||
np.savez(os.path.join(job_folder_path, 'raw_data.npz'), X=new_fov_data) | ||
metadata.to_csv(os.path.join(experiment_dir, 'metadata.csv')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
# Copyright 2016-2020 The Van Valen Lab at the California Institute of | ||
# Technology (Caltech), with support from the Paul Allen Family Foundation, | ||
# Google, & National Institutes of Health (NIH) under Grant U24CA224309-01. | ||
# All rights reserved. | ||
# | ||
# Licensed under a modified Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.github.com/vanvalenlab/caliban-toolbox/LICENSE | ||
# | ||
# The Work provided may be used for non-commercial academic purposes only. | ||
# For any other use of the Work, including commercial use, please contact: | ||
# vanvalenlab@gmail.com | ||
# | ||
# Neither the name of Caltech nor the names of its contributors may be used | ||
# to endorse or promote products derived from this software without specific | ||
# prior written permission. | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================== | ||
import os | ||
import tempfile | ||
|
||
import numpy as np | ||
import pandas as pd | ||
import xarray as xr | ||
|
||
from caliban_toolbox import pipeline | ||
import importlib | ||
importlib.reload(pipeline) | ||
|
||
|
||
def _make_raw_metadata(): | ||
metadata_file = {'PROJECT_ID': np.random.randint(1, 100), | ||
'EXPERIMENT_ID': np.random.randint(1, 100)} | ||
|
||
return metadata_file | ||
|
||
|
||
def _make_fov_ids(num_fovs): | ||
all_fovs = np.random.randint(low=1, high=num_fovs * 10, size=num_fovs) | ||
fovs = ['fov_{}'.format(i) for i in all_fovs] | ||
|
||
return fovs | ||
|
||
|
||
def _make_exp_metadata(num_fovs): | ||
fovs = _make_fov_ids(num_fovs) | ||
raw_metadata = _make_raw_metadata() | ||
|
||
metadata = pd.DataFrame({'image_name': fovs, 'EXPERIMENT_ID': raw_metadata['EXPERIMENT_ID'], | ||
'status': 'awaiting_prediction', 'job_folder': 'NA'}) | ||
|
||
return metadata | ||
|
||
|
||
def test_create_experiment_folder(): | ||
image_names = _make_fov_ids(10) | ||
metadata = _make_raw_metadata() | ||
|
||
with tempfile.TemporaryDirectory() as temp_dir: | ||
experiment_folder = pipeline.create_experiment_folder(image_names=image_names, | ||
raw_metadata=metadata, | ||
base_dir=temp_dir) | ||
|
||
saved_metadata = pd.read_csv(os.path.join(experiment_folder, 'metadata.csv')) | ||
|
||
assert np.all(np.isin(saved_metadata['image_name'], image_names)) | ||
assert saved_metadata.loc[0, 'EXPERIMENT_ID'] == metadata['EXPERIMENT_ID'] | ||
|
||
|
||
def test_create_job_folder(): | ||
metadata = _make_exp_metadata(10) | ||
fov_names = metadata['image_name'].values | ||
fov_data = np.zeros((len(fov_names), 20, 20, 3)) | ||
fov_num = 7 | ||
|
||
with tempfile.TemporaryDirectory() as temp_dir: | ||
pipeline.create_job_folder(temp_dir, metadata, fov_data, fov_names, fov_num) | ||
|
||
saved_metadata = pd.read_csv(os.path.join(temp_dir, 'metadata.csv')) | ||
new_status = saved_metadata.loc[np.isin(saved_metadata.image_name, fov_names[:fov_num]), | ||
'status'] | ||
|
||
assert np.all(np.isin(new_status, 'in_progress')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
# Copyright 2016-2020 The Van Valen Lab at the California Institute of | ||
# Technology (Caltech), with support from the Paul Allen Family Foundation, | ||
# Google, & National Institutes of Health (NIH) under Grant U24CA224309-01. | ||
# All rights reserved. | ||
# | ||
# Licensed under a modified Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.github.com/vanvalenlab/caliban-toolbox/LICENSE | ||
# | ||
# The Work provided may be used for non-commercial academic purposes only. | ||
# For any other use of the Work, including commercial use, please contact: | ||
# vanvalenlab@gmail.com | ||
# | ||
# Neither the name of Caltech nor the names of its contributors may be used | ||
# to endorse or promote products derived from this software without specific | ||
# prior written permission. | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================== | ||
|
||
import os | ||
import json | ||
|
||
import numpy as np | ||
|
||
from caliban_toolbox import metadata | ||
|
||
|
||
def get_job_folder_name(experiment_dir): | ||
"""Identify the name for next sequentially named job folder | ||
Args: | ||
experiment_dir: full path to directory of current experiment | ||
Returns: | ||
string: full path to newly created job folder | ||
string: name of the job folder | ||
""" | ||
|
||
files = os.listdir(experiment_dir) | ||
folders = [file for file in files if os.path.isdir(os.path.join(experiment_dir, file))] | ||
folders = [folder for folder in folders if 'caliban_job_' in folder] | ||
folders.sort() | ||
|
||
if len(folders) == 0: | ||
new_folder = 'caliban_job_0' | ||
else: | ||
latest_folder_num = folders[-1].split('caliban_job_')[1] | ||
new_folder = 'caliban_job_{}'.format(int(latest_folder_num) + 1) | ||
|
||
new_folder_path = os.path.join(experiment_dir, new_folder) | ||
|
||
return new_folder_path, new_folder |
Oops, something went wrong.