In [1]:
import pandas as pd
import toml
import os

In [2]:
config = toml.load('config.toml')
sample_id = ['gt-iz-p9-rep2', 'gt-iz-p7-rep2']
data_extension = {
    'cell': 'h5ad',
    'cell-gene': 'npz',
    'cell-gene-name': 'txt',
    'cell-info': 'csv',
    'cell-mask': 'npz',
    'wsi-img': 'tiff',
    'spot': 'h5ad',
    'spot-gene': 'npz',
    'spot-gene-name': 'txt',
    'spot-info': 'csv',
}
cache_extension = {
    'blend-cell-gene-img': 'png',
    'blend-cell-type-img': 'png',
    'blend-spot-gene-img': 'png',
    'mask-cell-gene-img': 'png',
    'mask-cell-type-img': 'png',
    'circle-spot-gene-img': 'png',
    'gis-blend-cell-gene-img': 'tiff',
    'gis-blend-cell-type-img': 'tiff',
    'gis-blend-spot-gene-img': 'tiff',
}

In [3]:
class AristotleDataset:
    
    def __init__(self, data_dir, data_extension, cache_dir, cache_extension, primary_key_list):
        self.data_dir = data_dir
        self.data_extension = data_extension
        self.cache_dir = cache_dir
        self.cache_extension = cache_extension
        self.primary_key_list = primary_key_list
        
    def _unparse_filename(self, primary_key, field_name, extension):
        filename = '-'.join([primary_key, field_name])
        filename = '.'.join([filename, extension])
        return filename
    
    def get_data_field(self, primary_key, data_field):
        if primary_key not in self.primary_key_list:
            raise ValueError('Bad input primary key')
        
        filename = self._unparse_filename(primary_key, data_field, self.data_extension[data_field])
        filepath = os.path.join(self.data_dir, filename)
        return filepath
    
    def get_cache_field(self, primary_key, cache_field):
        if primary_key not in self.primary_key_list:
            raise ValueError('Bad input primary key')
        
        filename = self._unparse_filename(primary_key, cache_field, self.cache_extension[cache_field])
        filepath = os.path.join(self.cache_dir, filename)
        return filepath

In [4]:
dataset = AristotleDataset(
    config['path']['data'], data_extension, 
    config['path']['cache'], cache_extension, 
    sample_id
)
print(dataset.get_data_field('gt-iz-p9-rep2', 'cell'))

/home/tom/github/niceview/data/gt-iz-p9-rep2-cell.h5ad
