In [1]:
import glob
import itertools
import os
import yaml
from copy import deepcopy

import numpy as np


In [2]:
CONFIG_FNAME = "standard_bnfix_adam_beta1_5_spotless_sc.yml"
MODEL_NAME = "ADDA"

In [3]:
with open(os.path.join("configs", MODEL_NAME, CONFIG_FNAME), "r") as f:
    config = yaml.safe_load(f)
print(yaml.safe_dump(config))

data_params:
  all_genes: false
  data_dir: data
  dset: mouse_cortex
  n_markers: 20
  n_mix: 10
  n_spots: 2000
  samp_split: true
  sample_id_n: '151673'
  sc_id: GSE115746
  scaler_name: standard
  st_id: spotless_mouse_cortex
  st_split: false
lib_params:
  manual_seed: 12865
model_params:
  adda_kwargs:
    bn_momentum: 0.01
    emb_dim: 64
  model_version: standard_bnfix_adam_beta1_5_spotless_sc
train_params:
  adam_beta1: 0.5
  alpha: 2
  batch_size: 16
  dis_loop_factor: 5
  early_stop_crit: 100
  early_stop_crit_adv: 200
  enc_lr: 2.0e-05
  epochs: 200
  initial_train_epochs: 100
  initial_train_lr: 0.0002
  min_epochs: 40.0
  min_epochs_adv: 80.0
  pretraining: true



In [4]:
config["data_params"]["all_genes"] = False
config["data_params"]["data_dir"] = "data"
config["data_params"]["dset"] = "dlpfc"
config["data_params"]["sc_id"] = "GSE144136"
config["data_params"]["st_id"] = "spatialLIBD"
config["data_params"]["st_split"] = False
config["data_params"]["samp_split"] = True

config["train_params"]["epochs"] = 500

In [5]:
# ## CellDART

# # data_params
# n_markers_l = [5, 10, 20, 40, 80]
# n_mix_l =  [5, 8, 10, 15, 20]
# n_spots_l = [5000, 10000, 20000, 50000, 100000, 200000]
# scaler_name_l = ["minmax", "standard"]

# # model_params
# bn_momentum_l = [0.01, 0.1, 0.9, 0.99]


# # train_params
# alpha_l =  [0.1, 0.6, 1.0, 2.0]
# alpha_lr_l = [1, 2, 5, 10]
# batch_size_l = [256, 512, 1024]
# lr_l =[0.01, 0.001, 0.0001]


In [6]:
## ADDA

# data_params
data_params_lists = dict(
    n_markers=[5, 10, 20, 40, 80],
    n_mix=[5, 8, 10, 15, 20],
    n_spots=[5000, 10000, 20000, 50000, 100000, 200000],
    scaler_name=["standard"],
)

# model_params
model_params_lists = dict(
    bn_momentum=[0.1, 0.01],
    emb_dim=[32, 64, 128],
)
# train_params
train_params_lists = dict(
    adam_beta1=[0.5, 0.9],
    alpha=[1, 2, 5],
    batch_size=[512, 1024, 2048],
    dis_loop_factor=[2, 5, 10],
    enc_lr=[0.000002, 0.00002, 0.0002],
    initial_train_lr=[0.0001, 0.0002, 0.001],
    initial_train_epochs=[50, 100, 200],
)

In [7]:
total_configs = 1
for value in data_params_lists.values():
    total_configs *= len(value)
for value in model_params_lists.values():
    total_configs *= len(value)
for value in train_params_lists.values():
    total_configs *= len(value)
total_configs

1312200

In [8]:
config

{'data_params': {'all_genes': False,
  'data_dir': 'data',
  'dset': 'dlpfc',
  'n_markers': 20,
  'n_mix': 10,
  'n_spots': 2000,
  'sample_id_n': '151673',
  'scaler_name': 'standard',
  'sc_id': 'GSE144136',
  'st_id': 'spatialLIBD',
  'st_split': False,
  'samp_split': True},
 'model_params': {'adda_kwargs': {'bn_momentum': 0.01, 'emb_dim': 64},
  'model_version': 'standard_bnfix_adam_beta1_5_spotless_sc'},
 'lib_params': {'manual_seed': 12865},
 'train_params': {'adam_beta1': 0.5,
  'alpha': 2,
  'batch_size': 16,
  'dis_loop_factor': 5,
  'early_stop_crit': 100,
  'early_stop_crit_adv': 200,
  'enc_lr': 2e-05,
  'epochs': 500,
  'initial_train_lr': 0.0002,
  'initial_train_epochs': 100,
  'min_epochs': 40.0,
  'min_epochs_adv': 80.0,
  'pretraining': True}}

In [9]:
rng = np.random.default_rng(3853)

yes_samples = set(rng.choice(total_configs, size=1000, replace=False))


data_params_l = []
# for n_markers, n_mix, n_spots, scaler_name in itertools.product(
#     n_markers_l, n_mix_l, n_spots_l, scaler_name_l
# ):
#     data_params_l.append(
#         dict(n_markers=n_markers, n_mix=n_mix, n_spots=n_spots, scaler_name=scaler_name)
#     )
for kv_tuples in itertools.product(*[[(k,v) for v in vlist] for k, vlist in data_params_lists.items()]):
    data_params_l.append(dict(kv_tuples))

model_params_l = []
# for k, v_list in model_params_lists.items():
#     if len(model_params_l) == 0:
#         for v in v_list:
#             model_params_l.append({k: v})
#     else:
#         new_model_params_l = []
#         for v in v_list:
#             for d in model_params_l:
for kv_tuples in itertools.product(*[[(k,v) for v in vlist] for k, vlist in model_params_lists.items()]):
    model_params_l.append(dict(kv_tuples))

train_params_l = []
# for alpha, alpha_lr, batch_size, lr in itertools.product(
#     adam_beta1_l,
#     alpha_l,
#     batch_size_l,
#     dis_loop_factor_l,
#     enc_lr_l,
#     initial_train_lr,
#     initial_train_epochs,
# ):
#     train_params_l.append(dict(alpha=alpha, alpha_lr=alpha_lr, batch_size=batch_size, lr=lr))
for kv_tuples in itertools.product(*[[(k,v) for v in vlist] for k, vlist in train_params_lists.items()]):
    train_params_l.append(dict(kv_tuples))

if not os.path.exists(os.path.join("configs/generated", MODEL_NAME)):
    os.makedirs(os.path.join("configs/generated", MODEL_NAME))
for i, (data_params, model_params, train_params) in enumerate(
    itertools.product(data_params_l, model_params_l, train_params_l)
):
    if i not in yes_samples:
        continue

    new_config = deepcopy(config)
    new_config["data_params"].update(data_params)
    new_config["model_params"]["adda_kwargs"].update(model_params)
    new_config["train_params"].update(train_params)

    new_config["lib_params"]["manual_seed"] = int(rng.integers(0, 2**32))

    version = f"gen_v1_perm_{i}"
    new_config["model_params"]["model_version"] = version

    with open(os.path.join("configs/generated", MODEL_NAME, f"{version}.yml"), "w") as f:
        yaml.safe_dump(new_config, f)

In [10]:
new_config


{'data_params': {'all_genes': False,
  'data_dir': 'data',
  'dset': 'dlpfc',
  'n_markers': 80,
  'n_mix': 20,
  'n_spots': 200000,
  'sample_id_n': '151673',
  'scaler_name': 'standard',
  'sc_id': 'GSE144136',
  'st_id': 'spatialLIBD',
  'st_split': False,
  'samp_split': True},
 'model_params': {'adda_kwargs': {'bn_momentum': 0.01, 'emb_dim': 128},
  'model_version': 'gen_v1_perm_1312012'},
 'lib_params': {'manual_seed': 1378592176},
 'train_params': {'adam_beta1': 0.9,
  'alpha': 5,
  'batch_size': 512,
  'dis_loop_factor': 10,
  'early_stop_crit': 100,
  'early_stop_crit_adv': 200,
  'enc_lr': 2e-06,
  'epochs': 500,
  'initial_train_lr': 0.0001,
  'initial_train_epochs': 100,
  'min_epochs': 40.0,
  'min_epochs_adv': 80.0,
  'pretraining': True}}

In [11]:
lines = [
    os.path.basename(name)
    for name in glob.glob(os.path.join("configs/generated", MODEL_NAME, "*.yml"))
]
with open(
    os.path.join("configs/generated", MODEL_NAME, "a_list.txt"),
    mode="wt",
    encoding="utf-8",
) as myfile:
    myfile.write("\n".join(lines))
    myfile.write("\n")