In [19]:
import glob
import itertools
import os
import yaml
from copy import deepcopy

import numpy as np


In [20]:
# CONFIG_FNAME = "standard_bnfix_adam_beta1_5_spotless_sc.yml"
MODEL_NAME = "ADDA"
CONFIG_DIR = "configs/generated_spotless"
CONFIG_FNAME_PREFIX = "gen_spotless_oracle"

In [21]:
config = {
    "lib_params": {},
    "data_params": {},
    "train_params": {},
    "model_params": {},
}

config["data_params"]["all_genes"] = False
config["data_params"]["data_dir"] = "data"
config["data_params"]["dset"] = "mouse_cortex"
config["data_params"]["n_spots"] = 100000
config["data_params"]["samp_split"] = True
config["data_params"]["sc_id"] = "GSE115746"
config["data_params"]["scaler_name"] = "standard"
config["data_params"]["st_id"] = "spotless_mouse_cortex"
config["data_params"]["st_split"] = False

config["model_params"]["adda_kwargs"] = {}
config["model_params"]["adda_kwargs"]["bn_momentum"] = 0.1 #setting high bc of large batch size

config["train_params"]["epochs"] = 20
config["train_params"]["batch_size"] = 8
config["train_params"]["initial_train_epochs"] = 200
config["train_params"]["adam_beta1"] = 0.5
config["train_params"]["reverse_val"] = False
config["train_params"]["pretraining"] = True

if not os.path.exists(os.path.join(CONFIG_DIR, MODEL_NAME)):
    os.makedirs(os.path.join(CONFIG_DIR, MODEL_NAME))

with open(os.path.join(CONFIG_DIR, MODEL_NAME, f"{CONFIG_FNAME_PREFIX}.yml"), "w") as f:
    yaml.safe_dump(config, f)

In [22]:
## ADDA

# data_params
data_params_lists = dict(
    n_markers=[20, 40, 80],
    n_mix=[5, 8, 10, 15],
)

# model_params
model_params_lists = dict(
    emb_dim=[32, 64],
)
# train_params
train_params_lists = dict(
    alpha=[1, 2, 5],
    dis_loop_factor=[2, 5, 10],
    enc_lr=[0.000002, 0.00002, 0.0002],
    initial_train_lr=[0.0001, 0.0002, 0.001],
)

In [23]:
total_configs = 1
for value in data_params_lists.values():
    total_configs *= len(value)
for value in model_params_lists.values():
    total_configs *= len(value)
for value in train_params_lists.values():
    total_configs *= len(value)
total_configs

1944

In [24]:
config

{'lib_params': {},
 'data_params': {'all_genes': False,
  'data_dir': 'data',
  'dset': 'mouse_cortex',
  'n_spots': 100000,
  'samp_split': True,
  'sc_id': 'GSE115746',
  'scaler_name': 'standard',
  'st_id': 'spotless_mouse_cortex',
  'st_split': False},
 'train_params': {'epochs': 20,
  'batch_size': 8,
  'initial_train_epochs': 200,
  'adam_beta1': 0.5,
  'reverse_val': False,
  'pretraining': True},
 'model_params': {'adda_kwargs': {'bn_momentum': 0.1}}}

In [25]:
rng = np.random.default_rng(274)

yes_samples = set(rng.choice(total_configs, size=1000, replace=False))


data_params_l = []
for kv_tuples in itertools.product(
    *[[(k, v) for v in vlist] for k, vlist in data_params_lists.items()]
):
    data_params_l.append(dict(kv_tuples))

model_params_l = []
for kv_tuples in itertools.product(
    *[[(k, v) for v in vlist] for k, vlist in model_params_lists.items()]
):
    model_params_l.append(dict(kv_tuples))

train_params_l = []
for kv_tuples in itertools.product(
    *[[(k, v) for v in vlist] for k, vlist in train_params_lists.items()]
):
    train_params_l.append(dict(kv_tuples))

for i, (data_params, model_params, train_params) in enumerate(
    itertools.product(data_params_l, model_params_l, train_params_l)
):
    if i not in yes_samples:
        continue

    new_config = deepcopy(config)
    
    new_config["data_params"].update(data_params)
    new_config["model_params"]["adda_kwargs"].update(model_params)
    new_config["train_params"].update(train_params)

    new_config["lib_params"]["manual_seed"] = int(rng.integers(0, 2**32))

    version = f"{CONFIG_FNAME_PREFIX}-{i}"
    new_config["model_params"]["model_version"] = version

    with open(os.path.join(CONFIG_DIR, MODEL_NAME, f"{version}.yml"), "w") as f:
        yaml.safe_dump(new_config, f)

In [26]:
print(yaml.safe_dump(new_config))


data_params:
  all_genes: false
  data_dir: data
  dset: mouse_cortex
  n_markers: 80
  n_mix: 15
  n_spots: 100000
  samp_split: true
  sc_id: GSE115746
  scaler_name: standard
  st_id: spotless_mouse_cortex
  st_split: false
lib_params:
  manual_seed: 4008570184
model_params:
  adda_kwargs:
    bn_momentum: 0.1
    emb_dim: 64
  model_version: gen_spotless_oracle-1943
train_params:
  adam_beta1: 0.5
  alpha: 5
  batch_size: 8
  dis_loop_factor: 10
  enc_lr: 0.0002
  epochs: 20
  initial_train_epochs: 200
  initial_train_lr: 0.001
  pretraining: true
  reverse_val: false



In [27]:
lines = [
    os.path.basename(name)
    for name in sorted(glob.glob(os.path.join(CONFIG_DIR, MODEL_NAME, f"{CONFIG_FNAME_PREFIX}-*.yml")))
]
with open(
    os.path.join(CONFIG_DIR, MODEL_NAME, "a_list.txt"),
    mode="wt",
    encoding="utf-8",
) as myfile:
    myfile.write("\n".join(lines))
    myfile.write("\n")