In [12]:
from pathlib import Path
import json
import pandas as pd
from copy import deepcopy

In [13]:
task_name = "MHC"
model_name = "af3"
peptide_chains = "A"
protein_chains = "B"

data_dir = Path(f".")
hmc_df = pd.read_csv("mhc_data.csv")
base_config_path = data_dir / "mhc_data.json"
base_config = json.loads(base_config_path.read_text())
info_config_path = data_dir / "info-base.json"
base_info = json.loads(info_config_path.read_text())

In [14]:
def get_config(seq: str):
    msa_config = {
        "protein": {
            "id": "A",
            "sequence": seq,
        }
    }
    job_config = {
        "id": "A",
        "sequence": seq,
        "has_ncaa": False,
        "positions": [],
        "types": [],
        "length": len(seq),
    }
    return msa_config, job_config

In [11]:
force_write = True

for i, row in hmc_df.iterrows():
    job_name = row["job_name"]
    job_dir = data_dir / task_name.lower() / job_name
    job_dir.mkdir(parents=True, exist_ok=True)
    af3_dir = job_dir / "af3"
    af3_dir.mkdir(parents=True, exist_ok=True)

    config = deepcopy(base_config)
    config_info = deepcopy(base_info)

    config["name"] = job_name

    msa_config, job_config = get_config(row["sequence"])
    config["sequences"].insert(0, msa_config)
    config_info["job_name"] = job_name
    config_info["A"] = job_config

    new_config_path = af3_dir / f"{job_name}.json"
    if force_write or not new_config_path.exists():
        json.dump(config, open(new_config_path, "w"), indent=2, ensure_ascii=False)
    new_config_info_path = job_dir / f"job_info.json"
    if force_write or not new_config_info_path.exists():
        json.dump(
            config_info, open(new_config_info_path, "w"), indent=2, ensure_ascii=False
        )

In [32]:
!python run_model.py --data_dir mhc --model_name af3 --gpu_id 3 > run_model.log 2>&1