In [13]:
from virtual_lab.agent import Agent
from virtual_lab.constants import CONSISTENT_TEMPERATURE, CREATIVE_TEMPERATURE
from virtual_lab.prompts import CODING_RULES
from virtual_lab.run_meeting import run_meeting

from nanobody_constants import (
    background_prompt,
    nanobody_prompt,
    discussions_dir,
    model,
    num_rounds,
    principal_investigator,
    scientific_critic,
    immunologist,
    machine_learning_specialist,
    computational_biologist,
)

In [3]:
# Setup up topic to model mapping
topic_to_model = {
    "AlphaFold-Multimer": "ft:gpt-4o-mini-2024-07-18:personal:alphafold-multimer:ArGNLO1a",
    "ESM": "ft:gpt-4o-mini-2024-07-18:personal:esm:ArFrZ4tt",
    "Rosetta": "ft:gpt-4o-mini-2024-07-18:personal:rosetta:ArFx2nis",
    "SARS-CoV-2 spike protein": "ft:gpt-4o-mini-2024-07-18:personal:sars-cov-2-spike-protein:ArG9fZwz",
    "SARS-CoV-2 variants KP.3 and JN.1": "ft:gpt-4o-mini-2024-07-18:personal:sars-cov-2-variants-kp-3-and-jn-1:ArGVyHfW",
    "nanobodies": "ft:gpt-4o-mini-2024-07-18:personal:nanobodies:ArGjRZ9R",
}

In [4]:
# Setup up topic to agent mapping
topic_to_agent = {
    "AlphaFold-Multimer": computational_biologist,
    "ESM": machine_learning_specialist,
    "Rosetta": computational_biologist,
    "SARS-CoV-2 spike protein": immunologist,
    "SARS-CoV-2 variants KP.3 and JN.1": immunologist,
    "nanobodies": immunologist,
}

In [5]:
# Create specialized agents for each topic
specialized_agents = {}
for topic, base_agent in topic_to_agent.items():
    specialized_agent = Agent(
        title=f"{topic} {base_agent.title}",
        expertise=base_agent.expertise,
        goal=base_agent.goal,
        role=base_agent.role,
        model=topic_to_model[topic],
    )
    specialized_agents[topic] = specialized_agent

In [6]:
ablations_dir = discussions_dir / "ablations"
ablations_dir.mkdir(parents=True, exist_ok=True)

In [7]:
# Run meetings with base and specialized agents
agenda = "What are some of the RBD mutations of the KP.3 and JN.1 variants of the SARS-CoV-2 spike protein that do not appear in previous variants?"

agents = [
    (immunologist, "base_immunologist"),
    (specialized_agents["SARS-CoV-2 variants KP.3 and JN.1"], "specialized_immunologist")
]

for agent, save_name in agents:
    run_meeting(
        meeting_type="individual", 
        team_member=agent,
        agenda=agenda,
        save_dir=ablations_dir,
        save_name=save_name,
        temperature=CONSISTENT_TEMPERATURE,
    )

Team:   0%|          | 0/2 [00:32<?, ?it/s]1 [00:00<?, ?it/s]
Rounds (+ Final Round): 100%|██████████| 1/1 [00:32<00:00, 32.06s/it]


Input token count: 74
Output token count: 491
Tool token count: 0
Max token length: 565
Cost: $0.01
Time: 0:33


Team:   0%|          | 0/2 [00:11<?, ?it/s]1 [00:00<?, ?it/s]
Rounds (+ Final Round): 100%|██████████| 1/1 [00:11<00:00, 11.66s/it]


Input token count: 103
Output token count: 456
Tool token count: 0
Max token length: 559
Time: 0:13


In [8]:
agenda = "You need to select machine learning and/or computational tools to design nanobody binders for recent variants of SARS-CoV-2. Please list several tools (5-10) that would be relevant to this nanobody design approach and how they could be used in the context of this project. If selecting machine learning tools, please prioritize pre-trained models (e.g., pre-trained protein language models or protein structure prediction models) for simplicity."

agenda_questions = (
    "What machine learning and/or computational tools could be used for this nanobody design approach (list 5-10)?",
    "For each tool, how could it be used for designing modified nanobodies?",
)

generic_lead = Agent(
    title="Lead Assistant",
    expertise="helping people with their problems",
    goal="help people with their problems",
    role="help people with their problems",
    model=model,
)

generic_team = [Agent(
    title=f"Assistant {i + 1}",
    expertise="helping people with their problems",
    goal="help people with their problems",
    role="help people with their problems",
    model=model,
) for i in range(4)]

teams = [
    (principal_investigator, [immunologist, machine_learning_specialist, computational_biologist, scientific_critic], "base_team"),
    (generic_lead, generic_team, "generic_team"),
    (generic_lead, generic_team[:1], "generic_team_short"),
]

for team_lead, team_members, save_name in teams:
    run_meeting(
        meeting_type="team", 
        team_lead=team_lead,
        team_members=team_members,
        agenda=agenda,
        agenda_questions=agenda_questions,
        save_dir=ablations_dir,
        save_name=save_name,
        temperature=CONSISTENT_TEMPERATURE,
        num_rounds=num_rounds,
    )

Team: 100%|██████████| 5/5 [01:05<00:00, 13.01s/it]<?, ?it/s]
Team: 100%|██████████| 5/5 [01:13<00:00, 14.63s/it]<03:15, 65.08s/it]
Team: 100%|██████████| 5/5 [01:04<00:00, 12.83s/it]<02:19, 69.82s/it]
Team:   0%|          | 0/5 [00:21<?, ?it/s]4 [03:22<01:07, 67.23s/it]
Rounds (+ Final Round): 100%|██████████| 4/4 [03:43<00:00, 55.90s/it]


Input token count: 64,293
Output token count: 7,437
Tool token count: 0
Max token length: 9,030
Cost: $0.24
Time: 3:46


Team: 100%|██████████| 5/5 [01:00<00:00, 12.18s/it]<?, ?it/s]
Team: 100%|██████████| 5/5 [01:04<00:00, 12.86s/it]<03:02, 60.88s/it]
Team: 100%|██████████| 5/5 [00:55<00:00, 11.19s/it]<02:05, 62.89s/it]
Team:   0%|          | 0/5 [00:27<?, ?it/s]4 [03:01<00:59, 59.72s/it]
Rounds (+ Final Round): 100%|██████████| 4/4 [03:28<00:00, 52.15s/it]


Input token count: 57,779
Output token count: 6,160
Tool token count: 0
Max token length: 7,740
Cost: $0.21
Time: 3:30


Team: 100%|██████████| 2/2 [00:29<00:00, 14.82s/it]<?, ?it/s]
Team: 100%|██████████| 2/2 [00:27<00:00, 13.76s/it]<01:28, 29.64s/it]
Team: 100%|██████████| 2/2 [00:25<00:00, 12.86s/it]<00:56, 28.39s/it]
Team:   0%|          | 0/2 [00:21<?, ?it/s]4 [01:22<00:27, 27.18s/it]
Rounds (+ Final Round): 100%|██████████| 4/4 [01:44<00:00, 26.02s/it]


Input token count: 11,349
Output token count: 2,937
Tool token count: 0
Max token length: 3,974
Cost: $0.06
Time: 1:45


In [14]:
agenda = f"{background_prompt} {nanobody_prompt} Now you must use AlphaFold-Multimer to predict the structure of a nanobody-antigen complex and evaluate its binding. I will run AlphaFold-Multimer on several nanobody-antigen complexes and you need to process the outputs. Please write a complete Python script that takes as input a directory containing PDB files where each PDB file contains one nanobody-antigen complex predicted by AlphaFold-Multimer and outputs a CSV file containing the AlphaFold-Multimer confidence of each nanobody-antigen complex in terms of the interface pLDDT."

run_meeting(
    meeting_type="individual", 
    team_member=generic_team[0],
    agenda=agenda,
    agenda_rules=CODING_RULES,
    save_dir=ablations_dir,
    save_name="generic_individual",
    temperature=CREATIVE_TEMPERATURE,
)

Team:   0%|          | 0/2 [00:09<?, ?it/s]1 [00:00<?, ?it/s]
Rounds (+ Final Round): 100%|██████████| 1/1 [00:09<00:00,  9.12s/it]


Input token count: 360
Output token count: 808
Tool token count: 0
Max token length: 1,168
Cost: $0.01
Time: 0:10
