In [1]:
import concurrent.futures
from pathlib import Path

from agent import Agent
from constants import CONSISTENT_TEMPERATURE, CREATIVE_TEMPERATURE
from prompts import (
    PRINCIPAL_INVESTIGATOR,
    SCIENTIFIC_CRITIC,
    create_merge_prompt,
)
from run_individual_meeting import run_individual_meeting
from run_scientific_meeting import run_scientific_meeting
from utils import load_summaries

In [2]:
# Set up key parameters
num_iterations = 5
num_rounds = 3
num_critiques = 3
save_dir = Path("antibody_design")
model = "gpt-4o"
background_prompt = "You are working on a research project to use machine learning to develop antibodies for the newest variant of the SARS-CoV-2 spike protein that also, ideally, have activity against other circulating minor variants and past variants."

## Select team members

In [3]:
# Select team members - prompts
team_selection_dir = save_dir / "team_selection"

team_selection_agenda = f"""{background_prompt} You need to select a team of three scientists to help you with this project. Please select the team members that you would like to invite to a discussion to create the antibody design approach. Please list the team members in the following format, using the team member below as an example. You should not include yourself (Principal Investigator) in the list.

Agent(
    title="Principal Investigator",
    expertise="applying artificial intelligence to biomedical research",
    goal="perform research in your area of expertise that maximizes the scientific impact of the work",
    role="lead a team of experts to solve an important problem in artificial intelligence for biomedicine, make key decisions about the project direction based on team member input, and manage the project timeline and resources",
)
"""

In [None]:
# Select team members - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_individual_meeting,
            team_member=PRINCIPAL_INVESTIGATOR,
            agenda=team_selection_agenda,
            save_dir=team_selection_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Select team members - merge
team_selection_summaries = load_summaries(discussion_paths=list(team_selection_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(team_selection_summaries)}")

team_selection_merge_prompt = create_merge_prompt(agenda=team_selection_agenda)

run_individual_meeting(
    team_member=PRINCIPAL_INVESTIGATOR,
    agenda=team_selection_merge_prompt,
    save_dir=team_selection_dir,
    save_name="merged",
    summaries=team_selection_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

In [4]:
# Add team members
IMMUNOLOGIST = Agent(
    title="Immunologist",
    expertise="antibody development, immune response mechanisms, and viral pathogenesis",
    goal="identify key epitopes on the spike protein that are likely to elicit a strong immune response and assess the potential cross-reactivity with other variants",
    role="guide the selection of target epitopes for antibody design, ensuring that they are likely to be broadly neutralizing across multiple variants, and validate the effectiveness of antibody candidates",
)

COMPUTATIONAL_BIOLOGIST = Agent(
    title="Computational Biologist",
    expertise="protein structure prediction, molecular dynamics simulations, and structural bioinformatics",
    goal="model the 3D structure of the new SARS-CoV-2 spike protein variant and simulate its interactions with potential antibody candidates",
    role="provide structural insights into the spike protein, predict how mutations might affect antibody binding, and guide the computational design of antibody candidates",
)

DATA_SCIENTIST = Agent(
    title="Machine Learning Specialist",
    expertise="deep learning, predictive modeling, and biological data analysis",
    goal="develop and implement machine learning models to predict antibody efficacy and optimize antibody sequences",
    role="create and refine algorithms to predict antibody binding affinity and cross-reactivity with various SARS-CoV-2 variants, and optimize antibody sequences for broad neutralizing activity",
)

team_members = (
    IMMUNOLOGIST,
    COMPUTATIONAL_BIOLOGIST,
    DATA_SCIENTIST,
    SCIENTIFIC_CRITIC,
)

## Select project

In [5]:
# Select antibodies project - prompts
project_selection_dir = save_dir / "project_selection"

project_selection_agenda = f"{background_prompt} Please create an antibody design approach for this target that uses ESM amino acid likelihoods to identify promising antibody sequences and RosettaAntibody to rank those proposed antibodies by binding affinity. Decide whether you will design antibodies de novo or modify existing antibodies. Explain in detail how you will apply ESM and RosettaAntibody to design antibodies."

project_selection_questions = (
    "Will you design antibodies de novo or will you modify existing antibodies (choose only one)?",
    "If modifying existing antibodies, which precise antibody or antibodies will you modify?",
    "If modifying existing antibodies, how exactly will you propose modifications to the antibodies?",
    "If designing antibodies de novo, how exactly will you propose antibody candidates?",
    "How will you use ESM and RosettaAntibody (step-by-step, in detail, without any additional tools) to design antibodies?",
    "What specific role will ESM play in the antibody design process?",
    "What specific role will RosettaAntibody play in the antibody design process?",
)

project_selection_rules = (
    "You must not use any computational tools at any point in your antibody design approach except for ESM and RosettaAntibody.",
    "You must only consider the computational aspects of antibody design and ignore experimental validation.",
)

In [None]:
# Select antibodies project - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_scientific_meeting,
            team_lead=PRINCIPAL_INVESTIGATOR,
            team_members=team_members,
            agenda=project_selection_agenda,
            agenda_questions=project_selection_questions,
            agenda_rules=project_selection_rules,
            save_dir=project_selection_dir,
            save_name=f"discussion_{iteration_num + 1}",
            num_rounds=num_rounds,
            temperature=CREATIVE_TEMPERATURE,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Select antibodies project - merge
project_selection_summaries = load_summaries(discussion_paths=list(project_selection_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(project_selection_summaries)}")

project_selection_merge_prompt = create_merge_prompt(
    agenda=project_selection_agenda,
    agenda_questions=project_selection_questions,
    agenda_rules=project_selection_rules
)

run_individual_meeting(
    team_member=PRINCIPAL_INVESTIGATOR,
    agenda=project_selection_merge_prompt,
    save_dir=project_selection_dir,
    save_name="merged",
    summaries=project_selection_summaries,
    num_critiques=num_critiques,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

## Get antibodies

In [6]:
# Get antibodies - prompts
antibodies_dir = save_dir / "antibodies"

antibodies_agenda = f"{background_prompt} Your team previously decided on an antibody design approach using ESM and RosettaAntibody to modify existing antibodies. First, write out the list of existing antibodies that were selected in the previous meeting. Then, write step-by-step instructions for obtaining the sequences of each of these antibodies."

antibodies_prior_summaries = load_summaries(discussion_paths=[project_selection_dir / "merged.json"])
print(f"Number of prior summaries: {len(antibodies_prior_summaries)}")

Number of prior summaries: 1


In [None]:
# Get antibodies - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_individual_meeting,
            team_member=IMMUNOLOGIST,
            agenda=antibodies_agenda,
            save_dir=antibodies_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            num_critiques=num_critiques,
            summaries=antibodies_prior_summaries,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Get antibodies - merge
antibodies_summaries = load_summaries(discussion_paths=list(antibodies_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(antibodies_summaries)}")

antibodies_merge_prompt = create_merge_prompt(agenda=antibodies_agenda)

run_individual_meeting(
    team_member=IMMUNOLOGIST,
    agenda=antibodies_merge_prompt,
    save_dir=antibodies_dir,
    save_name="merged",
    summaries=antibodies_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

## Coding rules

In [7]:
coding_rules = (
    "Your code must be self-contained (with appropriate imports) and complete.",
    "Your code may not include any undefined or unimplemented variables or functions.",
    "Your code may not include any pseudocode; it must be fully functioning code.",
    "Your code may not include any hard-coded examples.",
    "If your code needs user-provided values, write code to parse those values from the command line.",
    "Your code must be high quality, well-engineered, efficient, and well-documented (including docstrings, comments, and Python type hints if using Python).",
)

## Run ESM

In [14]:
# Run ESM - prompts
esm_dir = save_dir / "esm"

esm_agenda = f"{background_prompt} Your team previously decided on an antibody design approach using ESM and RosettaAntibody to modify existing antibodies. Now, you must implement the ESM portion of the approach. Please write a complete Python script that takes an antibody sequence as input and uses ESM amino acid likelihoods to identify the most promising mutant sequences."

esm_prior_summaries = load_summaries(discussion_paths=[project_selection_dir / "merged.json"])
print(f"Number of prior summaries: {len(esm_prior_summaries)}")

Number of prior summaries: 1


In [15]:
# Run ESM - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_individual_meeting,
            team_member=DATA_SCIENTIST,
            agenda=esm_agenda,
            agenda_rules=coding_rules,
            save_dir=esm_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            num_critiques=num_critiques,
            summaries=esm_prior_summaries,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])

Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s]
Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A


Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A

Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A



Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A[A




Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A





Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A






Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A







Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A








Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A






Agents:  50%|█████     | 1/2 [00:16<00:16, 16.13s/it][A[A[A[A[A[A[A





Agents:  50%|█████     | 1/2 [00:16<00:16, 16.75s/it][A[A[A[A[A[A








Agents:  50%|█████     | 1/2 [00:16<00:16, 16.84s/it][A[A[A[A[A[A[A[A[A







Agents:  50%|█████  

Input token count: 34,220
Output token count: 6,937
Max token length: 9,049
Cost: $0.28
Time: 1:48











Agents: 100%|██████████| 2/2 [00:38<00:00, 18.68s/it][A[A[A[A[A[A[A[A[A








                                                     [A[A[A[A[A[A[A[A[A



Critiques (+ Final Round):  75%|███████▌  | 3/4 [01:50<00:37, 37.99s/it][A[A[A[A

Agents:   0%|          | 0/1 [00:00<?, ?it/s][A[A






Agents:  50%|█████     | 1/2 [00:21<00:21, 21.87s/it][A[A[A[A[A[A[A





Agents: 100%|██████████| 1/1 [00:23<00:00, 23.93s/it][A[A[A[A[A[A





                                                     [A[A[A[A[A[A
Critiques (+ Final Round): 100%|██████████| 4/4 [02:08<00:00, 30.89s/it][A
                                                                        [A

Input token count: 37,402
Output token count: 8,153
Max token length: 10,265
Cost: $0.31
Time: 2:08







Agents: 100%|██████████| 1/1 [00:24<00:00, 24.06s/it][A[A[A[A[A




                                                                        

Input token count: 36,345
Output token count: 8,356
Max token length: 10,468
Cost: $0.31
Time: 2:09









Agents: 100%|██████████| 2/2 [00:41<00:00, 20.35s/it][A[A[A[A[A[A[A






                                                     [A[A[A[A[A[A[A


Agents:   0%|          | 0/1 [00:00<?, ?it/s]/4 [02:13<00:43, 43.82s/it][A[A[A

Agents: 100%|██████████| 1/1 [00:27<00:00, 27.41s/it][A[A

                                                     [A[A



Critiques (+ Final Round): 100%|██████████| 4/4 [02:18<00:00, 33.81s/it][A[A[A[A



                                                                        [A[A[A[A

Input token count: 34,371
Output token count: 7,843
Max token length: 9,955
Cost: $0.29
Time: 2:18


                                                     


Critiques (+ Final Round): 100%|██████████| 4/4 [02:44<00:00, 38.65s/it][A[A[A


                                                                        [A[A[A

Input token count: 39,153
Output token count: 9,562
Max token length: 11,674
Cost: $0.34
Time: 2:44


In [16]:
# Run ESM - merge
esm_summaries = load_summaries(discussion_paths=list(esm_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(esm_summaries)}")

esm_merge_prompt = create_merge_prompt(agenda=esm_agenda)

run_individual_meeting(
    team_member=DATA_SCIENTIST,
    agenda=esm_merge_prompt,
    save_dir=esm_dir,
    save_name="merged",
    summaries=esm_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

Number of summaries: 5


Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Agents:   0%|          | 0/1 [00:00<?, ?it/s][A
Agents: 100%|██████████| 1/1 [00:28<00:00, 28.55s/it][A
                                                                        

Input token count: 8,183
Output token count: 1,827
Max token length: 10,010
Cost: $0.07
Time: 0:28




## Run RosettaAntibody

In [11]:
# Run RosettaAntibody - prompts
rosetta_dir = save_dir / "rosetta"

rosetta_agenda = f"{background_prompt} Your team previously decided on an antibody design approach using ESM and RosettaAntibody to modify existing antibodies. Now, you must implement the RosettaAntibody portion of the approach. Please write a complete Python script that takes a list of antibody sequences and an antigen structure as input and uses RosettaAntibody via PyRosetta to rank the antibody sequences based on their binding affinity and structural stability."

rosetta_prior_summaries = load_summaries(discussion_paths=[project_selection_dir / "merged.json"])
print(f"Number of prior summaries: {len(rosetta_prior_summaries)}")

Number of prior summaries: 1


In [12]:
# Run RosettaAntibody - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_individual_meeting,
            team_member=COMPUTATIONAL_BIOLOGIST,
            agenda=rosetta_agenda,
            agenda_rules=coding_rules,
            save_dir=rosetta_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            num_critiques=num_critiques,
            summaries=rosetta_prior_summaries,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])

Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s]


Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A

Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A
Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A



Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A[A




Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A






Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A





Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A








Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A







Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A





Agents:  50%|█████     | 1/2 [00:14<00:14, 14.80s/it][A[A[A[A[A[A




Agents:  50%|█████     | 1/2 [00:15<00:15, 15.70s/it][A[A[A[A[A







Agents:  50%|█████     | 1/2 [00:17<00:17, 17.14s/it][A[A[A[A[A[A[A[A






Agents:  50%|█████     | 1/2 [00:

Input token count: 35,586
Output token count: 7,970
Max token length: 10,104
Cost: $0.30
Time: 2:08









Agents: 100%|██████████| 2/2 [00:53<00:00, 25.38s/it][A[A[A[A[A[A[A






                                                     [A[A[A[A[A[A[A

Critiques (+ Final Round):  75%|███████▌  | 3/4 [02:12<00:46, 46.61s/it][A[A



Agents:   0%|          | 0/1 [00:00<?, ?it/s][A[A[A[A





Agents:  50%|█████     | 1/2 [00:30<00:30, 30.99s/it][A[A[A[A[A[A




Agents:  50%|█████     | 1/2 [00:28<00:28, 28.90s/it][A[A[A[A[A





Agents: 100%|██████████| 2/2 [00:56<00:00, 27.55s/it][A[A[A[A[A[A





                                                     [A[A[A[A[A[A


Critiques (+ Final Round):  75%|███████▌  | 3/4 [02:39<00:54, 54.93s/it][A[A[A





Agents:   0%|          | 0/1 [00:00<?, ?it/s][A[A[A[A[A[A







Agents: 100%|██████████| 1/1 [00:38<00:00, 38.99s/it][A[A[A[A[A[A[A[A







                                                     [A[A[A[A[A[A[A[A
Critiques (+ Final Round): 100%|██████████| 4/4 [02:39<00:00, 41.90s/

Input token count: 36,512
Output token count: 8,379
Max token length: 10,513
Cost: $0.31
Time: 2:39






Agents: 100%|██████████| 1/1 [00:32<00:00, 32.22s/it][A[A[A[A



                                                     [A[A[A[A

Critiques (+ Final Round): 100%|██████████| 4/4 [02:44<00:00, 40.93s/it][A[A

                                                                        [A[A

Input token count: 36,157
Output token count: 8,091
Max token length: 10,225
Cost: $0.30
Time: 2:44







Agents: 100%|██████████| 2/2 [01:01<00:00, 30.89s/it][A[A[A[A[A




Critiques (+ Final Round):  75%|███████▌  | 3/4 [03:03<01:02, 62.53s/it]
Agents:   0%|          | 0/1 [00:00<?, ?it/s][A





Agents: 100%|██████████| 1/1 [00:42<00:00, 42.74s/it][A[A[A[A[A[A





                                                     [A[A[A[A[A[A


Critiques (+ Final Round): 100%|██████████| 4/4 [03:22<00:00, 50.12s/it][A[A[A


                                                                        [A[A[A

Input token count: 35,362
Output token count: 8,800
Max token length: 10,934
Cost: $0.31
Time: 3:22



Agents: 100%|██████████| 1/1 [00:22<00:00, 22.43s/it][A
                                                                        

Input token count: 40,207
Output token count: 10,032
Max token length: 12,166
Cost: $0.35
Time: 3:26




In [13]:
# Run RosettaAntibody - merge
rosetta_summaries = load_summaries(discussion_paths=list(rosetta_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(rosetta_summaries)}")

rosetta_merge_prompt = create_merge_prompt(agenda=rosetta_agenda)

run_individual_meeting(
    team_member=COMPUTATIONAL_BIOLOGIST,
    agenda=rosetta_merge_prompt,
    save_dir=rosetta_dir,
    save_name="merged",
    summaries=rosetta_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

Number of summaries: 5


Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Agents:   0%|          | 0/1 [00:00<?, ?it/s][A
Agents: 100%|██████████| 1/1 [00:24<00:00, 24.10s/it][A
                                                                        

Input token count: 8,948
Output token count: 1,633
Max token length: 10,581
Cost: $0.07
Time: 0:24


