In [1]:
import concurrent.futures
from pathlib import Path

from agent import Agent
from constants import CONSISTENT_TEMPERATURE, CREATIVE_TEMPERATURE
from prompts import (
    MERGE_PROMPT,
    PRINCIPAL_INVESTIGATOR,
    SCIENTIFIC_CRITIC,
    create_merge_prompt,
)
from run_individual_meeting import run_individual_meeting
from run_scientific_meeting import run_scientific_meeting
from utils import load_summaries

In [2]:
# Set up key parameters
num_iterations = 5
num_rounds = 3
num_critiques = 3
save_dir = Path("antibody_design")
model = "gpt-4o"
background_prompt = "You are working on a project to develop antibodies for the newest variant of the SARS-CoV-2 spike protein that also, ideally, have activity against other circulating minor variants and past variants."

## Select team members

In [3]:
# Select team members - prompts
team_selection_dir = save_dir / "team_selection"

team_selection_prompt = f"""{background_prompt} You need to select a team of scientists to help you with this project. Please select a small set of team members that you would like to invite to a discussion to design the antibody discovery approach. Please list team members in the following format, using the team member below as an example. You should not include yourself (Principal Investigator) in the list.

Agent(
    title="Principal Investigator",
    expertise="applying artificial intelligence to biomedical research",
    goal="perform research in your area of expertise that maximizes the scientific impact of the work",
    role="lead a team of experts to solve an important problem in artificial intelligence for biomedicine, make key decisions about the project direction based on team member input, and manage the project timeline and resources",
)
"""

In [4]:
# Select team members - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_individual_meeting,
            team_member=PRINCIPAL_INVESTIGATOR,
            agenda=team_selection_prompt,
            save_dir=team_selection_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])


Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s][A


Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s][A[A[A



Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s][A[A[A[A

Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s][A[A




Agents:   0%|          | 0/1 [00:00<?, ?it/s][A[A[A[A[A





Agents:   0%|          | 0/1 [00:00<?, ?it/s][A[A[A[A[A[A






Agents:   0%|          | 0/1 [00:00<?, ?it/s][A[A[A[A[A[A[A







Agents:   0%|          | 0/1 [00:00<?, ?it/s][A[A[A[A[A[A[A[A








Agents:   0%|          | 0/1 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A








Agents: 100%|██████████| 1/1 [00:07<00:00,  7.97s/it][A[A[A[A[A[A[A[A[A


Critiques (+ Final Round): 100%|██████████| 1/1 [00:08<00:00,  8.03s/it][A[A


Input token count: 230
Output token count: 458
Max token length: 688
Cost: $0.01
Time: 0:08








Agents: 100%|██████████| 1/1 [00:08<00:00,  8.23s/it][A[A[A[A[A[A
Critiques (+ Final Round): 100%|██████████| 1/1 [00:08<00:00,  8.25s/it]





Agents: 100%|██████████| 1/1 [00:08<00:00,  8.27s/it][A[A[A[A[A

Critiques (+ Final Round): 100%|██████████| 1/1 [00:08<00:00,  8.28s/it][A


Input token count: 230
Output token count: 431
Max token length: 661
Cost: $0.01
Time: 0:08
Input token count: 230
Output token count: 497
Max token length: 727
Cost: $0.01
Time: 0:08










Agents: 100%|██████████| 1/1 [00:08<00:00,  8.98s/it][A[A[A[A[A[A[A[A




Critiques (+ Final Round): 100%|██████████| 1/1 [00:09<00:00,  9.03s/it][A[A[A[A


Input token count: 230
Output token count: 440
Max token length: 670
Cost: $0.01
Time: 0:09









Agents: 100%|██████████| 1/1 [00:10<00:00, 10.71s/it][A[A[A[A[A[A[A



Critiques (+ Final Round): 100%|██████████| 1/1 [00:10<00:00, 10.76s/it][A[A[A

Input token count: 230
Output token count: 473
Max token length: 703
Cost: $0.01
Time: 0:10





In [5]:
# Select team members - merge
team_selection_summaries = load_summaries(discussion_paths=list(team_selection_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(team_selection_summaries)}")

run_individual_meeting(
    team_member=PRINCIPAL_INVESTIGATOR,
    agenda=MERGE_PROMPT,
    save_dir=team_selection_dir,
    save_name="merged",
    summaries=team_selection_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

Number of summaries: 5


Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Agents:   0%|          | 0/1 [00:00<?, ?it/s][A
Agents: 100%|██████████| 1/1 [00:13<00:00, 13.27s/it][A
Critiques (+ Final Round): 100%|██████████| 1/1 [00:13<00:00, 13.27s/it]

Input token count: 2,446
Output token count: 802
Max token length: 3,248
Cost: $0.02
Time: 0:13





In [3]:
# Add team members
IMMUNOLOGIST = Agent(
    title="Immunologist",
    expertise="immune response mechanisms and antibody engineering",
    goal="provide insights on immune system interactions with SARS-CoV-2 variants and guide the antibody development process",
    role="offer expertise in the biological mechanisms of antibody responses and help design effective antibodies against multiple virus variants",
)

VIROLOGIST = Agent(
    title="Virologist",
    expertise="SARS-CoV-2 virology and viral pathogenesis",
    goal="provide detailed knowledge of SARS-CoV-2 biology and its variants",
    role="offer insights into viral behavior and assist in designing experiments to test the effectiveness of developed antibodies in neutralizing the virus",
)

DATA_SCIENTIST = Agent(
    title="Machine Learning Scientist",
    expertise="machine learning algorithms and predictive modeling",
    goal="develop AI models to predict the efficacy of antibodies against various SARS-CoV-2 variants",
    role="apply machine learning techniques to identify the most promising antibody candidates and predict their neutralizing potential",
)

team_members = (
    IMMUNOLOGIST,
    VIROLOGIST,
    DATA_SCIENTIST,
    SCIENTIFIC_CRITIC,
)

## Select project

In [4]:
# Select antibodies project - prompts
project_selection_dir = save_dir / "project_selection"

project_selection_prompt = f"{background_prompt} Please create an antibody design approach for this target that uses ESM likelihoods to identify promising antibody sequences and RosettaAntibody to rank those proposed antibodies by binding affinity. Decide whether you will design antibodies de novo or modify existing antibodies. Explain in detail how you will apply ESM and RosettaAntibody to design antibodies."

project_selection_questions = (
    "Will you design antibodies de novo or will you modify existing antibodies (choose only one)?",
    "If modifying existing antibodies, which precise antibody or antibodies will you modify?",
    "If modifying existing antibodies, how exactly will you propose modifications to the antibodies?",
    "If designing antibodies de novo, how exactly will you propose antibody candidates?",
    "How will you use ESM and RosettaAntibody (step-by-step, in detail, without any additional tools) to design antibodies?",
    "What specific role will ESM play in the antibody design process?",
    "What specific role will RosettaAntibody play in the antibody design process?",
)

project_selection_rules = (
    "You must not use any computational tools at any point in your antibody design approach except for ESM and RosettaAntibody.",
    "You must only consider the computational aspects of antibody design and ignore experimental validation.",
)

In [5]:
# Select antibodies project - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_scientific_meeting,
            team_lead=PRINCIPAL_INVESTIGATOR,
            team_members=team_members,
            agenda=project_selection_prompt,
            agenda_questions=project_selection_questions,
            agenda_rules=project_selection_rules,
            save_dir=project_selection_dir,
            save_name=f"discussion_{iteration_num + 1}",
            num_rounds=num_rounds,
            temperature=CREATIVE_TEMPERATURE,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])

Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s]

Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A
Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A



Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A[A


Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A




Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A





Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A






Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A[A








Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A







Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A[A[A








Team:  20%|██        | 1/5 [00:20<01:23, 20.95s/it][A[A[A[A[A[A[A[A[A






Team:  20%|██        | 1/5 [00:21<01:27, 21.78s/it][A[A[A[A[A[A[A




Team:  20%|██        | 1/5 [00:22<01:30, 22.58s/it][A[A[A[A[A







Team:  20%|██        | 1/5 [00:23<01:35, 23.7

Input token count: 76,268
Output token count: 9,556
Max token length: 11,478
Cost: $0.52
Time: 4:28
Input token count: 78,456
Output token count: 9,435
Max token length: 11,357
Cost: $0.53
Time: 4:28








Team:  80%|████████  | 4/5 [01:27<00:19, 19.15s/it][A[A[A[A[A[A







Team:  80%|████████  | 4/5 [01:54<00:29, 29.45s/it][A[A[A[A[A[A[A[A







Team: 100%|██████████| 5/5 [02:08<00:00, 25.67s/it][A[A[A[A[A[A[A[A




Rounds (+ Summary Round):  75%|███████▌  | 3/4 [04:58<01:43, 103.82s/it][A[A[A[A
Team:   0%|          | 0/5 [00:00<?, ?it/s][A





Team: 100%|██████████| 5/5 [01:55<00:00, 23.13s/it][A[A[A[A[A[A


Rounds (+ Summary Round):  75%|███████▌  | 3/4 [05:01<01:41, 101.95s/it][A[A


Team:   0%|          | 0/5 [00:43<?, ?it/s][A[A[A
Rounds (+ Summary Round): 100%|██████████| 4/4 [05:06<00:00, 76.52s/it]


Input token count: 85,966
Output token count: 10,580
Max token length: 12,502
Cost: $0.59
Time: 5:06


Team:   0%|          | 0/5 [00:27<?, ?it/s]




Rounds (+ Summary Round): 100%|██████████| 4/4 [05:25<00:00, 81.45s/it] [A[A[A[A


Input token count: 85,241
Output token count: 10,107
Max token length: 12,029
Cost: $0.58
Time: 5:25


Team:   0%|          | 0/5 [00:34<?, ?it/s]


Rounds (+ Summary Round): 100%|██████████| 4/4 [05:35<00:00, 83.99s/it] [A[A

Input token count: 100,564
Output token count: 12,138
Max token length: 14,060
Cost: $0.68
Time: 5:35





In [6]:
# Select antibodies project - merge
project_selection_summaries = load_summaries(discussion_paths=list(project_selection_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(project_selection_summaries)}")

project_selection_merge_prompt = create_merge_prompt(
    agenda=project_selection_prompt,
    agenda_questions=project_selection_questions,
    agenda_rules=project_selection_rules
)

run_individual_meeting(
    team_member=PRINCIPAL_INVESTIGATOR,
    agenda=project_selection_merge_prompt,
    save_dir=project_selection_dir,
    save_name="merged",
    summaries=project_selection_summaries,
    num_critiques=num_critiques,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

Number of summaries: 5


Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s]
Agents:   0%|          | 0/2 [00:00<?, ?it/s][A
Agents:  50%|█████     | 1/2 [00:43<00:43, 43.29s/it][A
Agents: 100%|██████████| 2/2 [01:12<00:00, 36.06s/it][A
Critiques (+ Final Round):  25%|██▌       | 1/4 [01:12<03:36, 72.12s/it]
Agents:   0%|          | 0/2 [00:00<?, ?it/s][A
Agents:  50%|█████     | 1/2 [00:35<00:35, 35.07s/it][A
Agents: 100%|██████████| 2/2 [01:07<00:00, 33.62s/it][A
Critiques (+ Final Round):  50%|█████     | 2/4 [02:19<02:18, 69.25s/it]
Agents:   0%|          | 0/2 [00:00<?, ?it/s][A
Agents:  50%|█████     | 1/2 [00:52<00:52, 52.62s/it][A
Agents: 100%|██████████| 2/2 [01:23<00:00, 41.52s/it][A
Critiques (+ Final Round):  75%|███████▌  | 3/4 [03:42<01:15, 75.54s/it]
Agents:   0%|          | 0/1 [00:00<?, ?it/s][A
Agents: 100%|██████████| 1/1 [00:53<00:00, 53.15s/it][A
Critiques (+ Final Round): 100%|██████████| 4/4 [04:35<00:00, 68.89s/it]

Input token count: 79,363
Output token count: 9,612
Max token length: 17,192
Cost: $0.54
Time: 4:35





## Run DeepAb

In [8]:
# Run DeepAb - prompts
deepab_dir = save_dir / "deepab"

deepab_prompt = f"{background_prompt} Your team previously decided to use DeepAb to predict beneficial antibody mutations. Please write a complete script that uses DeepAb to predict beneficial mutations given an antibody sequence. Your code must be self-contained (with appropriate imports) and complete."

deepab_prior_summaries = load_summaries(discussion_paths=[project_selection_dir / "discussion_1.json"])
print(f"Number of prior summaries: {len(deepab_prior_summaries)}")

Number of prior summaries: 1


In [9]:
# Run DeepAb - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_individual_meeting,
            team_member=DATA_SCIENTIST,
            agenda=deepab_prompt,
            save_dir=deepab_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            num_critiques=num_critiques,
            summaries=deepab_prior_summaries,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])


Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A

Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A


Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A



Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A[A




Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A







Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A






Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A





Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A








Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A








Agents:  50%|█████     | 1/2 [00:17<00:17, 17.38s/it][A[A[A[A[A[A[A[A[A






Agents:  50%|█████     | 1/2 [00:19<00:19, 19.02s/it][A[A[A[A[A[A[A





Agents:  50%|█████     | 1/2 [00:19<00:19, 19.35s/it][A[A[A[A[A[A







Agents:  50%|█████     | 1/2 [00:22<00:22, 22.94s/it][A[A[A[A[A[A[A[A




Ag

Input token count: 26,931
Output token count: 5,511
Max token length: 7,333
Cost: $0.22
Time: 2:32











Agents: 100%|██████████| 1/1 [00:37<00:00, 37.18s/it][A[A[A[A[A[A[A[A[A
Critiques (+ Final Round): 100%|██████████| 4/4 [02:43<00:00, 40.81s/it]


Input token count: 27,670
Output token count: 6,137
Max token length: 7,959
Cost: $0.23
Time: 2:43







Agents: 100%|██████████| 2/2 [01:11<00:00, 35.76s/it][A[A[A[A[A


Agents:   0%|          | 0/1 [00:00<?, ?it/s]/4 [02:44<00:58, 58.29s/it][A[A





Agents: 100%|██████████| 1/1 [00:37<00:00, 37.84s/it][A[A[A[A[A[A

Critiques (+ Final Round): 100%|██████████| 4/4 [02:45<00:00, 41.28s/it][A


Input token count: 27,900
Output token count: 5,928
Max token length: 7,750
Cost: $0.23
Time: 2:45









Agents: 100%|██████████| 1/1 [00:46<00:00, 46.23s/it][A[A[A[A[A[A[A



Critiques (+ Final Round): 100%|██████████| 4/4 [03:03<00:00, 45.91s/it][A[A[A


Input token count: 28,855
Output token count: 7,276
Max token length: 9,098
Cost: $0.25
Time: 3:03


Agents: 100%|██████████| 1/1 [00:26<00:00, 26.61s/it]


Critiques (+ Final Round): 100%|██████████| 4/4 [03:11<00:00, 47.80s/it][A[A

Input token count: 28,117
Output token count: 6,146
Max token length: 7,968
Cost: $0.23
Time: 3:11





In [10]:
# Run DeepAb - merge
deepab_summaries = load_summaries(discussion_paths=list(deepab_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(deepab_summaries)}")

run_individual_meeting(
    team_member=DATA_SCIENTIST,
    agenda=MERGE_PROMPT,
    save_dir=deepab_dir,
    save_name="merged",
    summaries=deepab_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

Number of summaries: 5


Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Agents:   0%|          | 0/1 [00:00<?, ?it/s][A
Agents: 100%|██████████| 1/1 [00:32<00:00, 32.13s/it][A
Critiques (+ Final Round): 100%|██████████| 1/1 [00:32<00:00, 32.13s/it]

Input token count: 6,965
Output token count: 1,407
Max token length: 8,372
Cost: $0.06
Time: 0:32





## Run ABodyBuilder

In [11]:
# Run ABodyBuilder - prompts
abodybuilder_dir = save_dir / "abodybuilder"

abodybuilder_prompt = f"{background_prompt} Your team previously decided to use ABodyBuilder to refine the modified antibody structures and ensure structural integrity. Please write a complete script that uses ABodyBuilder to refine the modified antibody structures and ensure structural integrity. Your code must be self-contained (with appropriate imports) and complete."

abodybuilder_prior_summaries = load_summaries(discussion_paths=[project_selection_dir / "discussion_1.json"])
print(f"Number of prior summaries: {len(deepab_prior_summaries)}")

Number of prior summaries: 1


In [12]:
# Run ABodyBuilder - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_individual_meeting,
            team_member=DATA_SCIENTIST,
            agenda=abodybuilder_prompt,
            save_dir=abodybuilder_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            num_critiques=num_critiques,
            summaries=abodybuilder_prior_summaries,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])


Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A



Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A[A

Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A


Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A






Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A




Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A





Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A







Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A








Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A







Agents:  50%|█████     | 1/2 [00:25<00:25, 25.36s/it][A[A[A[A[A[A[A[A





Agents:  50%|█████     | 1/2 [00:25<00:25, 25.95s/it][A[A[A[A[A[A




Agents:  50%|█████     | 1/2 [00:27<00:27, 27.41s/it][A[A[A[A[A






Agents:  50%|█████     | 1/2 [00:30<00:30, 30.56s/it][A[A[A[A[A[A[A







Agents: 100%|██

Input token count: 29,968
Output token count: 7,054
Max token length: 8,885
Cost: $0.26
Time: 3:28









Agents: 100%|██████████| 1/1 [00:40<00:00, 40.71s/it][A[A[A[A[A[A[A




Critiques (+ Final Round): 100%|██████████| 4/4 [03:36<00:00, 54.09s/it][A[A[A[A


Input token count: 33,015
Output token count: 8,196
Max token length: 10,027
Cost: $0.29
Time: 3:36








Agents: 100%|██████████| 1/1 [00:36<00:00, 36.41s/it][A[A[A[A[A[A
Critiques (+ Final Round): 100%|██████████| 4/4 [03:43<00:00, 55.94s/it]


Input token count: 30,125
Output token count: 6,807
Max token length: 8,638
Cost: $0.25
Time: 3:43







Agents: 100%|██████████| 1/1 [01:16<00:00, 76.99s/it][A[A[A[A[A

Critiques (+ Final Round): 100%|██████████| 4/4 [03:48<00:00, 57.04s/it][A


Input token count: 30,715
Output token count: 7,372
Max token length: 9,203
Cost: $0.26
Time: 3:48











Agents: 100%|██████████| 1/1 [00:54<00:00, 54.05s/it][A[A[A[A[A[A[A[A[A



Critiques (+ Final Round): 100%|██████████| 4/4 [03:48<00:00, 57.17s/it][A[A[A

Input token count: 37,463
Output token count: 9,175
Max token length: 11,006
Cost: $0.32
Time: 3:48





In [13]:
# Run ABodyBuilder - merge
abodybuilder_summaries = load_summaries(discussion_paths=list(abodybuilder_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(abodybuilder_summaries)}")

run_individual_meeting(
    team_member=DATA_SCIENTIST,
    agenda=MERGE_PROMPT,
    save_dir=abodybuilder_dir,
    save_name="merged",
    summaries=abodybuilder_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

Number of summaries: 5


Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Agents:   0%|          | 0/1 [00:00<?, ?it/s][A
Agents: 100%|██████████| 1/1 [00:40<00:00, 40.65s/it][A
Critiques (+ Final Round): 100%|██████████| 1/1 [00:40<00:00, 40.66s/it]

Input token count: 8,077
Output token count: 1,959
Max token length: 10,036
Cost: $0.07
Time: 0:40





## Run RosettaAntibody

In [14]:
# Run RosettaAntibody - prompts
rosetta_antibody_dir = save_dir / "rosetta_antibody"

rosetta_antibody_prompt = f"{background_prompt} Your team previously decided to use RosettaAntibody to predict binding affinities and validate modifications of a given antibody. Please write a complete script that uses RosettaAntibody to predict binding affinities and validate modifications for a given antibody. Your code must be self-contained (with appropriate imports) and complete."

rosetta_antibody_prior_summaries = load_summaries(discussion_paths=[project_selection_dir / "discussion_1.json"])
print(f"Number of prior summaries: {len(rosetta_antibody_prior_summaries)}")

Number of prior summaries: 1


In [15]:
# Run RosettaAntibody - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_individual_meeting,
            team_member=DATA_SCIENTIST,
            agenda=rosetta_antibody_prompt,
            save_dir=rosetta_antibody_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            num_critiques=num_critiques,
            summaries=rosetta_antibody_prior_summaries,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])



Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A


Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A



Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A[A
Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A





Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A




Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A






Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A








Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A







Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A






Agents:  50%|█████     | 1/2 [00:17<00:17, 17.25s/it][A[A[A[A[A[A[A







Agents:  50%|█████     | 1/2 [00:23<00:23, 23.16s/it][A[A[A[A[A[A[A[A








Agents:  50%|█████     | 1/2 [00:25<00:25, 25.76s/it][A[A[A[A[A[A[A[A[A





Agents:  50%|█████     | 1/2 [00:26<00:26, 26.06s/it][A[A[A[A[A[A




Ag

Input token count: 33,627
Output token count: 8,552
Max token length: 10,393
Cost: $0.30
Time: 3:25








Agents: 100%|██████████| 1/1 [00:57<00:00, 57.13s/it][A[A[A[A[A[A



Critiques (+ Final Round): 100%|██████████| 4/4 [03:35<00:00, 53.77s/it][A[A[A


Input token count: 34,693
Output token count: 8,738
Max token length: 10,579
Cost: $0.30
Time: 3:35







Agents: 100%|██████████| 1/1 [01:00<00:00, 60.41s/it][A[A[A[A[A


Critiques (+ Final Round): 100%|██████████| 4/4 [03:41<00:00, 55.26s/it][A[A


Input token count: 34,700
Output token count: 8,735
Max token length: 10,576
Cost: $0.30
Time: 3:41









Agents: 100%|██████████| 1/1 [00:48<00:00, 48.09s/it][A[A[A[A[A[A[A
Critiques (+ Final Round): 100%|██████████| 4/4 [03:52<00:00, 58.20s/it]


Input token count: 37,178
Output token count: 10,410
Max token length: 12,251
Cost: $0.34
Time: 3:52










Agents: 100%|██████████| 1/1 [00:49<00:00, 49.10s/it][A[A[A[A[A[A[A[A




Critiques (+ Final Round): 100%|██████████| 4/4 [03:54<00:00, 58.60s/it][A[A[A[A

Input token count: 37,371
Output token count: 10,310
Max token length: 12,151
Cost: $0.34
Time: 3:54





In [16]:
# Run RosettaAntibody - merge
rosetta_antibody_summaries = load_summaries(discussion_paths=list(rosetta_antibody_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(rosetta_antibody_summaries)}")

run_individual_meeting(
    team_member=DATA_SCIENTIST,
    agenda=MERGE_PROMPT,
    save_dir=rosetta_antibody_dir,
    save_name="merged",
    summaries=rosetta_antibody_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

Number of summaries: 5


Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Agents:   0%|          | 0/1 [00:00<?, ?it/s][A
Agents: 100%|██████████| 1/1 [00:55<00:00, 55.83s/it][A
Critiques (+ Final Round): 100%|██████████| 1/1 [00:55<00:00, 55.83s/it]

Input token count: 10,842
Output token count: 2,578
Max token length: 13,420
Cost: $0.09
Time: 0:55





## Design ESM approach

In [10]:
# ESM design - prompts
esm_design_dir = save_dir / "esm_design"

esm_design_prompt = f"{background_prompt} You will use the ESM family of models for antibody design. Please design a method for applying ESM to this antibody design problem. Specify the exact model you will use and how you will use it to design antibodies."

esm_design_questions = (
    "Which ESM model will you use?"
    "Will you design the antibodies de novo or will you modify existing antibodies?",
    "If modifying existing antibodies, which precise antibody or antibodies will you modify?",
    "How exactly will you use your model to design antibodies?",
    "What is the precise process for designing antibodies and selecting candidates for experimental validation?",
    "How will you computationally evaluate the quality of the design antibodies?",
    "What objectives will you optimize for in the design process?",
)

esm_design_prior_summaries = load_summaries(discussion_paths=[project_selection_dir / "merged.json"])
print(f"Number of prior summaries: {len(esm_design_prior_summaries)}")

Number of prior summaries: 1


In [11]:
# ESM design - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_scientific_meeting,
            team_lead=PRINCIPAL_INVESTIGATOR,
            team_members=team_members,
            agenda=esm_design_prompt,
            agenda_questions=esm_design_questions,
            save_dir=esm_design_dir,
            save_name=f"discussion_{iteration_num + 1}",
            num_rounds=num_rounds,
            summaries=esm_design_prior_summaries,
            temperature=CREATIVE_TEMPERATURE,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])




Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A
Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A

Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A



Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A[A




Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A





Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A







Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A[A[A






Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A[A








Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A





Team:  20%|██        | 1/5 [00:06<00:27,  6.76s/it][A[A[A[A[A[A







Team:  20%|██        | 1/5 [00:09<00:37,  9.33s/it][A[A[A[A[A[A[A[A






Team:  20%|██        | 1/5 [00:12<00:51, 12.81s/it][A[A[A[A[A[A[A





Team:  40%|████      | 2/5 [00:13<00:20,  6.82s/it][A[A[A[A[A[A








Team:  20%|██        | 1/5 [00:1

Input token count: 107,048
Output token count: 9,792
Max token length: 12,959
Cost: $0.68
Time: 3:00









Team:  80%|████████  | 4/5 [00:45<00:11, 11.81s/it][A[A[A[A[A[A[A








Team:  60%|██████    | 3/5 [00:42<00:27, 13.66s/it][A[A[A[A[A[A[A[A[A




Team: 100%|██████████| 5/5 [01:04<00:00, 12.93s/it][A[A[A[A[A

Team:   0%|          | 0/5 [00:00<?, ?it/s]3/4 [03:04<01:01, 61.99s/it][A







Team:  40%|████      | 2/5 [00:40<00:57, 19.03s/it][A[A[A[A[A[A[A[A






Team: 100%|██████████| 5/5 [00:57<00:00, 11.51s/it][A[A[A[A[A[A[A



Rounds (+ Summary Round):  75%|███████▌  | 3/4 [03:13<01:02, 62.44s/it][A[A[A




Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A








Team:  80%|████████  | 4/5 [00:56<00:13, 13.68s/it][A[A[A[A[A[A[A[A[A







Team:  60%|██████    | 3/5 [00:53<00:32, 16.17s/it][A[A[A[A[A[A[A[A








Team: 100%|██████████| 5/5 [01:09<00:00, 13.86s/it][A[A[A[A[A[A[A[A[A




Rounds (+ Summary Round):  75%|███████▌  | 3/4 [03:29<01:08, 68.91s/it][A[A[A[A





Team:   0%|          | 0/5 

Input token count: 105,113
Output token count: 9,548
Max token length: 12,715
Cost: $0.67
Time: 3:31










Team:   0%|          | 0/5 [00:27<?, ?it/s].39s/it][A[A[A[A[A[A[A[A



Rounds (+ Summary Round): 100%|██████████| 4/4 [03:40<00:00, 55.16s/it][A[A[A


Input token count: 130,073
Output token count: 12,061
Max token length: 15,228
Cost: $0.83
Time: 3:40










Team: 100%|██████████| 5/5 [01:23<00:00, 16.71s/it][A[A[A[A[A[A[A[A


Team:   0%|          | 0/5 [00:29<?, ?it/s]3/4 [03:51<01:18, 78.79s/it][A[A




Rounds (+ Summary Round): 100%|██████████| 4/4 [03:58<00:00, 59.64s/it][A[A[A[A


Input token count: 120,508
Output token count: 11,199
Max token length: 14,366
Cost: $0.77
Time: 3:58


Team:   0%|          | 0/5 [00:35<?, ?it/s]


Rounds (+ Summary Round): 100%|██████████| 4/4 [04:27<00:00, 66.95s/it][A[A

Input token count: 118,939
Output token count: 11,458
Max token length: 14,625
Cost: $0.77
Time: 4:27





In [12]:
# ESM design - merge
esm_design_summaries = load_summaries(discussion_paths=list(esm_design_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(esm_design_summaries)}")

run_individual_meeting(
    team_member=PRINCIPAL_INVESTIGATOR,
    agenda=MERGE_PROMPT,
    save_dir=esm_design_dir,
    save_name="merged",
    summaries=esm_design_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

Number of summaries: 5


Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Agents:   0%|          | 0/1 [00:00<?, ?it/s][A
Agents: 100%|██████████| 1/1 [00:48<00:00, 48.18s/it][A
Critiques (+ Final Round): 100%|██████████| 1/1 [00:48<00:00, 48.19s/it]

Input token count: 6,791
Output token count: 1,817
Max token length: 8,608
Cost: $0.06
Time: 0:48





"### Agenda\n\nOur goal is to develop antibodies targeting the SARS-CoV-2 spike protein, focusing on the newest variants and aiming for broad-spectrum activity across multiple variants. We will use the ESM family of models to design these antibodies. Key decisions include whether we will design antibodies de novo or modify existing ones, which specific antibodies we will modify if we choose that route, the exact methodology for applying the ESM model to antibody design, the process for selecting candidates for experimental validation, the computational evaluation metrics, and the primary objectives to optimize during the design process.\n\n### Team Member Input\n\n**Immunologist:**\n- Advocates for modifying existing antibodies like sotrovimab, S309, REGN10933, and REGN10987 due to their known cross-reactivity and structural data.\n- Suggested prioritizing conserved regions within the S2 subunit, specifically the fusion peptide (FP), HR1, HR2, stem helix, and MPER.\n- Emphasizes the ne

## Implement ESM approach

In [21]:
# ESM implement - prompts
esm_implement_dir = save_dir / "esm_implement"

esm_implement_prompt = f"{background_prompt} You will start with an existing SARS-CoV-2 antibody, mutate the antibody sequence, and then evaluate the mutated sequences for potential binding by using ESM to calculate the log-likelihood ratio between the mutated and wildtype sequences. Please write a complete script to implement this method. Your code must be self-contained (with appropriate imports) and complete."

In [22]:
# ESM implement - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_individual_meeting,
            team_member=DATA_SCIENTIST,
            agenda=esm_implement_prompt,
            save_dir=esm_implement_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            num_critiques=num_critiques,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])



Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A
Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A


Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A



Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A[A




Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A





Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A






Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A







Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A








Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A





Agents:  50%|█████     | 1/2 [00:12<00:12, 12.68s/it][A[A[A[A[A[A




Agents:  50%|█████     | 1/2 [00:15<00:15, 15.45s/it][A[A[A[A[A







Agents:  50%|█████     | 1/2 [00:17<00:17, 17.58s/it][A[A[A[A[A[A[A[A






Agents:  50%|█████     | 1/2 [00:20<00:20, 20.55s/it][A[A[A[A[A[A[A





Agents: 100%|████

Input token count: 22,236
Output token count: 7,606
Max token length: 7,976
Cost: $0.23
Time: 2:25







Agents: 100%|██████████| 1/1 [00:31<00:00, 31.86s/it][A[A[A[A[A


Critiques (+ Final Round): 100%|██████████| 4/4 [02:35<00:00, 38.99s/it][A[A


Input token count: 22,874
Output token count: 8,194
Max token length: 8,564
Cost: $0.24
Time: 2:35









Agents: 100%|██████████| 1/1 [00:20<00:00, 20.48s/it][A[A[A[A[A[A[A



Critiques (+ Final Round): 100%|██████████| 4/4 [02:36<00:00, 39.19s/it][A[A[A


Input token count: 21,673
Output token count: 7,311
Max token length: 7,681
Cost: $0.22
Time: 2:36











Agents: 100%|██████████| 1/1 [00:26<00:00, 26.19s/it][A[A[A[A[A[A[A[A[A




Critiques (+ Final Round): 100%|██████████| 4/4 [02:43<00:00, 40.79s/it][A[A[A[A


Input token count: 21,878
Output token count: 7,629
Max token length: 7,999
Cost: $0.22
Time: 2:43










Agents: 100%|██████████| 1/1 [00:41<00:00, 41.16s/it][A[A[A[A[A[A[A[A
Critiques (+ Final Round): 100%|██████████| 4/4 [02:58<00:00, 44.71s/it]

Input token count: 21,535
Output token count: 8,145
Max token length: 8,515
Cost: $0.23
Time: 2:58





In [23]:
# ESM implement - merge
esm_implement_summaries = load_summaries(discussion_paths=list(esm_implement_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(esm_implement_summaries)}")

run_individual_meeting(
    team_member=DATA_SCIENTIST,
    agenda=MERGE_PROMPT,
    save_dir=esm_implement_dir,
    save_name="merged",
    summaries=esm_implement_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

Number of summaries: 5


Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Agents:   0%|          | 0/1 [00:00<?, ?it/s][A
Agents: 100%|██████████| 1/1 [00:42<00:00, 42.07s/it][A
Critiques (+ Final Round): 100%|██████████| 1/1 [00:42<00:00, 42.08s/it]

Input token count: 8,770
Output token count: 1,985
Max token length: 10,755
Cost: $0.07
Time: 0:42





'### Merged and Enhanced Script\n\n```python\nimport torch\nfrom esm import pretrained, Alphabet\nimport random\nimport numpy as np\nfrom Bio.SubsMat import MatrixInfo as matlist\n\n# Load the pretrained ESM model\ndef load_esm_model():\n    """\n    Load the pretrained ESM model and set it to evaluation mode.\n    """\n    model, alphabet = pretrained.esm1b_t33_650M_UR50S()\n    model.eval()  # Disable dropout for evaluation\n    return model, alphabet\n\n# Function to set random seed for reproducibility\ndef set_seed(seed=42):\n    random.seed(seed)\n    torch.manual_seed(seed)\n\nset_seed()\n\n# Function to validate that the sequence only contains valid amino acid characters\ndef validate_sequence(seq):\n    valid_amino_acids = "ACDEFGHIKLMNPQRSTVWY"\n    return all(char in valid_amino_acids for char in seq)\n\n# Function to mutate a sequence using a substitution matrix\ndef mutate_sequence(sequence, mutation_rate=0.01, sub_matrix=matlist.blosum62):\n    """\n    Mutate the given se

## Fix ESM implementation

In [32]:
# ESM fix - prompts
esm_fix_dir = save_dir / "esm_fix"

esm_fix_prompt = """You previously implemented a method for using ESM to design antibodies SARS-CoV-2 spike protein (see summary). However, the code has several issues listed below.

1. The substitution matrix assumes an old version of biopython which is no longer available. Please remove this component and allow any valid amino acid substitution.
2. The wt_log_likelihood and mut_log_likelihood calculations fail since the sequence length does not account for two additional tokens that are added. Please adjust the sequence length to account for these tokens.
3. Please replace the wildtype sequence with this sequence: QVQLVQSGAEVKKPGASVKVSCKASGYPFTSYGISWVRQAPGQGLEWMGWISTYNGNTNYAQKFQGRVTMTTDTSTTTGYMELRRLRSDDTAVYYCARDYTRGAWFGESLIGGFDNWGQGTLVTVSS
4. Please modify the script to create many mutants of the wildtype sequence (e.g., 1000) and calculate the log-likelihood ratio for each mutant. Then have the script select the top mutants (e.g., 10) by log-likelihood ratio and print out both their mutations (e.g., P28T) and their log-likelihood ratios.

Please rewrite the code to fix these issues."""

esm_fix_prior_summaries = load_summaries(discussion_paths=[esm_implement_dir / "merged.json"])
print(f"Number of prior summaries: {len(esm_fix_prior_summaries)}")

Number of prior summaries: 1


In [33]:
# ESM fix
run_individual_meeting(
    team_member=DATA_SCIENTIST,
    agenda=esm_fix_prompt,
    save_dir=esm_fix_dir,
    save_name="answer",
    summaries=esm_fix_prior_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Agents:   0%|          | 0/1 [00:00<?, ?it/s][A
Agents: 100%|██████████| 1/1 [00:24<00:00, 24.76s/it][A
Critiques (+ Final Round): 100%|██████████| 1/1 [00:24<00:00, 24.77s/it]

Input token count: 2,310
Output token count: 1,476
Max token length: 3,786
Cost: $0.03
Time: 0:24





'Certainly! Let\'s address each of the issues listed in the agenda and modify the script accordingly.\n\n### 1. Remove the Substitution Matrix Dependency\nWe\'ll remove the dependency on the substitution matrix and allow any valid amino acid substitution.\n\n### 2. Adjust Sequence Length for Log-Likelihood Calculation\nWe\'ll adjust the sequence length to account for the two additional tokens added by the ESM model.\n\n### 3. Replace the Wildtype Sequence\nWe\'ll replace the wildtype sequence with the provided sequence.\n\n### 4. Generate Multiple Mutants and Select Top Mutants\nWe\'ll modify the script to create multiple mutants (e.g., 1000), calculate the log-likelihood ratio for each, and then select and print the top mutants by log-likelihood ratio.\n\nHere\'s the updated script:\n\n```python\nimport torch\nfrom esm import pretrained, Alphabet\nimport random\nimport numpy as np\n\n# Load the pretrained ESM model\ndef load_esm_model():\n    """\n    Load the pretrained ESM model and

## Improve ESM implementation efficiency

In [36]:
# ESM efficient - prompts
esm_efficient_dir = save_dir / "esm_efficient"

esm_efficient_prompt = """You previously implemented a method for using ESM to design antibodies SARS-CoV-2 spike protein (see summary). However, the code is not efficient. Please rewrite the code to maintain the same logic but improve the efficiency of the implementation (e.g., via vectorization and batching)."""

esm_efficient_prior_summaries = load_summaries(discussion_paths=[esm_fix_dir / "answer.json"])
print(f"Number of prior summaries: {len(esm_efficient_prior_summaries)}")

Number of prior summaries: 1


In [37]:
# ESM efficient
run_individual_meeting(
    team_member=DATA_SCIENTIST,
    agenda=esm_efficient_prompt,
    save_dir=esm_efficient_dir,
    save_name="answer",
    summaries=esm_efficient_prior_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Agents:   0%|          | 0/1 [00:00<?, ?it/s][A
Agents: 100%|██████████| 1/1 [00:24<00:00, 24.71s/it][A
Critiques (+ Final Round): 100%|██████████| 1/1 [00:24<00:00, 24.71s/it]

Input token count: 1,586
Output token count: 1,453
Max token length: 3,039
Cost: $0.03
Time: 0:24





'Thank you for the summary and the agenda for today\'s meeting. I understand that the goal is to improve the efficiency of the current implementation for using ESM to design antibodies against the SARS-CoV-2 spike protein. Specifically, we aim to maintain the same logic but enhance the performance through vectorization and batching.\n\nHere\'s an optimized version of the script that leverages vectorization and batching to improve efficiency:\n\n```python\nimport torch\nfrom esm import pretrained, Alphabet\nimport random\nimport numpy as np\n\n# Load the pretrained ESM model\ndef load_esm_model():\n    """\n    Load the pretrained ESM model and set it to evaluation mode.\n    """\n    model, alphabet = pretrained.esm1b_t33_650M_UR50S()\n    model.eval()  # Disable dropout for evaluation\n    return model, alphabet\n\n# Function to set random seed for reproducibility\ndef set_seed(seed=42):\n    random.seed(seed)\n    torch.manual_seed(seed)\n\nset_seed()\n\n# Function to validate that t