In [1]:
import concurrent.futures
from pathlib import Path

from agent import Agent
from constants import CONSISTENT_TEMPERATURE, CREATIVE_TEMPERATURE
from prompts import (
    ANTIBODIES_CONTEXTS,
    MERGE_PROMPT,
    PRINCIPAL_INVESTIGATOR,
    SCIENTIFIC_CRITIC
)
from run_individual_meeting import run_individual_meeting
from run_scientific_meeting import run_scientific_meeting
from utils import load_summaries

In [2]:
# Set up key parameters
num_iterations = 5
num_rounds = 3
save_dir = Path("antibody_design")
model = "gpt-4o"

In [3]:
# Select team members - prompts
team_selection_dir = save_dir / "team_selection"

team_selection_prompt = """You are working on a project to develop antibodies for the SARS-CoV-2 spike protein, ideally for the newest variant of the virus and with broad spectrum activity across variants. You need to select a team of scientists to help you with this project. Please select a small set of team members that you would like to invite to a discussion to design the antibody discovery approach. Please list team members in the following format, using the team member below as an example. You should not include yourself (Principal Investigator) in the list.

Agent(
    title="Principal Investigator",
    expertise="applying artificial intelligence to biomedical research",
    goal="perform research in your area of expertise that maximizes the scientific impact of the work",
    role="lead a team of experts to solve an important problem in artificial intelligence for biomedicine, make key decisions about the project direction based on team member input, and manage the project timeline and resources",
)
"""

In [4]:
# Select team members - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_individual_meeting,
            team_member=PRINCIPAL_INVESTIGATOR,
            agenda=team_selection_prompt,
            save_dir=team_selection_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])

Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s][A

Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s][A[A



Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s][A[A[A[A


Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s][A[A[A




Agents:   0%|          | 0/1 [00:00<?, ?it/s][A[A[A[A[A





Agents:   0%|          | 0/1 [00:00<?, ?it/s][A[A[A[A[A[A






Agents:   0%|          | 0/1 [00:00<?, ?it/s][A[A[A[A[A[A[A







Agents:   0%|          | 0/1 [00:00<?, ?it/s][A[A[A[A[A[A[A[A








Agents:   0%|          | 0/1 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A







Agents: 100%|██████████| 1/1 [00:06<00:00,  6.07s/it][A[A[A[A[A[A[A[A




Critiques (+ Final Round): 100%|██████████| 1/1 [00:06<00:00,  6.11s/it][A[A[A[A






Agents: 100%|██████████| 1/1 [00:06<00:00,  6.29s/it][A[A[A[A[A[A


Critiques (+ Final R

Input token count: 232
Output token count: 306
Max token length: 538
Cost: $0.01
Time: 0:06


Critiques (+ Final Round): 100%|██████████| 1/1 [00:06<00:00,  6.31s/it]


Input token count: 232
Output token count: 343
Max token length: 575
Cost: $0.01
Time: 0:06







Agents: 100%|██████████| 1/1 [00:07<00:00,  7.10s/it][A[A[A[A[A
Critiques (+ Final Round): 100%|██████████| 1/1 [00:07<00:00,  7.11s/it]


Input token count: 232
Output token count: 341
Max token length: 573
Cost: $0.01
Time: 0:07











Agents: 100%|██████████| 1/1 [00:07<00:00,  7.34s/it][A[A[A[A[A[A[A[A[A



Critiques (+ Final Round): 100%|██████████| 1/1 [00:07<00:00,  7.38s/it][A[A[A


Input token count: 232
Output token count: 361
Max token length: 593
Cost: $0.01
Time: 0:07









Agents: 100%|██████████| 1/1 [00:10<00:00, 10.68s/it][A[A[A[A[A[A[A

Critiques (+ Final Round): 100%|██████████| 1/1 [00:10<00:00, 10.72s/it][A

Input token count: 232
Output token count: 392
Max token length: 624
Cost: $0.01
Time: 0:10





In [5]:
# Select team members - merge
team_selection_summaries = load_summaries(discussion_paths=list(team_selection_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(team_selection_summaries)}")

run_individual_meeting(
    team_member=PRINCIPAL_INVESTIGATOR,
    agenda=MERGE_PROMPT,
    save_dir=team_selection_dir,
    save_name="merged",
    summaries=team_selection_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

Number of summaries: 5


Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Agents:   0%|          | 0/1 [00:00<?, ?it/s][A
Agents: 100%|██████████| 1/1 [00:14<00:00, 14.44s/it][A
Critiques (+ Final Round): 100%|██████████| 1/1 [00:14<00:00, 14.45s/it]

Input token count: 1,890
Output token count: 1,027
Max token length: 2,917
Cost: $0.02
Time: 0:14





'Certainly! Based on the summaries of the previous meetings, here is a consolidated list of key team members for our antibody discovery approach, incorporating the best components from each individual answer:\n\nAgent(\n    title="Immunologist",\n    expertise="immune response characterization and antibody development",\n    goal="identify and characterize potential antibody candidates that can effectively neutralize SARS-CoV-2 variants",\n    role="provide insights on immunogenic targets, design experiments for antibody discovery, and interpret immunological data"\n)\n\nAgent(\n    title="Structural Biologist",\n    expertise="protein structure analysis and molecular modeling",\n    goal="analyze and model the structure of the SARS-CoV-2 spike protein and its variants",\n    role="assist in identifying key structural features for antibody binding and stability, and guide the design of broad-spectrum antibodies using structural insights"\n)\n\nAgent(\n    title="Bioinformatician",\n   

In [3]:
# Add team members
IMMUNOLOGIST = Agent(
    title="Immunologist",
    expertise="immune response characterization and antibody development",
    goal="identify and characterize potential antibody candidates that can effectively neutralize SARS-CoV-2 variants",
    role="provide insights on immunogenic targets, design experiments for antibody discovery, and interpret immunological data"
)

DATA_SCIENTIST = Agent(
    title="Machine Learning Scientist",
    expertise="developing and applying machine learning algorithms for biological data",
    goal="design and implement AI models to predict effective antibody candidates",
    role="integrate data from bioinformatics and immunology to train machine learning models for antibody discovery, and predict antibody binding affinity and specificity"
)

VIROLOGIST = Agent(
    title="Virologist",
    expertise="SARS-CoV-2 biology and virology assays",
    goal="assess the neutralization efficacy of antibody candidates against various SARS-CoV-2 variants",
    role="design and conduct virology assays to test antibody effectiveness in vitro and in vivo, and provide insights on viral escape mechanisms"
)

team_members = (
    IMMUNOLOGIST,
    DATA_SCIENTIST,
    VIROLOGIST,
    SCIENTIFIC_CRITIC,
)

In [16]:
# Select antibodies project - prompts
project_selection_dir = save_dir / "project_selection"

project_selection_prompt = "You are working on a project to develop antibodies for the SARS-CoV-2 spike protein, ideally for the newest variant of the virus and with broad spectrum activity across variants. Please design a specific antibody discovery approach for this target that uses exactly one existing, pre-trained machine learning model to design antibody candidates. Decide what specific machine learning model to use and precisely how it will be applied to this antibody discovery task."

project_selection_questions = (
    "What specific machine learning model will you use to design antibodies? You must choose only one model.",
    "Will you design the antibodies de novo or will you modify existing antibodies? You must choose only one option.",
    "If modifying existing antibodies, which precise antibody or antibodies will you modify?",
    "If modifying existing antibodies, how will you modify the antibodies?",
    "If designing antibodies de novo, how will you propose antibody candidates?",
    "How exactly will you use your machine learning model to design antibodies in silico? Please provide a step-by-step description.",
)

In [17]:
# Select antibodies project - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_scientific_meeting,
            team_lead=PRINCIPAL_INVESTIGATOR,
            team_members=team_members,
            agenda=project_selection_prompt,
            agenda_questions=project_selection_questions,
            save_dir=project_selection_dir,
            save_name=f"discussion_{iteration_num + 1}",
            num_rounds=num_rounds,
            contexts=ANTIBODIES_CONTEXTS,
            temperature=CREATIVE_TEMPERATURE,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])


Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A


Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A

Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A



Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A[A




Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A





Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A






Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A[A







Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A[A[A








Team:   0%|          | 0/5 [00:02<?, ?it/s][A[A[A[A[A[A[A[A[A
Rounds (+ Summary Round):   0%|          | 0/4 [00:03<?, ?it/s]






Team:  20%|██        | 1/5 [00:05<00:21,  5.30s/it][A[A[A[A[A[A




Team:  20%|██        | 1/5 [00:05<00:22,  5.52s/it][A[A[A[A[A






Team:  20%|██        | 1/5 [00:05<00:22,  5.61s/it][A[A[A[A[A[A[A








Team:  20%|██        | 1/5 [00:08<00:33,  8.38s/it][A

In [18]:
# Select antibodies project - merge
project_selection_summaries = load_summaries(discussion_paths=list(project_selection_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(project_selection_summaries)}")

run_individual_meeting(
    team_member=PRINCIPAL_INVESTIGATOR,
    agenda=MERGE_PROMPT,
    save_dir=project_selection_dir,
    save_name="merged",
    summaries=project_selection_summaries,
    contexts=ANTIBODIES_CONTEXTS,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

Number of summaries: 5


Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Agents:   0%|          | 0/1 [00:02<?, ?it/s][A
Critiques (+ Final Round):   0%|          | 0/1 [00:02<?, ?it/s]


RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [10]:
# ESM design - prompts
esm_design_dir = save_dir / "esm_design"

esm_design_prompt = "You are working on a project to develop antibodies for the SARS-CoV-2 spike protein, ideally for the newest variant of the virus and with broad spectrum activity across variants. You will use the ESM family of models for antibody design. Please design a method for applying ESM to this antibody design problem. Specify the exact model you will use and how you will use it to design antibodies."

esm_design_questions = (
    "Which ESM model will you use?"
    "Will you design the antibodies de novo or will you modify existing antibodies?",
    "If modifying existing antibodies, which precise antibody or antibodies will you modify?",
    "How exactly will you use your model to design antibodies?",
    "What is the precise process for designing antibodies and selecting candidates for experimental validation?",
    "How will you computationally evaluate the quality of the design antibodies?",
    "What objectives will you optimize for in the design process?",
)

esm_design_prior_summaries = load_summaries(discussion_paths=[project_selection_dir / "merged.json"])
print(f"Number of prior summaries: {len(esm_design_prior_summaries)}")

Number of prior summaries: 1


In [11]:
# ESM design - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_scientific_meeting,
            team_lead=PRINCIPAL_INVESTIGATOR,
            team_members=team_members,
            agenda=esm_design_prompt,
            agenda_questions=esm_design_questions,
            save_dir=esm_design_dir,
            save_name=f"discussion_{iteration_num + 1}",
            num_rounds=num_rounds,
            summaries=esm_design_prior_summaries,
            temperature=CREATIVE_TEMPERATURE,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])




Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A
Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A

Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A



Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A[A




Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A





Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A







Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A[A[A






Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A[A








Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A





Team:  20%|██        | 1/5 [00:06<00:27,  6.76s/it][A[A[A[A[A[A







Team:  20%|██        | 1/5 [00:09<00:37,  9.33s/it][A[A[A[A[A[A[A[A






Team:  20%|██        | 1/5 [00:12<00:51, 12.81s/it][A[A[A[A[A[A[A





Team:  40%|████      | 2/5 [00:13<00:20,  6.82s/it][A[A[A[A[A[A








Team:  20%|██        | 1/5 [00:1

Input token count: 107,048
Output token count: 9,792
Max token length: 12,959
Cost: $0.68
Time: 3:00









Team:  80%|████████  | 4/5 [00:45<00:11, 11.81s/it][A[A[A[A[A[A[A








Team:  60%|██████    | 3/5 [00:42<00:27, 13.66s/it][A[A[A[A[A[A[A[A[A




Team: 100%|██████████| 5/5 [01:04<00:00, 12.93s/it][A[A[A[A[A

Team:   0%|          | 0/5 [00:00<?, ?it/s]3/4 [03:04<01:01, 61.99s/it][A







Team:  40%|████      | 2/5 [00:40<00:57, 19.03s/it][A[A[A[A[A[A[A[A






Team: 100%|██████████| 5/5 [00:57<00:00, 11.51s/it][A[A[A[A[A[A[A



Rounds (+ Summary Round):  75%|███████▌  | 3/4 [03:13<01:02, 62.44s/it][A[A[A




Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A








Team:  80%|████████  | 4/5 [00:56<00:13, 13.68s/it][A[A[A[A[A[A[A[A[A







Team:  60%|██████    | 3/5 [00:53<00:32, 16.17s/it][A[A[A[A[A[A[A[A








Team: 100%|██████████| 5/5 [01:09<00:00, 13.86s/it][A[A[A[A[A[A[A[A[A




Rounds (+ Summary Round):  75%|███████▌  | 3/4 [03:29<01:08, 68.91s/it][A[A[A[A





Team:   0%|          | 0/5 

Input token count: 105,113
Output token count: 9,548
Max token length: 12,715
Cost: $0.67
Time: 3:31










Team:   0%|          | 0/5 [00:27<?, ?it/s].39s/it][A[A[A[A[A[A[A[A



Rounds (+ Summary Round): 100%|██████████| 4/4 [03:40<00:00, 55.16s/it][A[A[A


Input token count: 130,073
Output token count: 12,061
Max token length: 15,228
Cost: $0.83
Time: 3:40










Team: 100%|██████████| 5/5 [01:23<00:00, 16.71s/it][A[A[A[A[A[A[A[A


Team:   0%|          | 0/5 [00:29<?, ?it/s]3/4 [03:51<01:18, 78.79s/it][A[A




Rounds (+ Summary Round): 100%|██████████| 4/4 [03:58<00:00, 59.64s/it][A[A[A[A


Input token count: 120,508
Output token count: 11,199
Max token length: 14,366
Cost: $0.77
Time: 3:58


Team:   0%|          | 0/5 [00:35<?, ?it/s]


Rounds (+ Summary Round): 100%|██████████| 4/4 [04:27<00:00, 66.95s/it][A[A

Input token count: 118,939
Output token count: 11,458
Max token length: 14,625
Cost: $0.77
Time: 4:27





In [12]:
# ESM design - merge
esm_design_summaries = load_summaries(discussion_paths=list(esm_design_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(esm_design_summaries)}")

run_individual_meeting(
    team_member=PRINCIPAL_INVESTIGATOR,
    agenda=MERGE_PROMPT,
    save_dir=esm_design_dir,
    save_name="merged",
    summaries=esm_design_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

Number of summaries: 5


Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Agents:   0%|          | 0/1 [00:00<?, ?it/s][A
Agents: 100%|██████████| 1/1 [00:48<00:00, 48.18s/it][A
Critiques (+ Final Round): 100%|██████████| 1/1 [00:48<00:00, 48.19s/it]

Input token count: 6,791
Output token count: 1,817
Max token length: 8,608
Cost: $0.06
Time: 0:48





"### Agenda\n\nOur goal is to develop antibodies targeting the SARS-CoV-2 spike protein, focusing on the newest variants and aiming for broad-spectrum activity across multiple variants. We will use the ESM family of models to design these antibodies. Key decisions include whether we will design antibodies de novo or modify existing ones, which specific antibodies we will modify if we choose that route, the exact methodology for applying the ESM model to antibody design, the process for selecting candidates for experimental validation, the computational evaluation metrics, and the primary objectives to optimize during the design process.\n\n### Team Member Input\n\n**Immunologist:**\n- Advocates for modifying existing antibodies like sotrovimab, S309, REGN10933, and REGN10987 due to their known cross-reactivity and structural data.\n- Suggested prioritizing conserved regions within the S2 subunit, specifically the fusion peptide (FP), HR1, HR2, stem helix, and MPER.\n- Emphasizes the ne

In [21]:
# ESM implement - prompts
esm_implement_dir = save_dir / "esm_implement"

esm_implement_prompt = "Your goal is to identify antibodies that bind to the SARS-CoV-2 spike protein. You will start with an existing SARS-CoV-2 antibody, mutate the antibody sequence, and then evaluate the mutated sequences for potential binding by using ESM to calculate the log-likelihood ratio between the mutated and wildtype sequences. Please write a complete script to implement this method. Your code must be self-contained (with appropriate imports) and complete."

In [22]:
# ESM implement - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_individual_meeting,
            team_member=DATA_SCIENTIST,
            agenda=esm_implement_prompt,
            save_dir=esm_implement_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            num_critiques=3,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])



Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A
Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A


Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A



Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A[A




Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A





Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A






Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A







Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A








Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A





Agents:  50%|█████     | 1/2 [00:12<00:12, 12.68s/it][A[A[A[A[A[A




Agents:  50%|█████     | 1/2 [00:15<00:15, 15.45s/it][A[A[A[A[A







Agents:  50%|█████     | 1/2 [00:17<00:17, 17.58s/it][A[A[A[A[A[A[A[A






Agents:  50%|█████     | 1/2 [00:20<00:20, 20.55s/it][A[A[A[A[A[A[A





Agents: 100%|████

Input token count: 22,236
Output token count: 7,606
Max token length: 7,976
Cost: $0.23
Time: 2:25







Agents: 100%|██████████| 1/1 [00:31<00:00, 31.86s/it][A[A[A[A[A


Critiques (+ Final Round): 100%|██████████| 4/4 [02:35<00:00, 38.99s/it][A[A


Input token count: 22,874
Output token count: 8,194
Max token length: 8,564
Cost: $0.24
Time: 2:35









Agents: 100%|██████████| 1/1 [00:20<00:00, 20.48s/it][A[A[A[A[A[A[A



Critiques (+ Final Round): 100%|██████████| 4/4 [02:36<00:00, 39.19s/it][A[A[A


Input token count: 21,673
Output token count: 7,311
Max token length: 7,681
Cost: $0.22
Time: 2:36











Agents: 100%|██████████| 1/1 [00:26<00:00, 26.19s/it][A[A[A[A[A[A[A[A[A




Critiques (+ Final Round): 100%|██████████| 4/4 [02:43<00:00, 40.79s/it][A[A[A[A


Input token count: 21,878
Output token count: 7,629
Max token length: 7,999
Cost: $0.22
Time: 2:43










Agents: 100%|██████████| 1/1 [00:41<00:00, 41.16s/it][A[A[A[A[A[A[A[A
Critiques (+ Final Round): 100%|██████████| 4/4 [02:58<00:00, 44.71s/it]

Input token count: 21,535
Output token count: 8,145
Max token length: 8,515
Cost: $0.23
Time: 2:58





In [23]:
# ESM implement - merge
esm_implement_summaries = load_summaries(discussion_paths=list(esm_implement_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(esm_implement_summaries)}")

run_individual_meeting(
    team_member=DATA_SCIENTIST,
    agenda=MERGE_PROMPT,
    save_dir=esm_implement_dir,
    save_name="merged",
    summaries=esm_implement_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

Number of summaries: 5


Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Agents:   0%|          | 0/1 [00:00<?, ?it/s][A
Agents: 100%|██████████| 1/1 [00:42<00:00, 42.07s/it][A
Critiques (+ Final Round): 100%|██████████| 1/1 [00:42<00:00, 42.08s/it]

Input token count: 8,770
Output token count: 1,985
Max token length: 10,755
Cost: $0.07
Time: 0:42





'### Merged and Enhanced Script\n\n```python\nimport torch\nfrom esm import pretrained, Alphabet\nimport random\nimport numpy as np\nfrom Bio.SubsMat import MatrixInfo as matlist\n\n# Load the pretrained ESM model\ndef load_esm_model():\n    """\n    Load the pretrained ESM model and set it to evaluation mode.\n    """\n    model, alphabet = pretrained.esm1b_t33_650M_UR50S()\n    model.eval()  # Disable dropout for evaluation\n    return model, alphabet\n\n# Function to set random seed for reproducibility\ndef set_seed(seed=42):\n    random.seed(seed)\n    torch.manual_seed(seed)\n\nset_seed()\n\n# Function to validate that the sequence only contains valid amino acid characters\ndef validate_sequence(seq):\n    valid_amino_acids = "ACDEFGHIKLMNPQRSTVWY"\n    return all(char in valid_amino_acids for char in seq)\n\n# Function to mutate a sequence using a substitution matrix\ndef mutate_sequence(sequence, mutation_rate=0.01, sub_matrix=matlist.blosum62):\n    """\n    Mutate the given se

In [32]:
# ESM fix - prompts
esm_fix_dir = save_dir / "esm_fix"

esm_fix_prompt = """You previously implemented a method for using ESM to design antibodies SARS-CoV-2 spike protein (see summary). However, the code has several issues listed below.

1. The substitution matrix assumes an old version of biopython which is no longer available. Please remove this component and allow any valid amino acid substitution.
2. The wt_log_likelihood and mut_log_likelihood calculations fail since the sequence length does not account for two additional tokens that are added. Please adjust the sequence length to account for these tokens.
3. Please replace the wildtype sequence with this sequence: QVQLVQSGAEVKKPGASVKVSCKASGYPFTSYGISWVRQAPGQGLEWMGWISTYNGNTNYAQKFQGRVTMTTDTSTTTGYMELRRLRSDDTAVYYCARDYTRGAWFGESLIGGFDNWGQGTLVTVSS
4. Please modify the script to create many mutants of the wildtype sequence (e.g., 1000) and calculate the log-likelihood ratio for each mutant. Then have the script select the top mutants (e.g., 10) by log-likelihood ratio and print out both their mutations (e.g., P28T) and their log-likelihood ratios.

Please rewrite the code to fix these issues."""

esm_fix_prior_summaries = load_summaries(discussion_paths=[esm_implement_dir / "merged.json"])
print(f"Number of prior summaries: {len(esm_fix_prior_summaries)}")

Number of prior summaries: 1


In [33]:
# ESM fix
run_individual_meeting(
    team_member=DATA_SCIENTIST,
    agenda=esm_fix_prompt,
    save_dir=esm_fix_dir,
    save_name="answer",
    summaries=esm_fix_prior_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Agents:   0%|          | 0/1 [00:00<?, ?it/s][A
Agents: 100%|██████████| 1/1 [00:24<00:00, 24.76s/it][A
Critiques (+ Final Round): 100%|██████████| 1/1 [00:24<00:00, 24.77s/it]

Input token count: 2,310
Output token count: 1,476
Max token length: 3,786
Cost: $0.03
Time: 0:24





'Certainly! Let\'s address each of the issues listed in the agenda and modify the script accordingly.\n\n### 1. Remove the Substitution Matrix Dependency\nWe\'ll remove the dependency on the substitution matrix and allow any valid amino acid substitution.\n\n### 2. Adjust Sequence Length for Log-Likelihood Calculation\nWe\'ll adjust the sequence length to account for the two additional tokens added by the ESM model.\n\n### 3. Replace the Wildtype Sequence\nWe\'ll replace the wildtype sequence with the provided sequence.\n\n### 4. Generate Multiple Mutants and Select Top Mutants\nWe\'ll modify the script to create multiple mutants (e.g., 1000), calculate the log-likelihood ratio for each, and then select and print the top mutants by log-likelihood ratio.\n\nHere\'s the updated script:\n\n```python\nimport torch\nfrom esm import pretrained, Alphabet\nimport random\nimport numpy as np\n\n# Load the pretrained ESM model\ndef load_esm_model():\n    """\n    Load the pretrained ESM model and

In [36]:
# ESM efficient - prompts
esm_efficient_dir = save_dir / "esm_efficient"

esm_efficient_prompt = """You previously implemented a method for using ESM to design antibodies SARS-CoV-2 spike protein (see summary). However, the code is not efficient. Please rewrite the code to maintain the same logic but improve the efficiency of the implementation (e.g., via vectorization and batching)."""

esm_efficient_prior_summaries = load_summaries(discussion_paths=[esm_fix_dir / "answer.json"])
print(f"Number of prior summaries: {len(esm_efficient_prior_summaries)}")

Number of prior summaries: 1


In [37]:
# ESM efficient
run_individual_meeting(
    team_member=DATA_SCIENTIST,
    agenda=esm_efficient_prompt,
    save_dir=esm_efficient_dir,
    save_name="answer",
    summaries=esm_efficient_prior_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Agents:   0%|          | 0/1 [00:00<?, ?it/s][A
Agents: 100%|██████████| 1/1 [00:24<00:00, 24.71s/it][A
Critiques (+ Final Round): 100%|██████████| 1/1 [00:24<00:00, 24.71s/it]

Input token count: 1,586
Output token count: 1,453
Max token length: 3,039
Cost: $0.03
Time: 0:24





'Thank you for the summary and the agenda for today\'s meeting. I understand that the goal is to improve the efficiency of the current implementation for using ESM to design antibodies against the SARS-CoV-2 spike protein. Specifically, we aim to maintain the same logic but enhance the performance through vectorization and batching.\n\nHere\'s an optimized version of the script that leverages vectorization and batching to improve efficiency:\n\n```python\nimport torch\nfrom esm import pretrained, Alphabet\nimport random\nimport numpy as np\n\n# Load the pretrained ESM model\ndef load_esm_model():\n    """\n    Load the pretrained ESM model and set it to evaluation mode.\n    """\n    model, alphabet = pretrained.esm1b_t33_650M_UR50S()\n    model.eval()  # Disable dropout for evaluation\n    return model, alphabet\n\n# Function to set random seed for reproducibility\ndef set_seed(seed=42):\n    random.seed(seed)\n    torch.manual_seed(seed)\n\nset_seed()\n\n# Function to validate that t