In [1]:
import concurrent.futures
from pathlib import Path

from agent import Agent
from constants import CONSISTENT_TEMPERATURE, CREATIVE_TEMPERATURE
from prompts import (
    ANTIBODIES_CONTEXTS,
    MERGE_PROMPT,
    PRINCIPAL_INVESTIGATOR,
    SCIENTIFIC_CRITIC
)
from run_individual_meeting import run_individual_meeting
from run_scientific_meeting import run_scientific_meeting
from utils import load_summaries

In [2]:
# Set up key parameters
num_iterations = 5
num_rounds = 3
save_dir = Path("antibody_design")
model = "gpt-4o"

In [3]:
# Select team members - prompts
team_selection_dir = save_dir / "team_selection"

team_selection_prompt = """You are working on a project to develop antibodies for the SARS-CoV-2 spike protein, ideally for the newest variant of the virus and with broad spectrum activity across variants. You need to select a team of scientists to help you with this project. Please select a small set of team members (3-4) that you would like to invite to a discussion to design the antibody discovery approach. Please list team members in the following format, using the team member below as an example. You should not include yourself (Principal Investigator) in the list.

Agent(
    title="Principal Investigator",
    expertise="applying artificial intelligence to biomedical research",
    goal="perform research in your area of expertise that maximizes the scientific impact of the work",
    role="lead a team of experts to solve an important problem in artificial intelligence for biomedicine, make key decisions about the project direction based on team member input, and manage the project timeline and resources",
)
"""

In [4]:
# Select team members - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_individual_meeting,
            team_member=PRINCIPAL_INVESTIGATOR,
            agenda=team_selection_prompt,
            save_dir=team_selection_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])

Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s][A

Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s][A[A



Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s][A[A[A[A


Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s][A[A[A




Agents:   0%|          | 0/1 [00:00<?, ?it/s][A[A[A[A[A





Agents:   0%|          | 0/1 [00:00<?, ?it/s][A[A[A[A[A[A






Agents:   0%|          | 0/1 [00:00<?, ?it/s][A[A[A[A[A[A[A







Agents:   0%|          | 0/1 [00:00<?, ?it/s][A[A[A[A[A[A[A[A








Agents:   0%|          | 0/1 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A







Agents: 100%|██████████| 1/1 [00:06<00:00,  6.07s/it][A[A[A[A[A[A[A[A




Critiques (+ Final Round): 100%|██████████| 1/1 [00:06<00:00,  6.11s/it][A[A[A[A






Agents: 100%|██████████| 1/1 [00:06<00:00,  6.29s/it][A[A[A[A[A[A


Critiques (+ Final R

Input token count: 232
Output token count: 306
Max token length: 538
Cost: $0.01
Time: 0:06


Critiques (+ Final Round): 100%|██████████| 1/1 [00:06<00:00,  6.31s/it]


Input token count: 232
Output token count: 343
Max token length: 575
Cost: $0.01
Time: 0:06







Agents: 100%|██████████| 1/1 [00:07<00:00,  7.10s/it][A[A[A[A[A
Critiques (+ Final Round): 100%|██████████| 1/1 [00:07<00:00,  7.11s/it]


Input token count: 232
Output token count: 341
Max token length: 573
Cost: $0.01
Time: 0:07











Agents: 100%|██████████| 1/1 [00:07<00:00,  7.34s/it][A[A[A[A[A[A[A[A[A



Critiques (+ Final Round): 100%|██████████| 1/1 [00:07<00:00,  7.38s/it][A[A[A


Input token count: 232
Output token count: 361
Max token length: 593
Cost: $0.01
Time: 0:07









Agents: 100%|██████████| 1/1 [00:10<00:00, 10.68s/it][A[A[A[A[A[A[A

Critiques (+ Final Round): 100%|██████████| 1/1 [00:10<00:00, 10.72s/it][A

Input token count: 232
Output token count: 392
Max token length: 624
Cost: $0.01
Time: 0:10





In [5]:
# Select team members - merge
team_selection_summaries = load_summaries(discussion_paths=list(team_selection_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(team_selection_summaries)}")

run_individual_meeting(
    team_member=PRINCIPAL_INVESTIGATOR,
    agenda=MERGE_PROMPT,
    save_dir=team_selection_dir,
    save_name="merged",
    summaries=team_selection_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

Number of summaries: 5


Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Agents:   0%|          | 0/1 [00:00<?, ?it/s][A
Agents: 100%|██████████| 1/1 [00:14<00:00, 14.44s/it][A
Critiques (+ Final Round): 100%|██████████| 1/1 [00:14<00:00, 14.45s/it]

Input token count: 1,890
Output token count: 1,027
Max token length: 2,917
Cost: $0.02
Time: 0:14





'Certainly! Based on the summaries of the previous meetings, here is a consolidated list of key team members for our antibody discovery approach, incorporating the best components from each individual answer:\n\nAgent(\n    title="Immunologist",\n    expertise="immune response characterization and antibody development",\n    goal="identify and characterize potential antibody candidates that can effectively neutralize SARS-CoV-2 variants",\n    role="provide insights on immunogenic targets, design experiments for antibody discovery, and interpret immunological data"\n)\n\nAgent(\n    title="Structural Biologist",\n    expertise="protein structure analysis and molecular modeling",\n    goal="analyze and model the structure of the SARS-CoV-2 spike protein and its variants",\n    role="assist in identifying key structural features for antibody binding and stability, and guide the design of broad-spectrum antibodies using structural insights"\n)\n\nAgent(\n    title="Bioinformatician",\n   

In [6]:
# Add team members
IMMUNOLOGIST = Agent(
    title="Immunologist",
    expertise="immune response characterization and antibody development",
    goal="identify and characterize potential antibody candidates that can effectively neutralize SARS-CoV-2 variants",
    role="provide insights on immunogenic targets, design experiments for antibody discovery, and interpret immunological data"
)

DATA_SCIENTIST = Agent(
    title="Machine Learning Scientist",
    expertise="developing and applying machine learning algorithms for biological data",
    goal="design and implement AI models to predict effective antibody candidates",
    role="integrate data from bioinformatics and immunology to train machine learning models for antibody discovery, and predict antibody binding affinity and specificity"
)

VIROLOGIST = Agent(
    title="Virologist",
    expertise="SARS-CoV-2 biology and virology assays",
    goal="assess the neutralization efficacy of antibody candidates against various SARS-CoV-2 variants",
    role="design and conduct virology assays to test antibody effectiveness in vitro and in vivo, and provide insights on viral escape mechanisms"
)

team_members = (
    IMMUNOLOGIST,
    DATA_SCIENTIST,
    VIROLOGIST,
    SCIENTIFIC_CRITIC,
)

In [7]:
# Select antibodies project - prompts
project_selection_dir = save_dir / "project_selection"

project_selection_prompt = "You are working on a project to develop antibodies for the SARS-CoV-2 spike protein, ideally for the newest variant of the virus and with broad spectrum activity across variants. Please design a specific antibody discovery approach for this target that uses machine learning to design antibody candidates. Decide what specific machine learning model to use and precisely how it will be used. If the model needs to be trained, please decide on a specific dataset for training. If the model is pre-trained, please explain exactly how it will be used for identifying or designing antibody candidates."

project_selection_questions = (
    "Will you design the antibodies de novo or will you modify existing antibodies?",
    "Will you train a model from scratch or use a pre-trained model?",
    "What specific model architecture will you use?",
    "If training a model, what dataset will you use for training?",
    "How exactly will you use your model to design antibodies?"
)

In [8]:
# Select antibodies project - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_scientific_meeting,
            team_lead=PRINCIPAL_INVESTIGATOR,
            team_members=team_members,
            agenda=project_selection_prompt,
            agenda_questions=project_selection_questions,
            save_dir=project_selection_dir,
            save_name=f"discussion_{iteration_num + 1}",
            num_rounds=num_rounds,
            contexts=ANTIBODIES_CONTEXTS,
            temperature=CREATIVE_TEMPERATURE,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])


Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A



Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A[A

Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A


Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A




Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A





Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A






Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A[A








Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A







Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A[A[A




Team:  20%|██        | 1/5 [00:08<00:32,  8.10s/it][A[A[A[A[A





Team:  20%|██        | 1/5 [00:09<00:37,  9.37s/it][A[A[A[A[A[A








Team:  20%|██        | 1/5 [00:12<00:49, 12.28s/it][A[A[A[A[A[A[A[A[A







Team:  20%|██        | 1/5 [00:13<00:52, 13.15s/it][A[A[A[A[A[A[A[A






Team:  20%|██        | 1/5 [00

Input token count: 98,746
Output token count: 10,360
Max token length: 12,863
Cost: $0.65
Time: 3:08







Team:  80%|████████  | 4/5 [01:05<00:14, 14.43s/it][A[A[A[A[A








Team: 100%|██████████| 5/5 [01:05<00:00, 13.09s/it][A[A[A[A[A[A[A[A[A



Rounds (+ Summary Round):  75%|███████▌  | 3/4 [03:16<01:06, 66.18s/it][A[A[A

Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A





Team: 100%|██████████| 5/5 [01:12<00:00, 14.54s/it][A[A[A[A[A[A




Rounds (+ Summary Round):  75%|███████▌  | 3/4 [03:17<01:08, 68.36s/it][A[A[A[A





Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A




Team: 100%|██████████| 5/5 [01:20<00:00, 16.06s/it][A[A[A[A[A

Rounds (+ Summary Round):  75%|███████▌  | 3/4 [03:26<01:11, 71.19s/it][A




Team:   0%|          | 0/5 [00:24<?, ?it/s][A[A[A[A[A
Rounds (+ Summary Round): 100%|██████████| 4/4 [03:32<00:00, 53.18s/it]


Input token count: 90,657
Output token count: 9,379
Max token length: 11,882
Cost: $0.59
Time: 3:32


Team:   0%|          | 0/5 [00:19<?, ?it/s]



Rounds (+ Summary Round): 100%|██████████| 4/4 [03:36<00:00, 54.01s/it][A[A[A


Input token count: 106,189
Output token count: 11,467
Max token length: 13,970
Cost: $0.70
Time: 3:36


Team:   0%|          | 0/5 [00:20<?, ?it/s]




Rounds (+ Summary Round): 100%|██████████| 4/4 [03:38<00:00, 54.59s/it][A[A[A[A


Input token count: 99,734
Output token count: 10,771
Max token length: 13,274
Cost: $0.66
Time: 3:38


Team:   0%|          | 0/5 [00:25<?, ?it/s]

Rounds (+ Summary Round): 100%|██████████| 4/4 [03:52<00:00, 58.01s/it][A

Input token count: 107,884
Output token count: 11,400
Max token length: 13,903
Cost: $0.71
Time: 3:52





In [9]:
# Select antibodies project - merge
project_selection_summaries = load_summaries(discussion_paths=list(project_selection_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(project_selection_summaries)}")

run_individual_meeting(
    team_member=PRINCIPAL_INVESTIGATOR,
    agenda=MERGE_PROMPT,
    save_dir=project_selection_dir,
    save_name="merged",
    summaries=project_selection_summaries,
    contexts=ANTIBODIES_CONTEXTS,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

Number of summaries: 5


Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Agents:   0%|          | 0/1 [00:00<?, ?it/s][A
Agents: 100%|██████████| 1/1 [00:35<00:00, 35.59s/it][A
Critiques (+ Final Round): 100%|██████████| 1/1 [00:35<00:00, 35.59s/it]

Input token count: 6,655
Output token count: 1,535
Max token length: 8,190
Cost: $0.06
Time: 0:35





'### Agenda\n\nOur goal is to develop antibodies targeting the SARS-CoV-2 spike protein, specifically focusing on the newest variants and achieving broad-spectrum activity across multiple variants. We aim to utilize machine learning to design antibody candidates. Key decisions include whether to design antibodies de novo or modify existing ones, whether to train a model from scratch or use a pre-trained model, the specific model architecture to use, the dataset for training (if a model is trained), and the detailed methodology for using the model to design antibodies.\n\n### Team Member Input\n\n**Immunologist:**\n- Advocates for modifying existing antibodies like sotrovimab and S309 due to their known cross-reactivity and structural data.\n- Suggested targeting conserved regions within the S2 subunit, such as the fusion peptide, HR1, HR2, stem helix, and MPER.\n\n**Machine Learning Scientist:**\n- Recommends using pre-trained models like AlphaFold, Rosetta, and ESM, and fine-tuning th

In [10]:
# ESM design - prompts
esm_design_dir = save_dir / "esm_design"

esm_design_prompt = "You are working on a project to develop antibodies for the SARS-CoV-2 spike protein, ideally for the newest variant of the virus and with broad spectrum activity across variants. You will use the ESM family of models for antibody design. Please design a method for applying ESM to this antibody design problem. Specify the exact model you will use and how you will use it to design antibodies."

esm_design_questions = (
    "Which ESM model will you use?"
    "Will you design the antibodies de novo or will you modify existing antibodies?",
    "If modifying existing antibodies, which precise antibody or antibodies will you modify?",
    "How exactly will you use your model to design antibodies?",
    "What is the precise process for designing antibodies and selecting candidates for experimental validation?",
    "How will you computationally evaluate the quality of the design antibodies?",
    "What objectives will you optimize for in the design process?",
)

esm_design_prior_summaries = load_summaries(discussion_paths=[project_selection_dir / "merged.json"])
print(f"Number of prior summaries: {len(esm_design_prior_summaries)}")

Number of prior summaries: 1


In [11]:
# ESM design - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_scientific_meeting,
            team_lead=PRINCIPAL_INVESTIGATOR,
            team_members=team_members,
            agenda=esm_design_prompt,
            agenda_questions=esm_design_questions,
            save_dir=esm_design_dir,
            save_name=f"discussion_{iteration_num + 1}",
            num_rounds=num_rounds,
            summaries=esm_design_prior_summaries,
            temperature=CREATIVE_TEMPERATURE,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])




Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A
Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A

Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A



Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A[A




Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A





Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A







Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A[A[A






Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A[A








Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A





Team:  20%|██        | 1/5 [00:06<00:27,  6.76s/it][A[A[A[A[A[A







Team:  20%|██        | 1/5 [00:09<00:37,  9.33s/it][A[A[A[A[A[A[A[A






Team:  20%|██        | 1/5 [00:12<00:51, 12.81s/it][A[A[A[A[A[A[A





Team:  40%|████      | 2/5 [00:13<00:20,  6.82s/it][A[A[A[A[A[A








Team:  20%|██        | 1/5 [00:1

Input token count: 107,048
Output token count: 9,792
Max token length: 12,959
Cost: $0.68
Time: 3:00









Team:  80%|████████  | 4/5 [00:45<00:11, 11.81s/it][A[A[A[A[A[A[A








Team:  60%|██████    | 3/5 [00:42<00:27, 13.66s/it][A[A[A[A[A[A[A[A[A




Team: 100%|██████████| 5/5 [01:04<00:00, 12.93s/it][A[A[A[A[A

Team:   0%|          | 0/5 [00:00<?, ?it/s]3/4 [03:04<01:01, 61.99s/it][A







Team:  40%|████      | 2/5 [00:40<00:57, 19.03s/it][A[A[A[A[A[A[A[A






Team: 100%|██████████| 5/5 [00:57<00:00, 11.51s/it][A[A[A[A[A[A[A



Rounds (+ Summary Round):  75%|███████▌  | 3/4 [03:13<01:02, 62.44s/it][A[A[A




Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A








Team:  80%|████████  | 4/5 [00:56<00:13, 13.68s/it][A[A[A[A[A[A[A[A[A







Team:  60%|██████    | 3/5 [00:53<00:32, 16.17s/it][A[A[A[A[A[A[A[A








Team: 100%|██████████| 5/5 [01:09<00:00, 13.86s/it][A[A[A[A[A[A[A[A[A




Rounds (+ Summary Round):  75%|███████▌  | 3/4 [03:29<01:08, 68.91s/it][A[A[A[A





Team:   0%|          | 0/5 

Input token count: 105,113
Output token count: 9,548
Max token length: 12,715
Cost: $0.67
Time: 3:31










Team:   0%|          | 0/5 [00:27<?, ?it/s].39s/it][A[A[A[A[A[A[A[A



Rounds (+ Summary Round): 100%|██████████| 4/4 [03:40<00:00, 55.16s/it][A[A[A


Input token count: 130,073
Output token count: 12,061
Max token length: 15,228
Cost: $0.83
Time: 3:40










Team: 100%|██████████| 5/5 [01:23<00:00, 16.71s/it][A[A[A[A[A[A[A[A


Team:   0%|          | 0/5 [00:29<?, ?it/s]3/4 [03:51<01:18, 78.79s/it][A[A




Rounds (+ Summary Round): 100%|██████████| 4/4 [03:58<00:00, 59.64s/it][A[A[A[A


Input token count: 120,508
Output token count: 11,199
Max token length: 14,366
Cost: $0.77
Time: 3:58


Team:   0%|          | 0/5 [00:35<?, ?it/s]


Rounds (+ Summary Round): 100%|██████████| 4/4 [04:27<00:00, 66.95s/it][A[A

Input token count: 118,939
Output token count: 11,458
Max token length: 14,625
Cost: $0.77
Time: 4:27





In [12]:
# ESM design - merge
esm_design_summaries = load_summaries(discussion_paths=list(esm_design_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(esm_design_summaries)}")

run_individual_meeting(
    team_member=PRINCIPAL_INVESTIGATOR,
    agenda=MERGE_PROMPT,
    save_dir=esm_design_dir,
    save_name="merged",
    summaries=esm_design_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

Number of summaries: 5


Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Agents:   0%|          | 0/1 [00:00<?, ?it/s][A
Agents: 100%|██████████| 1/1 [00:48<00:00, 48.18s/it][A
Critiques (+ Final Round): 100%|██████████| 1/1 [00:48<00:00, 48.19s/it]

Input token count: 6,791
Output token count: 1,817
Max token length: 8,608
Cost: $0.06
Time: 0:48





"### Agenda\n\nOur goal is to develop antibodies targeting the SARS-CoV-2 spike protein, focusing on the newest variants and aiming for broad-spectrum activity across multiple variants. We will use the ESM family of models to design these antibodies. Key decisions include whether we will design antibodies de novo or modify existing ones, which specific antibodies we will modify if we choose that route, the exact methodology for applying the ESM model to antibody design, the process for selecting candidates for experimental validation, the computational evaluation metrics, and the primary objectives to optimize during the design process.\n\n### Team Member Input\n\n**Immunologist:**\n- Advocates for modifying existing antibodies like sotrovimab, S309, REGN10933, and REGN10987 due to their known cross-reactivity and structural data.\n- Suggested prioritizing conserved regions within the S2 subunit, specifically the fusion peptide (FP), HR1, HR2, stem helix, and MPER.\n- Emphasizes the ne

In [13]:
# ESM implement - prompts
esm_implement_dir = save_dir / "esm_implement"

esm_implement_prompt = "Your goal is to identify antibodies that bind to the SARS-CoV-2 spike protein. You will start with an existing SARS-CoV-2 antibody and explore mutation space around that antibody to find other potential binders using ESM. Please write a complete script to implement this method. Your code must be self-contained (with appropriate imports) and complete."

esm_implement_prior_summaries = load_summaries(discussion_paths=[esm_design_dir / "merged.json"])
print(f"Number of prior summaries: {len(esm_design_prior_summaries)}")

Number of prior summaries: 1


In [14]:
# ESM implement - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_individual_meeting,
            team_member=DATA_SCIENTIST,
            agenda=esm_implement_prompt,
            save_dir=esm_implement_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            num_critiques=3,
            summaries=esm_implement_prior_summaries,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])

Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s]


Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A
Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A



Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A[A

Critiques (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A




Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A





Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A







Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A






Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A








Agents:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A






Agents:  50%|█████     | 1/2 [00:17<00:17, 17.20s/it][A[A[A[A[A[A[A








Agents:  50%|█████     | 1/2 [00:19<00:19, 19.04s/it][A[A[A[A[A[A[A[A[A





Agents:  50%|█████     | 1/2 [00:22<00:22, 22.85s/it][A[A[A[A[A[A






Agents: 100%|████████

Input token count: 38,963
Output token count: 9,610
Max token length: 11,795
Cost: $0.34
Time: 3:24







Agents: 100%|██████████| 1/1 [00:36<00:00, 36.11s/it][A[A[A[A[A
Critiques (+ Final Round): 100%|██████████| 4/4 [03:25<00:00, 51.29s/it]


Input token count: 40,114
Output token count: 9,868
Max token length: 12,053
Cost: $0.35
Time: 3:25










Agents: 100%|██████████| 1/1 [00:50<00:00, 50.65s/it][A[A[A[A[A[A[A[A




Critiques (+ Final Round): 100%|██████████| 4/4 [03:47<00:00, 56.91s/it][A[A[A[A


Input token count: 38,885
Output token count: 9,086
Max token length: 11,271
Cost: $0.33
Time: 3:47









Agents: 100%|██████████| 1/1 [00:58<00:00, 58.59s/it][A[A[A[A[A[A[A

Critiques (+ Final Round): 100%|██████████| 4/4 [03:59<00:00, 59.97s/it][A


Input token count: 43,029
Output token count: 11,345
Max token length: 13,530
Cost: $0.39
Time: 3:59











Agents: 100%|██████████| 1/1 [00:54<00:00, 54.53s/it][A[A[A[A[A[A[A[A[A


Critiques (+ Final Round): 100%|██████████| 4/4 [04:05<00:00, 61.38s/it][A[A

Input token count: 39,265
Output token count: 9,846
Max token length: 12,031
Cost: $0.34
Time: 4:05





In [15]:
# ESM implement - merge
esm_implement_summaries = load_summaries(discussion_paths=list(esm_implement_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(esm_implement_summaries)}")

run_individual_meeting(
    team_member=DATA_SCIENTIST,
    agenda=MERGE_PROMPT,
    save_dir=esm_implement_dir,
    save_name="merged",
    summaries=esm_implement_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

Number of summaries: 5


Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Agents:   0%|          | 0/1 [00:00<?, ?it/s][A
Agents: 100%|██████████| 1/1 [00:45<00:00, 45.92s/it][A
Critiques (+ Final Round): 100%|██████████| 1/1 [00:45<00:00, 45.92s/it]

Input token count: 11,818
Output token count: 2,456
Max token length: 14,274
Cost: $0.10
Time: 0:45





'Thank you for the detailed summaries and the opportunity to refine the approach. Below, I have merged the best components from each individual answer to create a comprehensive and scientifically rigorous plan for identifying effective antibody candidates against SARS-CoV-2.\n\n### Merged Script and Detailed Explanations\n\nThe merged script incorporates the best practices and detailed explanations from each individual answer, ensuring a robust and comprehensive approach.\n\n```python\n# Import required libraries\nimport torch\nfrom transformers import ESMForSequenceClassification, ESMTokenizer\nimport numpy as np\nfrom Bio import SeqIO\nfrom Bio.SeqUtils import seq1\nfrom scipy.spatial.distance import cdist\nfrom captum.attr import Saliency\nimport random\n\n# Define constants\nMODEL_NAME = "facebook/esm2_t6_8M_UR50D"  # ESM-2 model from HuggingFace\nEXISTING_ANTIBODY_SEQ = "QVQLVQSGAEVKKPGASVKVSCKASGYTFTNYGMNWVRQAPGQGLEWMGAIYPGNGNTNYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAR"\nSPIKE_PROT

In [16]:
# ESM fix - prompts
esm_fix_dir = save_dir / "esm_fix"

esm_fix_prompt = """You previously implemented a method for using ESM to design antibodies SARS-CoV-2 spike protein (see summary). However, the code has several errors listed below.

1. ESMForSequenceClassification should be EsmForSequenceClassification and ESMTokenizer should be EsmTokenizer.
2. The calculation of the number of mutations results in 0 mutations due to the short antibody sequence length. Please ensure at least one mutation is made.
3. The EsmForSequenceClassification model raises the warning "Some weights of EsmForSequenceClassification were not initialized from the model checkpoint at facebook/esm2_t6_8M_UR50D and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']". Please modify your method to avoid using these uninitialized weights.
4. Related to point 3, the binding affinity prediction results in two logit values per sequence. This then causes errors in the topk selection, which expects one value per sequence. Please modify your method to calculate a single score value per sequence.
5. Please adjust any other aspects of the script as needed based on these issues.

Please rewrite the code to fix these errors."""

esm_fix_prior_summaries = load_summaries(discussion_paths=[esm_implement_dir / "merged.json"])
print(f"Number of prior summaries: {len(esm_fix_prior_summaries)}")

Number of prior summaries: 1


In [17]:
# ESM fix
run_individual_meeting(
    team_member=DATA_SCIENTIST,
    agenda=esm_fix_prompt,
    save_dir=esm_fix_dir,
    save_name="answer",
    summaries=esm_fix_prior_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Agents:   0%|          | 0/1 [00:00<?, ?it/s][A
Agents: 100%|██████████| 1/1 [00:23<00:00, 23.57s/it][A
Critiques (+ Final Round): 100%|██████████| 1/1 [00:23<00:00, 23.57s/it]

Input token count: 2,755
Output token count: 1,352
Max token length: 4,107
Cost: $0.03
Time: 0:23





'Thank you for the detailed agenda and the summary of the previous meetings. I appreciate the feedback and the specific issues identified in the current implementation. Let\'s address each of the points raised and adjust the script accordingly.\n\n### Addressing the Issues\n\n1. **Correcting Class Names**:\n   - Change `ESMForSequenceClassification` to `EsmForSequenceClassification`.\n   - Change `ESMTokenizer` to `EsmTokenizer`.\n\n2. **Ensuring at Least One Mutation**:\n   - Modify the mutation logic to ensure at least one mutation is made, even for short sequences.\n\n3. **Avoiding Uninitialized Weights**:\n   - Use the `EsmForSequenceClassification` model correctly to avoid using uninitialized weights. This may involve using a different model or modifying the current approach.\n\n4. **Single Score Value per Sequence**:\n   - Modify the binding affinity prediction to produce a single score per sequence, ensuring compatibility with the `topk` selection.\n\n### Revised Script\n\nHere\