In [1]:
from pathlib import Path

from tqdm import trange

from agent import Agent
from prompts import ANTIBODIES_CONTEXT_PROMPT, PRINCIPAL_INVESTIGATOR
from run_individual_meeting import run_individual_meeting
from run_scientific_meeting import run_scientific_meeting
from utils import load_summaries

In [2]:
# Set up key parameters
team_lead = PRINCIPAL_INVESTIGATOR
num_iterations = 3
num_rounds = 3
save_dir = Path("antibody_design")
model = "gpt-4o"
discussion_paths = []
contexts = (ANTIBODIES_CONTEXT_PROMPT,)

In [3]:
# Ask PI to select team members
ANTIBODIES_TEAM_PROMPT = "You are working on a project to develop antibodies for the SARS-CoV-2 spike protein, ideally for the newest variant of the virus and with broad spectrum activity across variants. You need to select a team of scientists to help you with this project. Please select the team members you would like to invite to a discussion to design the antibody discovery approach. For each team member, please specify the following: 1. Title, 2. Expertise, 3. Goal, and 4. Role. Please use your own description as an example."

In [4]:
# Select team members
for iteration_num in trange(num_iterations, desc="Project Iterations"):
    run_individual_meeting(
        team_member=team_lead,
        agenda=ANTIBODIES_TEAM_PROMPT,
        save_dir=save_dir / "team_selection",
        save_name=f"discussion_{iteration_num + 1}",
        contexts=contexts,
        model=model,
    )

Project Iterations:  33%|███▎      | 1/3 [00:16<00:32, 16.43s/it]

Input token count: 194
Output token count: 704
Max token length: 898
Cost: $0.01
Time: 0:16


Project Iterations:  67%|██████▋   | 2/3 [00:29<00:14, 14.23s/it]

Input token count: 194
Output token count: 623
Max token length: 817
Cost: $0.01
Time: 0:12


Project Iterations: 100%|██████████| 3/3 [00:41<00:00, 13.73s/it]

Input token count: 194
Output token count: 622
Max token length: 816
Cost: $0.01
Time: 0:12





In [4]:
# Add team members based on discussion 2
team_members = (
    Agent(
        title="Computational Biologist",
        expertise="bioinformatics, protein structure prediction, and molecular dynamics simulations",
        goal="to identify potential antibody candidates using computational methods and predict their binding affinity to the SARS-CoV-2 spike protein",
        role="to run in silico screenings of antibody libraries, modeling antibody-spike protein interactions, and providing a shortlist of promising candidates for experimental validation",
    ),
    Agent(
        title="Immunologist",
        expertise="immune response mechanisms, antibody generation, and characterization",
        goal="to guide the selection of antibody candidates based on immunological principles and ensure the candidates have the potential for broad-spectrum activity",
        role="to provide insights into the immune response to SARS-CoV-2, help design the antibody generation strategy, and interpret the results from binding and neutralization assays",
    ),
    Agent(
        title="Structural Biologist",
        expertise="X-ray crystallography, cryo-electron microscopy (cryo-EM), and protein structure determination",
        goal="to determine the high-resolution structures of antibody-spike protein complexes to understand the binding mechanisms and improve antibody design",
        role="to solve the structures of selected antibody-spike protein complexes and providing structural insights to refine antibody candidates",
    ),
    Agent(
        title="Virologist",
        expertise="SARS-CoV-2 biology, viral entry mechanisms, and neutralization assays",
        goal="to validate the neutralizing activity of antibody candidates against SARS-CoV-2 and its variants",
        role="to oversee the design and execution of neutralization assays, interpret the results, and ensure that the selected antibodies are effective against multiple variants of the virus",
    ),
    Agent(
        title="Data Scientist",
        expertise="machine learning, data analysis, and predictive modeling",
        goal="to develop and apply machine learning models to predict the efficacy and broad-spectrum activity of antibody candidates",
        role="to analyze experimental data, develop predictive models, and integrate data from various sources to guide the selection and optimization of antibody candidates",
    ),
    Agent(
        title="Experimental Biologist",
        expertise="antibody engineering, protein expression, and purification",
        goal="to produce and characterize the antibody candidates identified through computational and immunological methods",
        role="to express, purify, and initially characterize antibody candidates, as well as to prepare samples for binding and neutralization assays",
    ),
)

In [5]:
ANTIBODIES_PROMPT = "You are working on a project to develop antibodies for the SARS-CoV-2 spike protein, ideally for the newest variant of the virus and with broad spectrum activity across variants. Please design a specific antibody discovery approach for this target that uses machine learning to design antibody candidates. Decide what specific machine learning model to use and precisely how it will be used. If the model needs to be trained, please decide on a specific dataset for training. If the model is pre-trained, please explain exactly how it will be used for identifying or designing antibody candidates."

In [6]:
ANTIBODIES_QUESTIONS = (
    "Will you design the antibodies de novo or will you modify existing antibodies?",
    "Will you train a model from scratch or use a pre-trained model?",
    "What specific model architecture will you use?",
    "If training a model, what dataset will you use for training?",
    "How exactly will you use your model to design antibodies?"
)

In [7]:
# Run antibodies project design
for iteration_num in trange(num_iterations, desc="Project Iterations"):
    run_scientific_meeting(
        team_lead=team_lead,
        team_members=team_members,
        agenda=ANTIBODIES_PROMPT,
        agenda_questions=ANTIBODIES_QUESTIONS,
        contexts=contexts,
        save_dir=save_dir / "project_design",
        save_name=f"discussion_{iteration_num + 1}",
        num_rounds=num_rounds,
        model=model,
    )

Project Iterations:   0%|          | 0/3 [00:00<?, ?it/s]
Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A

Team:   0%|          | 0/7 [00:14<?, ?it/s][A[A
Rounds (+ Summary Round):   0%|          | 0/4 [00:14<?, ?it/s]
Project Iterations:   0%|          | 0/3 [00:14<?, ?it/s]


KeyboardInterrupt: 

In [6]:
ESM_PROMPT = "You are working on a project to develop antibodies for the SARS-CoV-2 spike protein, ideally for the newest variant of the virus and with broad spectrum activity across variants. You will use the ESM family of models for antibody design. Please design a method for applying ESM to this antibody design problem. Specify the exact model you will use and how you will use it to design antibodies."

In [7]:
ESM_QUESTIONS = (
    "Which ESM model will you use?"
    "Will you design the antibodies de novo or will you modify existing antibodies?",
    "How exactly will you use your model to design antibodies?",
    "What is the precise process for designing antibodies and selecting candidates for experimental validation?",
    "How will you computationally evaluate the quality of the design antibodies?",
    "What objectives will you optimize for in the design process?"
)

In [8]:
# Run ESM project design
for iteration_num in trange(num_iterations, desc="Project Iterations"):
    run_scientific_meeting(
        team_lead=team_lead,
        team_members=team_members,
        agenda=ESM_PROMPT,
        agenda_questions=ESM_QUESTIONS,
        contexts=contexts,
        save_dir=save_dir / "esm",
        save_name=f"discussion_{iteration_num + 1}",
        num_rounds=num_rounds,
        model=model,
    )

Project Iterations:   0%|          | 0/3 [00:00<?, ?it/s]
Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A

Team Members:   0%|          | 0/6 [00:00<?, ?it/s][A[A

Team Members:  17%|█▋        | 1/6 [00:07<00:37,  7.43s/it][A[A

Team Members:  33%|███▎      | 2/6 [00:15<00:31,  7.90s/it][A[A

Team Members:  50%|█████     | 3/6 [00:26<00:27,  9.06s/it][A[A

Team Members:  67%|██████▋   | 4/6 [00:39<00:21, 10.58s/it][A[A

Team Members:  83%|████████▎ | 5/6 [00:51<00:11, 11.20s/it][A[A

Team Members: 100%|██████████| 6/6 [01:02<00:00, 10.48s/it][A[A

Rounds (+ Summary Round):  25%|██▌       | 1/4 [01:02<03:08, 62.90s/it][A

Team Members:   0%|          | 0/6 [00:00<?, ?it/s][A[A

Team Members:  17%|█▋        | 1/6 [00:09<00:46,  9.39s/it][A[A

Team Members:  33%|███▎      | 2/6 [00:28<01:00, 15.12s/it][A[A

Team Members:  50%|█████     | 3/6 [00:41<00:41, 13.93s/it][A[A

Team Members:  67%|██████▋   | 4/6 [00:51<00:25, 12.63s/it][A[A

Team Membe

Input token count: 116,197
Output token count: 12,655
Max token length: 13,796
Cost: $0.77



Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A

Team Members:   0%|          | 0/6 [00:00<?, ?it/s][A[A

Team Members:  17%|█▋        | 1/6 [00:15<01:19, 15.86s/it][A[A

Team Members:  33%|███▎      | 2/6 [00:26<00:51, 12.79s/it][A[A

Team Members:  50%|█████     | 3/6 [00:36<00:34, 11.58s/it][A[A

Team Members:  67%|██████▋   | 4/6 [00:47<00:22, 11.46s/it][A[A

Team Members:  83%|████████▎ | 5/6 [01:13<00:16, 16.37s/it][A[A

Team Members: 100%|██████████| 6/6 [01:23<00:00, 13.98s/it][A[A

Rounds (+ Summary Round):  25%|██▌       | 1/4 [01:23<04:11, 83.85s/it][A

Team Members:   0%|          | 0/6 [00:00<?, ?it/s][A[A

Team Members:  17%|█▋        | 1/6 [00:23<01:58, 23.66s/it][A[A

Team Members:  33%|███▎      | 2/6 [00:31<00:56, 14.13s/it][A[A

Team Members:  50%|█████     | 3/6 [00:40<00:35, 11.99s/it][A[A

Team Members:  67%|██████▋   | 4/6 [00:52<00:23, 11.90s/it][A[A

Team Members:  83%|████████▎ | 5/6 [01:04<00:12, 12.12s/it][A[A



Input token count: 117,639
Output token count: 12,087
Max token length: 13,228
Cost: $0.77



Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A

Team Members:   0%|          | 0/6 [00:00<?, ?it/s][A[A

Team Members:  17%|█▋        | 1/6 [00:08<00:43,  8.60s/it][A[A

Team Members:  33%|███▎      | 2/6 [00:20<00:41, 10.32s/it][A[A

Team Members:  50%|█████     | 3/6 [00:30<00:30, 10.33s/it][A[A

Team Members:  67%|██████▋   | 4/6 [01:00<00:36, 18.26s/it][A[A

Team Members:  83%|████████▎ | 5/6 [01:12<00:15, 15.87s/it][A[A

Team Members: 100%|██████████| 6/6 [01:26<00:00, 14.36s/it][A[A

Rounds (+ Summary Round):  25%|██▌       | 1/4 [01:26<04:18, 86.19s/it][A

Team Members:   0%|          | 0/6 [00:00<?, ?it/s][A[A

Team Members:  17%|█▋        | 1/6 [00:14<01:11, 14.33s/it][A[A

Team Members:  33%|███▎      | 2/6 [00:22<00:42, 10.66s/it][A[A

Team Members:  50%|█████     | 3/6 [00:33<00:32, 10.94s/it][A[A

Team Members:  67%|██████▋   | 4/6 [00:41<00:19,  9.84s/it][A[A

Team Members:  83%|████████▎ | 5/6 [00:52<00:10, 10.15s/it][A[A



Input token count: 113,878
Output token count: 12,845
Max token length: 13,986
Cost: $0.76





In [11]:
# Select preferred summary
discussion_paths.append(save_dir / "esm" / "discussion_3.json")

In [12]:
# Load summaries
summaries = load_summaries(discussion_paths=discussion_paths)

In [13]:
ESM_IMPLEMENT_PROMPT = "You now need to implement an ESM-based approach for antibody design. Please write code to implement the method you designed in the previous discussion. You should include the following components: 1. Loading the ESM model, 2. Designing antibodies using the ESM model, 3. Evaluating the designed antibodies computationally, 4. Selecting the best candidates for experimental validation. Please provide a detailed description of each component and the code to implement it."

In [14]:
ESM_IMPLEMENT_QUESTIONS = (
    "How will you load the ESM model?",
    "How will you design antibodies using the ESM model?",
    "How will you evaluate the designed antibodies computationally?",
    "How will you select the best candidates for experimental validation?"
)

In [15]:
# Run ESM implement
for iteration_num in trange(num_iterations, desc="Project Iterations"):
    run_scientific_meeting(
        team_lead=team_lead,
        team_members=team_members,
        agenda=ESM_IMPLEMENT_PROMPT,
        agenda_questions=ESM_IMPLEMENT_QUESTIONS,
        summaries=summaries,
        contexts=contexts,
        save_dir=save_dir / "esm_implement",
        save_name=f"discussion_{iteration_num + 1}",
        num_rounds=num_rounds,
        model=model,
    )

Project Iterations:   0%|          | 0/3 [00:00<?, ?it/s]
Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A

Team Members:   0%|          | 0/6 [00:00<?, ?it/s][A[A

Team Members:  17%|█▋        | 1/6 [00:29<02:27, 29.51s/it][A[A

Team Members:  33%|███▎      | 2/6 [00:41<01:15, 18.99s/it][A[A

Team Members:  50%|█████     | 3/6 [00:50<00:44, 14.81s/it][A[A

Team Members:  67%|██████▋   | 4/6 [01:04<00:28, 14.25s/it][A[A

Team Members:  83%|████████▎ | 5/6 [01:14<00:12, 12.94s/it][A[A

Team Members: 100%|██████████| 6/6 [01:27<00:00, 14.66s/it][A[A

Rounds (+ Summary Round):  25%|██▌       | 1/4 [01:27<04:23, 87.94s/it][A

Team Members:   0%|          | 0/6 [00:00<?, ?it/s][A[A

Team Members:  17%|█▋        | 1/6 [01:00<05:02, 60.50s/it][A[A

Team Members:  33%|███▎      | 2/6 [01:08<01:59, 29.79s/it][A[A

Team Members:  50%|█████     | 3/6 [01:18<01:02, 20.69s/it][A[A

Team Members:  67%|██████▋   | 4/6 [01:30<00:34, 17.05s/it][A[A

Team Membe

Input token count: 203,258
Output token count: 17,910
Max token length: 20,067
Cost: $1.28



Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A

Team Members:   0%|          | 0/6 [00:00<?, ?it/s][A[A

Team Members:  17%|█▋        | 1/6 [00:04<00:23,  4.68s/it][A[A

Team Members:  33%|███▎      | 2/6 [00:12<00:26,  6.63s/it][A[A

Team Members:  50%|█████     | 3/6 [00:25<00:27,  9.30s/it][A[A

Team Members:  67%|██████▋   | 4/6 [00:33<00:18,  9.11s/it][A[A

Team Members:  83%|████████▎ | 5/6 [00:42<00:08,  8.87s/it][A[A

Team Members: 100%|██████████| 6/6 [00:51<00:00,  8.63s/it][A[A

Rounds (+ Summary Round):  25%|██▌       | 1/4 [00:51<02:35, 51.78s/it][A

Team Members:   0%|          | 0/6 [00:00<?, ?it/s][A[A

Team Members:  17%|█▋        | 1/6 [00:12<01:00, 12.05s/it][A[A

Team Members:  33%|███▎      | 2/6 [00:21<00:42, 10.53s/it][A[A

Team Members:  50%|█████     | 3/6 [00:32<00:31, 10.63s/it][A[A

Team Members:  67%|██████▋   | 4/6 [00:49<00:26, 13.27s/it][A[A

Team Members:  83%|████████▎ | 5/6 [01:11<00:16, 16.28s/it][A[A



Input token count: 144,603
Output token count: 15,113
Max token length: 17,270
Cost: $0.95



Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A

Team Members:   0%|          | 0/6 [00:00<?, ?it/s][A[A

Team Members:  17%|█▋        | 1/6 [00:39<03:18, 39.68s/it][A[A

Team Members:  33%|███▎      | 2/6 [00:50<01:31, 22.96s/it][A[A

Team Members:  50%|█████     | 3/6 [01:00<00:49, 16.66s/it][A[A

Team Members:  67%|██████▋   | 4/6 [01:07<00:26, 13.02s/it][A[A

Team Members:  83%|████████▎ | 5/6 [01:17<00:11, 11.85s/it][A[A

Team Members: 100%|██████████| 6/6 [01:41<00:00, 16.84s/it][A[A

Rounds (+ Summary Round):  25%|██▌       | 1/4 [01:41<05:03, 101.02s/it][A

Team Members:   0%|          | 0/6 [00:00<?, ?it/s][A[A

Team Members:  17%|█▋        | 1/6 [00:14<01:11, 14.28s/it][A[A

Team Members:  33%|███▎      | 2/6 [00:20<00:37,  9.46s/it][A[A

Team Members:  50%|█████     | 3/6 [00:30<00:29,  9.86s/it][A[A

Team Members:  67%|██████▋   | 4/6 [00:43<00:22, 11.14s/it][A[A

Team Members:  83%|████████▎ | 5/6 [00:56<00:11, 11.63s/it][A[A


Input token count: 136,860
Output token count: 13,587
Max token length: 15,744
Cost: $0.89



