In [None]:
import concurrent.futures
from pathlib import Path

from agent import Agent
from constants import CONSISTENT_TEMPERATURE, CREATIVE_TEMPERATURE
from prompts import (
    PRINCIPAL_INVESTIGATOR,
    SCIENTIFIC_CRITIC,
    create_merge_prompt,
)
from run_meeting import run_meeting
from utils import load_summaries

In [None]:
# Set up key parameters
num_iterations = 5
num_rounds = 3
save_dir = Path("antibody_design")
model = "gpt-4o-2024-08-06"
background_prompt = "You are working on a research project to use machine learning to develop antibodies or nanobodies for the newest variant of the SARS-CoV-2 spike protein that also, ideally, have activity against other circulating minor variants and past variants."

## Select team members

In [None]:
# Select team members - prompts
team_selection_dir = save_dir / "team_selection"

team_selection_agenda = f"""{background_prompt} You need to select a team of three scientists to help you with this project. Please select the team members that you would like to invite to a discussion to create the antibody/nanobody design approach. Please list the team members in the following format, using the team member below as an example. You should not include yourself (Principal Investigator) in the list.

Agent(
    title="Principal Investigator",
    expertise="applying artificial intelligence to biomedical research",
    goal="perform research in your area of expertise that maximizes the scientific impact of the work",
    role="lead a team of experts to solve an important problem in artificial intelligence for biomedicine, make key decisions about the project direction based on team member input, and manage the project timeline and resources",
)
"""

In [None]:
# Select team members - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="individual",
            team_member=PRINCIPAL_INVESTIGATOR,
            agenda=team_selection_agenda,
            save_dir=team_selection_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Select team members - merge
team_selection_summaries = load_summaries(discussion_paths=list(team_selection_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(team_selection_summaries)}")

team_selection_merge_prompt = create_merge_prompt(agenda=team_selection_agenda)

run_meeting(
    meeting_type="individual",
    team_member=PRINCIPAL_INVESTIGATOR,
    agenda=team_selection_merge_prompt,
    save_dir=team_selection_dir,
    save_name="merged",
    summaries=team_selection_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

In [None]:
# Add team members
IMMUNOLOGIST = Agent(
    title="Immunologist",
    expertise="antibody engineering and immune response characterization",
    goal="guide the development of antibodies/nanobodies that elicit a strong and broad immune response",
    role="advise on immunogenicity, cross-reactivity with other variants, and potential for therapeutic application, ensuring the designs are viable for experimental validation and downstream applications",
)

MACHINE_LEARNING_SPECIALIST = Agent(
    title="Machine Learning Specialist",
    expertise="developing algorithms for protein-ligand interactions and optimization",
    goal="create and apply machine learning models to predict antibody efficacy and optimize binding affinity across SARS-CoV-2 variants",
    role="lead the development of AI tools for predicting interactions and refining antibody designs based on computational results",
)

COMPUTATIONAL_BIOLOGIST = Agent(
    title="Computational Biologist",
    expertise="protein structure prediction and molecular dynamics simulations",
    goal="develop predictive models to identify potential antibody/nanobody candidates and simulate interactions with the SARS-CoV-2 spike protein",
    role="provide insights into structural dynamics, guide virtual screening efforts, and validate computational predictions with simulations",
)

team_members = (
    IMMUNOLOGIST,
    MACHINE_LEARNING_SPECIALIST,
    COMPUTATIONAL_BIOLOGIST,
    SCIENTIFIC_CRITIC,
)

## Select project

In [None]:
# Select antibodies project - prompts
project_selection_dir = save_dir / "project_selection"

project_selection_agenda = f"{background_prompt} Please create an antibody/nanobody design approach for this target that uses ESM, AlphaFold-Multimer, and Rosetta. Use ESM to identify promising antibody/nanobody sequences via log-likelihoods, use AlphaFold-Multimer to predict the 3D structure of the antibody/nanobody-antigen complex and predict its binding via AlphaFold-Multimer confidence, and use Rosetta to refine the 3D structure and compute the binding affinity. Decide whether you will use standard antibodies or nanobodies. Decide whether you will design antibodies/nanobodies de novo or modify existing antibodies/nanobodies. Explain in detail how you will apply ESM, AlphaFold-Multimer, and Rosetta to design antibodies/nanobodies."

project_selection_questions = (
    "Will you design standard antibodies or nanobodies?",
    "Will you design antibodies/nanobodies de novo or will you modify existing antibodies/nanobodies (choose only one)?",
    "If modifying existing antibodies/nanobodies, which precise antibodies/nanobodies will you modify?",
    "If modifying existing antibodies/nanobodies, how exactly will you propose modifications to the antibodies/nanobodies?",
    "If designing antibodies/nanobodies de novo, how exactly will you propose antibody/nanobody candidates?",
    "How will you use ESM, AlphaFold-Multimer, and Rosetta (step-by-step, in detail, without any additional tools) to design antibodies/nanobodies?",
    "What specific role will ESM play in the antibody/nanobody design process?",
    "What specific role will AlphaFold-Multimer play in the antibody/nanobody design process?",
    "What specific role will Rosetta play in the antibody/nanobody design process?",
)

project_selection_rules = (
    "You must not use any computational tools at any point in your antibody/nanobody design approach except for ESM, AlphaFold-Multimer, and Rosetta.",
    "You must only consider the computational aspects of antibody/nanobody design and ignore experimental validation.",
)

In [None]:
# Select antibodies project - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="team",
            team_lead=PRINCIPAL_INVESTIGATOR,
            team_members=team_members,
            agenda=project_selection_agenda,
            agenda_questions=project_selection_questions,
            agenda_rules=project_selection_rules,
            save_dir=project_selection_dir,
            save_name=f"discussion_{iteration_num + 1}",
            num_rounds=num_rounds,
            temperature=CREATIVE_TEMPERATURE,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Select antibodies project - merge
project_selection_summaries = load_summaries(discussion_paths=list(project_selection_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(project_selection_summaries)}")

project_selection_merge_prompt = create_merge_prompt(
    agenda=project_selection_agenda,
    agenda_questions=project_selection_questions,
    agenda_rules=project_selection_rules
)

run_meeting(
    meeting_type="individual",
    team_member=PRINCIPAL_INVESTIGATOR,
    agenda=project_selection_merge_prompt,
    save_dir=project_selection_dir,
    save_name="merged",
    summaries=project_selection_summaries,
    num_rounds=num_rounds,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

## Coding rules

In [None]:
coding_rules = (
    "Your code must be self-contained (with appropriate imports) and complete.",
    "Your code may not include any undefined or unimplemented variables or functions.",
    "Your code may not include any pseudocode; it must be fully functioning code.",
    "Your code may not include any hard-coded examples.",
    "If your code needs user-provided values, write code to parse those values from the command line.",
    "Your code must be high quality, well-engineered, efficient, and well-documented (including docstrings, comments, and Python type hints if using Python).",
)

## ESM

In [None]:
# ESM - prompts
esm_dir = save_dir / "esm"

esm_agenda = f"{background_prompt} Your team previously decided on a nanobody design approach using ESM, AlphaFold-Multimer, and Rosetta to modify existing nanobodies. Now, you must implement the ESM portion of the approach. Please write a complete Python script that takes a nanobody sequence as input and uses ESM amino acid log-likelihoods to identify the most promising mutant sequences."

esm_prior_summaries = load_summaries(discussion_paths=[project_selection_dir / "merged.json"])
print(f"Number of prior summaries: {len(esm_prior_summaries)}")

In [None]:
# ESM - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="individual",
            team_member=MACHINE_LEARNING_SPECIALIST,
            agenda=esm_agenda,
            agenda_rules=coding_rules,
            save_dir=esm_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            num_rounds=num_rounds,
            summaries=esm_prior_summaries,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# ESM - merge
esm_summaries = load_summaries(discussion_paths=list(esm_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(esm_summaries)}")

esm_merge_prompt = create_merge_prompt(
    agenda=esm_agenda,
    agenda_rules=coding_rules,
)

run_meeting(
    meeting_type="individual",
    team_member=MACHINE_LEARNING_SPECIALIST,
    agenda=esm_merge_prompt,
    save_dir=esm_dir,
    save_name="merged",
    summaries=esm_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

## AlphaFold-Multimer

In [None]:
# AlphaFold-Multimer - prompts
alphafold_dir = save_dir / "alphafold"

alphafold_agenda = f"{background_prompt} Your team previously decided on a nanobody design approach using ESM, AlphaFold-Multimer, and Rosetta to modify existing nanobodies. Now, you must implement the AlphaFold-Multimer portion of the approach. I will run AlphaFold-Multimer on each nanobody in complex with the antigen, so you only need to process the outputs. Please write a complete Python script that takes as input a directory containing AlphaFold structures of each nanobody-antigen complex in PDB format, and output the AlphaFold-Multimer confidence of each nanobody-antigen interaction in terms of the interface pLDDT."

alphafold_prior_summaries = load_summaries(discussion_paths=[project_selection_dir / "merged.json"])

In [None]:
# AlphaFold-Multimer - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="individual",
            team_member=COMPUTATIONAL_BIOLOGIST,
            agenda=alphafold_agenda,
            agenda_rules=coding_rules,
            save_dir=alphafold_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            num_rounds=num_rounds,
            summaries=alphafold_prior_summaries,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# AlphaFold-Multimer - merge
alphafold_summaries = load_summaries(discussion_paths=list(alphafold_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(alphafold_summaries)}")

alphafold_merge_prompt = create_merge_prompt(
    agenda=alphafold_agenda,
    agenda_rules=coding_rules,
)

run_meeting(
    meeting_type="individual",
    team_member=COMPUTATIONAL_BIOLOGIST,
    agenda=alphafold_merge_prompt,
    save_dir=alphafold_dir,
    save_name="merged",
    summaries=alphafold_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

## Rosetta

In [None]:
# Rosetta - prompts
rosetta_dir = save_dir / "rosetta"

rosetta_agenda = f"{background_prompt} Your team previously decided on a nanobody design approach using ESM, AlphaFold-Multimer, and Rosetta to modify existing nanobodies. Now, you must implement the Rosetta portion of the approach. Please write a complete bash script that takes as input a directory of PDB files containing nanobody-antigen complexes from AlphaFold-Multimer and uses Rosetta commands to compute the binding energy of each nanobody-antigen complex (after appropriate preprocessing using Rosetta)."

rosetta_prior_summaries = load_summaries(discussion_paths=[project_selection_dir / "merged.json"])
print(f"Number of prior summaries: {len(rosetta_prior_summaries)}")

In [None]:
# Rosetta - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="individual",
            team_member=COMPUTATIONAL_BIOLOGIST,
            agenda=rosetta_agenda,
            agenda_rules=coding_rules,
            save_dir=rosetta_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            num_rounds=num_rounds,
            summaries=rosetta_prior_summaries,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Rosetta - merge
rosetta_summaries = load_summaries(discussion_paths=list(rosetta_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(rosetta_summaries)}")

rosetta_merge_prompt = create_merge_prompt(
    agenda=rosetta_agenda,
    agenda_rules=coding_rules,
)

run_meeting(
    meeting_type="individual",
    team_member=COMPUTATIONAL_BIOLOGIST,
    agenda=rosetta_merge_prompt,
    save_dir=rosetta_dir,
    save_name="merged",
    summaries=rosetta_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)