In [None]:
import concurrent.futures
from pathlib import Path

from agent import Agent
from constants import CONSISTENT_TEMPERATURE, CREATIVE_TEMPERATURE
from prompts import (
    CODING_RULES,
    PRINCIPAL_INVESTIGATOR,
    SCIENTIFIC_CRITIC,
    create_merge_prompt,
)
from run_meeting import run_meeting
from utils import load_summaries

In [None]:
# Set up key parameters
num_iterations = 5
num_rounds = 3
save_dir = Path("antibody_design")
model = "gpt-4o-2024-08-06"
background_prompt = "You are working on a research project to use machine learning to develop antibodies or nanobodies for the newest variant of the SARS-CoV-2 spike protein that also, ideally, have activity against other circulating minor variants and past variants."

## Select team members

In [None]:
# Select team members - prompts
team_selection_dir = save_dir / "team_selection"

team_selection_agenda = f"""{background_prompt} You need to select a team of three scientists to help you with this project. Please select the team members that you would like to invite to a discussion to create the antibody/nanobody design approach. Please list the team members in the following format, using the team member below as an example. You should not include yourself (Principal Investigator) in the list.

Agent(
    title="Principal Investigator",
    expertise="applying artificial intelligence to biomedical research",
    goal="perform research in your area of expertise that maximizes the scientific impact of the work",
    role="lead a team of experts to solve an important problem in artificial intelligence for biomedicine, make key decisions about the project direction based on team member input, and manage the project timeline and resources",
)
"""

In [None]:
# Select team members - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="individual",
            team_member=PRINCIPAL_INVESTIGATOR,
            agenda=team_selection_agenda,
            save_dir=team_selection_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Select team members - merge
team_selection_summaries = load_summaries(discussion_paths=list(team_selection_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(team_selection_summaries)}")

team_selection_merge_prompt = create_merge_prompt(agenda=team_selection_agenda)

run_meeting(
    meeting_type="individual",
    team_member=PRINCIPAL_INVESTIGATOR,
    summaries=team_selection_summaries,
    agenda=team_selection_merge_prompt,
    save_dir=team_selection_dir,
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

In [None]:
# Add team members
IMMUNOLOGIST = Agent(
    title="Immunologist",
    expertise="antibody engineering and immune response characterization",
    goal="guide the development of antibodies/nanobodies that elicit a strong and broad immune response",
    role="advise on immunogenicity, cross-reactivity with other variants, and potential for therapeutic application, ensuring the designs are viable for experimental validation and downstream applications",
)

MACHINE_LEARNING_SPECIALIST = Agent(
    title="Machine Learning Specialist",
    expertise="developing algorithms for protein-ligand interactions and optimization",
    goal="create and apply machine learning models to predict antibody efficacy and optimize binding affinity across SARS-CoV-2 variants",
    role="lead the development of AI tools for predicting interactions and refining antibody designs based on computational results",
)

COMPUTATIONAL_BIOLOGIST = Agent(
    title="Computational Biologist",
    expertise="protein structure prediction and molecular dynamics simulations",
    goal="develop predictive models to identify potential antibody/nanobody candidates and simulate interactions with the SARS-CoV-2 spike protein",
    role="provide insights into structural dynamics, guide virtual screening efforts, and validate computational predictions with simulations",
)

team_members = (
    IMMUNOLOGIST,
    MACHINE_LEARNING_SPECIALIST,
    COMPUTATIONAL_BIOLOGIST,
    SCIENTIFIC_CRITIC,
)

## Select project

In [None]:
# Select antibodies project - prompts
project_selection_dir = save_dir / "project_selection"

project_selection_agenda = f"{background_prompt} Please create an antibody/nanobody design approach to solve this problem. Decide whether you will design antibodies or nanobodies. For your choice, decide whether you will design the antibodies/nanobodies de novo or whether you will modify existing antibodies/nanobodies. If modifying existing antibodies/nanobodies, please specify which antibodies/nanobodies to start with as good candidates for targeting the newest variant of the SARS-CoV-2 spike protein. If designing antibodies/nanobodies de novo, please describe how you will propose antibody/nanobody candidates."

project_selection_questions = (
    "Will you design standard antibodies or nanobodies?",
    "Will you design antibodies/nanobodies de novo or will you modify existing antibodies/nanobodies (choose only one)?",
    "If modifying existing antibodies/nanobodies, which precise antibodies/nanobodies will you modify (please list 3-4)?",
    "If designing antibodies/nanobodies de novo, how exactly will you propose antibody/nanobody candidates?",
)

In [None]:
# Select antibodies project - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="team",
            team_lead=PRINCIPAL_INVESTIGATOR,
            team_members=team_members,
            agenda=project_selection_agenda,
            agenda_questions=project_selection_questions,
            save_dir=project_selection_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            model=model,
            num_rounds=num_rounds,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Select antibodies project - merge
project_selection_summaries = load_summaries(discussion_paths=list(project_selection_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(project_selection_summaries)}")

project_selection_merge_prompt = create_merge_prompt(
    agenda=project_selection_agenda,
    agenda_questions=project_selection_questions,
)

run_meeting(
    meeting_type="individual",
    team_member=PRINCIPAL_INVESTIGATOR,
    summaries=project_selection_summaries,
    agenda=project_selection_merge_prompt,
    save_dir=project_selection_dir,
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
    num_rounds=num_rounds,
)

In [None]:
nanobody_prompt = "Your team previous decided to modify existing nanobodies to improve their binding to the newest variant of the SARS-CoV-2 spike protein."

## Select tools

In [None]:
# Select tools - prompts
tools_selection_dir = save_dir / "tools_selection"

tools_selection_agenda = f"{background_prompt} {nanobody_prompt} Now you need to select machine learning and/or computational tools to implement this nanobody design approach. Please list several tools (5-10) that would be relevant to this nanobody design approach and how they could be used in the context of this project. If selecting machine learning tools, please prioritize pre-trained models (e.g., pre-trained protein language models or protein structure prediction models) for simplicity."

tools_selection_questions = (
    "What machine learning and/or computational tools could be used for this nanobody design approach (list 5-10)?",
    "For each tool, how could it be used for designing modified nanobodies?",
)

tools_selection_prior_summaries = load_summaries(discussion_paths=[project_selection_dir / "merged.json"])
print(f"Number of prior summaries: {len(tools_selection_prior_summaries)}")

In [None]:
# Select tools - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="team",
            team_lead=PRINCIPAL_INVESTIGATOR,
            team_members=team_members,
            summaries=tools_selection_prior_summaries,
            agenda=tools_selection_agenda,
            agenda_questions=tools_selection_questions,
            save_dir=tools_selection_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            model=model,
            num_rounds=num_rounds,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Select tools - merge
tools_selection_summaries = load_summaries(discussion_paths=list(tools_selection_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(tools_selection_summaries)}")

tools_selection_merge_prompt = create_merge_prompt(
    agenda=tools_selection_agenda,
    agenda_questions=tools_selection_questions,
)

run_meeting(
    meeting_type="individual",
    team_member=PRINCIPAL_INVESTIGATOR,
    summaries=tools_selection_summaries,
    agenda=tools_selection_merge_prompt,
    save_dir=tools_selection_dir,
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
    num_rounds=num_rounds,
)

## ESM

In [None]:
# ESM - prompts
esm_dir = save_dir / "esm"

esm_agenda = f"{background_prompt} {nanobody_prompt} Now you must use ESM to suggest modifications to an existing antibody. Please write a complete Python script that takes a nanobody sequence as input and uses ESM amino acid log-likelihoods to identify the most promising point mutations by log-likelihood ratio."

In [None]:
# ESM - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="individual",
            team_member=MACHINE_LEARNING_SPECIALIST,
            agenda=esm_agenda,
            agenda_rules=CODING_RULES,
            save_dir=esm_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            model=model,
            num_rounds=num_rounds,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# ESM - merge
esm_summaries = load_summaries(discussion_paths=list(esm_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(esm_summaries)}")

esm_merge_prompt = create_merge_prompt(
    agenda=esm_agenda,
    agenda_rules=CODING_RULES,
)

run_meeting(
    meeting_type="individual",
    team_member=MACHINE_LEARNING_SPECIALIST,
    summaries=esm_summaries,
    agenda=esm_merge_prompt,
    save_dir=esm_dir,
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

## AlphaFold-Multimer

In [None]:
# AlphaFold-Multimer - prompts
alphafold_dir = save_dir / "alphafold"

alphafold_agenda = f"{background_prompt} {nanobody_prompt} Now you must use AlphaFold-Multimer to predict the structure of a nanobody-antigen complex and evaluate its binding. I will run AlphaFold-Multimer on several nanobody-antigen complexes and you need to process the outputs. Please write a complete Python script that takes as input a directory containing PDB files where each PDB file contains one nanobody-antigen complex predicted by AlphaFold-Multimer and outputs a CSV file containing the AlphaFold-Multimer confidence of each nanobody-antigen complex in terms of the interface pLDDT."

In [None]:
# AlphaFold-Multimer - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="individual",
            team_member=COMPUTATIONAL_BIOLOGIST,
            agenda=alphafold_agenda,
            agenda_rules=CODING_RULES,
            save_dir=alphafold_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            model=model,
            num_rounds=num_rounds,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# AlphaFold-Multimer - merge
alphafold_summaries = load_summaries(discussion_paths=list(alphafold_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(alphafold_summaries)}")

alphafold_merge_prompt = create_merge_prompt(
    agenda=alphafold_agenda,
    agenda_rules=CODING_RULES,
)

run_meeting(
    meeting_type="individual",
    team_member=COMPUTATIONAL_BIOLOGIST,
    summaries=alphafold_summaries,
    agenda=alphafold_merge_prompt,
    save_dir=alphafold_dir,
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

## Rosetta

In [None]:
# Rosetta - prompts
rosetta_dir = save_dir / "rosetta"

rosetta_agenda = f"{background_prompt} {nanobody_prompt} Now you must use Rosetta to calculate the binding energy of nanobody-antigen complexes. Please write a complete RosettaScripts XML file that calculates the binding energy of a nanobody-antigen complex predicted by AlphaFold-Multimer as provided in PDB format. Please be sure to include any necessary Rosetta preprocessing steps on the structure. Additionally, please write a complete Python script that runs the RosettaScripts XML file on every PDB file in a directory and saves a CSV file with the binding energy of every nanobody-antigen complex."

In [None]:
# Rosetta - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="individual",
            team_member=COMPUTATIONAL_BIOLOGIST,
            agenda=rosetta_agenda,
            agenda_rules=CODING_RULES,
            save_dir=rosetta_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            model=model,
            num_rounds=num_rounds,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Rosetta - merge
rosetta_summaries = load_summaries(discussion_paths=list(rosetta_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(rosetta_summaries)}")

rosetta_merge_prompt = create_merge_prompt(
    agenda=rosetta_agenda,
    agenda_rules=CODING_RULES,
)

run_meeting(
    meeting_type="individual",
    team_member=COMPUTATIONAL_BIOLOGIST,
    summaries=rosetta_summaries,
    agenda=rosetta_merge_prompt,
    save_dir=rosetta_dir,
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

## Workflow

In [None]:
# Workflow - prompts
workflow_dir = save_dir / "workflow"

workflow_agenda = f"{background_prompt} {nanobody_prompt} Your team has built a nanobody design pipeline. It first selects promising point mutations to an existing nanobody using ESM log-likelihoods and then validates those nanobodies using AlphaFold-Multimer and Rosetta to evaluate their binding to the newest variant of the SARS-CoV-2 spike protein. Now you need to determine how this pipeline will be run, with the goal of starting with four existing nanobodies and designing 96 modified nanobodies in total. Please determine how to apply this pipeline to design these nanobodies."

workflow_questions = (
    "For the first round of design, how many (give a precise number) different single point mutations will you select for each of the four existing nanobodies using ESM?",
    "After evaluating those modified nanobodies using AlphaFold-Multimer and Rosetta, how will you combine the AlphaFold-Multimer interface pLDDT score and the Rosetta binding energy score for each modified nanobody into a single score (e.g., using a weighted average) that can be used to rank the modified nanobodies?",
    "After ranking the modified nanobodies using the combined AlphaFold-Multimer and Rosetta scores, how many (give a precise number) of these modified nanobodies will you select for the next round of mutation?",
    "In all subsequent rounds of mutation, how many (give a precise number) different single point mutations will you select for each of the modified nanobodies from the previous round using ESM?",
    "In all subsequent rounds of mutation, how many (give a precise number) of the top-ranked modified nanobodies from AlphaFold-Multimer and Rosetta will you select for the next round of mutation?",
    "How many rounds of mutation will you perform in total (i.e., how many point mutations will each of the final modified nanobodies have)?",
    "How does your procedure above ensure that the final round of selection results in exactly 96 modified nanobodies in total (show the math)?",
)

In [None]:
# Workflow - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="team",
            team_lead=PRINCIPAL_INVESTIGATOR,
            team_members=team_members,
            agenda=workflow_agenda,
            agenda_questions=workflow_questions,
            save_dir=workflow_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            model=model,
            num_rounds=num_rounds,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Workflow - merge
workflow_summaries = load_summaries(discussion_paths=list(workflow_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(workflow_summaries)}")

workflow_merge_prompt = create_merge_prompt(
    agenda=workflow_agenda,
    agenda_questions=workflow_questions,
)

run_meeting(
    meeting_type="individual",
    team_member=PRINCIPAL_INVESTIGATOR,
    summaries=workflow_summaries,
    agenda=workflow_merge_prompt,
    save_dir=workflow_dir,
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
    num_rounds=num_rounds,
)