In [None]:
import concurrent.futures
from pathlib import Path

from agent import Agent
from constants import CONSISTENT_TEMPERATURE, CREATIVE_TEMPERATURE
from prompts import (
    CODING_RULES,
    DRUG_DISCOVERY_CONTEXTS,
    PRINCIPAL_INVESTIGATOR,
    SCIENTIFIC_CRITIC,
    create_merge_prompt,
)
from run_meeting import run_meeting
from utils import load_summaries

In [None]:
# Set up key parameters
num_iterations = 5
num_rounds = 3
save_dir = Path("drug_discovery")
model = "gpt-4o-2024-08-06"
background_prompt = "You are working on a research project to use machine learning for drug discovery. Your goals are the following: (1) the project must have high clinical value, meaning the research contributes to helping patients, (2) the project must include a scientifically impactful application of machine learning to drug discovery, and (3) the project must use Emerald Cloud Labs (ECL) for all experimental validation."

## Select team members

In [None]:
# Select team members - prompts
team_selection_dir = save_dir / "team_selection"

team_selection_agenda = f"""{background_prompt} You need to select a team of three scientists to help you with this project. Please select the team members that you would like to invite to work on the project. Please list the team members in the following format, using the team member below as an example. You should not include yourself (Principal Investigator) in the list.

Agent(
    title="Principal Investigator",
    expertise="applying artificial intelligence to biomedical research",
    goal="perform research in your area of expertise that maximizes the scientific impact of the work",
    role="lead a team of experts to solve an important problem in artificial intelligence for biomedicine, make key decisions about the project direction based on team member input, and manage the project timeline and resources",
)
"""

In [None]:
# Select team members - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="individual",
            team_member=PRINCIPAL_INVESTIGATOR,
            agenda=team_selection_agenda,
            save_dir=team_selection_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Select team members - merge
team_selection_summaries = load_summaries(discussion_paths=list(team_selection_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(team_selection_summaries)}")

team_selection_merge_prompt = create_merge_prompt(agenda=team_selection_agenda)

run_meeting(
    meeting_type="individual",
    team_member=PRINCIPAL_INVESTIGATOR,
    summaries=team_selection_summaries,
    agenda=team_selection_merge_prompt,
    save_dir=team_selection_dir,
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

In [None]:
# Add team members
COMPUTATIONAL_CHEMIST = Agent(
    title="Computational Chemist",
    expertise="molecular modeling, cheminformatics, and machine learning applications in drug design",
    goal="develop and optimize computational models to predict molecular interactions and optimize drug candidate structures",
    role="design and implement molecular simulations and predictive algorithms to identify potential drug candidates, ensuring integration with machine learning models"
)

MACHINE_LEARNING_SCIENTIST = Agent(
    title="Machine Learning Scientist",
    expertise="deep learning, neural networks, and data-driven model development",
    goal="create robust machine learning models that can efficiently predict drug efficacy and safety profiles",
    role="develop novel machine learning models tailored for drug discovery, collaborate on integrating biological and chemical data, and optimize models for predictive accuracy and performance"
)

EXPERIMENTAL_PHARMACOLOGIST = Agent(
    title="Experimental Pharmacologist",
    expertise="in vitro and in vivo pharmacology, assay development, and high-throughput screening",
    goal="design and oversee the experimental validation of machine learning predictions using the Emerald Cloud Labs platform",
    role="coordinate and conduct experimental assays that validate the predictions of machine learning models, ensuring robust and reproducible results that align with clinical objectives"
)

team_members = (
    COMPUTATIONAL_CHEMIST,
    MACHINE_LEARNING_SCIENTIST,
    EXPERIMENTAL_PHARMACOLOGIST,
    SCIENTIFIC_CRITIC,
)

## Drug modality

In [25]:
# Drug modality - prompts
drug_modality_dir = save_dir / "drug_modality"

drug_modality_agenda = f"{background_prompt} In this meeting, you need to select a specific drug modality to pursue. Without deciding on a specific disease or target yet, please determine what drug modality would be most appropriate given the goals and constraints of the project as well as overall feasibility. Please be as specific as possible in terms of the type of drug modality and the rationale for choosing it."

drug_modality_questions = (
    "What is the specific drug modality that you are proposing?",
    "For that drug modality, will you design new drugs de novo or modify and improve existing drugs (choose only one)?"
)

In [26]:
# Project selection - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="team",
            team_lead=PRINCIPAL_INVESTIGATOR,
            team_members=team_members,
            contexts=DRUG_DISCOVERY_CONTEXTS,
            agenda=drug_modality_agenda,
            agenda_questions=drug_modality_questions,
            save_dir=drug_modality_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            model=model,
            num_rounds=num_rounds,
        ) for iteration_num in range(num_iterations)
    ])

Rounds (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s]
Team:   0%|          | 0/5 [00:00<?, ?it/s][A

Rounds (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A


Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A



Rounds (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A[A




Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A





Rounds (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A[A[A[A






Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A[A







Rounds (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A[A[A[A[A[A








Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A






Team:  20%|██        | 1/5 [00:04<00:17,  4.27s/it][A[A[A[A[A[A[A




Team:  20%|██        | 1/5 [00:05<00:21,  5.42s/it][A[A[A[A[A


Team:  20%|██        | 1/5 [00:05<00:22,  5.58s/it][A[A[A
Team:  20%|██        | 1/5 [00:07<00:30,  7.71s/it][A






Team:  40%|████      | 2/

Input token count: 216,901
Output token count: 11,363
Tool token count: 0
Max token length: 11,363
Cost: $0.66
Time: 1:48











Team: 100%|██████████| 5/5 [00:34<00:00,  7.00s/it][A[A[A[A[A[A[A[A[A








Rounds (+ Final Round):  75%|███████▌  | 3/4 [01:44<00:34, 34.92s/it][A[A[A[A[A[A[A[A





Team:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A


Team:   0%|          | 0/5 [00:10<?, ?it/s].43s/it][A[A[A




Rounds (+ Final Round): 100%|██████████| 4/4 [01:51<00:00, 27.86s/it][A[A[A[A


Input token count: 223,517
Output token count: 11,506
Tool token count: 0
Max token length: 11,506
Cost: $0.67
Time: 1:53



Team:  80%|████████  | 4/5 [00:36<00:09,  9.81s/it][A


Team: 100%|██████████| 5/5 [00:47<00:00,  9.54s/it][A[A[A


Rounds (+ Final Round):  75%|███████▌  | 3/4 [01:56<00:40, 40.82s/it][A[A


Team:   0%|          | 0/5 [00:10<?, ?it/s][A[A[A








Rounds (+ Final Round): 100%|██████████| 4/4 [01:54<00:00, 28.74s/it][A[A[A[A[A[A[A[A


Input token count: 211,407
Output token count: 10,736
Tool token count: 0
Max token length: 10,736
Cost: $0.64
Time: 1:59



Team: 100%|██████████| 5/5 [00:44<00:00,  8.88s/it][A
Rounds (+ Final Round):  75%|███████▌  | 3/4 [01:59<00:41, 41.18s/it]
Team:   0%|          | 0/5 [00:12<?, ?it/s][A
Rounds (+ Final Round): 100%|██████████| 4/4 [02:12<00:00, 33.03s/it]


Input token count: 210,911
Output token count: 10,779
Tool token count: 0
Max token length: 10,779
Cost: $0.64
Time: 2:13


Team:   0%|          | 0/5 [00:16<?, ?it/s]


Rounds (+ Final Round): 100%|██████████| 4/4 [02:13<00:00, 33.26s/it][A[A


Input token count: 209,611
Output token count: 10,808
Tool token count: 0
Max token length: 10,808
Cost: $0.63
Time: 2:14


In [27]:
# Project selection - merge
drug_modality_summaries = load_summaries(discussion_paths=list(drug_modality_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(drug_modality_summaries)}")

drug_modality_merge_prompt = create_merge_prompt(
    agenda=drug_modality_agenda,
    agenda_questions=drug_modality_questions,
)

run_meeting(
    meeting_type="individual",
    team_member=PRINCIPAL_INVESTIGATOR,
    contexts=DRUG_DISCOVERY_CONTEXTS,
    summaries=drug_modality_summaries,
    agenda=drug_modality_merge_prompt,
    save_dir=drug_modality_dir,
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
    num_rounds=num_rounds,
)

Number of summaries: 5


Rounds (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s]
Team:   0%|          | 0/2 [00:00<?, ?it/s][A
Team:  50%|█████     | 1/2 [00:08<00:08,  8.93s/it][A
Team: 100%|██████████| 2/2 [00:15<00:00,  7.74s/it][A
Rounds (+ Final Round):  25%|██▌       | 1/4 [00:15<00:46, 15.49s/it]
Team:   0%|          | 0/2 [00:00<?, ?it/s][A
Team:  50%|█████     | 1/2 [00:09<00:09,  9.09s/it][A
Team: 100%|██████████| 2/2 [00:14<00:00,  7.32s/it][A
Rounds (+ Final Round):  50%|█████     | 2/4 [00:30<00:29, 15.00s/it]
Team:   0%|          | 0/2 [00:00<?, ?it/s][A
Team:  50%|█████     | 1/2 [00:09<00:09,  9.08s/it][A
Team: 100%|██████████| 2/2 [00:15<00:00,  7.84s/it][A
Rounds (+ Final Round):  75%|███████▌  | 3/4 [00:45<00:15, 15.31s/it]
Team:   0%|          | 0/2 [00:08<?, ?it/s][A
Rounds (+ Final Round): 100%|██████████| 4/4 [00:53<00:00, 13.48s/it]


Input token count: 118,299
Output token count: 11,504
Tool token count: 0
Max token length: 11,504
Cost: $0.41
Time: 0:54


In [None]:
drug_modality_prompt = "Your team previously decided to develop peptide-based therapeutics."

## Target Selection

In [None]:
# Target selection - prompts
target_selection_dir = save_dir / "target_selection"

target_selection_agenda = f"{background_prompt} {drug_modality_prompt} Now you need to select a specific disease and a specific drug target for that disease. Please suggest five potential disease/target pairs would be most appropriate for peptide-based therapeutics."

target_selection_questions = (
    "What specific disease/target pairs are you proposing (list five)?",
    "Why are these diseases and targets appropriate for peptide-based therapeutics?",
    "For each disease/target pair, is there an existing peptide drug that can be improved or will you design a new peptide de novo?",
)

In [None]:
# Target selection - discussion
target_selection_prior_summaries = load_summaries(discussion_paths=[drug_modality_dir / "merged.json"])
print(f"Number of summaries: {len(target_selection_prior_summaries)}")

with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="team",
            team_lead=PRINCIPAL_INVESTIGATOR,
            team_members=team_members,
            contexts=DRUG_DISCOVERY_CONTEXTS,
            summaries=target_selection_prior_summaries,
            agenda=target_selection_agenda,
            agenda_questions=target_selection_questions,
            save_dir=target_selection_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            model=model,
            num_rounds=num_rounds,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Target selection - merge
target_selection_summaries = load_summaries(discussion_paths=list(target_selection_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(target_selection_summaries)}")

target_selection_merge_prompt = create_merge_prompt(
    agenda=target_selection_agenda,
    agenda_questions=target_selection_questions,
)

run_meeting(
    meeting_type="individual",
    team_member=PRINCIPAL_INVESTIGATOR,
    contexts=DRUG_DISCOVERY_CONTEXTS,
    summaries=target_selection_summaries,
    agenda=target_selection_merge_prompt,
    save_dir=target_selection_dir,
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
    num_rounds=num_rounds,
)

In [None]:
disease_target_prompt = "Your team previously decided to pursue Type 2 Diabetes by improving existing GLP-1 analogs with dual-agonist properties to enhance efficacy and stability"

## Data curation

In [None]:
# Data curation - prompts
data_curation_dir = save_dir / "data_curation"

data_curation_agenda = f"{background_prompt} {drug_modality_prompt} {disease_target_prompt} Now you need to collect existing data for GLP-1. Please explain where to find existing GLP-1 analog data that include peptide sequences with experimentally determined efficacy and stability. Specify 2-4 databases and how to identify and extract the relevant data from each database."

In [None]:
# Data curation - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="individual",
            team_member=EXPERIMENTAL_PHARMACOLOGIST,
            agenda=data_curation_agenda,
            save_dir=data_curation_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            model=model,
            num_rounds=num_rounds,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Data curation - merge
data_curation_summaries = load_summaries(discussion_paths=list(data_curation_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(data_curation_summaries)}")

data_curation_merge_prompt = create_merge_prompt(agenda=data_curation_agenda)

run_meeting(
    meeting_type="individual",
    team_member=EXPERIMENTAL_PHARMACOLOGIST,
    summaries=data_curation_summaries,
    agenda=data_curation_merge_prompt,
    save_dir=data_curation_dir,
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

## Model design

In [None]:
# Model design - prompts
model_design_dir = save_dir / "model_design"

model_design_agenda = f"{background_prompt} {drug_modality_prompt} {disease_target_prompt} Now you must design a machine learning architecture to predict peptide efficacy and stability. Please write a complete Python script that implements a machine learning model to predict peptide efficacy and stability from peptide sequence. Be sure to include all necessary data processing, training, and evaluation code."

In [None]:
# Model design - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="individual",
            team_member=MACHINE_LEARNING_SCIENTIST,
            agenda=model_design_agenda,
            agenda_rules=CODING_RULES,
            save_dir=model_design_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            model=model,
            num_rounds=num_rounds,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Model design - merge
model_design_summaries = load_summaries(discussion_paths=list(model_design_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(model_design_summaries)}")

model_design_merge_prompt = create_merge_prompt(
    agenda=model_design_agenda,
    agenda_rules=CODING_RULES,
)

run_meeting(
    meeting_type="individual",
    team_member=MACHINE_LEARNING_SCIENTIST,
    summaries=model_design_summaries,
    agenda=model_design_merge_prompt,
    save_dir=model_design_dir,
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)