In [None]:
import concurrent.futures
from pathlib import Path

from agent import Agent
from constants import CONSISTENT_TEMPERATURE, CREATIVE_TEMPERATURE
from prompts import (
    CODING_RULES,
    DRUG_DISCOVERY_CONTEXTS,
    PRINCIPAL_INVESTIGATOR,
    SCIENTIFIC_CRITIC,
    create_merge_prompt,
)
from run_meeting import run_meeting
from utils import load_summaries

In [None]:
# Set up key parameters
num_iterations = 5
num_rounds = 3
save_dir = Path("drug_discovery")
model = "gpt-4o-2024-08-06"
background_prompt = "You are working on a research project to use machine learning for drug discovery. Your goals are the following: (1) the project must have high clinical value, meaning the research contributes to helping patients, (2) the project must include a scientifically impactful application of machine learning to drug discovery, and (3) the project must use Emerald Cloud Labs (ECL) for all experimental validation with a 3-month limit on experiments."

## Select team members

In [None]:
# Select team members - prompts
team_selection_dir = save_dir / "team_selection"

team_selection_agenda = f"""{background_prompt} You need to select a team of three scientists to help you with this project. Please select the team members that you would like to invite to work on the project. Please list the team members in the following format, using the team member below as an example. You should not include yourself (Principal Investigator) in the list.

Agent(
    title="Principal Investigator",
    expertise="applying artificial intelligence to biomedical research",
    goal="perform research in your area of expertise that maximizes the scientific impact of the work",
    role="lead a team of experts to solve an important problem in artificial intelligence for biomedicine, make key decisions about the project direction based on team member input, and manage the project timeline and resources",
)
"""

In [None]:
# Select team members - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="individual",
            team_member=PRINCIPAL_INVESTIGATOR,
            agenda=team_selection_agenda,
            save_dir=team_selection_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Select team members - merge
team_selection_summaries = load_summaries(discussion_paths=list(team_selection_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(team_selection_summaries)}")

team_selection_merge_prompt = create_merge_prompt(agenda=team_selection_agenda)

run_meeting(
    meeting_type="individual",
    team_member=PRINCIPAL_INVESTIGATOR,
    summaries=team_selection_summaries,
    agenda=team_selection_merge_prompt,
    save_dir=team_selection_dir,
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

In [None]:
# Add team members
COMPUTATIONAL_BIOLOGIST = Agent(
    title="Computational Biologist",
    expertise="machine learning algorithms for drug target identification and validation",
    goal="develop and implement machine learning models for identifying potential drug targets with high clinical relevance",
    role="design and optimize machine learning models, collaborate with domain experts to ensure clinical applicability, and interpret the results from a biological perspective",
)

MEDICINAL_CHEMIST = Agent(
    title="Medicinal Chemist",
    expertise="drug design and synthesis with experience in virtual screening and molecular docking",
    goal="translate computational predictions into testable compounds and design experiments to validate these compounds using ECL",
    role="provide insights into chemical feasibility of predicted compounds, help prioritize compounds for synthesis, and coordinate experimental validation through ECL",
)

MACHINE_LEARNING_SPECIALIST = Agent(
    title="Machine Learning Specialist",
    expertise="advanced machine learning algorithms and their application in biomedical data",
    goal="design and implement innovative machine learning approaches to enhance drug discovery pipelines",
    role="lead the development of machine learning models, ensure their scientific rigor, and optimize them for predicting clinically relevant drug candidates",
)

team_members = (
    COMPUTATIONAL_BIOLOGIST,
    MEDICINAL_CHEMIST,
    MACHINE_LEARNING_SPECIALIST,
    SCIENTIFIC_CRITIC,
)

## Drug modality

In [None]:
# Drug modality - prompts
drug_modality_dir = save_dir / "drug_modality"

drug_modality_agenda = f"{background_prompt} In this meeting, you need to select a specific drug modality to pursue. Without deciding on a specific disease or target yet, please determine what drug modality would be most appropriate given the goals of the project and how it will be developed."

drug_modality_questions = (
    "What is the specific drug modality that you are proposing?",
    "For that drug modality, will you design new drugs de novo or modify and improve existing drugs (choose only one)?"
)

In [None]:
# Project selection - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="team",
            team_lead=PRINCIPAL_INVESTIGATOR,
            team_members=team_members,
            contexts=DRUG_DISCOVERY_CONTEXTS,
            agenda=drug_modality_agenda,
            agenda_questions=drug_modality_questions,
            save_dir=drug_modality_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            model=model,
            num_rounds=num_rounds,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Project selection - merge
drug_modality_summaries = load_summaries(discussion_paths=list(drug_modality_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(drug_modality_summaries)}")

drug_modality_merge_prompt = create_merge_prompt(
    agenda=drug_modality_agenda,
    agenda_questions=drug_modality_questions,
)

run_meeting(
    meeting_type="individual",
    team_member=PRINCIPAL_INVESTIGATOR,
    contexts=DRUG_DISCOVERY_CONTEXTS,
    summaries=drug_modality_summaries,
    agenda=drug_modality_merge_prompt,
    save_dir=drug_modality_dir,
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
    num_rounds=num_rounds,
)

In [None]:
drug_modality_prompt = "Your team previously decided to develop peptide-based therapeutics by modifying and improving existing peptides."

## Target Selection

In [None]:
# Target selection - prompts
target_selection_dir = save_dir / "target_selection"

target_selection_agenda = f"{background_prompt} {drug_modality_prompt} Now you need to select a specific disease and a specific drug target for that disease. Please suggest five potential disease/target pairs that would be most appropriate for peptide-based therapeutics."

target_selection_questions = (
    "What specific disease/target pairs are you proposing (list five)?",
    "Why are these diseases and targets appropriate for peptide-based therapeutics?",
    "For each disease/target pair, which specific peptide or peptides will be modified?",
    "For each disease/target pair, in addition to efficacy (binding), which other peptide property or properties should be improved?",
)

In [None]:
# Target selection - discussion
target_selection_prior_summaries = load_summaries(discussion_paths=[drug_modality_dir / "merged.json"])
print(f"Number of summaries: {len(target_selection_prior_summaries)}")

with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="team",
            team_lead=PRINCIPAL_INVESTIGATOR,
            team_members=team_members,
            contexts=DRUG_DISCOVERY_CONTEXTS,
            summaries=target_selection_prior_summaries,
            agenda=target_selection_agenda,
            agenda_questions=target_selection_questions,
            save_dir=target_selection_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            model=model,
            num_rounds=num_rounds,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Target selection - merge
target_selection_summaries = load_summaries(discussion_paths=list(target_selection_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(target_selection_summaries)}")

target_selection_merge_prompt = create_merge_prompt(
    agenda=target_selection_agenda,
    agenda_questions=target_selection_questions,
)

run_meeting(
    meeting_type="individual",
    team_member=PRINCIPAL_INVESTIGATOR,
    contexts=DRUG_DISCOVERY_CONTEXTS,
    summaries=target_selection_summaries,
    agenda=target_selection_merge_prompt,
    save_dir=target_selection_dir,
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
    num_rounds=num_rounds,
)

In [None]:
disease_target_prompt = "Your team previously decided to pursue Type 2 Diabetes by improving existing GLP-1 analogs with the goal of improving target binding and stability."

## Data curation

In [None]:
# Data curation - prompts
data_curation_dir = save_dir / "data_curation"

data_curation_agenda = f"{background_prompt} {drug_modality_prompt} {disease_target_prompt} Now you need to collect existing data for GLP-1. Please explain where to find existing GLP-1 analog data that include peptide sequences with experimentally determined binding and stability. Specify a few databases and how to identify and extract the relevant data from each database."

In [None]:
# Data curation - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="individual",
            team_member=COMPUTATIONAL_BIOLOGIST,
            agenda=data_curation_agenda,
            save_dir=data_curation_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            model=model,
            num_rounds=num_rounds,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Data curation - merge
data_curation_summaries = load_summaries(discussion_paths=list(data_curation_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(data_curation_summaries)}")

data_curation_merge_prompt = create_merge_prompt(agenda=data_curation_agenda)

run_meeting(
    meeting_type="individual",
    team_member=COMPUTATIONAL_BIOLOGIST,
    summaries=data_curation_summaries,
    agenda=data_curation_merge_prompt,
    save_dir=data_curation_dir,
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

## Model design

In [None]:
# Model design - prompts
model_design_dir = save_dir / "model_design"

model_design_agenda = f"{background_prompt} {drug_modality_prompt} {disease_target_prompt} Now you must design a machine learning model to predict peptide binding and stability. Please write a complete Python script that implements a machine learning model to predict peptide binding and stability from peptide sequence. Be sure to include all necessary data processing, training, and evaluation code."

In [None]:
# Model design - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="individual",
            team_member=MACHINE_LEARNING_SPECIALIST,
            agenda=model_design_agenda,
            agenda_rules=CODING_RULES,
            save_dir=model_design_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            model=model,
            num_rounds=num_rounds,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Model design - merge
model_design_summaries = load_summaries(discussion_paths=list(model_design_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(model_design_summaries)}")

model_design_merge_prompt = create_merge_prompt(
    agenda=model_design_agenda,
    agenda_rules=CODING_RULES,
)

run_meeting(
    meeting_type="individual",
    team_member=MACHINE_LEARNING_SPECIALIST,
    summaries=model_design_summaries,
    agenda=model_design_merge_prompt,
    save_dir=model_design_dir,
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

## Experiment design

In [None]:
# Experiment design - prompts
experiment_design_dir = save_dir / "experiment_design"

experiment_design_agenda = f"{background_prompt} {drug_modality_prompt} {disease_target_prompt} Now you need to design an experimental protocol for evaluating modified GLP-1 analogs. Please write out a precise protocol for synthesizing and testing the modified GLP-1 analogs for binding and stability. You must exclusively use ECL for all experimental validation. In your protocol, name the specific ECL experiments that will be performed and the expected results for each experiment. Additionally, list all reagents that will be needed for each ECL experiment along with the required quantities. Make sure the protocol is as detailed as possible. Include controls and replicates where necessary."

In [None]:
# Experiment design - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="individual",
            team_member=MEDICINAL_CHEMIST,
            contexts=DRUG_DISCOVERY_CONTEXTS,
            agenda=experiment_design_agenda,
            save_dir=experiment_design_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            model=model,
            num_rounds=num_rounds,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Experiment design - merge
experiment_design_summaries = load_summaries(discussion_paths=list(experiment_design_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(experiment_design_summaries)}")

experiment_design_merge_prompt = create_merge_prompt(agenda=experiment_design_agenda)

run_meeting(
    meeting_type="individual",
    team_member=MEDICINAL_CHEMIST,
    contexts=DRUG_DISCOVERY_CONTEXTS,
    summaries=experiment_design_summaries,
    agenda=experiment_design_merge_prompt,
    save_dir=experiment_design_dir,
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)