In [None]:
import concurrent.futures
from pathlib import Path

from constants import CONSISTENT_TEMPERATURE, CREATIVE_TEMPERATURE
from prompts import (
    BIOLOGIST,
    COMPUTER_SCIENTIST,
    DRUG_DISCOVERY_CONTEXTS,
    DRUG_DISCOVERY_TEAM,
    MERGE_PROMPT,
    PRINCIPAL_INVESTIGATOR
)
from run_meeting import run_meeting
from utils import load_summaries

In [None]:
# Set up key parameters
num_iterations = 5
num_rounds = 3
save_dir = Path("drug_discovery")
model = "gpt-4o-2024-08-06"

## Project Selection

In [None]:
# Project selection - prompts
project_selection_dir = save_dir / "project_selection"

project_selection_prompt = "You are starting on a research project that is aiming to apply artificial intelligence to drug discovery. In this meeting, you need to select a specific research direction for this project. The primary considerations are: (1) the project must have high clinical value, meaning the research contributes to helping patients, (2) the project must involve the development of an artificial intelligence model, and (3) the project must use Emerald Cloud Labs (ECL) to validate the artificial intelligence model’s output, which means that any required wet lab experiments must be within the capabilities of ECL’s scientific instrumentation. Please determine a research project that meets these criteria. Please be as specific as possible in terms of the precise goal of the project and the experiments that will be run."

project_selection_questions = (
    "What is the specific research direction that you are proposing?",
    "What is the clinical value of this research direction?",
    "How will you develop an artificial intelligence model for this project?",
    "How will you use Emerald Cloud Labs (ECL) to validate the artificial intelligence model’s output?",
)

In [None]:
# Project selection - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="team",
            team_lead=PRINCIPAL_INVESTIGATOR,
            team_members=DRUG_DISCOVERY_TEAM,
            agenda=project_selection_prompt,
            agenda_questions=project_selection_questions,
            save_dir=project_selection_dir,
            save_name=f"discussion_{iteration_num + 1}",
            num_rounds=num_rounds,
            contexts=DRUG_DISCOVERY_CONTEXTS,
            temperature=CREATIVE_TEMPERATURE,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Project selection - merge
project_selection_summaries = load_summaries(discussion_paths=list(project_selection_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(project_selection_summaries)}")

run_meeting(
    meeting_type="individual",
    team_member=PRINCIPAL_INVESTIGATOR,
    agenda=MERGE_PROMPT,
    save_dir=project_selection_dir,
    save_name="merged",
    summaries=project_selection_summaries,
    contexts=DRUG_DISCOVERY_CONTEXTS,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

## Target Selection

In [None]:
# Target selection - prompts
target_selection_dir = save_dir / "target_selection"

target_selection_prompt = "In the previous meeting, you settled on a general project direction (see summary). Now, you need to make that project more precisely defined. Please select one specific disease target and one specific drug modality for this target related to your prior discussion. Remember that you are constrained by the capabilities of Emerald Cloud Labs (ECL)."

target_selection_questions = (
    "What is the specific disease target that you are proposing?",
    "What is the specific drug modality that you are proposing?",
)

In [None]:
# Target selection - discussion
target_selection_prior_summaries = load_summaries(discussion_paths=[project_selection_dir / "merged.json"])
print(f"Number of summaries: {len(target_selection_prior_summaries)}")

with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="team",
            team_lead=PRINCIPAL_INVESTIGATOR,
            team_members=DRUG_DISCOVERY_TEAM,
            agenda=target_selection_prompt,
            agenda_questions=target_selection_questions,
            save_dir=target_selection_dir,
            save_name=f"discussion_{iteration_num + 1}",
            num_rounds=num_rounds,
            summaries=target_selection_prior_summaries,
            contexts=DRUG_DISCOVERY_CONTEXTS,
            temperature=CREATIVE_TEMPERATURE,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Target selection - merge
target_selection_summaries = load_summaries(discussion_paths=list(target_selection_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(target_selection_summaries)}")

run_meeting(
    meeting_type="individual",
    team_member=PRINCIPAL_INVESTIGATOR,
    agenda=MERGE_PROMPT,
    save_dir=target_selection_dir,
    save_name="merged",
    summaries=target_selection_summaries,
    contexts=DRUG_DISCOVERY_CONTEXTS,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

## Drug Discovery Approach

In [None]:
# Drug discovery approach - prompts
drug_discovery_approach_dir = save_dir / "drug_discovery_approach"

drug_discovery_approach_prompt = "In the previous meeting, you chose a specific disease target and drug modality (see summary). Now, you need to be more specific about the drug discovery process. Please design a specific drug discovery approach for this target and drug modality. Specify whether to design a new drug de novo or whether to modify and improve an existing but imperfect drug candidate. In either case, decide which exact properties you will optimize for in the drug that you design. Furthermore, please specify exactly what type of machine learning model you will use to accomplish this task. Decide on a specific dataset to train the model, and describe in detail how that model will be used to design new or improved drugs. If improving an existing drug, please specify which drug. Please note: an important constraint is that you only have three months and relatively limited experimental throughput. Remember that you are also constrained by the capabilities of Emerald Cloud Labs (ECL)."

drug_discovery_approach_questions = (
    "What is the specific drug discovery approach that you are proposing?",
    "What properties will you optimize for in the drug that you design?",
    "What type of machine learning model will you use for this task?",
    "What dataset will you use to train the model?",
    "How will the model be used to design new or improved drugs?",
    "If improving an existing drug, which drug will you improve?",
)

In [None]:
# Drug discovery approach - discussion
drug_discovery_approach_prior_summaries = load_summaries(
    discussion_paths=[project_selection_dir / "merged.json", target_selection_dir / "merged.json"])
print(f"Number of summaries: {len(drug_discovery_approach_prior_summaries)}")

with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="team",
            team_lead=PRINCIPAL_INVESTIGATOR,
            team_members=DRUG_DISCOVERY_TEAM,
            agenda=drug_discovery_approach_prompt,
            agenda_questions=drug_discovery_approach_questions,
            save_dir=drug_discovery_approach_dir,
            save_name=f"discussion_{iteration_num + 1}",
            num_rounds=num_rounds,
            summaries=drug_discovery_approach_prior_summaries,
            contexts=DRUG_DISCOVERY_CONTEXTS,
            temperature=CREATIVE_TEMPERATURE,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Drug discovery approach selection - merge
drug_discovery_approach_summaries = load_summaries(
    discussion_paths=list(drug_discovery_approach_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(drug_discovery_approach_summaries)}")

run_meeting(
    meeting_type="individual",
    team_member=PRINCIPAL_INVESTIGATOR,
    agenda=MERGE_PROMPT,
    save_dir=drug_discovery_approach_dir,
    save_name="merged",
    summaries=drug_discovery_approach_summaries,
    contexts=DRUG_DISCOVERY_CONTEXTS,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

## Model Architecture

In [None]:
# Model architecture - prompts
model_architecture_dir = save_dir / "model_architecture"

model_architecture_prompt = f"In the previous meeting, you and your team chose a specific drug discovery approach (see summary). Now, you need to design a specific machine learning model architecture that implements the the general architecture described in the previous meetings. Please specify the exact architecture of the machine learning model that you will use, including the required training data, the exact input/output data format, the neural architecture of the model, the loss function, and any other relevant details. Please be as specific as possible so that the {BIOLOGIST.title} can curate an appropriate dataset for this model and so that you can later implement the model precisely in code (but do not implement it now)."

In [None]:
# Model architecture - discussion
model_architecture_prior_summaries = load_summaries(discussion_paths=[
    project_selection_dir / "merged.json",
    target_selection_dir / "merged.json",
    drug_discovery_approach_dir / "merged.json",
])
print(f"Number of summaries: {len(model_architecture_prior_summaries)}")

with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="individual",
            team_member=COMPUTER_SCIENTIST,
            agenda=model_architecture_prompt,
            save_dir=model_architecture_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            num_rounds=num_rounds,
            summaries=model_architecture_prior_summaries,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Model architecture - merge
model_architecture_summaries = load_summaries(discussion_paths=list(model_architecture_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(model_architecture_summaries)}")

run_meeting(
    meeting_type="individual",
    team_member=COMPUTER_SCIENTIST,
    agenda=MERGE_PROMPT,
    save_dir=model_architecture_dir,
    save_name="merged",
    summaries=model_architecture_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

## Data Curation

In [None]:
# Data curation - prompts
data_curation_dir = save_dir / "data_curation"

data_curation_prompt = f"In the previous meetings, you and your team chose a specific drug discovery approach and designed a machine learning model architecture (see summary). Now, you need to curate the dataset that the {COMPUTER_SCIENTIST.title} will use to train their machine learning model. The dataset you curate must be specifically designed for this machine learning model, with relevant data in the appropriate form for the model and without any extraneous data. Please specify the exact data source or sources that you will use, the precise steps needed to curate this dataset, and what the dataset will look like once it is curated. Please explain how this dataset is appropriate for training the machine learning model that your team has chosen."

In [None]:
# Data curation - discussion
data_curation_prior_summaries = load_summaries(discussion_paths=[
    project_selection_dir / "merged.json",
    target_selection_dir / "merged.json",
    drug_discovery_approach_dir / "merged.json",
    model_architecture_dir / "merged.json",
])
print(f"Number of summaries: {len(data_curation_prior_summaries)}")

with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="individual",
            team_member=BIOLOGIST,
            agenda=data_curation_prompt,
            save_dir=data_curation_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            num_rounds=num_rounds,
            summaries=data_curation_prior_summaries,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Data curation - merge
data_curation_summaries = load_summaries(discussion_paths=list(data_curation_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(data_curation_summaries)}")

run_meeting(
    meeting_type="individual",
    team_member=BIOLOGIST,
    agenda=MERGE_PROMPT,
    save_dir=data_curation_dir,
    save_name="merged",
    summaries=data_curation_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

## Model Implementation

In [None]:
# Model implementation - prompts
model_implementation_dir = save_dir / "model_implementation"

model_implementation_prompt = f"In the previous meeting, you designed a machine learning model architecture and the {BIOLOGIST.title} curated a dataset for training the model. Now, please implement the model precisely in code. Your implementation must be complete and self-sufficient (besides relevant imports). You must implement the model architecture as well as any code required to train, evaluate, and run the model."

In [None]:
# Model implementation - discussion
model_implementation_prior_summaries = load_summaries(discussion_paths=[
    project_selection_dir / "merged.json",
    target_selection_dir / "merged.json",
    drug_discovery_approach_dir / "merged.json",
    model_architecture_dir / "merged.json",
    data_curation_dir / "merged.json",
])
print(f"Number of summaries: {len(model_implementation_prior_summaries)}")

with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="individual",
            team_member=COMPUTER_SCIENTIST,
            agenda=model_implementation_prompt,
            save_dir=model_implementation_dir,
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            num_rounds=num_rounds,
            summaries=model_implementation_prior_summaries,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Model implementation - merge
model_implementation_summaries = load_summaries(discussion_paths=list(model_implementation_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(model_implementation_summaries)}")

run_meeting(
    meeting_type="individual",
    team_member=COMPUTER_SCIENTIST,
    agenda=MERGE_PROMPT,
    save_dir=model_implementation_dir,
    save_name="merged",
    summaries=model_implementation_summaries,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)