In [1]:
import concurrent.futures
from pathlib import Path

from constants import CONSISTENT_TEMPERATURE, CREATIVE_TEMPERATURE
from prompts import DRUG_DISCOVERY_CONTEXTS, DRUG_DISCOVERY_TEAM, MERGE_PROMPT, PRINCIPAL_INVESTIGATOR
from run_individual_meeting import run_individual_meeting
from run_scientific_meeting import run_scientific_meeting
from utils import load_summaries

In [2]:
# Set up key parameters
num_iterations = 5
num_rounds = 3
save_dir = Path("drug_discovery")
model = "gpt-4o"

In [3]:
# Project selection - prompts
project_selection_dir = save_dir / "project_selection"

project_selection_prompt = "You are starting on a research project that is aiming to apply artificial intelligence to drug discovery. In this meeting, you need to select a specific research direction for this project. The primary considerations are: (1) the project must have high clinical value, meaning the research contributes to helping patients, (2) the project must involve the development of an artificial intelligence model, and (3) the project must use Emerald Cloud Labs (ECL) to validate the artificial intelligence model’s output, which means that any required wet lab experiments must be within the capabilities of ECL’s scientific instrumentation. Please determine a research project that meets these criteria. Please be as specific as possible in terms of the precise goal of the project and the experiments that will be run."

project_selection_questions = (
    "What is the specific research direction that you are proposing?",
    "What is the clinical value of this research direction?",
    "How will you develop an artificial intelligence model for this project?",
    "How will you use Emerald Cloud Labs (ECL) to validate the artificial intelligence model’s output?",
)

In [4]:
# Project selection - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_scientific_meeting,
            team_lead=PRINCIPAL_INVESTIGATOR,
            team_members=DRUG_DISCOVERY_TEAM,
            agenda=project_selection_prompt,
            agenda_questions=project_selection_questions,
            save_dir=project_selection_dir,
            save_name=f"discussion_{iteration_num + 1}",
            num_rounds=num_rounds,
            contexts=DRUG_DISCOVERY_CONTEXTS,
            temperature=CREATIVE_TEMPERATURE,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])




Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A



Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A[A
Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A

Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A




Team:   0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A





Team:   0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A






Team:   0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A







Team:   0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A








Team:   0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A




Team:  17%|█▋        | 1/6 [00:07<00:35,  7.05s/it][A[A[A[A[A








Team:  17%|█▋        | 1/6 [00:07<00:37,  7.41s/it][A[A[A[A[A[A[A[A[A







Team:  17%|█▋        | 1/6 [00:07<00:37,  7.55s/it][A[A[A[A[A[A[A[A






Team:  17%|█▋        | 1/6 [00:08<00:43,  8.65s/it][A[A[A[A[A[A[A





Team:  17%|█▋        | 1/6 

Input token count: 177,494
Output token count: 11,955
Max token length: 16,920
Cost: $1.07
Time: 2:32











Team: 100%|██████████| 6/6 [00:56<00:00,  9.49s/it][A[A[A[A[A[A[A[A[A


Rounds (+ Summary Round):  75%|███████▌  | 3/4 [02:33<00:52, 52.37s/it][A[A
Team:   0%|          | 0/6 [00:00<?, ?it/s][A




Team:  83%|████████▎ | 5/6 [00:45<00:09,  9.18s/it][A[A[A[A[A





Team:  83%|████████▎ | 5/6 [00:54<00:11, 11.17s/it][A[A[A[A[A[A






Team:  83%|████████▎ | 5/6 [00:53<00:09,  9.93s/it][A[A[A[A[A[A[A




Team: 100%|██████████| 6/6 [00:55<00:00,  9.22s/it][A[A[A[A[A



Rounds (+ Summary Round):  75%|███████▌  | 3/4 [02:43<00:55, 55.05s/it][A[A[A




Team:   0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A





Team: 100%|██████████| 6/6 [01:04<00:00, 10.70s/it][A[A[A[A[A[A
Rounds (+ Summary Round):  75%|███████▌  | 3/4 [02:45<00:56, 56.90s/it]





Team:   0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A






Team: 100%|██████████| 6/6 [01:04<00:00, 10.83s/it][A[A[A[A[A[A[A




Rounds (+ Summary Round):  75%|███████▌  | 3/

Input token count: 187,280
Output token count: 13,293
Max token length: 18,258
Cost: $1.14
Time: 2:49


Team:   0%|          | 0/6 [00:14<?, ?it/s]



Rounds (+ Summary Round): 100%|██████████| 4/4 [02:57<00:00, 44.50s/it][A[A[A


Input token count: 196,500
Output token count: 14,053
Max token length: 19,018
Cost: $1.19
Time: 2:58


Team:   0%|          | 0/6 [00:15<?, ?it/s]
Rounds (+ Summary Round): 100%|██████████| 4/4 [03:00<00:00, 45.14s/it]


Input token count: 190,408
Output token count: 13,831
Max token length: 18,796
Cost: $1.16
Time: 3:00


Team:   0%|          | 0/6 [00:15<?, ?it/s]




Rounds (+ Summary Round): 100%|██████████| 4/4 [03:03<00:00, 45.89s/it][A[A[A[A

Input token count: 192,240
Output token count: 13,352
Max token length: 18,317
Cost: $1.16
Time: 3:03





In [5]:
# Project selection - merge
project_selection_summaries = load_summaries(discussion_paths=list(project_selection_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(project_selection_summaries)}")

run_individual_meeting(
    team_member=PRINCIPAL_INVESTIGATOR,
    agenda=MERGE_PROMPT,
    save_dir=project_selection_dir,
    save_name="merged",
    summaries=project_selection_summaries,
    contexts=DRUG_DISCOVERY_CONTEXTS,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

Number of summaries: 5


Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Agents:   0%|          | 0/1 [00:00<?, ?it/s][A
Agents: 100%|██████████| 1/1 [00:15<00:00, 15.76s/it][A
Critiques (+ Final Round): 100%|██████████| 1/1 [00:15<00:00, 15.76s/it]

Input token count: 9,135
Output token count: 1,065
Max token length: 10,200
Cost: $0.06
Time: 0:15





'### Answers\n\n**1. What is the specific research direction that you are proposing?**\n\n**Answer:** Developing a predictive AI model for drug-target interactions (DTIs) focusing on high-priority targets in neurodegenerative diseases (e.g., amyloid-beta, tau, alpha-synuclein), cancers (e.g., EGFR, BRAF, immune checkpoints), and infectious diseases (e.g., antibiotic-resistant bacteria, viral proteins). This approach will balance novel drug discovery with drug repurposing.\n\n**Justification:** This direction leverages existing data and targets that have high clinical relevance and significant unmet needs. It ensures the feasibility of the project while addressing important medical challenges. The inclusion of multiple disease areas allows for a broader impact and the potential for cross-disease insights.\n\n**Components:** This answer integrates the focus on neurodegenerative diseases from the third summary, the inclusion of cancers and infectious diseases from the fourth summary, and 

In [6]:
# Target selection - prompts
target_selection_dir = save_dir / "target_selection"

target_selection_prompt = "In the previous meeting, you settled on a general project direction (see summary). Now, you need to make that project more precisely defined. Please select one specific disease target and one specific drug modality for this target related to your prior discussion. Remember that you are constrained by the capabilities of Emerald Cloud Labs (ECL)."

target_selection_questions = (
    "What is the specific disease target that you are proposing?",
    "What is the specific drug modality that you are proposing?",
)

In [7]:
# Target selection - discussion
summaries = load_summaries(discussion_paths=[project_selection_dir / "merged.json"])
print(f"Number of summaries: {len(summaries)}")

with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_scientific_meeting,
            team_lead=PRINCIPAL_INVESTIGATOR,
            team_members=DRUG_DISCOVERY_TEAM,
            agenda=target_selection_prompt,
            agenda_questions=target_selection_questions,
            save_dir=target_selection_dir,
            save_name=f"discussion_{iteration_num + 1}",
            num_rounds=num_rounds,
            summaries=summaries,
            contexts=DRUG_DISCOVERY_CONTEXTS,
            temperature=CREATIVE_TEMPERATURE,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])

Number of summaries: 1


Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s]

Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A
Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A


Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A



Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A[A




Team:   0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A





Team:   0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A






Team:   0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A







Team:   0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A








Team:   0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A





Team:  17%|█▋        | 1/6 [00:05<00:26,  5.34s/it][A[A[A[A[A[A







Team:  17%|█▋        | 1/6 [00:06<00:32,  6.53s/it][A[A[A[A[A[A[A[A






Team:  17%|█▋        | 1/6 [00:06<00:33,  6.77s/it][A[A[A[A[A[A[A




Team:  17%|█▋        | 1/6 [00:07<00:35,  7.06s/

Input token count: 179,741
Output token count: 10,182
Max token length: 15,989
Cost: $1.05
Time: 2:19







Team:  67%|██████▋   | 4/6 [00:34<00:16,  8.24s/it][A[A[A[A[A






Team: 100%|██████████| 6/6 [00:50<00:00,  8.36s/it][A[A[A[A[A[A[A

Rounds (+ Summary Round):  75%|███████▌  | 3/4 [02:25<00:48, 48.79s/it][A


Team:   0%|          | 0/6 [00:00<?, ?it/s][A[A[A





Team:  67%|██████▋   | 4/6 [00:46<00:23, 11.67s/it][A[A[A[A[A[A




Team:   0%|          | 0/6 [00:13<?, ?it/s].07s/it][A[A[A[A[A




Rounds (+ Summary Round): 100%|██████████| 4/4 [02:29<00:00, 37.43s/it][A[A[A[A


Input token count: 186,849
Output token count: 11,301
Max token length: 17,108
Cost: $1.10
Time: 2:29








Team:   0%|          | 0/6 [00:12<?, ?it/s].59s/it][A[A[A[A[A[A

Rounds (+ Summary Round): 100%|██████████| 4/4 [02:38<00:00, 39.61s/it][A


Input token count: 200,756
Output token count: 12,379
Max token length: 18,186
Cost: $1.19
Time: 2:38







Team: 100%|██████████| 6/6 [00:53<00:00,  8.86s/it][A[A[A[A[A
Rounds (+ Summary Round):  75%|███████▌  | 3/4 [02:40<00:53, 53.57s/it]
Team:   0%|          | 0/6 [00:00<?, ?it/s][A





Team: 100%|██████████| 6/6 [01:05<00:00, 10.92s/it][A[A[A[A[A[A


Rounds (+ Summary Round):  75%|███████▌  | 3/4 [02:45<00:57, 57.13s/it][A[A


Team:   0%|          | 0/6 [00:12<?, ?it/s][A[A[A
Rounds (+ Summary Round): 100%|██████████| 4/4 [02:52<00:00, 43.13s/it]


Input token count: 208,216
Output token count: 12,978
Max token length: 18,785
Cost: $1.24
Time: 2:52


Team:   0%|          | 0/6 [00:13<?, ?it/s]


Rounds (+ Summary Round): 100%|██████████| 4/4 [02:58<00:00, 44.63s/it][A[A

Input token count: 209,911
Output token count: 13,942
Max token length: 19,749
Cost: $1.26
Time: 2:58





In [8]:
# Target selection - merge
target_selection_summaries = load_summaries(discussion_paths=list(target_selection_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(target_selection_summaries)}")

run_individual_meeting(
    team_member=PRINCIPAL_INVESTIGATOR,
    agenda=MERGE_PROMPT,
    save_dir=target_selection_dir,
    save_name="merged",
    summaries=target_selection_summaries,
    contexts=DRUG_DISCOVERY_CONTEXTS,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

Number of summaries: 5


Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Agents:   0%|          | 0/1 [00:00<?, ?it/s][A
Agents: 100%|██████████| 1/1 [00:11<00:00, 11.67s/it][A
Critiques (+ Final Round): 100%|██████████| 1/1 [00:11<00:00, 11.68s/it]

Input token count: 8,500
Output token count: 1,009
Max token length: 9,509
Cost: $0.06
Time: 0:11





"### Agenda\n\nOur objective today is to refine our research focus by selecting a specific disease target and a corresponding drug modality, aligned with the capabilities of Emerald Cloud Labs (ECL). This will help us develop a precise and actionable research plan for our AI-driven drug discovery project.\n\n### Answers\n\n**1. What is the specific disease target that you are proposing?**\n\n**Answer:** Alzheimer's Disease, with dual targets of amyloid-beta and tau proteins.\n\n**Justification:** Alzheimer's Disease has a significant unmet clinical need, and targeting both amyloid-beta and tau proteins addresses multiple pathological processes, offering a comprehensive approach. The availability of robust data and assays for both targets supports their feasibility. This dual-target approach leverages the extensive datasets available and aligns with the clinical urgency highlighted by the team.\n\n**2. What is the specific drug modality that you are proposing?**\n\n**Answer:** Peptide-b

In [9]:
# Drug discovery approach - prompts
drug_discovery_approach_dir = save_dir / "drug_discovery_approach"

drug_discovery_approach_prompt = "In the previous meeting, you chose a specific disease target and drug modality (see summary). Now, you need to be more specific about the drug discovery process. Please design a specific drug discovery approach for this target and drug modality. Specify whether to design a new drug de novo or whether to modify and improve an existing but imperfect drug candidate. In either case, decide which exact properties you will optimize for in the drug that you design. Furthermore, please specify exactly what type of machine learning model you will use to accomplish this task. Decide on a specific dataset to train the model, and describe in detail how that model will be used to design new or improved drugs. If improving an existing drug, please specify which drug. Please note: an important constraint is that you only have three months and relatively limited experimental throughput. Remember that you are also constrained by the capabilities of Emerald Cloud Labs (ECL)."

drug_discovery_approach_questions = (
    "What is the specific drug discovery approach that you are proposing?",
    "What properties will you optimize for in the drug that you design?",
    "What type of machine learning model will you use for this task?",
    "What dataset will you use to train the model?",
    "How will the model be used to design new or improved drugs?",
    "If improving an existing drug, which drug will you improve?",
)

In [10]:
# Drug discovery approach - discussion
summaries = load_summaries(discussion_paths=[project_selection_dir / "merged.json", target_selection_dir / "merged.json"])

with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_scientific_meeting,
            team_lead=PRINCIPAL_INVESTIGATOR,
            team_members=DRUG_DISCOVERY_TEAM,
            agenda=drug_discovery_approach_prompt,
            agenda_questions=drug_discovery_approach_questions,
            save_dir=drug_discovery_approach_dir,
            save_name=f"discussion_{iteration_num + 1}",
            num_rounds=num_rounds,
            summaries=summaries,
            contexts=DRUG_DISCOVERY_CONTEXTS,
            temperature=CREATIVE_TEMPERATURE,
            model=model,
        ) for iteration_num in range(num_iterations)
    ])


Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A


Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A

Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A



Rounds (+ Summary Round):   0%|          | 0/4 [00:00<?, ?it/s][A[A[A[A




Team:   0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A





Team:   0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A






Team:   0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A







Team:   0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A








Team:   0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A








Team:  17%|█▋        | 1/6 [00:05<00:29,  5.87s/it][A[A[A[A[A[A[A[A[A





Team:  17%|█▋        | 1/6 [00:06<00:33,  6.61s/it][A[A[A[A[A[A







Team:  17%|█▋        | 1/6 [00:07<00:35,  7.09s/it][A[A[A[A[A[A[A[A




Team:  17%|█▋        | 1/6 [00:07<00:35,  7.13s/it][A[A[A[A[A








Team:  33%|███▎      | 2/6 [

Input token count: 216,634
Output token count: 11,935
Max token length: 19,108
Cost: $1.26
Time: 2:29









Team:   0%|          | 0/6 [00:18<?, ?it/s].47s/it][A[A[A[A[A[A[A
Rounds (+ Summary Round): 100%|██████████| 4/4 [02:37<00:00, 39.45s/it]


Input token count: 208,366
Output token count: 11,208
Max token length: 18,381
Cost: $1.21
Time: 2:37










Team:   0%|          | 0/6 [00:18<?, ?it/s].40s/it][A[A[A[A[A[A[A[A

Rounds (+ Summary Round): 100%|██████████| 4/4 [02:47<00:00, 41.88s/it][A







Team:  67%|██████▋   | 4/6 [00:44<00:22, 11.12s/it][A[A[A[A[A[A[A

Input token count: 228,345
Output token count: 13,298
Max token length: 20,471
Cost: $1.34
Time: 2:47










Team: 100%|██████████| 6/6 [01:07<00:00, 11.33s/it][A[A[A[A[A[A[A[A


Team:   0%|          | 0/6 [00:00<?, ?it/s]3/4 [02:55<01:01, 61.21s/it][A[A






Team:  83%|████████▎ | 5/6 [00:56<00:11, 11.57s/it][A[A[A[A[A[A[A






Team: 100%|██████████| 6/6 [01:09<00:00, 11.66s/it][A[A[A[A[A[A[A



Rounds (+ Summary Round):  75%|███████▌  | 3/4 [03:13<01:06, 66.06s/it][A[A[A
Team:   0%|          | 0/6 [00:19<?, ?it/s][A


Rounds (+ Summary Round): 100%|██████████| 4/4 [03:14<00:00, 48.65s/it][A[A


Input token count: 237,684
Output token count: 15,142
Max token length: 22,315
Cost: $1.42
Time: 3:14


Team:   0%|          | 0/6 [00:18<?, ?it/s]



Rounds (+ Summary Round): 100%|██████████| 4/4 [03:31<00:00, 52.91s/it][A[A[A

Input token count: 259,554
Output token count: 16,902
Max token length: 24,075
Cost: $1.55
Time: 3:31





In [11]:
# Drug discovery approach selection - merge
drug_discovery_approach_summaries = load_summaries(discussion_paths=list(drug_discovery_approach_dir.glob("discussion_*.json")))
print(f"Number of summaries: {len(drug_discovery_approach_summaries)}")

run_individual_meeting(
    team_member=PRINCIPAL_INVESTIGATOR,
    agenda=MERGE_PROMPT,
    save_dir=drug_discovery_approach_dir,
    save_name="merged",
    summaries=drug_discovery_approach_summaries,
    contexts=DRUG_DISCOVERY_CONTEXTS,
    temperature=CONSISTENT_TEMPERATURE,
    model=model,
)

Number of summaries: 5


Critiques (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
Agents:   0%|          | 0/1 [00:00<?, ?it/s][A
Agents: 100%|██████████| 1/1 [00:15<00:00, 15.71s/it][A
Critiques (+ Final Round): 100%|██████████| 1/1 [00:15<00:00, 15.72s/it]

Input token count: 9,792
Output token count: 1,598
Max token length: 11,390
Cost: $0.07
Time: 0:15





'### Agenda\n\nOur primary goal is to design a specific drug discovery approach for targeting Alzheimer\'s Disease using peptide-based therapeutics. We need to decide whether to design a new drug de novo or modify an existing but imperfect drug candidate. We will specify the exact properties to optimize for, the type of machine learning model to use, the dataset for training the model, and how the model will be employed to design or improve the drug. We must also consider our constraints, including a three-month timeline and limited experimental throughput, leveraging the capabilities of Emerald Cloud Labs (ECL).\n\n### Team Member Input\n\n**Clinician:**\n- **Approach:** Modify an existing peptide to save time and leverage existing data.\n- **Properties:** Prioritize specificity, binding affinity, stability, BBB penetration, reduced immunogenicity, and focus on clinical endpoints like cognitive function improvement, amyloid plaques and tau tangles reduction, quality of life, and safet