In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import os

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')




In [2]:
# File to Load (Remember to Change These)
clinical_df = pd.read_csv("raw_data/clinicaltrial_data.csv")
mouse_df = pd.read_csv("raw_data/mouse_drug_data.csv")


In [3]:
# Read the Clinical Trial Data
clinical_df.head(5)

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,b128,0,45.0,0
1,f932,0,45.0,0
2,g107,0,45.0,0
3,a457,0,45.0,0
4,c819,0,45.0,0


In [4]:
# Read the Mouse Data
mouse_df.head(5)

Unnamed: 0,Mouse ID,Drug
0,f234,Stelasyn
1,x402,Stelasyn
2,a492,Stelasyn
3,w540,Stelasyn
4,v764,Stelasyn


In [12]:
# Combine the data into a single dataset
# Merge two dataframes using a left join

drugtrial_df = pd.merge(clinical_df, mouse_df, on="Mouse ID", how="left")

# Filter ONLY four treatments (Capomulin, Infubinol, Ketapril, and Placebo) 
drugtrial_df = drugtrial_df[(drugtrial_df["Drug"] == 'Capomulin') |
                      (drugtrial_df["Drug"] == 'Infubinol') |
                      (drugtrial_df["Drug"] == 'Ketapril') |
                      (drugtrial_df["Drug"] == 'Placebo')
                     ]

drugtrial_df.head()

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug
0,b128,0,45.0,0,Capomulin
1,f932,0,45.0,0,Ketapril
2,g107,0,45.0,0,Ketapril
3,a457,0,45.0,0,Ketapril
4,c819,0,45.0,0,Ketapril


In [25]:
# ****** Tumor Response to Treatment ******

# Select required columns for analysis
tumor_response_df = drugtrial_df[['Drug', 'Timepoint', 'Tumor Volume (mm3)']]

tumor_response_df.head(100)

Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3)
0,Capomulin,0,45.0
1,Ketapril,0,45.0
2,Ketapril,0,45.0
3,Ketapril,0,45.0
4,Ketapril,0,45.0
5,Ketapril,0,45.0
6,Ketapril,0,45.0
7,Ketapril,0,45.0
8,Ketapril,0,45.0
12,Ketapril,0,45.0


In [28]:
# Calculate mean of Tumor Volume per drug
tumor_response_df = pd.DataFrame(tumor_response_df.groupby(['Drug', 'Timepoint'])['Tumor Volume (mm3)'].mean())

# Calculate standard error for Tumor Volume
tumor_response_df["Volume SEM"] = tumor_response_df['Tumor Volume (mm3)'].sem()
tumor_response_df.reset_index(inplace = True)
tumor_response_df.head()

Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3),Volume SEM
0,Capomulin,0,45.0,1.502984
1,Capomulin,5,44.266086,1.502984
2,Capomulin,10,43.084291,1.502984
3,Capomulin,15,42.064317,1.502984
4,Capomulin,20,40.716325,1.502984
