In [None]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)
mouse_drug_data_to_load = "data/mouse_drug_data.csv"
clinical_trial_data_to_load = "data/clinicaltrial_data.csv"

# Read the Mouse and Drug Data and the Clinical Trial Data
mouse = pd.read_csv(mouse_drug_data_to_load)
clinical = pd.read_csv(clinical_trial_data_to_load)

# Combine the data into a single dataset
combined_df = pd.merge(clinical, mouse, on = "Mouse ID", how = "left")

# Display the data table for preview
combined_df.head(5)

In [None]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 
groupby = combined_df.groupby(["Drug", "Timepoint"])
Avg_Tumor_Volume = groupby["Tumor Volume (mm3)"].mean()
# Convert to DataFrame
Avg_Tumor_Volume = pd.DataFrame(Avg_Tumor_Volume)
Avg_Tumor_Volume = Avg_Tumor_Volume.reset_index()
# Preview DataFrame
Avg_Tumor_Volume.head()

In [None]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
SE = groupby["Tumor Volume (mm3)"].sem()
# Convert to DataFrame
SE = pd.DataFrame(SE)
SE = SE.reset_index()
# Preview DataFrame
SE.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames
Mungled_Data = Avg_Tumor_Volume.pivot(index = 'Timepoint', columns ='Drug', values = 'Tumor Volume (mm3)')  
# Preview that Reformatting worked
Mungled_Data

In [None]:
# Generate the Plot (with Error Bars)
Capomulin_error = SE.loc[SE["Drug"] == "Capomulin", "Tumor Volume (mm3)"]
Infubinol_error = SE.loc[SE["Drug"] == "Infubinol", "Tumor Volume (mm3)"]
Ketapril_error = SE.loc[SE["Drug"] == "Ketapril", "Tumor Volume (mm3)"]
Placebo_error = SE.loc[SE["Drug"] == "Placebo", "Tumor Volume (mm3)"]
Time = [0, 5, 10, 15, 20, 25, 30, 35, 40, 45]
plt.errorbar(Time, Mungled_Data["Capomulin"] , yerr= Capomulin_error, label= "Capomulin", marker= "o", color="red", linestyle='--')
plt.errorbar(Time, Mungled_Data["Infubinol"] , yerr= Infubinol_error, label= "Infubinol", marker= "^", color="blue", linestyle='--')
plt.errorbar(Time, Mungled_Data["Ketapril"] , yerr= Ketapril_error, label= "Ketapril", marker= "D", color="black", linestyle='--')
plt.errorbar(Time, Mungled_Data["Placebo"] , yerr= Placebo_error , label= "Placebo", marker= "s", color="green", linestyle='--')
plt.legend()
plt.title("Tumor Response to Treatment ")
plt.xlabel("Time (Days)")
plt.ylabel("Tumor Volume (mm3)")
plt.grid()
# Save the Figure
plt.savefig("Tumor_Response.png")

In [None]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 
groupby1 = combined_df.groupby(["Drug", "Timepoint"])
Avg_Met_Set = groupby1["Metastatic Sites"].mean()
# Convert to DataFrame
Avg_Met_Set = pd.DataFrame(Avg_Met_Set)
# Preview DataFrame
Avg_Met_Set.head()

In [None]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 
SE_Met_Set = groupby1["Metastatic Sites"].sem()
# Convert to DataFrame
SE_Met_Set = pd.DataFrame(SE_Met_Set)
# Preview DataFrame
SE_Met_Set.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames
Avg_Met_Set_R = Avg_Met_Set.reset_index()
SE_Met_Set_R = SE_Met_Set.reset_index()

Mungled_Data_Metsite = Avg_Met_Set_R.pivot(index = 'Timepoint', columns ='Drug', values = 'Metastatic Sites')
# Preview that Reformatting worked
Mungled_Data_Metsite.head()

In [None]:
# Generate the Plot (with Error Bars)
Capomulin_error = SE_Met_Set_R.loc[SE_Met_Set_R["Drug"] == "Capomulin", "Metastatic Sites"]
Infubinol_error = SE_Met_Set_R.loc[SE_Met_Set_R["Drug"] == "Infubinol", "Metastatic Sites"]
Ketapril_error = SE_Met_Set_R.loc[SE_Met_Set_R["Drug"] == "Ketapril", "Metastatic Sites"]
Placebo_error = SE_Met_Set_R.loc[SE_Met_Set_R["Drug"] == "Placebo", "Metastatic Sites"]
Time = [0, 5, 10, 15, 20, 25, 30, 35, 40, 45]
plt.errorbar(Time, Mungled_Data_Metsite["Capomulin"] , yerr= Capomulin_error, label= "Capomulin", marker= "o", color="red", linestyle=':')
plt.errorbar(Time, Mungled_Data_Metsite["Infubinol"] , yerr= Infubinol_error, label= "Infubinol", marker= "^", color="blue", linestyle=':')
plt.errorbar(Time, Mungled_Data_Metsite["Ketapril"] , yerr= Ketapril_error, label= "Ketapril", marker= "s", color="green", linestyle=':')
plt.errorbar(Time, Mungled_Data_Metsite["Placebo"] , yerr= Placebo_error, label= "Placebo", marker= "d", color="black", linestyle=':')
plt.legend()
plt.title("Metastatic spread During Treatment ")
plt.xlabel("Treatment Duration (Days)")
plt.ylabel("Met.Sites")
plt.grid()
# Save the Figure
plt.savefig("Metsite_Response.png")
# Show the Figure

In [None]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)
Mouse_Count = groupby["Mouse ID"].count()
# Convert to DataFrame
Mouse_Count = pd.DataFrame(Mouse_Count).reset_index()
Mouse_Count = Mouse_Count.rename(columns = {"Mouse ID":"Mouse Count"})
# Preview DataFrame
Mouse_Count.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames
Mungled_Mouse_data = Mouse_Count.pivot(index = 'Timepoint', columns ='Drug', values = 'Mouse Count')
# Preview the Data Frame
Mungled_Mouse_data.head()

In [None]:
# Generate the Plot (Accounting for percentages)
Time = [0, 5, 10, 15, 20, 25, 30, 35, 40, 45]
plt.plot(Time, (Mungled_Mouse_data["Capomulin"]/25)*100 , label= "Capomulin", marker= "o", color="red", linestyle=':')
plt.plot(Time, (Mungled_Mouse_data["Infubinol"]/25)*100 , label= "Infubinol", marker= "^", color="blue", linestyle=':')
plt.plot(Time, (Mungled_Mouse_data["Ketapril"]/25)*100 , label= "Ketapril", marker= "s", color="green", linestyle=':')
plt.plot(Time, (Mungled_Mouse_data["Placebo"]/25)*100 , label= "Placebo", marker= "d", color="black", linestyle=':')
plt.legend()
plt.title("Survival During Treatment")
plt.xlabel("Time (Days)")
plt.ylabel("Survival Rate (%) ")
plt.grid()
# Save the Figure
plt.savefig("Survival_Rate.png")
# Show the Figure
plt.show()

In [None]:
# Calculate the percent changes for each drug
Mungled_Data
Intial_tumor_volume = 45
Percent_Changes = ((Mungled_Data.loc[45, :] - Intial_tumor_volume)/Intial_tumor_volume)*100
# Display the data to confirm
Percent_Changes

In [None]:
# Store all Relevant Percent Changes into a Tuple
Tuple_Percent_Changes = tuple(zip(Percent_Changes.index, Percent_Changes))
Tuple_Percent_Changes_list = list(Tuple_Percent_Changes)
Tuple_Percent_Changes_list
# Splice the data between passing and failing drugs
passing = []
failing = []
index_passing = []
index_failing = []
for i,elements in Tuple_Percent_Changes_list :
    if elements > 0 :
        passing_drug = elements
        passing.append(elements)
        index_passing.append(i)
    else :
        failing_drugs = elements
        failing.append(elements)
        index_failing.append(i)
passing_drugs = list(zip(index_passing, passing))
failing_drugs = list(zip(index_failing, failing))

# Orient widths. Add labels, tick marks, etc. 
fig, ax = plt.subplots()
y_value1 = [Percent_Changes["Infubinol"], Percent_Changes["Ketapril"], Percent_Changes["Placebo"]]
x_axis1 = [0]
x_axis2 = [1, 2, 3]


bars1 = ax.bar(x_axis1, Percent_Changes["Capomulin"], color='g', alpha=0.8, align="edge", width = -1)
bars2 = ax.bar(x_axis2, y_value1 , color='r', alpha=0.8, align="edge", width = -1)
x_labels = ["Capomulin", "Infubinol", "Ketapril", "Placebo"]

plt.setp(ax, xticks=[0, 1, 2, 3], xticklabels=["Capomulin", "Infubinol", "Ketapril", "Placebo"],
        yticks=[-20, 0, 20, 40, 60])

ax.set_ylabel('% Tumor Volume Change')
ax.set_title('Tumor Change Over 45 Day Treatment')
ax.grid()

# Use functions to label the percentages of changes
def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        ax.text(rect.get_x() + rect.get_width()/2, .1*height, "%d" %int(height)+ "%",
                ha='center', va='top', color="white")
# Call functions to implement the function calls
autolabel(bars1)
autolabel(bars2)
fig.tight_layout()
# Save the Figure
plt.savefig("Percentage_Tumor_Volume_Change.png")
# Show the Figure
fig.show()