In [24]:
 # Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import sem

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)
mouse_drug_data_to_load = "../../Homework/Instructions/Pymaceuticals/data/mouse_drug_data.csv"
clinical_trial_data_to_load = "../../Homework/Instructions/Pymaceuticals/data/clinicaltrial_data.csv"

# Create dataframes from the two CSV files
mouse_drug_data_df = pd.read_csv(mouse_drug_data_to_load)
clinical_trial_data_df = pd.read_csv(clinical_trial_data_to_load)

In [53]:
# Merge the two dataframes on the mouse ID
mouse_clinic_df = pd.merge(mouse_drug_data_df, clinical_trial_data_df, on="Mouse ID")
mouse_clinic_df.head(5)

Unnamed: 0,Mouse ID,Drug,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,f234,Stelasyn,0,45.0,0
1,f234,Stelasyn,5,47.313491,0
2,f234,Stelasyn,10,47.904324,0
3,f234,Stelasyn,15,48.735197,1
4,f234,Stelasyn,20,51.112713,2


In [56]:
# Tumor Volume Changes Over Time for Each Treatment

# Create a list of all the drugs
all_drugs = mouse_clinic_df['Drug'].unique()

# Create a list of colors for the various plots
colors = ["Red", "Blue", "Green", "Black", "gray", "Red", "Blue", "Green", "Black", "gray"]

# Create a function to calculate and generate scatter plots for the tumor volume change over time for each treatment
def treatments(list_drugs):
    # Set the color variable for the plots
    color = 0
    # Loop through the list of drugs
    for drug in list_drugs:
        # Create a dataframe for each drug
        drug_df = mouse_clinic_df.loc[mouse_clinic_df['Drug'] == (drug)]
        
        # Create a list of the various time points within the dataframe
        times = drug_df['Timepoint'].unique()
        
        # Calculate the mean tumor volume for all mice at each timepoint
        means = [drug_df.loc[drug_df['Timepoint'] == (time)]['Tumor Volume (mm3)'].mean() for time in times]
        
        # Generate a sample by storing all tumor volumes at each timepoint in a list
        sample = [drug_df.loc[drug_df['Timepoint'] == (time)]['Tumor Volume (mm3)'] for time in times]
        
        # Set the X Axis to be each timepoint
        x_axis = drug_df['Timepoint'].unique()
        
        # Calculate the standard error
        standard_errors = [sem(s) for s in sample]
        
        # Create a plot based on the information for each drug
        fig, ax = plt.subplots()
        ax.errorbar(x_axis, means, standard_errors, fmt="o", color=colors[color])
        ax.grid(color='gray', linestyle='-', linewidth=0.2)
        ax.set_xlabel("Timepoint")
        ax.set_ylabel("Tumor Volume (mm3)")
        ax.set_title(f"Effects of {drug} on Test Subjects")
        
        # Use the next color for the next graph
        color +=1
        

In [65]:
# Display the various plots for the tumor volume over time for each treatment
# treatments(all_drugs)

In [66]:
# Metastatic Site Changes Over Time for Each Treatment

# Create a function to calculate and generate scatter plots for the number of met. sites over time for each treatment
def metastatic_sites(list_drugs):
    # Set the color variable for the plots
    color = 0
    # Loop through the list of drugs
    for drug in list_drugs:
        # Create a dataframe for each drug
        drug_df = mouse_clinic_df.loc[mouse_clinic_df['Drug'] == (drug)]
        
        # Create a list of the various time points within the dataframe
        times = drug_df['Timepoint'].unique()
        
        # Calculate the mean number of met. sites for all mice at each timepoint
        means = [drug_df.loc[drug_df['Timepoint'] == (time)]['Metastatic Sites'].mean() for time in times]
        
        # Generate a sample by storing all met. sites at each timepoint in a list
        sample = [drug_df.loc[drug_df['Timepoint'] == (time)]['Metastatic Sites'] for time in times]
        
        # Set the X Axis to be each timepoint
        x_axis = drug_df['Timepoint'].unique()
        
        # Calculate the standard error
        standard_errors = [sem(s) for s in sample]
        
        # Create a plot based on the information for each drug
        fig, ax = plt.subplots()
        ax.errorbar(x_axis, means, standard_errors, fmt="o", color=colors[color])
        ax.grid(color='gray', linestyle='-', linewidth=0.2)
        ax.set_xlabel("Timepoint")
        ax.set_ylabel("Tumor Volume (mm3)")
        ax.set_title(f"Number of Metastatic Sites in Test Subjects While on {drug}")
        
        # Use the next color for the next graph
        color +=1
        

In [68]:
# Display the various plots for the met. site changes over time for each treatment
# metastatic_sites(all_drugs)

In [5]:
# You are tasked with:
    # Creating a scatter plot that shows how the tumor volume changes over time for each treatment.
    # Creating a scatter plot that shows how the number of metastatic (cancer spreading) sites changes over time for each treatment.
    # Creating a scatter plot that shows the number of mice still alive through the course of treatment (Survival Rate)
    # Creating a bar graph that compares the total % tumor volume change for each drug across the full 45 days.
    # Include 3 observations about the results of the study. Use the visualizations you generated from the study data as the basis for your observations.

# As final considerations:
    # You must use the Pandas Library and the Jupyter Notebook.
    # You must use the Matplotlib library.
    # You must include a written description of three observable trends based on the data.
    # You must use proper labeling of your plots, including aspects like: Plot Titles, Axes Labels, Legend Labels, X and Y Axis Limits, etc.
    # Your scatter plots must include error bars. This will allow the company to account for variability between mice.
        #You may want to look into pandas.DataFrame.sem for ideas on how to calculate this.
    
    # Remember when making your plots to consider aesthetics!
        # Your legends should not be overlaid on top of any data.
        # Your bar graph should indicate tumor growth as red and tumor reduction as green.
        # It should also include a label with the percentage change for each bar.
            #You may want to consult this tutorial for relevant code snippets.