## Dependencies and starter code

In [1]:
%matplotlib notebook

# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
from scipy.stats import sem

# Study data files
mouse_metadata = "data/Mouse_metadata.csv"
study_results = "data/Study_results.csv"

# Read the mouse data and the study results
mouse_metadata = pd.read_csv(mouse_metadata)
study_results = pd.read_csv(study_results)

# Combine the data into a single dataset
mouse_study_data = pd.merge(mouse_metadata, study_results, on="Mouse ID")

## Summary statistics

In [9]:
# Generate a summary statistics table of mean, median, variance, standard deviation, and SEM of the tumor volume for each regimen

# Initialize data vars
mean      = []
median    = []
variance  = []
stddev    = []
pym_sem   = []
count     = []

# Get a list of unique drugs
drug_regimes = mouse_study_data['Drug Regimen'].unique()

# loop through drugs and for each one grab their data and calc stats
i = 0
for drug in drug_regimes:
    # get drug data
    drug_data_dfi = mouse_study_data.loc[mouse_study_data["Drug Regimen"] == drug]
    
    # calc and store stats
    mean.append(drug_data_dfi["Tumor Volume (mm3)"].mean())
    median.append(drug_data_dfi["Tumor Volume (mm3)"].median())
    variance.append(np.var(drug_data_dfi["Tumor Volume (mm3)"]))
    stddev.append(np.std(drug_data_dfi["Tumor Volume (mm3)"]))
    pym_sem.append(sem(drug_data_dfi["Tumor Volume (mm3)"]))
    count.append(drug_data_dfi["Tumor Volume (mm3)"].count())
     
    # increment index
    i = i+1
    
# make dictionary for data frame
data = {
    "drug regimes": drug_regimes,
    "mean": mean,
    "median": median,
    "variance": variance,
    "stddev": stddev,
    "SEM": pym_sem,
    "count": count
}

# create dataframe
summary_stats = pd.DataFrame(data=data, index=drug_regimes)

# add some formatting
for col in summary_stats.columns[[1,2,3,4,5]]:
    summary_stats[col] = summary_stats[col].map("{:,.2f}".format)
    
summary_stats.head()

Unnamed: 0,drug regimes,mean,median,variance,stddev,SEM,count
Ramicane,Ramicane,40.22,40.67,23.38,4.84,0.32,228
Capomulin,Capomulin,40.68,41.56,24.84,4.98,0.33,230
Infubinol,Infubinol,52.88,51.82,42.89,6.55,0.49,178
Placebo,Placebo,54.03,52.29,60.83,7.8,0.58,181
Ceftamin,Ceftamin,52.59,51.78,39.07,6.25,0.47,178


## Plots

In [10]:
# set x axis and tick locations
x = drug_regimes
tick_locs = [xi for xi in x]
y = summary_stats["count"]

# matplotlib plot
plt.figure(figsize=(6,6))
plt.bar(x, y, color='r', alpha=0.5, align="center")
plt.xticks(tick_locs, x, rotation="vertical")

# Set a title and labels
plt.title("Number of Data Points for Each Treatment Regime")
plt.xlabel("Drug")
plt.ylabel("Count of Data Points")

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Count of Data Points')

In [15]:
# pandas plot
summary_stats.plot(kind="bar", figsize=(6,6))

# Set a title for the chart
plt.title("Number of Data Points for Each Treatment Regime")
plt.xlabel("Drug")
plt.ylabel("Count of Data Points")

plt.show()

<IPython.core.display.Javascript object>

## Pie Chart

In [19]:
# matplotlib pie chart

# Labels for the sections of our pie chart
labels = mouse_study_data["Sex"].unique()

# The values of each section of the pie chart
sizes = mouse_study_data["Sex"].value_counts()

# The colors of each section of the pie chart
colors = ["green", "orange"]

# Tells matplotlib to seperate the "Python" section from the others
explode = (0.1, 0)

In [21]:

# Create pie chart
plt.figure(figsize=(6,6))
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
        autopct="%1.1f%%", startangle=140)

plt.axis("equal")

<IPython.core.display.Javascript object>

(-1.1756979042636164,
 1.1282937402085897,
 -1.1918349098573184,
 1.1259794268614327)

In [40]:
# pandas pie chart
try: 
    mouse_study_data.plot.pie(y='Sex',figsize=(5,5))
except TypeError:
    print("why is there a TypeError...check later")

why is there a TypeError...check later


In [72]:
# drug options
drugs = ["Capomulin", "Ramicane", "Infubinol", "Ceftamin"]

# initialize variables to store data
final_vol = {
                "Mouse ID": [],
                "Drug":[],
                "Tumor Vol_0": [],
                "Tumor Vol_f": [],
                "Change in Tumor Vol":[],
                "dt": []
            }

# loop through drugs to get the last time index
for drug in drugs:
    # get data
    drugi_index = mouse_study_data[mouse_study_data['Drug Regimen'] == drug].index.tolist()
    drugi_data = mouse_study_data.iloc[drugi_index,:]
    
    # get a list of unique mice IDs for this drug
    mice_IDs =  drugi_data["Mouse ID"].unique()
    
    # loop through the mice IDs and extract the interesting data
    for Mouse_ID in mice_IDs:
        mousei_index = drugi_data[drugi_data['Mouse ID'] == Mouse_ID].index.tolist()
        mousei_index = mouse_study_data.iloc[mousei_index,:]
        
        # get initial and final time, calc dt, tf - t0
        maxt = mousei_index['Timepoint'].max()
        mint = mousei_index['Timepoint'].min()
        dt = maxt-mint
        
        # get the row of initial and final time
        max_t_row = mousei_index[mousei_index['Timepoint'] == maxt]
        min_t_row = mousei_index[mousei_index['Timepoint'] == mint]
        
        # get the initial and final tumor volume, calc dvol, volf - vol0
        volf = max_t_row['Tumor Volume (mm3)']
        vol0 = min_t_row['Tumor Volume (mm3)']
        dvol = volf - vol0
        
        # store values
        final_vol["Mouse ID"].append(Mouse_ID)
        final_vol["Drug"].append(Drug)
        final_vol["Tumor Vol_0"].append(vol0)
        final_vol["Tumor Vol_f"].append(volf)
        final_vol["Change in Tumor Vol"].append(dvol)
        final_vol["dt"].append(dt)
        
        
        print(drug)
        print(Mouse_ID)
        print(dvol)
        
    
    # record data
    

Capomulin
s185
10   NaN
19   NaN
Name: Tumor Volume (mm3), dtype: float64
Capomulin
x401
20   NaN
29   NaN
Name: Tumor Volume (mm3), dtype: float64
Capomulin
m601
30   NaN
39   NaN
Name: Tumor Volume (mm3), dtype: float64
Capomulin
f966
60   NaN
64   NaN
Name: Tumor Volume (mm3), dtype: float64
Capomulin
u364
95    NaN
104   NaN
Name: Tumor Volume (mm3), dtype: float64
Capomulin
y793
115   NaN
124   NaN
Name: Tumor Volume (mm3), dtype: float64
Capomulin
r554
125   NaN
134   NaN
Name: Tumor Volume (mm3), dtype: float64
Capomulin
m957
135   NaN
144   NaN
Name: Tumor Volume (mm3), dtype: float64
Capomulin
t565
155   NaN
164   NaN
Name: Tumor Volume (mm3), dtype: float64
Capomulin
i738
195   NaN
204   NaN
Name: Tumor Volume (mm3), dtype: float64
Capomulin
w914
215   NaN
224   NaN
Name: Tumor Volume (mm3), dtype: float64
Capomulin
g288
235   NaN
244   NaN
Name: Tumor Volume (mm3), dtype: float64
Capomulin
l897
273   NaN
282   NaN
Name: Tumor Volume (mm3), dtype: float64
Capomulin
b742
290  

In [68]:
max_vol_row = mousei_index[mousei_index['Timepoint'] == mousei_index['Timepoint'].max()]
min_vol_row = mousei_index[mousei_index['Timepoint'] == mousei_index['Timepoint'].min()]


Unnamed: 0,Mouse ID,Drug Regimen,Sex,Age_months,Weight (g),Timepoint,Tumor Volume (mm3),Metastatic Sites
1855,y865,Ceftamin,Male,23,26,45,64.729837,3


In [37]:
unique_time = mouse_study_data['Timepoint'].unique()
unique_time

array([ 0,  5, 10, 15, 20, 25, 30, 35, 40, 45])

In [26]:
mouse_study_data.head()

Unnamed: 0,Mouse ID,Drug Regimen,Sex,Age_months,Weight (g),Timepoint,Tumor Volume (mm3),Metastatic Sites
0,k403,Ramicane,Male,21,16,0,45.0,0
1,k403,Ramicane,Male,21,16,5,38.825898,0
2,k403,Ramicane,Male,21,16,10,35.014271,1
3,k403,Ramicane,Male,21,16,15,34.223992,1
4,k403,Ramicane,Male,21,16,20,32.997729,1


In [55]:
mouse_study_data.set_index("Mouse ID")






Unnamed: 0,Mouse ID,Drug Regimen,Sex,Age_months,Weight (g),Timepoint,Tumor Volume (mm3),Metastatic Sites
10,s185,Capomulin,Female,3,17,0,45.000000,0
11,s185,Capomulin,Female,3,17,5,43.878496,0
12,s185,Capomulin,Female,3,17,10,37.614948,0
13,s185,Capomulin,Female,3,17,15,38.177232,0
14,s185,Capomulin,Female,3,17,20,36.866876,0
15,s185,Capomulin,Female,3,17,25,33.949940,0
16,s185,Capomulin,Female,3,17,30,32.959671,1
17,s185,Capomulin,Female,3,17,35,28.328531,1
18,s185,Capomulin,Female,3,17,40,25.472143,1
19,s185,Capomulin,Female,3,17,45,23.343598,1


In [50]:
row_i

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]