## Observations and Insights

## Dependencies and starter code

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st

# Study data files
mouse_metadata = "data/Mouse_metadata.csv"
study_results = "data/Study_results.csv"

# Read the mouse data and the study results
mouse_metadata = pd.read_csv(mouse_metadata)
study_results = pd.read_csv(study_results)

# Combine the data into a single dataset
single_results = pd.merge(mouse_metadata, study_results, on='Mouse ID')
single_results

## Summary statistics

In [None]:
# Generate a summary statistics table of mean, median, variance, standard deviation, and SEM of the tumor volume for each regimen

summary_stats = single_results.groupby('Drug Regimen')['Tumor Volume (mm3)'].describe()
summary_stats

## Bar plots

In [None]:
# Generate a bar plot showing number of data points for each treatment regimen using pandas

summary_stats['count'].plot.bar()

In [None]:
# Generate a bar plot showing number of data points for each treatment regimen using pyplot
plt.bar(300, summary_stats['count'])

## Pie plots

In [None]:
# Generate a pie plot showing the distribution of female versus male mice using pandas

sexes = single_results['Sex'].value_counts()
sexes.plot.pie()

In [None]:
# Generate a pie plot showing the distribution of female versus male mice using pyplot

sexes = single_results['Sex'].value_counts()
fig1, ax1 = plt.subplots()
ax1.pie(sexes)

## Quartiles, outliers and boxplots

In [None]:
# Calculate the final tumor volume of each mouse across four of the most promising treatment regimens. Calculate the IQR and quantitatively determine if there are any potential outliers. 
capomulin_df = single_results.loc[single_results["Drug Regimen"] == "Capomulin",:]
ramicane_df = single_results.loc[single_results["Drug Regimen"] == "Ramicane", :]
infubinol_df = single_results.loc[single_results["Drug Regimen"] == "Infubinol", :]
ceftamin_df = single_results.loc[single_results["Drug Regimen"] == "Ceftamin", :]

In [None]:
# Capomulin
capomulin_last = capomulin_df.groupby('Mouse ID').max()['Timepoint']
capomulin_vol = pd.DataFrame(capomulin_last)
capomulin_merge = pd.merge(capomulin_vol, single_results, on=("Mouse ID","Timepoint"),how="left")

capomulin_tumors = capomulin_merge["Tumor Volume (mm3)"]

quartiles = capomulin_tumors.quantile([.25,.5,.75])
lowerq = quartiles[0.25]
upperq = quartiles[0.75]
iqr = upperq-lowerq
lower_bound = lowerq - (1.5*iqr)
upper_bound = upperq + (1.5*iqr)

print(lowerq)
print(upperq)
print(iqr)
print(lower_bound)
print(upper_bound)

In [None]:
# Ramicane

ramicane_last = ramicane_df.groupby('Mouse ID').max()['Timepoint']
ramicane_vol = pd.DataFrame(ramicane_last)
ramicane_merge = pd.merge(ramicanen_vol, single_results, on=("Mouse ID","Timepoint"),how="left")

ramicane_tumors = ramicane_merge["Tumor Volume (mm3)"]

quartiles = ramicane_tumors.quantile([.25,.5,.75])
lowerq = quartiles[0.25]
upperq = quartiles[0.75]
iqr = upperq-lowerq
lower_bound = lowerq - (1.5*iqr)
upper_bound = upperq + (1.5*iqr)

print(lowerq)
print(upperq)
print(iqr)
print(lower_bound)
print(upper_bound)

In [None]:
# Infubinol

infubinol_last = infubinol_df.groupby('Mouse ID').max()['Timepoint']
infubinol_vol = pd.DataFrame(infubinol_last)
infubinol_merge = pd.merge(infubinoln_vol, single_results, on=("Mouse ID","Timepoint"),how="left")

infubinol_tumors = infubinol_merge["Tumor Volume (mm3)"]

quartiles = infubinol_tumors.quantile([.25,.5,.75])
lowerq = quartiles[0.25]
upperq = quartiles[0.75]
iqr = upperq-lowerq
lower_bound = lowerq - (1.5*iqr)
upper_bound = upperq + (1.5*iqr)

print(lowerq)
print(upperq)
print(iqr)
print(lower_bound)
print(upper_bound)

In [None]:
# Ceftamin

ceftamin_last = ceftamin_df.groupby('Mouse ID').max()['Timepoint']
ceftamin_vol = pd.DataFrame(ceftamin_last)
ceftamin_merge = pd.merge(ceftaminn_vol, single_results, on=("Mouse ID","Timepoint"),how="left")

ceftamin_tumors = ceftamin_merge["Tumor Volume (mm3)"]

quartiles = ceftamin_tumors.quantile([.25,.5,.75])
lowerq = quartiles[0.25]
upperq = quartiles[0.75]
iqr = upperq-lowerq
lower_bound = lowerq - (1.5*iqr)
upper_bound = upperq + (1.5*iqr)

print(lowerq)
print(upperq)
print(iqr)
print(lower_bound)
print(upper_bound)