In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st

mouse_metadata_path = "data/Mouse_metadata.csv"
study_results_path = "data/Study_results.csv"

mouse_metadata = pd.read_csv(mouse_metadata_path)
study_results = pd.read_csv(study_results_path)

mouse_df = pd.merge(mouse_metadata, study_results, how='outer', on='Mouse ID')

mouse_df.head()

In [None]:
mouse_df['Mouse ID'].value_counts()

mouse_df.loc[mouse_df['Mouse ID'] == 'g989', :]

In [None]:
mouse_cleaned = mouse_df.loc[mouse_df['Mouse ID'] != 'g989', :]

In [None]:
drug_group = mouse_cleaned.groupby('Drug Regimen')
tumor_mean = drug_group['Tumor Volume (mm3)'].mean()
tumor_median = drug_group['Tumor Volume (mm3)'].median()
tumor_variance = drug_group['Tumor Volume (mm3)'].var()
tumor_std = drug_group['Tumor Volume (mm3)'].std()

tumor_sample = mouse_cleaned.sample(25)
tumor_sample_group = tumor_sample.groupby('Drug Regimen')
tumor_sem = st.sem(tumor_sample['Tumor Volume (mm3)'])

drug_summary_table = pd.DataFrame(
{'Mean': tumor_mean, 
 'Median': tumor_median, 
 'Variance': tumor_variance, 
 'Standard Deviation': tumor_std, 
 'Standard Error': tumor_sem})

drug_summary_table.index.name = None
drug_summary_table.sort_values(by=['Variance'])

In [None]:
drug_counts = drug_group['Mouse ID'].count()
drug_counts

In [None]:
drug_counts.plot(kind="bar")
plt.xlabel('Treatment')
plt.ylabel('# of Subjects')
plt.title('Total Subjects per Treatment')
plt.show()

In [None]:
x_index = drug_counts.index
x_axis = [x for x in x_index]

y_axis = [x for x in drug_counts]

In [None]:
plt.bar(x_axis, y_axis)
plt.xlabel('Treatment')
plt.ylabel('# of Subjects')
plt.title('Total Subjects per Treatment')
plt.xticks(rotation=90)
plt.show()