In [1]:
import warnings

# The workhorses
import pandas as pd
import numpy as np

# Plotting modules
import matplotlib.pyplot as plt
import matplotlib.patches

# This is to enable inline displays for the purposes of the tutorial
%matplotlib inline

# This enables SVG graphics inline
%config InlineBackend.figure_formats = {'png', 'retina'}

# Seaborn makes plots look nice
import seaborn as sns
sns.set_context('notebook', font_scale=1.5, rc={'lines.linewidth': 2.5})
sns.set_style('darkgrid', {'axes.facecolor': '(0.875, 0.875, 0.9)'})

# Suppress future warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [5]:
# Load data
df = pd.read_csv('data/mean_rest_bouts.csv', comment='#')

# Pull out wild type and mutant and take NaNs to be zero
df = df[df['genotype'].isin(['wt', 'mut'])].fillna(0)

In [7]:
df.head()

Unnamed: 0,fish,genotype,mean_rest_bout_length
2,FISH11,mut,2.255556
3,FISH12,mut,1.529412
4,FISH13,mut,2.373626
5,FISH14,wt,2.352941
7,FISH18,wt,2.111111


## Problem 5.1a Frequentist parameter estimation

We are assuming that both the wild-type and mutant bout-lengths have Gaussian distributions and want unbiased estimators for $\mu_i$ and $\sigma_i^2$, where $i \in \{w,m\}$. These are, respectively, the sample mean:
$$\bar{x_i} = \frac{1}{n_i}\sum_{x_j \in i} x_j$$
and the sample variance:
$$s_i^ 2 = \frac{1}{n_i-1}\sum_{x_j \in i} (x_j - \bar{x_i})^ 2$$

We will use the built-in functions in Pandas to calculate these. 

In [16]:
x_bar_w = df[df['genotype']=='wt']['mean_rest_bout_length'].mean()
x_bar_m = df[df['genotype']=='mut']['mean_rest_bout_length'].mean()
var_w = df[df['genotype']=='wt']['mean_rest_bout_length'].var()
var_m = df[df['genotype']=='mut']['mean_rest_bout_length'].var()
print ('            mean (min)   variance (min²)')
print ('wild-type:  %.3f        %.3f' %(x_bar_w, var_w))
print ('mutant:     %.3f        %.3f' %(x_bar_m, var_m))

            mean (min)   variance (min²)
wild-type:  2.209        0.273
mutant:     1.727        0.648


We also want to calculate Cohen's d as an estimate of the effect size. 

In [19]:
def cohen_d(x, y, return_abs=False):
    """
    Cohen's d
    """
    diff = y.mean() - x.mean()
    pooled_variance = (len(x) * x.var() + len(y) * y.var()) / (len(x) + len(y))

    if return_abs:
        return np.abs(diff) / np.sqrt(pooled_variance)
    return diff / np.sqrt(pooled_variance)

actual_d = cohen_d (df[df['genotype']=='wt']['mean_rest_bout_length'],
        df[df['genotype']=='mut']['mean_rest_bout_length'])

print ("Cohen's d = %.4f" %actual_d)

Cohen's d = -0.6927
