In [88]:
import numpy as np
import scipy.stats as stats
import pandas as pd

In [89]:
table_mono = np.array([
    [56,  13,   1], # Wilson & Jones (1932)
    [35,  6,    1], # Stocks (1933)
    [34,  13,   3], # Newman et al. (1937)
    [80,  38,   4], # Bouterwek (1938)
    [176, 41,   6], # Rife (1940)
    [72,  24,   7], # Thyss (1946)
    [261, 76,   6], # Rife (1950)
    [19,  12,   2], # Dechaume (1957)
    [199, 51,   9], # Zazzo (1960)
    [132, 46,   9], # Carter-Saltzmann et al. (1976)
    [380, 123, 11], # Loehlin & Nichols (1976)
    [53,  19,   3], # Springer & Searleman (1978)
    [32,  9,    2], # NCDS (unpublished)
    [655, 158, 23]  # Neale (1988)
])
table_di = np.array([
    [97,  24,   2], # Wilson & Jones (1932)
    [76,  16,   2], # Stocks (1933)
    [39,  11,   0], # Newman et al. (1937)
    [23,  12,   0], # Bouterwek (1938)
    [104, 39,   3], # Rife (1940)
    [60,  24,   2], # Thyss (1946)
    [164, 45,   2], # Rife (1950)
    [21,  11,   1], # Dechaume (1957)
    [264, 69,   2], # Zazzo (1960)
    [115, 54,   7], # Carter-Saltzmann et al. (1976)
    [261, 70,   2], # Loehlin & Nichols (1976)
    [35,  9,    3], # Springer & Searleman (1978)
    [66,  18,   4], # NCDS (unpublished)
    [626, 183, 23]  # Neale (1988)
])
datasets_names = ["Wilson & Jones (1932)", "Stocks (1933)", "Newman et al. (1937)", "Bouterwek (1938)", "Rife (1940)", "Thyss (1946)", "Rife (1950)", "Dechaume (1957)", "Zazzo (1960)", "Carter-Saltzmann et al. (1976)",
 "Loehlin & Nichols (1976)",
 "Springer & Searleman (1978)", "NCDS (unpublished)", "Neale (1988)"]

In [90]:
def compute_t(ρ, α, β):
    """Generate t - eq .3 - the true incidence of left-handedness

    From main text:
    F_DR is the final equilibrium frequency of right-handers with allele D fixed
    We need F_DL = 1 - F_DR
    """
    if np.isclose(β, 0): 
        FDR = (1 - 2 * α + 2 * ρ) / (2 * (1 - 2 * α))  # eq 3a
    else:
        Δ = 4 * α * α - 4 * α + 4 * β * β + 1 + 8 * β * ρ
        FDR = (2 * α + 2 * β - 1 + np.sqrt(Δ)) / (4 * β)  # eq 3
        assert 0 < 1 - FDR < 1, ("t is the problem! t = ", 1-FDR)
    return 1 - FDR

Given that the only genotype is DD, the probabilities of MZ twins and DZ twins presenting phenotype $ H_1 \times H_2 $, given the parents' phenotypes $ H_{p1} \times H_{p2} $, can be modeled using a binomial distribution:

$ H_1 \times H_2 \sim \text{Bin}(2, p(L \mid H_{p1} \times H_{p2})) $


In [91]:
def compute_T_twins(ρ, α, β):
    """Generate T - column DD in Table 1

    From Appendix 3:
    T is a 3 x 2 matrix of p(Ht | Ht x Ht) entries -
    probability of a truly H child from a mating where parents are truly H
    """
    t = compute_t(ρ, α, β)
    # mating type                      RxR twins           RxL twins                               LxL  twins
    parents_2R =  np.array([np.power((0.5 + ρ + α),2),2*(0.5 + ρ + α)*(1-(0.5 + ρ + α)), np.power(1-(0.5 + ρ + α),2)])
    parents_RL =  np.array([np.power((0.5 + ρ + β),2),2*(0.5 + ρ + β)*(1-(0.5 + ρ + β)), np.power(1-(0.5 + ρ + β),2)])
    parents_2L =  np.array([np.power((0.5 + ρ - α),2),2*(0.5 + ρ - α)*(1-(0.5 + ρ - α)), np.power(1-(0.5 + ρ - α),2)])
    
    
    return np.power(1-t,2) * parents_2R + 2*(1-t)*t * parents_RL + np.power(t,2) * parents_2L 



In [92]:
def compute_O_twins(mo, t):

    if mo > t:

        w = (mo - t) / (1 - t)
        x = 0
          
        
    elif mo < t:

        x = 1 - (mo / t)
        w = 0

    else:
        x = w = 0
        
        
    P = np.array([[np.power(1-w,2), 2*w*(1-w)  ,   np.power(w,2)],
                  [        x*(1-w), (1-x)*(1-w),       (1-x)*w  ],
                  [  np.power(x,2), 2*x*(1-x)  , np.power(1-x,2)]
                 ])
    return P

In [93]:
def compute_M_twins(T, O):
    M = T @ O
    assert ((0 <= M) & (M <= 1)).all(), "M is the problem"
    return M

## measure mo

In [94]:
def compute_mo(D):
    mo = (D[1]+2*D[2])/ (2*D.sum())
    return mo

expect

In [95]:
# Generate expected table for the data using the manipulations in appendix 3:

def expected_table(θ, Observed):

    Exp_table = []

    if len(θ) == 3:
        ρ, α, β = θ
    elif len(θ) == 2:
        ρ, α = θ
        β = 0
    
    t=compute_t(*θ)
    T=compute_T_twins(*θ)
    
    for D in Observed:
        mo = compute_mo(D)


        
        O = compute_O_twins(mo, t)
        assert ((0 <= O) & (O <= 1)).all()
        
        M = compute_M_twins(T,  O)
        assert ((0 <= M) & (M <= 1)).all()

        Obs= D.copy()
        Exp= M.reshape(3) * np.sum(D)

        


        Exp_table.append(Exp)

    return np.array(Exp_table)

# test statistic

In [96]:
def compute_gtest(θ, observed):
    exp = expected_table(θ, observed)
    G,p = stats.power_divergence(observed.T, exp.T, ddof=1, lambda_ = 'log-likelihood')
    return np.around(np.array([G.T, p.T]).T,2)


# Table:

In [97]:
t = (0.277, 0.138, 0)

In [101]:
mono_G, mono_p = compute_gtest(t, table_mono).T
di_G, di_p = compute_gtest(t,table_di).T

In [102]:
mono_data = np.concatenate([np.array([compute_mo(ds)for ds in table_mono]).reshape(-1,1), 
                        table_mono,
                        np.around(expected_table(t,table_mono),1),
                        mono_G.reshape(-1,1),
                        mono_p.reshape(-1,1)], axis=1)
pd.DataFrame(mono_data, index=datasets_names, columns=["mo"] + [f"{dtype} {phens}" for dtype in ["obs", "exp"] for phens in ["RR", "RL", "LL" ]]+["G", "p-value"])

Unnamed: 0,mo,obs RR,obs RL,obs LL,exp RR,exp RL,exp LL,G,p-value
Wilson & Jones (1932),0.107143,56.0,13.0,1.0,56.0,12.9,1.0,0.0,0.97
Stocks (1933),0.095238,35.0,6.0,1.0,34.5,7.0,0.5,0.57,0.45
Newman et al. (1937),0.19,34.0,13.0,3.0,33.0,15.1,2.0,0.79,0.37
Bouterwek (1938),0.188525,80.0,38.0,4.0,80.7,36.5,4.7,0.19,0.66
Rife (1940),0.118834,176.0,41.0,6.0,174.0,44.9,4.0,1.22,0.27
Thyss (1946),0.184466,72.0,24.0,7.0,68.9,30.3,3.9,3.62,0.06
Rife (1950),0.12828,261.0,76.0,6.0,262.0,74.1,7.0,0.19,0.66
Dechaume (1957),0.242424,19.0,12.0,2.0,19.0,11.9,2.0,0.0,0.97
Zazzo (1960),0.133205,199.0,51.0,9.0,195.6,57.8,5.6,2.66,0.1
Carter-Saltzmann et al. (1976),0.171123,132.0,46.0,9.0,129.1,51.7,6.1,1.9,0.17


In [103]:
di_data = np.concatenate([np.array([compute_mo(ds)for ds in table_di]).reshape(-1,1), 
                        table_di,
                        np.around(expected_table(t,table_di),1),
                        di_G.reshape(-1,1),
                        di_p.reshape(-1,1)], axis=1)
pd.DataFrame(di_data, index=datasets_names, columns=["mo"] + [f"{dtype} {phens}" for dtype in ["obs", "exp"] for phens in ["RR", "RL", "LL" ]]+["G", "p-value"])

Unnamed: 0,mo,obs RR,obs RL,obs LL,exp RR,exp RL,exp LL,G,p-value
Wilson & Jones (1932),0.113821,97.0,24.0,2.0,97.0,23.9,2.0,0.0,0.97
Stocks (1933),0.106383,76.0,16.0,2.0,75.4,17.3,1.4,0.35,0.55
Newman et al. (1937),0.11,39.0,11.0,0.0,39.8,9.4,0.8,1.82,0.18
Bouterwek (1938),0.171429,23.0,12.0,0.0,24.2,9.7,1.2,2.86,0.09
Rife (1940),0.15411,104.0,39.0,3.0,105.0,37.0,4.0,0.39,0.53
Thyss (1946),0.162791,60.0,24.0,2.0,60.6,22.8,2.6,0.21,0.65
Rife (1950),0.116114,164.0,45.0,2.0,165.7,41.7,3.7,1.18,0.28
Dechaume (1957),0.19697,21.0,11.0,1.0,21.4,10.2,1.4,0.18,0.67
Zazzo (1960),0.108955,264.0,69.0,2.0,267.1,62.8,5.1,3.11,0.08
Carter-Saltzmann et al. (1976),0.193182,115.0,54.0,7.0,115.1,53.7,7.1,0.0,0.94


In [104]:
np.sum(mono_G+di_G)

35.68