In [43]:
import time
import random
import functools
import numpy as np
from causality_functions import jitter, dependent, simulations_data
from causality_functions import compute_regression, compute_correlation
from causality_functions import add_confidence_stats, confidence_graphs
from causality_functions import stats_graphs, check_generation
import collections

import pandas as pd
import altair as alt
alt.data_transformers.disable_max_rows()

samplesize = 500
Am, Bm, Cm, = 60, 10, 30
Xm, Ym = 1, 1
Astd,Bstd,Cstd = 100, 15, 21
Xstd,Ystd = 1, 1

m1, k1 = 1, 0
m2, k2 = 1.25, 0
m3, k3 = 0.5, 0
m4, k4 = 1.5, 0

d1 = 0
d2 = 0
d3 = 0
d4 = 0

errorA = 2
errorB = 1.5
errorC = 0.5
errorX = 0.75
errorY = 0.6

mrange1 = 0,2 #slope ranges
mrange2 = 0,2
mrange3 = 0,2
mrange4 = 0,2

erangeA = 0.2,100 #error range
erangeB = 0.2,2000 #error range
erangeC = 0.2,2000 #error range
erangeX = 0.2,2 #error range
erangeY = 0.2,2 #error range



def linear(n=100):
    
    def compute_A(history, generation):
        A, _, _ = history[-1]
        return jitter(A, errorA) # previous A + error
        
    def compute_B(A, history, generation):
        if check_generation(generation, d1):
            A, _, _ = history[1 - d1]
        return dependent(A, m1, k1, error=errorB)
        
    def compute_C(B, history, generation):
        if check_generation(generation, d2):
            _, B, _ = history[1 - d2]
        return dependent(B, m2, k2, error=errorC)
    
    def next_generation(history, generation):
        A_ = compute_A(history, generation)
        B_ = compute_B(A_, history, generation)
        C_ = compute_C(B_, history, generation)
        return A_, B_, C_

    A = np.random.normal(loc=Am, scale=Astd) # normal distribution, Am - mean and Astd - standard deviation
    history = collections.deque(maxlen=max([d1, d2])+1)
    history.append([A, 0, 0])
    for i in range(n+max([d1, d2])):
        history.append(next_generation(history, i))

    return np.array(history[-1])


def radiating(n=100):

    def compute_B(history, generation):
        _, B, _ = history[-1]
        return jitter(B, errorB)

    def compute_A(B, history, generation):
        if check_generation(generation, d1):
            _, B, _ = history[1 - d1]
        return dependent(B, m1, k1, errorA)

    def compute_C(B, history, generation):
        if check_generation(generation, d2):
            _, B, _ = history[1 - d2]
            C_ = dependent(B, m2, k2, errorC)
        return dependent(B, m2, k2, errorC)
                           
    def next_generation(history, generation):
        B_ = compute_B(history, generation)
        A_ = compute_A(B_, history, generation)
        C_ = compute_C(B_, history, generation)
        return A_, B_, C_
    

    B = np.random.normal(loc=Bm, scale=Bstd) # normal distribution, Bm - mean and Bstd - standard deviation
    history = collections.deque(maxlen=max([d1, d2])+1)
    history.append((0, B, 0))
    for i in range(n+max([d1, d2])):
        history.append(next_generation(history, i))

    return np.array(history[-1])


def convergent(n=100):

    def next_generation(history, generation):
        if check_generation(generation, d1):
            A, _, _ = history[1 - d1]
        else:
            A, _, _ = history[-1]

        if check_generation(generation, d2):
            _, _, C = history[1 - d2]
        else:
            _, _, C = history[-1]

        B_ = jitter(m1*A + m2*C + k1, errorB)
        
        A, _, C = history[-1]
        A_ = jitter(A, errorA)
        C_ = jitter(C, errorC)
        return A_, B_, C_
    
    A = np.random.normal(loc=Am, scale=Astd) # normal distribution, Am - mean and Astd - standard deviation
    C = np.random.normal(loc=Cm, scale=Cstd) # normal distribution, Cm - mean and Cstd - standard deviation
    history = collections.deque(maxlen=max([d1, d2]) + 1)
    history.append((A, 0, C))

    for i in range(n + max([d1, d2])):
        A, B, C = next_generation(history, i)
        history.append((A, B, C))
        
    return np.array(history[-1])


def common_cause(n=100):

    def compute_A(X, history, generation):
        if check_generation(generation, d1):
            _, _, _, X = history[1 - d1]
        
        return dependent(X, m1, k1, errorA)

    def compute_B(X, history, generation):
        if check_generation(generation, d2):
            _, _, _, X = history[1 - d2]
        
        return dependent(X, m2, k2, errorB)

    def compute_C(X, history, generation):
        if check_generation(generation, d3):            
            _, _, _, X = history[1 - d3]
            
        return dependent(X, m3, k3, errorC)

    def next_generation(history, generation):
        _, _, _, X = history[-1]
        X_ = jitter(X, errorX)
        A_ = compute_A(X_, history, generation)
        B_ = compute_B(X_, history, generation)
        C_ = compute_C(X_, history, generation)

        return A_, B_, C_, X
        
    X = np.random.normal(loc=Xm, scale=Xstd) # normal distribution, Xm - mean and Xstd - standard deviation
    history = collections.deque(maxlen=max([d1, d2, d3])+1)
    history.append((0, 0, 0, X))
    for i in range(n+max([d1, d2, d3])):
        A, B, C, X = next_generation(history, i)
        history.append((A, B, C, X))
    A, B, C, _ = history[-1]
    return np.array([A,B,C])


def single_difference_cause(n=100):
    def next_generation(history, generation):
        A, _, _, X = history[-1]
        A_ = jitter(A, errorA)
        X_ = jitter(X, errorX)

        # compute B
        if d1 > 0 and generation >= d1:
            A, _, _, _ = history[1 - d1]
        else:
            A, _, _, _ = history[-1]

        if d2 > 0 and generation >= d2:
            _, _, _, X = history[1 - d2]
        else:
            _, _, _, X = history[-1]
            
        B_ = jitter(m1*A + m2*X + k1, errorB)

        # compute C
        if d3 > 0 and generation >= d3:
            _, _, _, X = history[1 - d3]
        else:
            _, _, _, X = history[-1]

        C_ = dependent(X, m2, k3, errorC)

        return A_, B_, C_, X_

    
    X = np.random.normal(loc=Xm, scale=Xstd) # normal distribution, Xm - mean and Xstd - standard deviation
    A = np.random.normal(loc=Am, scale=Astd) # normal distribution, Am - mean and Astd - standard deviation
    history = collections.deque(maxlen=max([d1, d2, d3]) + 1)
    history.append((A, 0, 0, X))
    for i in range(n + max(d1, d2, d3)):
        A,B,C,X = next_generation(history, i)
        history.append((A, B, C, X))

    A, B, C, X = history[-1]
    return np.array([A, B, C])


def double_difference_cause(n=100):
    def next_generation_(X,Y):
        X = jitter(X, errorX)
        Y = jitter(Y, errorY)
        A = dependent(X, m1, k1, errorA)
        B = jitter(m2*X+m3*Y+k2, errorB)
        C = dependent(Y, m4, k4, errorC)
        return A, B, C, X, Y

    def next_generation(history, generation):
        _, _, _, X, Y = history[-1]
        X_ = jitter(X, errorX)
        Y_ = jitter(Y, errorY)

        # compute A
        if d1 and generation >= d1:
            _, _, _, X, _ = history[1 - d1]
        else:
            _, _, _, X, _ = history[-1]
        A_ = dependent(X, m1, k1, errorA)

        # compute B
        if d2 > 0 and generation >= d2:
            _, _, _, X, _ = history[1-d2]
        else:
            _, _, _, X, _ = history[-1]
        if d3 > 0 and generation >= d3:
            _, _, _, _, Y = history[1-d3]
        else:
            _, _, _, _, Y = history[-1]
        B_ = jitter(m2*X+m3*Y+k2, errorB)

        # compute C
        if d4 > 0 and generation >= d4:
            _, _, _, _, Y = history[1-d4]
        else:
            _, _, _, _, Y = history[-1]
        C_ = dependent(Y, m4, k4, errorC)

        return A_, B_, C_, X_, Y_

    def next_generation_delayed(history):
        _, _, _, X, Y = history[-1]
        X_ = jitter(X, errorX)
        Y_ = jitter(Y, errorY)

        _, _, _, X, _ = history[1-d1]
        A_ = dependent(X, m1, k1, errorA)

        _, _, _, X, _ = history[1-d2]
        _, _, _, _, Y = history[1-d3]
        B_ = jitter(m2*X+m3*Y+k2, errorB)

        _, _, _, _, Y = history[1-d4]
        C_ = dependent(Y, m4, k4, errorC)
        return A_, B_, C_, X_, Y_


    X = np.random.normal(loc=Xm, scale=Xstd) # normal distribution, Xm - mean and Xstd - standard deviation
    Y = np.random.normal(loc=Ym, scale=Ystd) # normal distribution, Ym - mean and Ystd - standard deviation
    history = collections.deque(maxlen=max([d1, d2, d3, d4]) +  1)
    history.append((0, 0, 0, X, Y))
                   
    for i in range(n + max([d1, d2, d3, d4])):
        A, B, C, X, Y = next_generation(history, i)
        history.append((A, B, C, X, Y))

    A, B, C, X, Y = history[-1]
    return np.array([A, B, C])

def update_slopes_errors():
    def select(lower, upper):
        return lower + random.random()*(upper-lower)
    
    global m1, m2, m3, m4
    global errorA, errorB, errorC, errorX, errorY

    m1 = select(*mrange1)
    m2 = select(*mrange2)
    m3 = select(*mrange3)
    m4 = select(*mrange4)
    
    errorA = select(*erangeA)
    errorB = select(*erangeB)
    errorC = select(*erangeC)
    errorX = select(*erangeX)
    errorY = select(*erangeY)
    
def overall_simulation(n=100):
    stats = []
    data = []
    for i in range(n):
        update_slopes_errors()
        ABC = simulations_data(common_cause, samplesize) # you can change pathway here
        r = compute_regression(ABC)
        r.update(compute_correlation(ABC))
        stats.append(r)
        data.append(ABC)
    return pd.DataFrame(stats), np.array(data)

#linear(100, True)
d, ABC_all = overall_simulation()

In [44]:
add_confidence_stats(d, ABC_all)
confidence_graphs(d)

In [45]:
stats_graphs(d)

In [46]:
A, B, C = random.choice(ABC_all).transpose()
AB = alt.Chart(pd.DataFrame({"A":A, "B":B})).mark_circle().encode(
    x="A",
    y="B")
BC= alt.Chart(pd.DataFrame({"B":B, "C":C})).mark_circle().encode(
    x="B",
    y="C")
AC = alt.Chart(pd.DataFrame({"A":A, "C":C})).mark_circle().encode(
    x="A",
    y="C")

alt.vconcat(AB, BC, AC)

In [113]:
d

Unnamed: 0,kAB,kBC,kAC,mAB,mBC,mAC,r_sqrAB,r_sqrBC,r_sqrAC,r_E,...,rAB,rBC,rAC,rAB2*rBC2-rAC2,r_E_BA_C2-rBC2,mAB*mBC-mAC,confidence_rAC,confidence_residual_corr,confidence_corrected_bc_corr,confidence_slope_AC
0,-1.794643,-76.836668,-54.045136,1.661823,1.130116,1.420961,0.042606,0.425623,0.010381,0.009123,...,0.206413,0.652398,0.101888,0.007753,-0.425615,0.457092,within,within,less,within
1,67.666493,37.487525,110.859609,1.141020,1.231430,1.606900,0.105753,0.523343,0.072385,-0.016831,...,0.325196,0.723425,0.269045,-0.017040,-0.516432,-0.201815,within,within,less,within
2,-53.842500,-207.302004,-316.825645,1.571632,1.759806,2.988175,0.226683,0.773668,0.204717,-0.038325,...,0.476112,0.879584,0.452457,-0.029340,-0.769779,-0.222408,within,within,less,within
3,-1.509609,-45.408777,-46.736456,0.830599,2.300848,1.874881,0.962302,0.016285,0.015083,0.012080,...,0.980970,0.127613,0.122813,0.000588,-0.014618,0.036202,within,within,within,within
4,0.281691,4.094682,5.144132,0.868524,1.957798,1.689654,0.188128,0.990736,0.184038,0.013638,...,0.433737,0.995357,0.428996,0.002347,-0.987423,0.010740,within,within,less,within
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,-3.896105,-32.008655,-47.575962,0.028256,1.665921,0.154963,0.000105,0.641846,0.000728,-0.000321,...,0.010227,0.801153,0.026972,-0.000660,-0.635531,-0.107890,within,within,less,within
96,-7.582013,-31.536262,-35.319027,1.327027,0.479029,0.638228,0.908367,0.114933,0.105239,-0.004328,...,0.953083,0.339018,0.324405,-0.000837,-0.112303,-0.002544,within,within,less,within
97,3.206236,-4.170976,-2.627108,1.900795,1.693345,3.294989,0.136606,0.821414,0.117592,-0.007473,...,0.369603,0.906319,0.342917,-0.005382,-0.821224,-0.076287,within,within,less,within
98,-4.697098,0.803724,-4.460355,1.893068,1.384208,2.598604,0.086963,0.999047,0.085441,0.024513,...,0.294896,0.999524,0.292303,0.001439,-0.984212,0.021796,within,within,less,within
