In [6]:
import time
import random
import functools
import numpy as np
from causality_functions import jitter, dependent, simulations_data
from causality_functions import compute_regression, compute_correlation
from causality_functions import add_confidence_stats, confidence_graphs
from causality_functions import stats_graphs
import collections

import pandas as pd
import altair as alt
alt.data_transformers.disable_max_rows()

samplesize = 500
Am, Bm, Cm, = 60, 10, 30
Xm, Ym = 1, 1
Astd,Bstd,Cstd = 11, 15, 21
Xstd,Ystd = 1, 1

m1, k1 = 1, 0
m2, k2 = 1.25, 0
m3, k3 = 0.5, 0
m4, k4 = 1.5, 0

d1 = 1
d2 = 1
d3 = 1
d4 = 1

errorA = 2
errorB = 1.5
errorC = 0.5
errorX = 0.75
errorY = 0.6

mrange1 = 0,2 #slope ranges
mrange2 = 0,2
mrange3 = 0,2
mrange4 = 0,2

erangeA = 0.2,100 #error range
erangeB = 0.2,2000 #error range
erangeC = 0.2,2000 #error range
erangeX = 0.2,2 #error range
erangeY = 0.2,2 #error range



def linear(n=100):

    def next_generation(A, history, generation):
        A_ = jitter(A, errorA) # previous A + error
        
        if generation <= d1:
            B_ = dependent(A_, m1, k1, error=errorB)
        else:
            A, _, _ = history[1 - d1]
            B_ = dependent(A, m1, k1, error=errorB)
        
        if generation <= d2:
            C_ = dependent(B_, m2, k2, error=errorC)
        else:
            _, B, _ = history[1 - d2]
            C_ = dependent(B, m2, k2, error=errorC)
        
        return A_, B_, C_
    

    def next_generation_delayed(history):
        A, _, _ = history[-1]
        A_ = jitter(A, errorA) # previous A + error
        A, _, _ = history[1 - d1]
        B_ = dependent(A, m1, k1, error=errorB)
        _, B, _ = history[1 - d2]
        C_ = dependent(B, m2, k2, error=errorC)
        return A_, B_, C_
        
    counter = 0
    A = np.random.normal(loc=Am, scale=Astd) # normal distribution, Am - mean and Astd - standard deviation
    history = collections.deque(maxlen=max([d1, d2]))
    for i in range(n+max([d1, d2])):
        history.append(next_generation(A, history, i))

    #for i in range(n):
    #    history.append(next_generation_delayed(history))

    return np.array(history[-1])


def radiating(n=100):
    def next_generation(B):
        B_ = jitter(B, errorB)
        A_ = dependent(B_, m1, k1, errorA)
        C_ = dependent(B_, m2, k2, errorC)
        return A_, B_, C_

    def next_generation_delayed(history):
        _, B, _ = history[-1]
        B_ = jitter(B, errorB)
        _, B, _ = history[1-d1]
        A_ = dependent(B, m1, k1, errorA)
        _, B, _ = history[1-d2]
        C_ = dependent(B, m2, k2, errorC)
        return A_, B_, C_


    B = np.random.normal(loc=Bm, scale=Bstd) # normal distribution, Bm - mean and Bstd - standard deviation
    history = collections.deque(maxlen=max([d1, d2]))

    for i in range(max([d1, d2])):
        A, B, C = next_generation(B)
        history.append((A, B, C))

    for i in range(n):
        history.append(next_generation_delayed(history))

    return np.array(history[-1])


def convergent(n=100):

    def next_generation(A, C):
        B_ = jitter(m1*A + m2*C + k1, errorB)
        A_ = jitter(A, errorA)
        C_ = jitter(C, errorC)
        return A_, B_, C_

    def next_generation_delayed(history):
        A, _, _ = history[1-d1]
        _, _, C = history[1-d2]
        B_ = jitter(m1*A + m2*C + k1, errorB)
        A, B, C = history[-1]
        A_ = jitter(A, errorA)
        C_ = jitter(C, errorC)
        return A_, B_, C_

    A = np.random.normal(loc=Am, scale=Astd) # normal distribution, Am - mean and Astd - standard deviation
    C = np.random.normal(loc=Cm, scale=Cstd) # normal distribution, Cm - mean and Cstd - standard deviation
    history = collections.deque(maxlen=max([d1, d2]))

    for i in range(max([d1, d2])):
        A, B, C = next_generation(A, C)
        history.append((A, B, C))

    for i in range(n):
        history.append(next_generation_delayed(history))

    return np.array(history[-1])


def common_cause(n=100):
    def next_generation(X):
        X = jitter(X, errorX)
        A = dependent(X, m1, k1, errorA)
        B = dependent(X, m2, k2, errorB)
        C = dependent(X, m3, k3, errorC)
        return A, B, C, X

    def next_generation_delayed(history):
        _, _, _, X = history[-1]
        X_ = jitter(X, errorX)
        _, _, _, X = history[1-d1]
        A_ = dependent(X, m1, k1, errorA)
        _, _, _, X = history[1-d2]
        B_ = dependent(X, m2, k2, errorB)
        _, _, _, X = history[1-d3]
        C_ = dependent(X, m3, k3, errorC)
        return A_, B_, C_, X_

    X = np.random.normal(loc=Xm, scale=Xstd) # normal distribution, Xm - mean and Xstd - standard deviation
    history = collections.deque(maxlen=max([d1, d2, d3]))
    for i in range(max([d1, d2, d3])):
        A, B, C, X = next_generation(X)
        history.append((A, B, C, X))

    for i in range(n):
        history.append(next_generation_delayed(history))

    A, B, C, _ = history[-1]
    return np.array([A,B,C])


def single_difference_cause(n=100):

    def next_generation(A, X):
        X = jitter(X, errorX)
        A = jitter(A, errorA)
        B = jitter(m1*A + m2*X + k1, errorB)
        C = dependent(X, m3, k3, errorC)
        return A, B, C, X

    def next_generation_delayed(history):
        A, _, _, X = history[-1]
        A_ = jitter(A, errorA)
        X_ = jitter(X, errorX)

        A, _, _, _ = history[1-d1]
        _, _, _, X = history[1-d2]
        B_ = jitter(m1*A + m2*X + k1, errorB)

        _, _, _, X = history[1-d3]
        C_ = dependent(X, m2, k3, errorC)

        return A_, B_ , C_, X_

    X = np.random.normal(loc=Xm, scale=Xstd) # normal distribution, Xm - mean and Xstd - standard deviation
    A = np.random.normal(loc=Am, scale=Astd) # normal distribution, Am - mean and Astd - standard deviation
    history = collections.deque(maxlen=max([d1, d2, d3]))
    for i in range(max(d1, d2, d3)):
        A,B,C,X = next_generation(A,X)
        history.append((A, B, C, X))

    for i in range(n):
        history.append(next_generation_delayed(history))

    A, B, C, X = history[-1]
    return np.array([A, B, C])


def double_difference_cause(n=100):
    def next_generation(X,Y):
        X = jitter(X, errorX)
        Y = jitter(Y, errorY)
        A = dependent(X, m1, k1, errorA)
        B = jitter(m2*X+m3*Y+k2, errorB)
        C = dependent(Y, m4, k4, errorC)
        return A, B, C, X, Y

    def next_generation_delayed(history):
        _, _, _, X, Y = history[-1]
        X_ = jitter(X, errorX)
        Y_ = jitter(Y, errorY)

        _, _, _, X, _ = history[1-d1]
        A_ = dependent(X, m1, k1, errorA)

        _, _, _, X, _ = history[1-d2]
        _, _, _, _, Y = history[1-d3]
        B_ = jitter(m2*X+m3*Y+k2, errorB)

        _, _, _, _, Y = history[1-d4]
        C_ = dependent(Y, m4, k4, errorC)
        return A_, B_, C_, X_, Y_


    X = np.random.normal(loc=Xm, scale=Xstd) # normal distribution, Xm - mean and Xstd - standard deviation
    Y = np.random.normal(loc=Ym, scale=Ystd) # normal distribution, Ym - mean and Ystd - standard deviation
    history = collections.deque(maxlen=max([d1, d2, d3, d4]))

    for i in range(max([d1, d2, d3, d4])):
        A, B, C, X, Y = next_generation(X, Y)
        history.append((A, B, C, X, Y))

    for i in range(n):
        history.append(next_generation_delayed(history))

    A, B, C, X, Y = history[-1]
    return np.array([A, B, C])

def update_slopes_errors():
    def select(lower, upper):
        return lower + random.random()*(upper-lower)
    
    global m1, m2, m3, m4
    global errorA, errorB, errorC, errorX, errorY

    m1 = select(*mrange1)
    m2 = select(*mrange2)
    m3 = select(*mrange3)
    m4 = select(*mrange4)
    
    errorA = select(*erangeA)
    errorB = select(*erangeB)
    errorC = select(*erangeC)
    errorX = select(*erangeX)
    errorY = select(*erangeY)
    
def overall_simulation(n=100):
    stats = []
    data = []
    for i in range(n):
        update_slopes_errors()
        ABC = simulations_data(linear, samplesize) # you can change pathway here
        r = compute_regression(ABC)
        r.update(compute_correlation(ABC))
        stats.append(r)
        data.append(ABC)
    return pd.DataFrame(stats), np.array(data)

d, ABC_all = overall_simulation()

In [7]:
add_confidence_stats(d, ABC_all)
confidence_graphs(d)

In [8]:
stats_graphs(d)

In [9]:
A, B, C = random.choice(ABC_all).transpose()
AB = alt.Chart(pd.DataFrame({"A":A, "B":B})).mark_circle().encode(
    x="A",
    y="B")
BC= alt.Chart(pd.DataFrame({"B":B, "C":C})).mark_circle().encode(
    x="B",
    y="C")
AC = alt.Chart(pd.DataFrame({"A":A, "C":C})).mark_circle().encode(
    x="A",
    y="C")

alt.vconcat(AB, BC, AC)

In [5]:
d

Unnamed: 0,kAB,kBC,kAC,mAB,mBC,mAC,r_sqrAB,r_sqrBC,r_sqrAC,r_E,...,rAB,rBC,rAC,rAB2*rBC2-rAC2,r_E_BA_C2-rBC2,mAB*mBC-mAC,confidence,confidence_residual_corr,confidence_corrected_bc_corr,confidence_slope_AC
0,60.683624,-100.651652,42.908000,2.414466,0.650059,-0.185284,0.061610,0.915933,0.000786,0.234716,...,0.248214,0.957044,-0.028043,0.055644,-0.915847,1.754829,within,less,less,within
1,11.424355,-4.325276,1.276327,1.878003,0.241719,0.406638,0.010738,0.991997,0.008547,0.012527,...,0.103625,0.995990,0.092452,0.002105,-0.983370,0.047312,within,within,less,within
2,64.939200,-70.883552,44.955280,1.716585,0.628239,-0.135778,0.079157,0.890334,0.001117,0.264629,...,0.281348,0.943575,-0.033424,0.069359,-0.888623,1.214204,within,less,less,within
3,4.913883,28.654438,40.474295,3.982118,0.020773,-0.114641,0.988654,0.000582,0.001105,0.534243,...,0.994311,0.024117,-0.033234,-0.000529,0.002931,0.197360,within,less,within,within
4,84.803577,-80.549124,26.096605,4.568833,0.297109,0.006430,0.063502,0.928842,0.000001,0.235958,...,0.251997,0.963765,0.001150,0.058982,-0.928827,1.351009,within,less,less,within
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.865546,-106.950196,11.929205,0.895537,2.484560,0.294240,0.764408,0.280844,0.003754,0.854015,...,0.874304,0.529947,0.061272,0.210925,-0.279440,1.930775,more,less,less,within
96,130.020440,-32.012696,30.878944,3.739193,0.173912,-0.022364,0.269115,0.715391,0.000228,0.516233,...,0.518763,0.845808,-0.015090,0.192295,-0.705148,0.672654,more,less,less,within
97,32.519756,-1.294983,14.919079,0.995186,0.358392,0.280059,0.002451,0.991709,0.001499,0.005765,...,0.049509,0.995846,0.038714,0.000932,-0.972474,0.076608,within,within,less,within
98,-28.145597,-34.633820,-4.470664,5.585680,0.239830,0.729880,0.054149,0.979381,0.015743,0.174654,...,0.232700,0.989637,0.125471,0.037290,-0.979041,0.609734,within,less,less,within
