In [11]:
import time
import random
import numpy as np

import altair as alt
import pandas as pd
import collections
from causality_functions import jitter, dependent, simulations_data
from causality_functions import compute_regression, compute_correlation
from causality_functions import add_confidence_stats, confidence_graphs
from causality_functions import stats_graphs, check_generation

alt.data_transformers.disable_max_rows()

samplesize = 500
Am, Bm, Cm, = 40, 10, 30
Xm, Ym = 1, 1
Astd,Bstd,Cstd = 7, 5, 11
Xstd,Ystd = 1, 1

m1, k1 = 1, -1.0
m2, k2 = 1.5, 2.0
m3, k3 = 1, -1.5
m4, k4 = .75, 0.0

d1 = 1
d2 = 3
d3 = 1
d4 = 1

errorA = 4
errorB = 8
errorC = 2
errorX = 0.75
errorY = 0.6

mrange1 = 0,5 #slope ranges
mrange2 = 0,5
mrange3 = 0,5
mrange4 = 0,5

erangeA = 0.2,2 #error range
erangeB = 0.2,2 #error range
erangeC = 0.2,10 #error range
erangeX = 0.2,2 #error range
erangeY = 0.2,2 #error range


def linear_positive_C_to_B(n=100):

    def compute_B(history, generation):
        if check_generation(generation, d1):
            A, _, _ = history[-1 - d1]
            _, _, C = history[-1 - d3]
        else:
            A, _, _ = history[-1]
            _, _, C = history[-1]
        return jitter(A*m1 + m3*C + k1 ,  errorB)

    def compute_C(history, generation):
        if check_generation(generation, d2):
            _, B, _ = history[-1 - d2]
        else:
            _, B, _ = history[-1]

        return dependent(B, m2, k2, error=errorC)
        
    def compute_A(history, generation):
        A, _, _ = history[-1]
        return jitter(A, errorA) # previous A + error
        
    def next_generation(history, generation):
        A_ = compute_A(history, generation)
        B_ = compute_B(history, generation)
        C_ = compute_C(history, generation)
        return A_, B_, C_

    def next_generation_delayed(history):
        A, _, _ = history[-1]
        A_ = jitter(A, errorA) # previous A + error
        A, _, _ = history[-1 - d1]
        _, _, C = history[-1 - d3]
        B_ = jitter(A*m1 + m3*C + k1 ,  errorB)
        _, B, _ = history[-1 - d2]
        C_ = dependent(B, m2, k2, error=errorC)
        return A_, B_, C_

    A = np.random.normal(loc=Am, scale=Astd) # normal distribution, Am - mean and Astd - standard deviation
    history = collections.deque(maxlen=max([d1, d2, d3]) + 1)
    history.append((A, 0, 0))
    for i in range(n + max([d1, d2, d3])):
        history.append(next_generation(history, i))

    return np.array(history[-1])


def linear_positive_A_to_C(n=100):

    def compute_A(history, generation):
        A, _, _ = history[-1]
        return jitter(A, errorA) # previous A + error

    def compute_B(history, generation):
        if check_generation(generation, d1):
            A, _, _ = history[-1 - d1]
        else:
            A, _, _ = history[-1]
        return dependent(A, m1, k1 ,  errorB)

    def compute_C(history, generation):
        if check_generation(generation, d2):
            _, B, _ = history[-1 - d2]
        else:
            _, B, _ = history[-1]

        if check_generation(generation, d3):
            A, _, _ = history[-1 - d3]
        else:
            A, _, _ = history[-1]
        return jitter(A*m3+ B*m2+ k2, errorC)
        

    def next_generation(history, generation):
        A_ = compute_A(history, generation)
        B_ = compute_B(history, generation)
        C_ = compute_C(history, generation)
        return A_, B_, C_
    
    def next_generation_(A):
        A_ = jitter(A, errorA) # previous A + error
        B_ = dependent(A_, m1, k1, error=errorB)
        C_ = dependent(B_, m2, k2, error=errorC)
        return A_, B_, C_

    
    def next_generation_delayed(history):
        A, _, _ = history[-1]
        A_ = jitter(A, errorA) # previous A + error
        
        A, _, _ = history[-1 - d1]
        B_ = dependent(A, m1, k1 ,  errorB)
        _, B, _ = history[-1 - d2]
        A, _, _ = history[-1 - d3]
        C_ = jitter(A*m3+ B*m2+ k2, errorC)
        return A_, B_, C_

    A = np.random.normal(loc=Am, scale=Astd) # normal distribution, Am - mean and Astd - standard deviation # 0-1 , uniformly distributed
    history = collections.deque(maxlen=max([d1, d2, d3]) + 1)
    for i in range(n + max([d1, d2, d3])):
        history.append(next_generation(history, i))

    return np.array(history[-1])


def update_slopes():
    def select(lower, upper):
        return lower + random.random()*(upper-lower)
    
    global m1, m2, m3, m4
  
    m1 = select(*mrange1)
    m2 = select(*mrange2)
    m3 = select(*mrange3)
    m4 = select(*mrange4)

def update_errors():
    def select(lower, upper):
        return lower + random.random()*(upper-lower)
    
    global errorA, errorB, errorC, errorX, errorY

    errorA = select(*erangeA)
    errorB = select(*erangeB)
    errorC = select(*erangeC)
    errorX = select(*erangeX)
    errorY = select(*erangeY)

def overall_simulation(n=100):
    stats = []
    data = []
    for i in range(n):
        update_slopes()
        update_errors()
        ABC = simulations_data(linear_positive_C_to_B, samplesize) # you can change pathway here
        r = compute_regression(ABC)
        r.update(compute_correlation(ABC))
        stats.append(r)
        data.append(ABC)
    return pd.DataFrame(stats), np.array(data)

d, ABC_all = overall_simulation()

In [12]:
add_confidence_stats(d, ABC_all)
confidence_graphs(d)

In [13]:
stats_graphs(d)

In [16]:
A, B, C = random.choice(ABC_all).transpose()
AB = alt.Chart(pd.DataFrame({"A":A, "B":B})).mark_circle().encode(
    x="A",
    y="B")
BC= alt.Chart(pd.DataFrame({"B":B, "C":C})).mark_circle().encode(
    x="B",
    y="C")
AC = alt.Chart(pd.DataFrame({"A":A, "C":C})).mark_circle().encode(
    x="A",
    y="C")

alt.hconcat(AB, BC, AC)

In [17]:
d

Unnamed: 0,kAB,kBC,kAC,mAB,mBC,mAC,r_sqrAB,r_sqrBC,r_sqrAC,r_E,...,rAB2*rBC2-rAC2,r_E_BA_C2-rBC2,mAB*mBC-mAC,mAB*mBC,confidence_rAC,confidence_residual_corr,confidence_corrected_bc_corr,confidence_slope_AC,L,U
0,1.362061e+05,3.805115e+04,3.895976e+04,7.240244e+02,0.083957,3.205496e+02,0.056641,0.038615,0.060823,-0.049945,...,-0.058636,-0.037919,-2.597630e+02,6.078664e+01,less,within,within,less,-0.049243,0.125881
1,-3.680962e-01,3.549741e+00,2.273464e+00,3.915498e+00,0.442121,1.758623e+00,0.968354,0.708608,0.708155,-0.134835,...,-0.021972,-0.708095,-2.749765e-02,1.731125e+00,within,more,less,within,0.662056,0.749714
2,1.084848e+11,1.775022e+09,9.103487e+10,3.049863e+09,0.831177,2.558048e+09,0.512155,0.983001,0.512655,-0.050752,...,-0.009206,-0.982294,-2.307295e+07,2.534975e+09,within,within,less,within,0.979766,0.985722
3,6.737997e+00,2.022291e+00,2.075555e+00,4.855644e+00,0.007361,3.565036e-02,0.982750,0.073780,0.072136,0.005413,...,0.000371,-0.072700,9.162026e-05,3.574198e-02,within,within,within,within,-0.014003,0.160434
4,4.588160e+06,9.483264e+04,4.570235e+06,1.507070e+05,0.980807,1.484260e+05,0.592981,0.993865,0.594231,-0.048951,...,-0.004887,-0.993537,-6.116125e+02,1.478144e+05,within,within,less,within,0.992690,0.994852
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2.774277e+08,-1.240313e+07,5.747968e+08,3.080022e+06,2.139943,6.755787e+06,0.327938,0.985216,0.339440,-0.081607,...,-0.016350,-0.980788,-1.647154e+05,6.591072e+06,within,within,less,within,0.982398,0.987585
96,1.237039e+16,1.199424e+16,2.633318e+15,2.839505e+15,0.251123,1.024890e+15,0.675539,0.082803,0.115556,-0.155827,...,-0.059620,-0.072975,-3.118256e+14,7.130640e+14,less,more,within,less,-0.004925,0.169266
97,1.616067e+16,3.872317e+14,4.782851e+15,5.881161e+14,0.276570,1.645135e+14,0.602768,0.591240,0.364570,-0.013140,...,-0.008189,-0.591031,-1.858186e+12,1.626553e+14,within,within,less,within,0.531082,0.645466
98,3.325513e+20,1.556409e+19,1.165037e+20,5.369636e+18,0.325903,1.936794e+18,0.413008,0.582631,0.294750,-0.067992,...,-0.054119,-0.581501,-1.868159e+17,1.749978e+18,within,within,less,within,0.521588,0.637740
