In [1]:
import time
import random
import numpy as np

import altair as alt
import pandas as pd
import collections
from causality_functions import jitter, dependent, simulations_data
from causality_functions import compute_regression, compute_correlation
from causality_functions import add_confidence_stats, confidence_graphs
from causality_functions import stats_graphs

alt.data_transformers.disable_max_rows()

samplesize = 500
Am, Bm, Cm, = 40, 10, 30
Xm, Ym = 1, 1
Astd,Bstd,Cstd = 7, 5, 11
Xstd,Ystd = 1, 1

m1, k1 = 1, -1.0
m2, k2 = 1.5, 2.0
m3, k3 = 1, -1.5
m4, k4 = .75, 0.0

d1 = 1
d2 = 1
d3 = 1
d4 = 1

errorA = 4
errorB = 8
errorC = 2
errorX = 0.75
errorY = 0.6

mrange1 = 0,5 #slope ranges
mrange2 = 0,5
mrange3 = 0,5
mrange4 = 0,5

erangeA = 0.2,2 #error range
erangeB = 0.2,2 #error range
erangeC = 0.2,10 #error range
erangeX = 0.2,2 #error range
erangeY = 0.2,2 #error range


def linear_positive_C_to_B(n=100):

    def next_generation(A):
        A_ = jitter(A, errorA) # previous A + error
        B_ = dependent(A_, m1, k1, error=errorB)
        C_ = dependent(B_, m2, k2, error=errorC)
        return A_, B_, C_

    def next_generation_delayed(history):
        A, _, _ = history[-1]
        A_ = jitter(A, errorA) # previous A + error
        A, _, _ = history[1 - d1]
        _, _, C = history[1 - d3]
        B_ = jitter(A*m1 + m3*C + k1 ,  errorB)
        _, B, _ = history[1 - d2]
        C_ = dependent(B, m2, k2, error=errorC)
        return A_, B_, C_

    A = np.random.normal(loc=Am, scale=Astd) # normal distribution, Am - mean and Astd - standard deviation
    history = collections.deque(maxlen=max([d1, d2, d3]))
    for i in range(max([d1, d2, d3])):
        history.append(next_generation(A))

    for i in range(n):
        history.append(next_generation_delayed(history))

    return np.array(history[-1])


def linear_positive_A_to_C(n=100):

    def next_generation(A):
        A_ = jitter(A, errorA) # previous A + error
        B_ = dependent(A_, m1, k1, error=errorB)
        C_ = dependent(B_, m2, k2, error=errorC)
        return A_, B_, C_

    def next_generation_delayed(history):
        A, _, _ = history[-1]
        A_ = jitter(A, errorA) # previous A + error
        A, _, _ = history[1 - d1]
        B_ = dependent(A, m1, k1 ,  errorB)
        _, B, _ = history[1 - d2]
        A, _, _ = history[1 - d3]
        C_ = jitter(A*m3+ B*m2+ k2, errorC)
        return A_, B_, C_

    A = np.random.normal(loc=Am, scale=Astd) # normal distribution, Am - mean and Astd - standard deviation # 0-1 , uniformly distributed
    history = collections.deque(maxlen=max([d1, d2, d3]))
    for i in range(max([d1, d2, d3])):
        history.append(next_generation(A))

    for i in range(n):
        history.append(next_generation_delayed(history))

    return np.array(history[-1])


def update_slopes_errors():
    def select(lower, upper):
        return lower + random.random()*(upper-lower)
    
    global m1, m2, m3, m4
    global errorA, errorB, errorC, errorX, errorY

    m1 = select(*mrange1)
    m2 = select(*mrange2)
    m3 = select(*mrange3)
    m4 = select(*mrange4)
    
    errorA = select(*erangeA)
    errorB = select(*erangeB)
    errorC = select(*erangeC)
    errorX = select(*erangeX)
    errorY = select(*erangeY)

def overall_simulation(n=100):
    stats = []
    data = []
    for i in range(n):
        update_slopes_errors()
        ABC = simulations_data(linear_positive_C_to_B, samplesize) # you can change pathway here
        r = compute_regression(ABC)
        r.update(compute_correlation(ABC))
        stats.append(r)
        data.append(ABC)
    return pd.DataFrame(stats), np.array(data)

d, ABC_all = overall_simulation()

In [2]:
add_confidence_stats(d, ABC_all)
confidence_graphs(d)

0.633093666355916 0.7270300867906914 0.4986987782614957
0.5614439145020682 0.6700177348596775 0.6190938465455993
0.6850922112687011 0.7676102322530653 0.7284487457926881
0.13118507032405471 0.2984175134631023 0.22051855938499876
0.05814740704358162 0.22986705999593685 0.14610791407425547
0.11753661183109565 0.28573732931894646 0.2088385385804371
0.8536499988264543 0.8947579045976995 0.7960780867268309
0.30426870941112827 0.45425863204711076 0.3884275639325376
0.18998353420310388 0.3523778359742653 0.2740753775643365
0.16652082469442736 0.33097441318345244 0.25765383914117296
0.09259438622845319 0.26241142912588067 0.17919139568768538
0.2253768769186538 0.384346505761217 0.30585328546498836
0.05583718514989101 0.2276706097869606 0.025812462659243293
0.9528411730240108 0.9665896833779103 0.9612866367127602
0.565067085638428 0.672931802207456 0.6223001661462848
0.11119618663143371 0.27982661410275833 0.1843937873110015
0.174333875144804 0.3381205653473699 0.25731248398264295
0.18026698119

In [3]:
stats_graphs(d)

In [4]:
A, B, C = random.choice(ABC_all).transpose()
AB = alt.Chart(pd.DataFrame({"A":A, "B":B})).mark_circle().encode(
    x="A",
    y="B")
BC= alt.Chart(pd.DataFrame({"B":B, "C":C})).mark_circle().encode(
    x="B",
    y="C")
AC = alt.Chart(pd.DataFrame({"A":A, "C":C})).mark_circle().encode(
    x="A",
    y="C")

alt.vconcat(AB, BC, AC)

In [5]:
d

Unnamed: 0,kAB,kBC,kAC,mAB,mBC,mAC,r_sqrAB,r_sqrBC,r_sqrAC,r_E,...,rAB,rBC,rAC,rAB2*rBC2-rAC2,r_E_BA_C2-rBC2,mAB*mBC-mAC,confidence,confidence_residual_corr,confidence_corrected_bc_corr,confidence_slope_AC
0,3.744881e+00,3.754708e+00,3.343844e+00,7.586468e-01,0.312435,2.773644e-01,0.866365,0.575622,0.682874,-0.469713,...,0.930787,0.758698,0.826362,-0.184176,-0.575203,-4.033652e-02,less,more,less,within
1,3.229903e+70,2.367720e+69,1.564781e+71,1.574463e+69,4.768242,7.504889e+69,0.621586,0.995991,0.618676,0.005375,...,0.788407,0.997994,0.786560,0.000418,-0.993816,2.533592e+66,within,within,less,within
2,2.924978e+47,5.660280e+45,1.386777e+48,2.364456e+46,4.727479,1.118206e+47,0.729019,0.999218,0.728989,-0.018558,...,0.853826,0.999609,0.853808,-0.000540,-0.997833,-4.142527e+43,within,within,less,within
3,5.458144e+37,1.762939e+36,1.959233e+38,3.848697e+35,3.547929,1.352641e+36,0.224655,0.981587,0.216388,0.017529,...,0.473978,0.990751,0.465175,0.004131,-0.975658,1.284918e+34,within,within,less,within
4,2.770972e+64,3.727733e+61,1.249748e+65,1.315853e+62,4.505788,5.908462e+62,0.146129,0.999857,0.145100,0.045736,...,0.382268,0.999929,0.380920,0.001008,-0.998565,2.049123e+60,within,within,less,within
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2.155884e+61,2.520883e+60,8.689229e+61,1.325638e+60,4.004427,5.357692e+60,0.680202,0.986959,0.683852,-0.097128,...,0.824744,0.993458,0.826954,-0.012521,-0.980046,-4.927297e+58,within,more,less,within
96,2.022927e+18,1.730707e+17,1.809770e+18,1.073765e+16,0.820027,9.355483e+15,0.199306,0.862815,0.194131,-0.034912,...,0.446437,0.928878,0.440603,-0.022167,-0.862702,-5.503214e+14,within,within,less,within
97,1.586748e+01,7.681867e+00,4.132633e+01,1.312962e+01,2.050786,2.689855e+01,0.983681,0.997888,0.979601,0.170894,...,0.991807,0.998943,0.989748,0.002003,-0.997540,2.749040e-02,within,less,less,within
98,2.199715e+01,5.978302e+00,3.655098e+01,9.168258e+00,1.350374,1.235945e+01,0.968476,0.995430,0.960763,0.137375,...,0.984112,0.997712,0.980185,0.003287,-0.982883,2.112626e-02,within,less,less,within
