In [1]:
import time
import random
import numpy as np

import altair as alt
import pandas as pd
from causality_functions import jitter, dependent, simulations_data
from causality_functions import compute_regression, compute_correlation
from causality_functions import add_confidence_stats, confidence_graphs
from causality_functions import stats_graphs

alt.data_transformers.disable_max_rows()

samplesize = 100
Am, Bm, Cm, = 1, 1, 1
Xm, Ym = 1, 1
Astd,Bstd,Cstd = 1, 1, 1
Xstd,Ystd = 1, 1

m1, k1 = 0.1, -1.0
m2, k2 = 0.25, 2.0
m3, k3 = 0.5, -1.5
m4, k4 = 1.5, 0.0

d1 = 5
d2 = 3
d3 = 7
d4 = 4

errorA = 2
errorB = 1.5
errorC = 0.5
errorX = 0.75
errorY = 0.6

mrange1 = 0,5 #slope ranges
mrange2 = 0,5
mrange3 = 0,5
mrange4 = 0,5

erangeA = 0.2,5 #error range
erangeB = 0.2,20 #error range
erangeC = 0.2,10 #error range
erangeX = 0.2,2 #error range
erangeY = 0.2,2 #error range

def linear(n=100):

    def next_generation(A):
        A_ = jitter(A, errorA) # previous A + error
        B_ = dependent(A_, m1, k1, error=errorB)
        C_ = dependent(B_, m2, k2, error=errorC)
        return A_, B_, C_

    A = np.random.normal(loc=Am, scale=Astd) # normal distribution, Am - mean and Astd - standard deviation
    B = dependent(A, m1, k1, errorB)
    C = dependent(B, m2, k2, errorC)
    for i in range(n):
        A, B, C = next_generation(A)

    return np.array([A,B,C])


def radiating(n=100):
    def next_generation(B):
        B_ = jitter(B, errorB)
        A_ = dependent(B_, m1, k1, errorA)
        C_ = dependent(B_, m2, k2, errorC)
        return A_, B_, C_

    B = np.random.normal(loc=Bm, scale=Bstd) # normal distribution, Bm - mean and Bstd - standard deviation
    A = dependent(B, m1, k1, errorA)
    C = dependent(B, m2, k2, errorC)

    for i in range(n):
        A,B,C = next_generation(B)

    return np.array([A, B, C])


def common_cause(n=100):
    def next_generation(X):
        X = jitter(X, errorX)
        A = dependent(X, m1, k1, errorA)
        B = dependent(X, m2, k2, errorB)
        C = dependent(X, m3, k3, errorC)
        return A, B, C, X

    X = np.random.normal(loc=Xm, scale=Xstd) # normal distribution, Xm - mean and Xstd - standard deviation
    A = dependent(X, m1, k1, errorA)
    B = dependent(X, m2, k2, errorB)
    C = dependent(X, m3, k3, errorC)

    for i in range(n):
        A, B, C, X = next_generation(X)

    return np.array([A,B,C])


def single_difference_cause(n=100):

    def next_generation(A, X):
        X = jitter(X, errorX)
        A = jitter(A, errorA)
        B = jitter(m1*A + m2*X + k1, errorB)
        C = dependent(X, m3, k3, errorC)
        return A, B, C, X

    A = np.random.normal(loc=Am, scale=Astd) # normal distribution, Am - mean and Astd - standard deviation
    X = np.random.normal(loc=Xm, scale=Xstd) # normal distribution, Xm - mean and Xstd - standard deviation

    for i in range(n+1):
        A,B,C,X = next_generation(A,X)

    return np.array([A, B, C])


def double_difference_cause(n=100):
    def next_generation(X,Y):
        X = jitter(X, errorX)
        Y = jitter(Y, errorY)
        A = dependent(X, m1, k1, errorA)
        B = jitter(m2*X+m3*Y+k2, errorB)
        C = dependent(Y, m4, k4, errorC)
        return A, B, C, X, Y

    X = np.random.normal(loc=Xm, scale=Xstd) # normal distribution, Xm - mean and Xstd - standard deviation
    Y = np.random.normal(loc=Ym, scale=Ystd) # normal distribution, Ym - mean and Ystd - standard deviation
    X = random.random()
    Y = random.random()

    for i in range(n+1):
        A, B, C, X, Y = next_generation(X, Y)

    return np.array([A, B, C])

def convergent(n=100):

    def next_generation(A, C):
        B_ = jitter(m1*A + m2*C + k1, errorB)
        A_ = jitter(A, errorA)
        C_ = jitter(C, errorC)
        return A_, B_, C_

    A = np.random.normal(loc=Am, scale=Astd) # normal distribution, Am - mean and Astd - standard deviation
    C = np.random.normal(loc=Cm, scale=Cstd) # normal distribution, Cm - mean and Cstd - standard deviation

    B = jitter(m1*A + m2*C + k1, errorB)
    for i in range(n):
        A, B, C = next_generation(A, C)

    return np.array([A, B, C])
   
    
def update_slopes_errors():
    def select(lower, upper):
        return lower + random.random()*(upper-lower)
    
    global m1, m2, m3, m4
    global errorA, errorB, errorC, errorX, errorY

    m1 = select(*mrange1)
    m2 = select(*mrange2)
    m3 = select(*mrange3)
    m4 = select(*mrange4)
    
    errorA = select(*erangeA)
    errorB = select(*erangeB)
    errorC = select(*erangeC)
    errorX = select(*erangeX)
    errorY = select(*erangeY)

    
def overall_simulation(n=100):
    stats = []
    data = []
    for i in range(n):
        update_slopes_errors()
        ABC = simulations_data(linear, samplesize) # you can change pathway here
        r = compute_regression(ABC)
        r.update(compute_correlation(ABC))
        stats.append(r)
        data.append(ABC)
    return pd.DataFrame(stats), np.array(data)

d, ABC_all = overall_simulation()

In [2]:
add_confidence_stats(d, ABC_all)
confidence_graphs(d)

-0.9975803956991921 -0.994644283513404 0.996352413691353
-0.944271660141638 -0.8805163929267935 0.9183173141407878
-0.9766447701434321 -0.9489531486957522 0.9660067429662633
-0.9874135181856704 -0.9723112338688885 0.9792433252135104
-0.9962968573342581 -0.9918095986596798 0.9944681054768564
-0.9786017367127958 -0.9531754682127992 0.967901817885466
-0.9843652509891752 -0.9656685900252584 0.9767051755384111
-0.9969526059121427 -0.9932572623914607 0.9952294069074996
-0.9668685945921222 -0.9280078166769173 0.9511165989362138
-0.988383022175451 -0.9744290601790649 0.9828950076689252
-0.9833504123667062 -0.9634625119607311 0.9783179350289289
-0.6659774055381613 -0.38461411165200454 0.5366958570302791
-0.20431068138651842 0.18850707263737274 0.001406256545458111
-0.9881567042182693 -0.9739344595539605 0.9822634716398332
-0.9999832714956765 -0.9999629178646513 0.9999755793195628
-0.44577071165087373 -0.08121470575735452 0.26914265932472914
-0.9987953883112497 -0.9973316612557396 0.998006494643

In [3]:
stats_graphs(d)

In [4]:
A, B, C = random.choice(ABC_all).transpose()
AB = alt.Chart(pd.DataFrame({"A":A, "B":B})).mark_circle().encode(
    x="A",
    y="B")
BC= alt.Chart(pd.DataFrame({"B":B, "C":C})).mark_circle().encode(
    x="B",
    y="C")
AC = alt.Chart(pd.DataFrame({"A":A, "C":C})).mark_circle().encode(
    x="A",
    y="C")
alt.vconcat(AB, BC, AC)

In [5]:
d

Unnamed: 0,kAB,kBC,kAC,mAB,mBC,mAC,r_sqrAB,r_sqrBC,r_sqrAC,r_E,...,rAB,rBC,rAC,rAB2*rBC2-rAC2,r_E_BA_C2-rBC2,mAB*mBC-mAC,confidence,confidence_residual_corr,confidence_corrected_bc_corr,confidence_slope_AC
0,-0.947837,2.431525,-0.953781,2.026472,3.571721,7.238164,0.998771,0.997578,0.996400,-0.013710,...,0.999385,0.998788,0.998198,-0.000047,-0.997156,-0.000172,more,within,more,within
1,-1.077520,1.909117,-2.284256,3.116434,3.892784,12.130452,0.918342,0.999973,0.918143,0.059198,...,0.958301,0.999987,0.958198,0.000174,-0.990622,0.001152,more,within,more,within
2,-1.186432,2.154374,0.164467,3.882551,1.678152,6.513548,0.966405,0.999588,0.965425,0.078305,...,0.983059,0.999794,0.982560,0.000582,-0.960986,0.001964,more,within,more,within
3,1.550871,1.614608,3.126527,2.923280,0.973852,2.849856,0.984717,0.994441,0.981318,-0.112794,...,0.992329,0.997217,0.990615,-0.002075,-0.994335,-0.003014,more,within,more,within
4,-1.594317,2.055854,-2.343476,3.079256,2.759288,8.496654,0.994867,0.999599,0.994491,-0.008157,...,0.997430,0.999800,0.997242,-0.000023,-0.980059,-0.000100,more,within,more,within
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,-1.059605,1.782316,0.542109,4.287920,1.175309,5.034951,0.990923,0.985727,0.974966,0.080271,...,0.995451,0.992838,0.987404,0.001813,-0.982237,0.004680,more,within,more,within
96,-1.318733,2.132624,-1.440400,1.041171,2.702872,2.810355,0.977097,0.997052,0.971590,0.160164,...,0.988482,0.998525,0.985693,0.002627,-0.988408,0.003796,more,within,more,within
97,-5.679559,1.925206,-12.860903,0.773157,2.595987,2.015176,0.752081,0.989358,0.750074,-0.058562,...,0.867226,0.994665,0.866068,-0.005996,-0.967536,-0.008071,more,within,more,within
98,-1.086172,1.611449,-0.659871,3.562540,2.082601,7.429557,0.706033,0.996307,0.705363,-0.029430,...,0.840258,0.998152,0.839859,-0.001937,-0.991862,-0.010208,more,within,more,within


In [8]:
d[['r_sqrAC']].to_csv("/tmp/rsqrAC.csv")