In [4]:
import time
import random
import numpy as np

import altair as alt
import pandas as pd
from causality_functions import jitter, dependent, simulations_data
from causality_functions import compute_regression, compute_correlation
from causality_functions import add_confidence_stats, confidence_graphs
from causality_functions import stats_graphs

alt.data_transformers.disable_max_rows()

samplesize = 500
Am, Bm, Cm, = 60, 10, 30
Xm, Ym = 1, 1
Astd,Bstd,Cstd = 1000, 15, 21
Xstd,Ystd = 1, 1

m1, k1 = 1, 0
m2, k2 = 1.25, 0
m3, k3 = 0.5, 0
m4, k4 = 1.5, 0

d1 = 1
d2 = 0
d3 = 0
d4 = 0

errorA = 2 # not using this
errorB = 0.001
errorC = 0.002
errorX = 0.75
errorY = 0.6

mrange1 = 0,2 #slope ranges
mrange2 = 0,2
mrange3 = 0,2
mrange4 = 0,2

erangeA = 0.2,2000 #error range
erangeB = 0.2,2000 #error range
erangeC = 0.2,2000 #error range
erangeX = 0.2,2 #error range
erangeY = 0.2,2 #error range

def linear(n=100):

    def next_generation(A):
        A_ = jitter(A, errorA) # previous A + error
        B_ = dependent(A_, m1, k1, error=errorB)
        C_ = dependent(B_, m2, k2, error=errorC)
        return A_, B_, C_

    A = np.random.normal(loc=Am, scale=Astd) # normal distribution, Am - mean and Astd - standard deviation
    B = dependent(A, m1, k1, errorB)
    C = dependent(B, m2, k2, errorC)
    for i in range(n):
        A, B, C = next_generation(A)

    return np.array([A,B,C])


def radiating(n=100):
    def next_generation(B):
        B_ = jitter(B, errorB)
        A_ = dependent(B_, m1, k1, errorA)
        C_ = dependent(B_, m2, k2, errorC)
        return A_, B_, C_

    B = np.random.normal(loc=Bm, scale=Bstd) # normal distribution, Bm - mean and Bstd - standard deviation
    A = dependent(B, m1, k1, errorA)
    C = dependent(B, m2, k2, errorC)

    for i in range(n):
        A,B,C = next_generation(B)

    return np.array([A, B, C])


def common_cause(n=100):
    def next_generation(X):
        X = jitter(X, errorX)
        A = dependent(X, m1, k1, errorA)
        B = dependent(X, m2, k2, errorB)
        C = dependent(X, m3, k3, errorC)
        return A, B, C, X

    X = np.random.normal(loc=Xm, scale=Xstd) # normal distribution, Xm - mean and Xstd - standard deviation
    A = dependent(X, m1, k1, errorA)
    B = dependent(X, m2, k2, errorB)
    C = dependent(X, m3, k3, errorC)

    for i in range(n):
        A, B, C, X = next_generation(X)

    return np.array([A,B,C])


def single_difference_cause(n=100):

    def next_generation(A, X):
        X = jitter(X, errorX)
        A = jitter(A, errorA)
        B = jitter(m1*A + m2*X + k1, errorB)
        C = dependent(X, m3, k3, errorC)
        return A, B, C, X

    A = np.random.normal(loc=Am, scale=Astd) # normal distribution, Am - mean and Astd - standard deviation
    X = np.random.normal(loc=Xm, scale=Xstd) # normal distribution, Xm - mean and Xstd - standard deviation

    for i in range(n+1):
        A,B,C,X = next_generation(A,X)

    return np.array([A, B, C])


def double_difference_cause(n=100):
    def next_generation(X,Y):
        X = jitter(X, errorX)
        Y = jitter(Y, errorY)
        A = dependent(X, m1, k1, errorA)
        B = jitter(m2*X+m3*Y+k2, errorB)
        C = dependent(Y, m4, k4, errorC)
        return A, B, C, X, Y

    X = np.random.normal(loc=Xm, scale=Xstd) # normal distribution, Xm - mean and Xstd - standard deviation
    Y = np.random.normal(loc=Ym, scale=Ystd) # normal distribution, Ym - mean and Ystd - standard deviation
    X = random.random()
    Y = random.random()

    for i in range(n+1):
        A, B, C, X, Y = next_generation(X, Y)

    return np.array([A, B, C])

def convergent(n=100):

    def next_generation(A, C):
        B_ = jitter(m1*A + m2*C + k1, errorB)
        A_ = jitter(A, errorA)
        C_ = jitter(C, errorC)
        return A_, B_, C_

    A = np.random.normal(loc=Am, scale=Astd) # normal distribution, Am - mean and Astd - standard deviation
    C = np.random.normal(loc=Cm, scale=Cstd) # normal distribution, Cm - mean and Cstd - standard deviation

    B = jitter(m1*A + m2*C + k1, errorB)
    for i in range(n):
        A, B, C = next_generation(A, C)

    return np.array([A, B, C])
   
    
def update_slopes():
    def select(lower, upper):
        return lower + random.random()*(upper-lower)
    
    global m1, m2, m3, m4
  
    m1 = select(*mrange1)
    m2 = select(*mrange2)
    m3 = select(*mrange3)
    m4 = select(*mrange4)

def update_errors():
    def select(lower, upper):
        return lower + random.random()*(upper-lower)
    
    global errorA, errorB, errorC, errorX, errorY

    errorA = select(*erangeA)
    errorB = select(*erangeB)
    errorC = select(*erangeC)
    errorX = select(*erangeX)
    errorY = select(*erangeY)
  
def overall_simulation(n=100):
    stats = []
    data = []
    for i in range(n):
        update_slopes()
        update_errors()
        ABC = simulations_data(convergent, samplesize) # you can change pathway here
        r = compute_regression(ABC)
        r.update(compute_correlation(ABC))
        stats.append(r)
        data.append(ABC)
    return pd.DataFrame(stats), np.array(data)

d, ABC_all = overall_simulation()

In [5]:
add_confidence_stats(d, ABC_all)
confidence_graphs(d)

In [6]:
stats_graphs(d)

In [11]:
A, B, C = random.choice(ABC_all).transpose()
data = pd.DataFrame({"A":A,"B":B,"C":C })
data['D'] = 'X'
brush = alt.selection_interval()
AB = alt.Chart(data).mark_circle().encode(
    x="A:Q",
    y="B:Q",
    color=alt.condition(brush, "D:N",alt.value('lightgray'))
   ).add_params(
    brush)
BC= AB.encode(
    x="B",
    y="C")
AC = AB.encode(
    x="A",
    y="C")

AB | BC | AC

In [8]:
d

Unnamed: 0,kAB,kBC,kAC,mAB,mBC,mAC,r_sqrAB,r_sqrBC,r_sqrAC,r_E,...,rAB2*rBC2-rAC2,r_E_BA_C2-rBC2,mAB*mBC-mAC,mAB*mBC,confidence_rAC,confidence_residual_corr,confidence_corrected_bc_corr,confidence_slope_AC,L,U
0,3143.095496,914.317177,1754.753448,1.008251,0.366917,0.031177,0.379302,0.650102,0.001751,0.600940,...,0.244834,-0.649988,0.338767,0.369944,more,less,less,more,0.596411,0.698002
1,22.285251,-155.923610,59.243899,0.983517,0.630925,0.016413,0.347593,0.654774,0.000159,0.576985,...,0.227436,-0.654748,0.604112,0.620526,more,less,less,more,0.601627,0.702150
2,-1557.719156,-1549.927327,-1924.614725,1.738886,0.211125,-0.089650,0.763906,0.157338,0.007167,0.845234,...,0.113024,-0.150498,0.456772,0.367122,more,less,less,more,0.070620,0.241695
3,168.869873,337.895280,113.614293,1.696104,0.337638,-0.075160,0.604489,0.328776,0.003423,0.760995,...,0.195318,-0.319964,0.647829,0.572669,more,less,less,more,0.248240,0.404797
4,135.218012,103.629736,136.762811,1.053416,0.337428,-0.030572,0.405863,0.525405,0.001577,0.601665,...,0.211665,-0.520662,0.386023,0.355452,more,less,less,more,0.458854,0.586094
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,-585.496768,65.107075,-457.828460,0.356376,0.770503,0.001230,0.139687,0.848799,0.000002,0.355223,...,0.118564,-0.848097,0.273359,0.274589,more,less,less,more,0.822314,0.871614
96,840.154767,198.171351,1121.538755,0.359104,1.049670,0.010494,0.110405,0.881096,0.000075,0.309774,...,0.097202,-0.879433,0.366447,0.376941,more,less,less,more,0.859839,0.899303
97,-171.570960,-231.467993,-223.427212,1.699430,-0.017862,-0.043996,0.986768,0.004504,0.009336,0.259276,...,-0.004892,0.002850,0.013640,-0.030356,within,less,within,within,-0.083221,0.092160
98,1.501807,41.633911,87.889033,0.698194,-0.479566,-0.903879,0.599232,0.002127,0.009290,0.074279,...,-0.008015,-0.001004,0.569049,-0.334830,within,within,within,within,-0.085581,0.089803


In [9]:
d[['r_sqrAC']].to_csv("/tmp/rsqrAC.csv")

In [10]:
d.columns

Index(['kAB', 'kBC', 'kAC', 'mAB', 'mBC', 'mAC', 'r_sqrAB', 'r_sqrBC',
       'r_sqrAC', 'r_E', 'r_E_BA_C', 'n', 'rAB', 'rBC', 'rAC',
       'rAB2*rBC2-rAC2', 'r_E_BA_C2-rBC2', 'mAB*mBC-mAC', 'mAB*mBC',
       'confidence_rAC', 'confidence_residual_corr',
       'confidence_corrected_bc_corr', 'confidence_slope_AC', 'L', 'U'],
      dtype='object')