In [1]:
using Random, Distributions, LinearAlgebra, Statistics, JLD, DelimitedFiles

In [2]:
function switchback_est(y,z,b,l)
    k = length(z)
    # divide y into blocks and calculate block means
    y_bar_f = [mean(y[(i-1)*l+b+1:i*l]) for i in 1:k] # y_bar in focal periods
    # calculate estimator
    if sum(z) == 0
        htau_dmf = - mean(y_bar_f[z.==0])
    elseif sum(1 .- z) == 0
        htau_dmf = mean(y_bar_f[z.==1])
    else
        htau_dmf = mean(y_bar_f[z.==1]) - mean(y_bar_f[z.==0])
    end
    # calculate jackknife variance estimator
    htau_dmf_j = zeros(k) # initialize
    for i in 1:k
        y_bar_f_j = y_bar_f[setdiff(1:k,i)] # take out i
        z_j = z[setdiff(1:k,i)]
        if sum(z_j) == 0
            htau_dmf_j[i] = - mean(y_bar_f_j[z_j.==0])
        elseif sum(1 .- z_j) == 0
            htau_dmf_j[i] = mean(y_bar_f_j[z_j.==1])
        else
            htau_dmf_j[i] = mean(y_bar_f_j[z_j.==1]) - mean(y_bar_f_j[z_j.==0])
        end
    end
    htau_dmf_var = (k-1)*mean((htau_dmf_j.-htau_dmf).^2)
    return htau_dmf, htau_dmf_var
end

switchback_est (generic function with 1 method)

In [3]:
function switchback_bc(y,z,b,l,nblocks=2)
    if b <= 0
        error("No burn-in periods!")
    end
    k = length(z)
    # divide y into blocks and calculate block means
    y_bar_b = [mean(y[(i-1)*l+1:(i-1)*l+b]) for i in 1:k] # y_bar in burn-in periods
    y_bar_f = [mean(y[(i-1)*l+b+1:i*l]) for i in 1:k] # y_bar in focal periods
    # calculate estimator
    if sum(z) == 0
        htau_dmf = - mean(y_bar_f[z.==0])
    elseif sum(1 .- z) == 0
        htau_dmf = mean(y_bar_f[z.==1])
    else
        htau_dmf = mean(y_bar_f[z.==1]) - mean(y_bar_f[z.==0])
    end
    zz1 = z[2:k] .* z[1:(k-1)] # z_i=z_{i-1}=1
    zz0 = (1 .- z[2:k]) .* (1 .- z[1:(k-1)]) # z_i=z_{i-1}=0
    if sum(zz1) == 0
        htau_bcb = - mean(y_bar_b[2:k][zz0.==1])
    elseif sum(zz0) == 0
        htau_bcb = mean(y_bar_b[2:k][zz1.==1])
    else
        htau_bcb = mean(y_bar_b[2:k][zz1.==1]) - mean(y_bar_b[2:k][zz0.==1])
    end
    htau_bc = b/l*htau_bcb + (l-b)/l*htau_dmf
    # calculate jackknife variance estimator
    htau_bc_j = zeros(k-nblocks) # initialize
    for i in 1:(k-nblocks)
        y_bar_f_j = y_bar_f[setdiff(1:k,i:(i+nblocks-1))] # take out i, i+1, ..., i+nblocks-1
        y_bar_b_j = y_bar_b[setdiff(1:k,(i+1):(i+nblocks))] # take out i+1, i+2, ..., i+nblocks
        z_j = z[setdiff(1:k,i:(i+nblocks-1))]
        zz1_j = zz1[setdiff(1:length(zz1),i:(i+nblocks-1))]
        zz0_j = zz0[setdiff(1:length(zz0),i:(i+nblocks-1))]
        if sum(z_j) == 0
            htau_dmf_j = - mean(y_bar_f_j[z_j.==0])
        elseif sum(1 .- z_j) == 0
            htau_dmf_j = mean(y_bar_f_j[z_j.==1])
        else
            htau_dmf_j = mean(y_bar_f_j[z_j.==1]) - mean(y_bar_f_j[z_j.==0])
        end
        if sum(zz1_j) == 0
            htau_bcb_j = - mean(y_bar_b_j[2:length(z_j)][zz0_j.==1])
        elseif sum(zz0_j) == 0
            htau_bcb_j = mean(y_bar_b_j[2:length(z_j)][zz1_j.==1])
        else
            htau_bcb_j = mean(y_bar_b_j[2:length(z_j)][zz1_j.==1]) - mean(y_bar_b_j[2:length(z_j)][zz0_j.==1])
        end
        htau_bc_j[i] =  b/l*htau_bcb_j + (l-b)/l*htau_dmf_j
    end
    htau_bc_var = (k-nblocks-1)^2/nblocks/(k-nblocks)*mean((htau_bc_j.-mean(htau_bc_j)).^2)
    return htau_bc, htau_bc_var
end

switchback_bc (generic function with 2 methods)

In [4]:
function sim_switchback_res(T,b,l,y,w,nblocks=2)

    # blockwise treatment
    k = floor(Int, T/l)
    z = [mean(w[((i-1)*l+1):(i*l)]) for i in 1:k]

    # crop y
    y = y[1:(k*l)]
    
    # estimator 1 - difference-in-means
    htau_dm, htau_dm_var = switchback_est(y,z,0,l)

    # estimator 2 - difference-in-means with burn-in
    htau_dmb, htau_dmb_var = switchback_est(y,z,b,l)

    # estimator 3 - bias corrected
    htau_bc, htau_bc_var = switchback_bc(y,z,b,l,nblocks)
    
    return [htau_dm,htau_dmb,htau_bc,htau_dm_var,htau_dmb_var,htau_bc_var]
end

sim_switchback_res (generic function with 2 methods)

In [5]:
T = 990 # 4000, first 10 periods have been thrown away
#T= 985 # 6000
#T= 995 # 2000
l = 10 # 4000
#l = 15 # 6000
#l=5 # 2000
b_all = [2,4] # may use different b for different l
tau = -13.97229 # 4000
#tau = -13.90982 # 6000
#tau = -13.84223 # 2000

-13.97229

In [7]:
searchdir(path,key) = filter(x->occursin(key,x), readdir(path))
file_list = searchdir("/home/users/yuchenhu/switchback/ride_sharing/rideshare-simulator/output/cleaned_files/","actual")

100-element Vector{String}:
 "actual1.csv"
 "actual10.csv"
 "actual100.csv"
 "actual11.csv"
 "actual12.csv"
 "actual13.csv"
 "actual14.csv"
 "actual15.csv"
 "actual16.csv"
 "actual17.csv"
 "actual18.csv"
 "actual19.csv"
 "actual2.csv"
 ⋮
 "actual89.csv"
 "actual9.csv"
 "actual90.csv"
 "actual91.csv"
 "actual92.csv"
 "actual93.csv"
 "actual94.csv"
 "actual95.csv"
 "actual96.csv"
 "actual97.csv"
 "actual98.csv"
 "actual99.csv"

In [8]:
res_table = zeros(length(b_all),12)

for b_index in 1:length(b_all)
    res1 = zeros(0)
    res2 = zeros(0)
    res3 = zeros(0)
    res4 = zeros(0)
    res5 = zeros(0)
    res6 = zeros(0)
    for i = 1:length(file_list)
        # please remember to change directory for different designs
        actual = readdlm("/home/users/yuchenhu/switchback/ride_sharing/rideshare-simulator/output/cleaned_files/"*file_list[i],',',header=true)
        result = sim_switchback_res(T,b_all[b_index],l,actual[1][:,3],actual[1][:,2])
        append!(res1,result[1])
        append!(res2,result[2])
        append!(res3,result[3])
        append!(res4,result[4])
        append!(res5,result[5])
        append!(res6,result[6])
    end
    res_table[b_index,1] = abs(mean(res1.-tau)) # bias
    res_table[b_index,2] = abs(mean(res2.-tau))
    res_table[b_index,3] = abs(mean(res3.-tau))
    res_table[b_index,4] = sqrt(var(res1)) # standard error (truth)
    res_table[b_index,5] = sqrt(var(res2))
    res_table[b_index,6] = sqrt(var(res3))
    res_table[b_index,7] = mean((res1.-tau).^2) # mean squared error
    res_table[b_index,8] = mean((res2.-tau).^2)
    res_table[b_index,9] = mean((res3.-tau).^2)
    res_table[b_index,10] = mean((tau .<= res1 .+ 1.96 .* sqrt.(res4)) .& (tau .>= res1 .- 1.96 .* sqrt.(res4))) # coverage
    res_table[b_index,11] = mean((tau .<= res2 .+ 1.96 .* sqrt.(res5)) .& (tau .>= res2 .- 1.96 .* sqrt.(res5)))
    res_table[b_index,12] = mean((tau .<= res3 .+ 1.96 .* sqrt.(res6)) .& (tau .>= res3 .- 1.96 .* sqrt.(res6)))
end

In [9]:
display(res_table[:,1:3]) # bias(DM), bias(DMB), bias(BC)

2×3 Matrix{Float64}:
 2.64198  0.569046  0.39779
 2.64198  0.155264  0.0869995

In [10]:
display(res_table[:,4:6]) # standard error(DM), standard error(DMB), standard error(BC)

2×3 Matrix{Float64}:
 2.59567  2.68625  2.75288
 2.59567  2.88423  3.09174

In [11]:
display(res_table[:,7:9]) # MSE(DM), MSE(DMB), MSE(BC)

2×3 Matrix{Float64}:
 13.6502  7.4676   7.66083
 13.6502  8.25972  9.47085

In [12]:
display(res_table[:,10:12]) # coverage(DM), coverage(DMB), coverage(BC)

2×3 Matrix{Float64}:
 0.78  0.92  0.92
 0.78  0.9   0.92

In [None]:
# for reproducing Figure 5b, use T=4000 and dataset actual1 for illustration

T = 990 # 4000, first 10 periods have been thrown away
l = 10 # 4000
b_all = [1,2,3,4,5,6,7,8]

res_table = zeros(length(b_all),6)

for b_index in 1:length(b_all)
    actual = readdlm("/home/users/yuchenhu/switchback/ride_sharing/rideshare-simulator/output_4000/cleaned_files/actual1.csv",',',header=true)
    result = sim_switchback_res(T,b_all[b_index],l,actual[1][:,3],actual[1][:,2])
    res_table[b_index,1] = result[1] # estimator
    res_table[b_index,2] = result[2]
    res_table[b_index,3] = result[3]
    res_table[b_index,4] = sqrt.(result[4]) # estimated sd
    res_table[b_index,5] = sqrt.(result[5])
    res_table[b_index,6] = sqrt.(result[6])
end    

In [None]:
res_table

In [None]:
writedlm("/home/users/yuchenhu/switchback/ride_sharing/rideshare-simulator/lepski_plot.csv", res_table, ',')