# Load Data

In [419]:
using DataFrames

# To run this notebook, you need to have a data file available.
# You can either run the phenotype preprocessing scripts in ../preprocessing directory
# Or scp -r username@sherlock.stanford.edu:/scratch/PI/dpwall/DATA/phenotypes/jsonschema ../data

# Data
df = readtable("../data/all_samples_filtered.csv", nastrings=["None", ""])
samples = df[:, 1:1]
df = df[:, 2:end] # Remove identifier

# Binarize Data
[df[df[nm].> 2, nm] = 3 for nm in names(df)]
[df[df[nm].== 2, nm] = 3 for nm in names(df)]
[df[df[nm].== 1, nm] = 2 for nm in names(df)]
[df[df[nm].== 0, nm] = 1 for nm in names(df)]
[df[isna(df[nm]), nm] = 0 for nm in names(df)]

m, n = size(df)


(13434,123)

In [420]:
# Form sparse array
all_data = sparse(Array(df))
dropzeros!(all_data)
p = size(nonzeros(all_data), 1)

1001073

In [421]:
adir_indices = 1:77
ados_indices = 78:n

78:123

In [422]:
# First split out whole instrument test data
all_sample_indices = collect(1:m)
break1, break2 = ceil(Integer, 0.05 * m), ceil(Integer, 0.1 * m)
adir_heldout_indices = view(all_sample_indices, 1:(break1-1))
ados_heldout_indices = view(all_sample_indices, break1:(break2-1))

remaining_data = copy(all_data)
remaining_data[adir_heldout_indices, adir_indices] = 0
remaining_data[ados_heldout_indices, ados_indices] = 0
dropzeros!(remaining_data)
p = size(nonzeros(remaining_data), 1)

# Split out testing data
all_indices = collect(1:p)
shuffle!(all_indices)
break1, break2 = ceil(Integer, 0.85 * p), ceil(Integer, 0.9 * p)
train_indices = view(all_indices, 1:(break1-1))
test_indices = view(all_indices, break1:(break2-1))
held_out_test_indices = view(all_indices, break2:p)

train_data = copy(remaining_data)
nonzeros(train_data)[union(test_indices, held_out_test_indices)] = 0
dropzeros!(train_data)

test_data = copy(remaining_data)
nonzeros(test_data)[union(train_indices, held_out_test_indices)] = 0
dropzeros!(test_data)

heldout_data = copy(remaining_data)
nonzeros(heldout_data)[union(train_indices, test_indices)] = 0
dropzeros!(heldout_data)

println(size(held_out_test_indices), " ", size(nonzeros(heldout_data)))
println(size(test_indices), " ", size(nonzeros(test_data)))
println(size(train_indices), " ", size(nonzeros(train_data)))

(94998,) (94998,)
(47498,) (47498,)
(807477,) (807477,)


In [423]:
println(size(nonzeros(heldout_data[:, adir_indices])))
println(size(nonzeros(heldout_data[:, ados_indices])))

(65030,)
(29968,)


# Train Models

## Both instruments

In [455]:
using LowRankModels

Xs = []
Ys = []
for k=6:6
    losses = [MultinomialLoss(3) for i=1:n]

    rx = ZeroReg()
    ry = ZeroReg()
    glrm = GLRM(train_data, losses, rx, ry, k, offset=false, scale=false);
    init_svd!(glrm);

    X,Y,ch = fit!(glrm, ProxGradParams(), verbose=true, max_iter=5000); # fit GLRM
    push!(Xs, X)
    push!(Ys, Y)
end


Fitting GLRM
Iteration 10: objective value = 534301.9627493373


LoadError: InterruptException:

In [409]:
using LowRankModels

k = 5
losses = LogisticLoss()
rx = ZeroReg()
ry = ZeroReg()
glrm = GLRM(train_data, losses, rx, ry, k, offset=false, scale=false);
init_svd!(glrm);

X,Y,ch = fit!(glrm, verbose=true, max_iter=5000); # fit GLRM
push!(Xs, X)
push!(Ys, Y)


LoadError: MethodError: no method matching sort_observations(::Array{Tuple{Any,Any},1}, ::Int64, ::Int64)[0m
Closest candidates are:
  sort_observations([1m[31m::Array{Tuple{Int64,Int64},1}[0m, ::Int64, ::Int64; check_empty) at /Users/kelley/.julia/v0.5/LowRankModels/src/modify_glrm.jl:6[0m

## ADIR

In [348]:
using LowRankModels

adir_Xs = []
adir_Ys = []
for k=1:10
    losses = LogisticLoss()
    rx = ZeroReg()
    ry = ZeroReg()
    glrm = GLRM(train_data[:, adir_indices], losses, rx, ry, k, offset=false, scale=false);
    init_svd!(glrm);

    X,Y,ch = fit!(glrm, verbose=true, max_iter=1000); # fit GLRM
    push!(adir_Xs, X)
    push!(adir_Ys, Y)
end

LowRankModels.SparseProxGradParams(1.0,100,1,1.0e-5,0.01)
Fitting GLRM
Iteration 10: objective value = 345058.04418069025
Iteration 20: objective value = 260530.07672721613
obj went up to 253953.30022975296; reducing step size to 2.744090396921059
Iteration 30: objective value = 253453.74653149213
obj went up to 253506.13483629696; reducing step size to 1.8293935979473728
obj went up to 253186.70614302575; reducing step size to 1.5565475464105132
Iteration 40: objective value = 253056.75860194003
LowRankModels.SparseProxGradParams(1.0,100,1,1.0e-5,0.01)
Fitting GLRM
Iteration 10: objective value = 236610.52202479122
Iteration 20: objective value = 222986.85860227654
obj went up to 222391.32098451263; reducing step size to 2.150066629142469
obj went up to 221475.34472904255; reducing step size to 1.5802989724197147
Iteration 30: objective value = 221015.046145284
obj went up to 220722.69214762896; reducing step size to 1.4118345092158846
Iteration 40: objective value = 220262.861039521


obj went up to 149796.14913013682; reducing step size to 1.7422796170927357
Iteration 30: objective value = 149681.96681952628
obj went up to 148293.24399742758; reducing step size to 1.5565475464105127
Iteration 40: objective value = 147322.75004107875
obj went up to 146918.8584407962; reducing step size to 1.5331530973071477
Iteration 50: objective value = 145981.09889475923
obj went up to 145847.85194900644; reducing step size to 1.510110259852339
Iteration 60: objective value = 144903.78183171878
LowRankModels.SparseProxGradParams(1.0,100,1,1.0e-5,0.01)
Fitting GLRM
Iteration 10: objective value = 158490.6486418879
Iteration 20: objective value = 143992.7787962751
obj went up to 140959.28231616243; reducing step size to 2.370448458629572
Iteration 30: objective value = 139766.72548253505
obj went up to 140075.12365371245; reducing step size to 1.8293935979473723
obj went up to 138194.61750263488; reducing step size to 1.6343749237310385
Iteration 40: objective value = 137144.386764

## ADOS

In [351]:
using LowRankModels

ados_Xs = []
ados_Ys = []
for k=1:10
    losses = LogisticLoss()
    rx = ZeroReg()
    ry = ZeroReg()
    glrm = GLRM(train_data[:, ados_indices], losses, rx, ry, k, offset=false, scale=false);
    init_svd!(glrm);

    X,Y,ch = fit!(glrm, verbose=true, max_iter=1000); # fit GLRM
    push!(ados_Xs, X)
    push!(ados_Ys, Y)
end

LowRankModels.SparseProxGradParams(1.0,100,1,1.0e-5,0.01)
Fitting GLRM
obj went up to 154774.06955478943; reducing step size to 0.7000000000000001
obj went up to 145226.50338865927; reducing step size to 0.5402250000000001
Iteration 10: objective value = 141177.8598754878
obj went up to 139566.18198886505; reducing step size to 0.5866463998338959
Iteration 20: objective value = 138021.9940893037
obj went up to 137223.60531795898; reducing step size to 0.5778292780092299
Iteration 30: objective value = 135424.66135644974
obj went up to 134794.2103629715; reducing step size to 0.5691446749169604
Iteration 40: objective value = 133184.82325681017
obj went up to 132751.64673217057; reducing step size to 0.5886201287817338
Iteration 50: objective value = 131211.7616839303
obj went up to 130985.96668364444; reducing step size to 0.579773342394928
Iteration 60: objective value = 129437.27267851081
obj went up to 129290.15535486647; reducing step size to 0.571059520590111
Iteration 70: objecti

obj went up to 47252.36787209618; reducing step size to 0.6511370492615337
Iteration 80: objective value = 46701.84269422681
obj went up to 46603.29934789845; reducing step size to 0.6413506520561335
Iteration 90: objective value = 46065.000519914
obj went up to 46033.23189383051; reducing step size to 0.6317113415053331
Iteration 100: objective value = 45524.69553848929
LowRankModels.SparseProxGradParams(1.0,100,1,1.0e-5,0.01)
Fitting GLRM
Iteration 10: objective value = 59605.71037915898
obj went up to 59569.54955787925; reducing step size to 1.1972375506814201
obj went up to 48376.585029221475; reducing step size to 0.9701664837253055
Iteration 20: objective value = 45290.49774490954
obj went up to 44785.77391221214; reducing step size to 0.8667439170138448
Iteration 30: objective value = 42267.15289500295
obj went up to 42192.81423541402; reducing step size to 0.7743464964856602
Iteration 40: objective value = 40456.896737749
obj went up to 40479.48046984066; reducing step size to 

# Evaluate

## Both Instruments

In [355]:
error = []
adir_error = []
ados_error = []
for l=1:10
    println(l)
    approx = Xs[l].'*Ys[l]
    approx[approx.>=0] = 1
    approx[approx.<0] = -1
    approx = trunc(Int, approx)

    adir_train_confusion = zeros(Int, 3, 3)
    adir_test_confusion = zeros(Int, 3, 3)
    ados_train_confusion = zeros(Int, 3, 3)
    ados_test_confusion = zeros(Int, 3, 3)

    for i=1:m
        for j=adir_indices
            if train_data[i, j] != 0
                adir_train_confusion[train_data[i, j]+2, approx[i, j]+2] += 1
            end
            if test_data[i, j] != 0
                adir_test_confusion[test_data[i, j]+2, approx[i, j]+2] += 1
            end
        end
        for j=ados_indices
            if train_data[i, j] != 0
                ados_train_confusion[train_data[i, j]+2, approx[i, j]+2] += 1
            end
            if test_data[i, j] != 0
                ados_test_confusion[test_data[i, j]+2, approx[i, j]+2] += 1
            end
        end
    end
    push!(error, (adir_test_confusion[1, 3]+adir_test_confusion[3, 1]+ados_test_confusion[1, 3]+ados_test_confusion[3, 1])/(sum(adir_test_confusion)+sum(ados_test_confusion)))
    println("Error ", (adir_train_confusion[1, 3]+adir_train_confusion[3, 1]+ados_train_confusion[1, 3]+ados_train_confusion[3, 1])/(sum(adir_train_confusion)+sum(ados_train_confusion)), " ", 
                    (adir_test_confusion[1, 3]+adir_test_confusion[3, 1]+ados_test_confusion[1, 3]+ados_test_confusion[3, 1])/(sum(adir_test_confusion)+sum(ados_test_confusion)))

    push!(adir_error, (adir_test_confusion[1, 3]+adir_test_confusion[3, 1])/sum(adir_test_confusion))
    println("ADIR ", (adir_train_confusion[1, 3]+adir_train_confusion[3, 1])/sum(adir_train_confusion), " ", 
                    (adir_test_confusion[1, 3]+adir_test_confusion[3, 1])/sum(adir_test_confusion))

    push!(ados_error, (ados_test_confusion[1, 3]+ados_test_confusion[3, 1])/sum(ados_test_confusion))
    println("ADOS ", (ados_train_confusion[1, 3]+ados_train_confusion[3, 1])/sum(ados_train_confusion), " ", 
                    (ados_test_confusion[1, 3]+ados_test_confusion[3, 1])/sum(ados_test_confusion))
end

1
Error 0.2420465226873335 0.24415764874310497
ADIR 0.23155070655716098 0.23138703963842913
ADOS 0.2649707800657744 0.2725054229934924
2
Error 0.6042212397380978 0.21485115162743695
ADIR 0.2026167981324576 0.20888047395101692
ADOS 0.20135728763298816 0.22810466377440347
3
Error 0.18611427941600814 0.20775611604699146
ADIR 0.18290144308455034 0.19776461247175228
ADOS 0.1931315409670576 0.2299349240780911
4
Error 0.59139995690279722 0.20198745210324645
ADIR 0.1696134382170964 0.19251206254198985
ADOS 0.17848608405561645 0.22302060737527116
5
Error 0.16364924326017954 0.20120847193566044
ADIR 0.16331246851771944 0.19300067183778172
ADOS 0.16438480405687833 0.21942787418655096
6
Error 0.67373564819802918 0.20287169986104678
ADIR 0.15213321224879622 0.19153484395040615
ADOS 0.15723558128346884 0.22803687635574837
7
Error 0.1463880704961256 0.20337698429407555
ADIR 0.14856928268244377 0.19275636718988579
ADOS 0.14162401318643186 0.2269522776572668
8
Error 0.77288256507615695 0.20493494462924

## ADIR

In [349]:
only_adir_error = []
for l=1:10
    println(l)
    approx = adir_Xs[l].'*adir_Ys[l]
    approx[approx.>=0] = 1
    approx[approx.<0] = -1
    approx = trunc(Int, approx)

    adir_train_confusion = zeros(Int, 3, 3)
    adir_test_confusion = zeros(Int, 3, 3)

    for i=1:m
        for j=adir_indices
            if train_data[i, j] != 0
                adir_train_confusion[train_data[i, j]+2, approx[i, j]+2] += 1
            end
            if test_data[i, j] != 0
                adir_test_confusion[test_data[i, j]+2, approx[i, j]+2] += 1
            end
        end
    end
    push!(only_adir_error, (adir_test_confusion[1, 3]+adir_test_confusion[3, 1])/(sum(adir_test_confusion)))
    println("Error ", (adir_train_confusion[1, 3]+adir_train_confusion[3, 1])/(sum(adir_train_confusion)), " ", 
                    (adir_test_confusion[1, 3]+adir_test_confusion[3, 1])/(sum(adir_test_confusion)))
end

1
Error 0.23133947061021912 0.23193672509619495
2
Error 0.7844433806417601 0.20448299028889025
3
Error 0.17282711330732303 0.19574909912661087
4
Error 0.7407325012683184 0.19474134245404018
5
Error 0.1528571918618192 0.1946497282110792
6
Error 0.46592145814188195 0.19620717034141574
7
Error 0.1329269899960822 0.19816160752458314
8
Error 0.72276175473881669 0.19889452146827094
9
Error 0.11161201914483745 0.19981066389788066
10
Error 0.08173926442949142 0.20484944726073415


## ADOS

In [352]:
only_ados_error = []
for l=1:10
    println(l)
    approx = ados_Xs[l].'*ados_Ys[l]
    approx[approx.>=0] = 1
    approx[approx.<0] = -1
    approx = trunc(Int, approx)

    ados_train_confusion = zeros(Int, 3, 3)
    ados_test_confusion = zeros(Int, 3, 3)

    for i=1:m
        for j=ados_indices
            if train_data[i, j] != 0
                ados_train_confusion[train_data[i, j]+2, approx[i, j-77]+2] += 1
            end
            if test_data[i, j] != 0
                ados_test_confusion[test_data[i, j]+2, approx[i, j-77]+2] += 1
            end
        end
    end
    push!(only_ados_error, (ados_test_confusion[1, 3]+ados_test_confusion[3, 1])/(sum(ados_test_confusion)))
    println("Error ", (ados_train_confusion[1, 3]+ados_train_confusion[3, 1])/(sum(ados_train_confusion)), " ", 
                    (ados_test_confusion[1, 3]+ados_test_confusion[3, 1])/(sum(ados_test_confusion)))
end

1
Error 0.266966095412352 0.28782537960954446
2
Error 0.249344361459656 0.22674891540130152
3
Error 0.16835966150618706 0.23386659436008678
4
Error 0.94489930361128417 0.24017082429501085
5
Error 0.13606788804151518 0.25935466377440347
6
Error 0.0615493836604967 0.24857646420824295
7
Error 0.09444200809167409 0.255558568329718
8
Error 0.025915224334960605 0.2533215835140998
9
Error 0.06298122983982271 0.2584056399132321
10
Error 0.097624369661742784 0.2723698481561822


## Tune Parameters

In [356]:
using Plots
plotly() # Choose the Plotly.jl backend for web interactivity
labels = Array{String}(1, 3)
labels[1] = "All Items"
labels[2] = "ADI-R Items"
labels[3] = "ADOS Items"
plot([error, adir_error, ados_error], xticks = 0:1:20, linewidth=2, 
    title="Imputation Error as a function of k", ylabel="Validation Error", xlabel="k", label=labels,
    palette=["#ce93d8", "#4db6ac", "#ef6c00"])

#labels[1] = ""
#labels[2] = "ADI-R"
#labels[3] = "ADOS"

#plot!([[], only_adir_error, only_ados_error], linewidth=2, 
#    title="Single Instrument Imputation Error <br>as a function of k", ylabel="Validation Error", legend=:right, 
#    xlabel="k", xticks = 0:1:10, label=labels, palette = ["#ce93d8", "#4db6ac", "#ef6c00"])


In [353]:
using Plots
plotly() # Choose the Plotly.jl backend for web interactivity
labels = Array{String}(1, 3)
labels[1] = ""
labels[2] = "ADI-R"
labels[3] = "ADOS"

plot([[], only_adir_error, only_ados_error], linewidth=2, 
    title="Single Instrument Imputation Error <br>as a function of k", ylabel="Validation Error", legend=:right, 
    xlabel="k", xticks = 0:1:10, label=labels, palette = ["#ce93d8", "#4db6ac", "#ef6c00"])


## ROC

In [359]:
approx = Xs[6].'*Ys[6]
#i_values = approx[heldout_data .!= 0]
#a_values = heldout_data[heldout_data .!= 0]
i_values = approx[adir_heldout_indices, adir_indices][:]
a_values = all_data[adir_heldout_indices, adir_indices][:]
#i_values = approx[ados_heldout_indices, ados_indices][:]
#a_values = all_data[ados_heldout_indices, ados_indices][:]

roc_values = sort(collect(zip(i_values, a_values)))
tp = sum(map(x -> x[2] > 0, roc_values))
fp = sum(map(x -> x[2] < 0, roc_values))
tn = 0
fn = 0
sensitivity = Array(Float64, 0)
specificity = Array(Float64, 0)
cutoffs = Array(Float64, 0)
push!(sensitivity, 1)
push!(specificity, 0)
push!(cutoffs, 0)
for v=roc_values
    if v[2] > 0 
        tp -=1
        fn += 1
    elseif v[2] < 0
        fp -= 1
        tn += 1
    end
    if (abs(tp/(tp+fn) - sensitivity[end]) > .1) || (abs(tn/(tn+fp) - specificity[end]) > .1)
        push!(sensitivity, tp/(tp+fn))
        push!(specificity, tn/(tn+fp))
        push!(cutoffs, v[1])
    end
end
push!(sensitivity, 0)
push!(specificity, 1)
baseline_index = findfirst(x -> x > 0, cutoffs)

13

In [380]:
approx = Xs[5].'*Ys[5]
i_values = approx[heldout_data .!= 0]
a_values = heldout_data[heldout_data .!= 0]
roc_values = sort(collect(zip(i_values, a_values)))
tp = sum(map(x -> x[2] > 0, roc_values))
fp = sum(map(x -> x[2] < 0, roc_values))
tn = 0
fn = 0
sensitivity = Array(Float64, 0)
specificity = Array(Float64, 0)
cutoffs = Array(Float64, 0)
push!(sensitivity, 1)
push!(specificity, 0)
push!(cutoffs, 0)
for v=roc_values
    if v[2] > 0 
        tp -=1
        fn += 1
    elseif v[2] < 0
        fp -= 1
        tn += 1
    end
    if (abs(tp/(tp+fn) - sensitivity[end]) > .1) || (abs(tn/(tn+fp) - specificity[end]) > .1)
        push!(sensitivity, tp/(tp+fn))
        push!(specificity, tn/(tn+fp))
        push!(cutoffs, v[1])
    end
end
push!(sensitivity, 0)
push!(specificity, 1)
baseline_index = findfirst(x -> x > 0, cutoffs)

9

In [361]:
approx = adir_Xs[6].'*adir_Ys[6]
adir_heldout_data = heldout_data[:, adir_indices]
i_values = approx[adir_heldout_data .!= 0]
a_values = adir_heldout_data[adir_heldout_data .!= 0]
roc_values = sort(collect(zip(i_values, a_values)))
tp = sum(map(x -> x[2] > 0, roc_values))
fp = sum(map(x -> x[2] < 0, roc_values))
tn = 0
fn = 0
adir_sensitivity = Array(Float64, 0)
adir_specificity = Array(Float64, 0)
adir_cutoffs = Array(Float64, 0)
push!(adir_sensitivity, 1)
push!(adir_specificity, 0)
push!(adir_cutoffs, 0)
for v=roc_values
    if v[2] > 0 
        tp -=1
        fn += 1
    elseif v[2] < 0
        fp -= 1
        tn += 1
    end
    if (abs(tp/(tp+fn) - adir_sensitivity[end]) > .1) || (abs(tn/(tn+fp) - adir_specificity[end]) > .1)
        push!(adir_sensitivity, tp/(tp+fn))
        push!(adir_specificity, tn/(tn+fp))
        push!(adir_cutoffs, v[1])
    end
end
push!(adir_sensitivity, 0)
push!(adir_specificity, 1)
adir_baseline_index = findfirst(x -> x > 0, adir_cutoffs)

9

In [340]:
approx = ados_Xs[3].'*ados_Ys[3]
ados_heldout_data = heldout_data[:, ados_indices]
i_values = approx[ados_heldout_data .!= 0]
a_values = ados_heldout_data[ados_heldout_data .!= 0]
roc_values = sort(collect(zip(i_values, a_values)))
tp = sum(map(x -> x[2] > 0, roc_values))
fp = sum(map(x -> x[2] < 0, roc_values))
tn = 0
fn = 0
ados_sensitivity = Array(Float64, 0)
ados_specificity = Array(Float64, 0)
ados_cutoffs = Array(Float64, 0)
push!(ados_sensitivity, 1)
push!(ados_specificity, 0)
push!(ados_cutoffs, 0)
for v=roc_values
    if v[2] > 0 
        tp -=1
        fn += 1
    elseif v[2] < 0
        fp -= 1
        tn += 1
    end
    if (abs(tp/(tp+fn) - ados_sensitivity[end]) > .1) || (abs(tn/(tn+fp) - ados_specificity[end]) > .1)
        push!(ados_sensitivity, tp/(tp+fn))
        push!(ados_specificity, tn/(tn+fp))
        push!(ados_cutoffs, v[1])
    end
end
push!(ados_sensitivity, 0)
push!(ados_specificity, 1)
ados_baseline_index = findfirst(x -> x > 0, ados_cutoffs)

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.


In [406]:
# Baseline
zero_impute = zeros()
baseline_impute[baseline_impute.<0] = -1
baseline_impute[baseline_impute.>0] = 1

i_values = baseline_impute[heldout_data .!= 0]
a_values = heldout_data[heldout_data .!= 0]

tp = count(x -> x[1] == 1 && x[2] == 1, zip(i_values, a_values))
tn = count(x -> x[1] == -1 && x[2] == -1, zip(i_values, a_values))
fp = count(x -> x[1] == 1 && x[2] == -1, zip(i_values, a_values))
fn = count(x -> x[1] == -1 && x[2] == 1, zip(i_values, a_values))

baseline_sensitivity = tp/(tp+fn)
baseline_specificity = tn/(tn+fp)

0.6393071571523873

In [405]:
using Plots
plotly() # Choose the Plotly.jl backend for web interactivity
plot(1-specificity, sensitivity, linewidth=2, 
    title="Imputation ROC", ylabel="Sensitivity", xlabel="1-Specificity", linewidth=2, label="GLRM Impute<br>k=5", primary=true,
    palette=["#ce93d8", "#4db6ac", "#ef6c00"], xticks = 0:.2:1, yticks = 0:.2:1, size=(500, 400), legend=:right, margin=5mm)
plot!([1-specificity[baseline_index]], [sensitivity[baseline_index]], markersize=5, markershape = :hexagon, primary=false)

plot!([1-baseline_specificity], [baseline_sensitivity], markersize=5, markershape = :hexagon, primary=true, label="Median Impute")


#plot!(1-adir_specificity, adir_sensitivity, linewidth=2, 
#    linewidth=2, label="ADI-R", primary=true,
#    palette=["#ce93d8", "#4db6ac", "#ef6c00"], xticks = 0:.2:1, yticks = 0:.2:1)
#plot!([1-adir_specificity[adir_baseline_index]], [adir_sensitivity[adir_baseline_index]], markersize=5, markershape = :hexagon, primary=false, label="ADI-R")

#plot!(1-ados_specificity, ados_sensitivity, linewidth=2, 
#    linewidth=2, label="ADOS",primary=true,
#    palette=["#ce93d8", "#4db6ac", "#ef6c00"], xticks = 0:.2:1, yticks = 0:.2:1)
#plot!([1-ados_specificity[ados_baseline_index]], [ados_sensitivity[ados_baseline_index]], markersize=5, markershape = :hexagon, primary=false, label="ADOS")

# Output

In [410]:
k = 5
approx = Xs[k].'*Ys[k]
approx[approx.>=0] = 1
approx[approx.<0] = 0
approx = trunc(Int, approx)

# Replace imputed values with real values if we have them
approx[all_data.>0] = 1
approx[all_data.<0] = 0

new_df = convert(DataFrame, approx)
names!(new_df, names(df))
new_df = hcat(samples, new_df)

writecsv("../data/impute_logloss_X$(k).csv", Xs[k])
writecsv("../data/impute_logloss_Y$(k).csv", Ys[k])
writetable("../data/impute_logloss_realfill_Z$(k).csv", new_df, separator = ',', header = true)

In [65]:

k = 6
approx = adir_Xs[k].'*adir_Ys[k]
approx[approx.>=0] = 1
approx[approx.<0] = 0
approx = trunc(Int, approx)

# Replace imputed values with real values if we have them
approx[all_data[:, adir_indices].>0] = 1
approx[all_data[:, adir_indices].<0] = 0

new_df = convert(DataFrame, approx)
names!(new_df, names(df)[1:size(approx, 2)])
new_df = hcat(samples, new_df)

writecsv("../data/impute_logloss_adir_realfill_X$(k).csv", adir_Xs[k])
writecsv("../data/impute_logloss_adir_realfill_Y$(k).csv", adir_Ys[k])
writetable("../data/impute_logloss_adir_realfill_Z$(k).csv", new_df, separator = ',', header = true)

In [286]:
k = 6
approx = ados_Xs[k].'*ados_Ys[k]
approx[approx.>=0] = 1
approx[approx.<0] = 0
approx = trunc(Int, approx)

# Replace imputed values with real values if we have them
approx[all_data[:, ados_indices].>0] = 1
approx[all_data[:, ados_indices].<0] = 0

new_df = convert(DataFrame, approx)
names!(new_df, names(df)[(end-size(approx, 2)+1):end])
new_df = hcat(samples, new_df)

writecsv("../data/impute_logloss_ados_realfill_X$(k).csv", adir_Xs[k])
writecsv("../data/impute_logloss_ados_realfill_Y$(k).csv", adir_Ys[k])
writetable("../data/impute_logloss_ados_realfill_Z$(k).csv", new_df, separator = ',', header = true)