In [None]:
using PyPlot
using AlfvenDetectors
using BSON
using Flux
using ValueHistories

In [None]:
datapath = "/home/vit/vyzkum/alfven/cdb_data/data_sample"
shots = joinpath.(datapath, readdir(datapath))
shots = filter(x-> any(map(y -> occursin(y,x), 
    ["10370", "10514", "10800", "10866", "10870", "10893"])), 
    shots)
patchsize = 128
readfun = AlfvenDetectors.readnormlogupsd
data = AlfvenDetectors.collect_conv_signals(shots,readfun,patchsize);
println(size(data))

In [None]:
validshots = ["Jldata10000.h5"]
validdata = AlfvenDetectors.collect_conv_signals(joinpath.(datapath,validshots), readfun, patchsize);
println(size(validdata))

In [None]:
testdata = readfun(joinpath(datapath, "Jldata10893.h5"));
pcolormesh(testdata)
title("full data")
subtestdata = testdata[129:384, 1537:2816]
figure()
title("subset")
pcolormesh(subtestdata)

In [None]:
convtestdata = AlfvenDetectors.split_reshape(testdata,patchsize);
println(size(convtestdata))
convsubtestdata = AlfvenDetectors.split_reshape(subtestdata,patchsize);
size(convsubtestdata)

In [None]:
#modelpath = "/home/vit/vyzkum/alfven/experiments/conv/uprobe/"
#modelpath = "/home/vit/vyzkum/alfven/experiments/conv/uprobe/ae-test";
modelpath = "/home/vit/vyzkum/alfven/experiments/conv/uprobe/batchnorm-test"
filenames = joinpath.(modelpath, readdir(modelpath))
println("$(length(filenames)) models found in modelpath")

Add the AE models as well.

In [None]:
aepath = "/home/vit/vyzkum/alfven/experiments/conv/uprobe/ae-test"
aefilenames = joinpath.(aepath, readdir(aepath))
filenames = vcat(filenames, aefilenames);
println("working with a total of $(length(filenames)) files")

In [None]:
function parse_params(filename::String)
    subss = split(basename(filename), "_")
    params = Dict()
    model = split(subss[1],"/")[end]
    params[:model] = model
    for subs in subss[2:end-1]
        key, val = split(subs, "-")
        try 
            val = eval(Meta.parse(val))
        catch e
            nothing
        end
        params[Symbol(key)] = val
    end
    time = split(subss[end],".bson")[1]
    params[:time] = time
    return params
end

In [None]:
function plothistory(h, val; label=nothing, inds=nothing)
    is,xs = get(h,val)
    if inds == nothing
        inds = 1:length(xs)
    end
    if label==nothing
        plot(is[inds], xs[inds])
    else
        plot(is[inds], xs[inds], label=label)
    end
end
function plotvae(h, inds=nothing)
    plothistory(h,:loss,label="loss",inds=inds)
    plothistory(h,:loglikelihood,label="-loglikelihood",inds=inds)
    plothistory(h,:KL,label="KL",inds=inds)
end
function plotae(h, inds=nothing)
    plothistory(h,:loss,label="loss",inds=inds)
end
function plotloss(h,inds=nothing)
    if :KL in keys(h)
        plotvae(h,inds)
    else
        plotae(h,inds)
    end
end
function plotlosses(h, inds=nothing)
    if length(h) == 1
        figure()
        plotloss(h,inds)
        legend()
    else
        for _h in h
            figure()
            plotloss(_h,inds)
            legend()
        end
    end 
end

Lets focus on the AE models that have the lowest loss first.

In [None]:
loss_list = []
model_params_list = []
model_list = []
for imodel in 1:length(filenames)
    model_data = BSON.load(filenames[imodel])
    is, ls = get(model_data[:history], :loss)
    push!(loss_list, ls)
    model_params = parse_params(filenames[imodel])
    push!(model_params_list, model_params)
    if get(model_params, :batchnorm, false)
        model = Flux.testmode!(model_data[:model])
    else
        model = model_data[:model]
    end
    push!(model_list, model)
end
final_losses = [x[end] for x in loss_list];

Filter out some models

In [None]:
filter_list = [
    :xdim => (patchsize,patchsize,1),
    :model => "ConvAE"
]
filter_inds = map(x->all(map(y->x[y[1]] == y[2],filter_list)),model_params_list)
model_params_list = model_params_list[filter_inds]
loss_list = loss_list[filter_inds]
model_list = model_list[filter_inds]
final_losses = final_losses[filter_inds];

In [None]:
valid_loss = []
for (model, params) in zip(model_list, model_params_list)
    if occursin("VAE", params[:model])
        push!(valid_loss, AlfvenDetectors.loss(model, validdata, 1, 1.0).data)
    else
        push!(valid_loss, AlfvenDetectors.loss(model, validdata).data)
    end
end

In [None]:
function pretty_params(params)
    s = ""
    for (key, val) in params
        s *= "$key = $val \n"
    end
    return s
end

In [None]:
testdata = readfun(joinpath(datapath, "Jldata10893.h5"));
subtestdata = testdata[129:384, 1537:2816]
convtestdata = AlfvenDetectors.split_reshape(testdata,patchsize);
convsubtestdata = AlfvenDetectors.split_reshape(subtestdata,patchsize);

In [None]:
filenames[sortinds[2]]

In [None]:
model_params_list[sortinds[2]]

In [None]:
sortinds = sortperm(final_losses);
isample = 10
#sample = data[:,:,:,isample:isample]
sample = convsubtestdata[:,:,:,isample:isample]
#sample = validdata[:,:,:,isample:isample]
pcolormesh(sample[:,:,1,1])
for imodel in sortinds
    figure()
    ns = model_list[imodel](sample).data
    cl = Flux.mse(sample,ns)
    title("model: $imodel, final training loss: $(round(final_losses[imodel],digits=5)),
        validation loss: $(round(valid_loss[imodel],digits=5)),
        patch loss: $(round(cl,digits=5))")
    pcolormesh(ns[:,:,1,1])
    text(135, 30, pretty_params(model_params_list[imodel]))
end

Now lets look at a particular AE model.

In [None]:
imodel = sortinds[1]
model_data = BSON.load(filenames[imodel])
model = model_data[:model]
hist = model_data[:history]
println(filenames[imodel])
params = parse_params(filenames[imodel])
if get(params, :batchnorm, false)
    model = Flux.testmode!(model)
end

In [None]:
plotlosses(hist)

In [None]:
i = 15
figure()
pcolormesh(convsubtestdata[:,:,1,i])
sample = convsubtestdata[:,:,1:1,i:i];
figure()
pcolormesh(model(sample).data[:,:,1,1])

Now lets look at the 2D UMAP projections.

In [None]:
using PyCall
umap = pyimport("umap")

In [None]:
umap_model = umap.UMAP(n_components = 2, n_neighbors=15, min_dist=0.1)

In [None]:
size(data)
zdata = []
for i in 1:Int(size(data,4)/10)
    if params[:model] == "ConvTSVAE"
        _zdata = model.m1.encoder(data[:,:,:,(i-1)*10+1:i*10]).data
    else
        _zdata = model.encoder(data[:,:,:,(i-1)*10+1:i*10]).data
    end    
    push!(zdata,_zdata)
end
zdata = hcat(zdata...);
size(zdata)

In [None]:
zdata2D = Array(umap_model.fit_transform(zdata')')

In [None]:
scatter(zdata2D[1,:], zdata2D[2,:])
title("all data transformed into 2D")

In [None]:
selectinds = [17,15,10,9,8,6,4,2,13]
selectinds = [17,15,13,9]
#selectinds = [10,9,4]
chirpdata = convsubtestdata[:,:,:,selectinds]
for i in 1:length(selectinds)
   figure()
    pcolormesh(chirpdata[:,:,1,i])
end

In [None]:
if params[:model] == "ConvTSVAE"
    chirpzdata = model.m1.encoder(chirpdata).data
else
    chirpzdata = model.encoder(chirpdata).data
end
chirpzdata2D = Array(umap_model.transform(chirpzdata')')

In [None]:
scatter(zdata2D[1,:], zdata2D[2,:],s=3)
scatter(chirpzdata2D[1,:], chirpzdata2D[2,:],s=20)
title("all data transformed into 2D")

Find some shots with chirp modes and use them as validation data.

In [None]:
#lims = [6.5 8.3; 0.0 2.1]
lims = [6.5 8.3; 0.0 2.1]
box = [lims[1,1] lims[1,2] lims[1,2] lims[1,1] lims[1,1]; lims[2,1] lims[2,1] lims[2,2] lims[2,2] lims[2,1]]

Instead of boxes, search for N nearest samples in the original latent space.

In [None]:
clusterinds = (lims[:,1] .<= zdata2D .<= lims[:,2]);
clusterinds = clusterinds[1,:] .& clusterinds[2,:];

In [None]:
scatter(zdata2D[1,:], zdata2D[2,:],s=3)
scatter(zdata2D[1,clusterinds], zdata2D[2,clusterinds],s=3)
plot(box[1,:], box[2,:],c="k")

In [None]:
clusterconvdata = data[:,:,:,clusterinds];
size(clusterconvdata)

In [None]:
for i in 1:size(clusterconvdata,4)
    figure()
    pcolormesh(clusterconvdata[:,:,1,i])
end

How does the model encode a picture in the individual channels?

In [None]:
#sample = batch[:,:,1:1,2:2]
sample = chirpdata[:,:,:,2:2];
if params[:model] == "ConvTSVAE"
    m = model.m1.encoder.layers[1].layers[1](sample);
    m = model.m1.encoder.layers[1].layers[2](m);
else
    m = model.encoder.layers[1].layers[1](sample);
    m = model.encoder.layers[1].layers[2](m);
end

In [None]:
pcolormesh(sample[:,:,1,1])

In [None]:
for i in 1:size(m,3)
    figure()
    pcolormesh(m.data[:,:,i,1])
end

And how doe the decoding work?

In [None]:
z = model.encoder(sample);
if params[:model] == "ConvAE"
    mx = model.decoder.layers[2](model.decoder.layers[1](z))
    mx = model.decoder.layers[3].layers[1](mx)
    #mx = model.decoder.layers[3].layers[2](mx)
end

In [None]:
for i in 1:size(mx,3)
    figure()
    pcolormesh(mx.data[:,:,i,1])
end

Now test a selected model

In [None]:
modelpath = "/home/vit/vyzkum/alfven/experiments/conv/uprobe/"
filenames = readdir(modelpath)
params = [
#    :nepochs => 200
    :opt => "NADAM"
]
fstrings = vcat(["$(x[1])-$(x[2])" for x in params])
filenames = joinpath.(modelpath,filter(x->any(map(y->occursin(y,x),fstrings)),filenames));
println("working with a list of $(length(filenames)) files")

In [None]:
filename = filenames[2]
model_data = BSON.load(filename)
model = model_data[:model]
hist = model_data[:history]
params = parse_params(filename)

In [None]:
plotlosses(hist)

In [None]:
filename

In [None]:
isample = 1
#sample = convsubtestdata[:,:,:,isample:isample]
sample = data[:,:,:,isample:isample]
pcolormesh(sample[:,:,1,1])
ns = model(sample).data
figure()
pcolormesh(ns[:,:,1,1])

Check the different loss developement for optimizers/batchnorm.

In [None]:
modelpath = "/home/vit/vyzkum/alfven/experiments/conv/uprobe/batchnorm-test"
filenames = joinpath.(modelpath, readdir(modelpath))
aepath = "/home/vit/vyzkum/alfven/experiments/conv/uprobe/ae-test"
aefilenames = joinpath.(aepath, readdir(aepath))
filenames = vcat(filenames, aefilenames);
println("working with a total of $(length(filenames)) files")

In [None]:
loss_list = []
model_params_list = []
model_list = []
for imodel in 1:length(filenames)
    model_data = BSON.load(filenames[imodel])
    is, ls = get(model_data[:history], :loss)
    push!(loss_list, ls)
    model_params = parse_params(filenames[imodel])
    push!(model_params_list, model_params)
    if get(model_params, :batchnorm, false)
        model = Flux.testmode!(model_data[:model])
    else
        model = model_data[:model]
    end
    push!(model_list, model)
end
final_losses = [x[end] for x in loss_list];

In [None]:
filter_list = [
    x->x[:xdim] == (patchsize,patchsize,1),
    x->x[:model] == "ConvAE",
    x->x[:opt] == RMSProp
]
filter_inds = map(x->all(map(y->y(x),filter_list)),model_params_list)
filtered_params = model_params_list[filter_inds]
filtered_models = model_list[filter_inds]
filtered_losses = loss_list[filter_inds]
filtered_filenames = filenames[filter_inds]

In [None]:
imin = 200
imax = 550
for (loss, params) in zip(filtered_losses, filtered_params)
    bn = get(params, :batchnorm, false)
    plot(loss[imin:imax], label = "batchnorm: $bn")
end
legend()
title("AE - convergence rate depending on the use of batch normalization")

In [None]:
filter_list = [
    x->!get(x,:batchnorm,false),
    x->x[:eta]==0.001,
    x->x[:model] == "ConvAE"
]
filter_inds = map(x->all(map(y->y(x),filter_list)),model_params_list)
filtered_params = model_params_list[filter_inds]
filtered_models = model_list[filter_inds]
filtered_losses = loss_list[filter_inds]
filtered_filenames = filenames[filter_inds]

In [None]:
filtered_losses = filtered_losses[1:4]
filtered_params = filtered_params[1:4]
imin = 5
imax = 500
for (loss, params) in zip(filtered_losses, filtered_params)
    opt = get(params, :opt, false)
    plot(loss[imin:imax], label = "optimiser: $opt")
end
legend()
title("AE - Convergence rate depending on the used optimiser")