In [None]:
using PyPlot
using AlfvenDetectors
using BSON
using Flux
using ValueHistories
using StatsBase
using GenModels

In [None]:
datapath = "/home/vit/vyzkum/alfven/cdb_data/uprobe_data"
shots = joinpath.(datapath, readdir(datapath))
shotnos, labels, tstarts, fstarts = AlfvenDetectors.labeled_patches()
patchsize = 128
readfun = AlfvenDetectors.readnormlogupsd
cmap = "plasma"

In [None]:
@time patchdata = map(x->AlfvenDetectors.get_patch(datapath, x[1], x[2], x[3], patchsize, readfun;
        memorysafe=true), zip(shotnos, tstarts, fstarts))
data = cat([x[1] for x in patchdata]..., dims=4);
println(size(data))

In [None]:
alfvendata = data[:,:,:,labels.==1]
noalfvendata = data[:,:,:,labels.==0];

In [None]:
#modelpath = "/home/vit/vyzkum/alfven/experiments/conv/uprobe/"
#modelpath = "/home/vit/vyzkum/alfven/experiments/conv/uprobe/ae-test";
#modelpath = "/home/vit/vyzkum/alfven/experiments/conv/uprobe/batchnorm-test"
#modelpath = "/home/vit/vyzkum/alfven/experiments/conv/uprobe/benchmark-runs"
#modelpath = "/home/vit/vyzkum/alfven/experiments/conv_old_library/uprobe/benchmark-runs"
modelpath = "/home/vit/vyzkum/alfven/experiments/conv/uprobe/data_augmentation/"
filenames = joinpath.(modelpath, readdir(modelpath))
println("$(length(filenames)) models found in modelpath")

Lets focus on the AE models that have the lowest loss first.
First get the model params.

In [None]:
model_params_list = []
for imodel in 1:length(filenames)
    model_params = parse_params(filenames[imodel])
    push!(model_params_list, model_params)
end

Now filter some of them out.

In [None]:
filter_list = [
    :xdim => (patchsize,patchsize,1),
    :model => "ConvAE",
    :nepochs => 1000,
]
filter_inds = map(x->all(map(y->x[y[1]] == y[2],filter_list)),model_params_list)
model_params_list = model_params_list[filter_inds]
filename_list = filenames[filter_inds]
println("working with $(length(filename_list)) models")

In [None]:
loss_list = []
model_list = []
for (params, filename) in zip(model_params_list, filename_list)
    model_data = BSON.load(filename)
    is, ls = get(model_data[:history], :loss)
    push!(loss_list, ls)
    if get(params, :batchnorm, false)
        model = Flux.testmode!(model_data[:model])
    else
        model = model_data[:model]
    end
    push!(model_list, model)
end
final_losses = [x[end] for x in loss_list];

In [None]:
alfven_loss = []
noalfven_loss = []
for (model, params) in zip(model_list, model_params_list)
    if occursin("VAE", params[:model])
        push!(alfven_loss, AlfvenDetectors.loss(model, alfvendata, 1, 1.0).data)
        push!(noalfven_loss, AlfvenDetectors.loss(model, noalfvendata, 1, 1.0).data)
    else
        push!(alfven_loss, AlfvenDetectors.loss(model, alfvendata).data)
        push!(noalfven_loss, AlfvenDetectors.loss(model, noalfvendata).data)
    end
end

#### Sort the models by the training loss

In [None]:
sortinds = sortperm(final_losses);
sortinds = sortperm(alfven_loss);
isample = 80
patch = data[:,:,:,isample:isample]
#sample = convsubtestdata[:,:,:,isample:isample]
#sample = validdata[:,:,:,isample:isample]
pcolormesh(patch[:,:,1,1],cmap=cmap)
for imodel in sortinds
    figure()
    ns = model_list[imodel](patch).data
    cl = Flux.mse(patch,ns)
    title("model: $imodel, final training loss: $(round(final_losses[imodel],digits=5)),
        alfven data loss: $(round(alfven_loss[imodel],digits=5)),
        no alfven data loss: $(round(noalfven_loss[imodel],digits=5)),
        patch loss: $(round(cl,digits=5))")
    pcolormesh(ns[:,:,1,1],cmap=cmap)
    text(135, 30, pretty_params(model_params_list[imodel]))
end

Now lets look at a particular AE model.

In [None]:
imodel = sortinds[1]
model = model_list[imodel]
params = model_params_list[imodel]
loss = loss_list[imodel]
file = filename_list[imodel]
println(file)
println("")
println("final training loss: $(round(final_losses[imodel],digits=5))")
println("alfven data loss: $(round(alfven_loss[imodel],digits=5))")
println("no alfven data loss: $(round(noalfven_loss[imodel],digits=5))")
println("")
print(pretty_params(params))

In [None]:
#plotlosses(hist)
plot(loss[500:end])

In [None]:
i = 15
figure()
pcolormesh(data[:,:,1,i],cmap=cmap)
patch = data[:,:,1:1,i:i];
figure()
rp = model(patch).data[:,:,1,1]
l = Flux.mse(rp,patch)
title("loss $l")
pcolormesh(rp,cmap=cmap)

Now lets look at the 2D UMAP projections.

In [None]:
using PyCall
umap = pyimport("umap")

In [None]:
umap_model = umap.UMAP(n_components = 2, n_neighbors=15, min_dist=0.1)

In [None]:
zdata = []
N = size(data,4)
for i in 1:ceil(Int,N/10)
    if params[:model] == "ConvTSVAE"
        _zdata = model.m1.encoder(data[:,:,:,(i-1)*10+1:min(i*10,N)]).data
    else
        _zdata = model.encoder(data[:,:,:,(i-1)*10+1:min(i*10,N)]).data
    end    
    push!(zdata,_zdata)
end
zdata = hcat(zdata...);
size(zdata)

In [None]:
zdata2D = Array(umap_model.fit_transform(zdata')')

In [None]:
scatter(zdata2D[1,labels.==1], zdata2D[2,labels.==1],label="alfven",s=5)
scatter(zdata2D[1,labels.==0], zdata2D[2,labels.==0],label="no alfven",s=5)
title("all data transformed into 2D")
legend()

Find some shots with chirp modes and use them as validation data.

In [None]:
lims = [-8 -2.5; 1 4]
plotbox = [lims[1,1] lims[1,2] lims[1,2] lims[1,1] lims[1,1]; lims[2,1] lims[2,1] lims[2,2] lims[2,2] lims[2,1]]
zinds = vec(all(lims[:,1] .< zdata2D .< lims[:,2], dims=1));

In [None]:
scatter(zdata2D[1,zinds], zdata2D[2,zinds],label="selected patches",s=15,c="k")
scatter(zdata2D[1,labels.==1], zdata2D[2,labels.==1],label="alfven",s=5)
scatter(zdata2D[1,labels.==0], zdata2D[2,labels.==0],label="no alfven",s=5)
plot(plotbox[1,:], plotbox[2,:])
title("all data transformed into 2D")
legend()

Compute the loss on the individual patches.

In [None]:
data_loss = map(i->AlfvenDetectors.loss(model,data[:,:,:,i:i]).data,collect(1:size(data,4)))

In [None]:
for i in collect(1:size(data,4))[zinds]
    figure(figsize=(10,5))
    subplot(1,2,1)
    suptitle("shot $(shotnos[i]), label $(labels[i]), loss $(data_loss[i])")
    pcolormesh(data[:,:,1,i],cmap=cmap)
    subplot(1,2,2)
    pcolormesh(model(data[:,:,:,i:i]).data[:,:,1,1],cmap=cmap)
end    

Instead of boxes, search for N nearest samples in the original latent space.

In [None]:
clusterinds = (lims[:,1] .<= zdata2D .<= lims[:,2]);
clusterinds = clusterinds[1,:] .& clusterinds[2,:];

In [None]:
scatter(zdata2D[1,:], zdata2D[2,:],s=3)
scatter(zdata2D[1,clusterinds], zdata2D[2,clusterinds],s=3)
plot(box[1,:], box[2,:],c="k")

In [None]:
clusterconvdata = data[:,:,:,clusterinds];
size(clusterconvdata)

In [None]:
for i in 1:size(clusterconvdata,4)
    figure()
    pcolormesh(clusterconvdata[:,:,1,i])
end

How does the model encode a picture in the individual channels?

In [None]:
#sample = batch[:,:,1:1,2:2]
sample = chirpdata[:,:,:,2:2];
if params[:model] == "ConvTSVAE"
    m = model.m1.encoder.layers[1].layers[1](sample);
    m = model.m1.encoder.layers[1].layers[2](m);
else
    m = model.encoder.layers[1].layers[1](sample);
    m = model.encoder.layers[1].layers[2](m);
end

In [None]:
pcolormesh(sample[:,:,1,1])

In [None]:
for i in 1:size(m,3)
    figure()
    pcolormesh(m.data[:,:,i,1])
end

And how doe the decoding work?

In [None]:
z = model.encoder(sample);
if params[:model] == "ConvAE"
    mx = model.decoder.layers[2](model.decoder.layers[1](z))
    mx = model.decoder.layers[3].layers[1](mx)
    #mx = model.decoder.layers[3].layers[2](mx)
end

In [None]:
for i in 1:size(mx,3)
    figure()
    pcolormesh(mx.data[:,:,i,1])
end

Now test a selected model

In [None]:
modelpath = "/home/vit/vyzkum/alfven/experiments/conv/uprobe/"
filenames = readdir(modelpath)
params = [
#    :nepochs => 200
    :opt => "NADAM"
]
fstrings = vcat(["$(x[1])-$(x[2])" for x in params])
filenames = joinpath.(modelpath,filter(x->any(map(y->occursin(y,x),fstrings)),filenames));
println("working with a list of $(length(filenames)) files")

In [None]:
filename = filenames[2]
model_data = BSON.load(filename)
model = model_data[:model]
hist = model_data[:history]
params = parse_params(filename)

In [None]:
plotlosses(hist)

In [None]:
filename

In [None]:
isample = 1
#sample = convsubtestdata[:,:,:,isample:isample]
sample = data[:,:,:,isample:isample]
pcolormesh(sample[:,:,1,1])
ns = model(sample).data
figure()
pcolormesh(ns[:,:,1,1])

Check the different loss developement for optimizers/batchnorm.

In [None]:
modelpath = "/home/vit/vyzkum/alfven/experiments/conv/uprobe/batchnorm-test"
filenames = joinpath.(modelpath, readdir(modelpath))
aepath = "/home/vit/vyzkum/alfven/experiments/conv/uprobe/ae-test"
aefilenames = joinpath.(aepath, readdir(aepath))
filenames = vcat(filenames, aefilenames);
println("working with a total of $(length(filenames)) files")

In [None]:
loss_list = []
model_params_list = []
model_list = []
for imodel in 1:length(filenames)
    model_data = BSON.load(filenames[imodel])
    is, ls = get(model_data[:history], :loss)
    push!(loss_list, ls)
    model_params = parse_params(filenames[imodel])
    push!(model_params_list, model_params)
    if get(model_params, :batchnorm, false)
        model = Flux.testmode!(model_data[:model])
    else
        model = model_data[:model]
    end
    push!(model_list, model)
end
final_losses = [x[end] for x in loss_list];

In [None]:
filter_list = [
    x->x[:xdim] == (patchsize,patchsize,1),
    x->x[:model] == "ConvAE",
    x->x[:opt] == RMSProp
]
filter_inds = map(x->all(map(y->y(x),filter_list)),model_params_list)
filtered_params = model_params_list[filter_inds]
filtered_models = model_list[filter_inds]
filtered_losses = loss_list[filter_inds]
filtered_filenames = filenames[filter_inds]

In [None]:
imin = 200
imax = 550
for (loss, params) in zip(filtered_losses, filtered_params)
    bn = get(params, :batchnorm, false)
    plot(loss[imin:imax], label = "batchnorm: $bn")
end
legend()
title("AE - convergence rate depending on the use of batch normalization")

In [None]:
filter_list = [
    x->!get(x,:batchnorm,false),
    x->x[:eta]==0.001,
    x->x[:model] == "ConvAE"
]
filter_inds = map(x->all(map(y->y(x),filter_list)),model_params_list)
filtered_params = model_params_list[filter_inds]
filtered_models = model_list[filter_inds]
filtered_losses = loss_list[filter_inds]
filtered_filenames = filenames[filter_inds]

In [None]:
filtered_losses = filtered_losses[1:4]
filtered_params = filtered_params[1:4]
imin = 5
imax = 500
for (loss, params) in zip(filtered_losses, filtered_params)
    opt = get(params, :opt, false)
    plot(loss[imin:imax], label = "optimiser: $opt")
end
legend()
title("AE - Convergence rate depending on the used optimiser")