### In this notebook we shall look at some of the trrained models on different types of data to see whether there is some kind of structure int he latent space. UMAP will be needed in those cases where the dimensionality of the latent space is > 2. Use only TSVAE pretrained models as the should contain about the same information as AE and definitely the same as VAE.

In [None]:
using AlfvenDetectors
using Flux
using ValueHistories
using BSON
using PyPlot
using Dates
using PyCall
using GaussianMixtures
umap = pyimport("umap")

In [None]:
datapath = "/home/vit/vyzkum/alfven/cdb_data/data_sample/"
shots = readdir(datapath)
shotlist = joinpath.(datapath, shots)

In [None]:
measurement_type = "uprobe"
basemodelpath = "/home/vit/vyzkum/alfven/experiments/single_col/"
modelpath = joinpath(basemodelpath, measurement_type)
if measurement_type == "mscamp"
    readfun = AlfvenDetectors.readmscamp
elseif measurement_type == "mscphase"
    readfun = AlfvenDetectors.readnormmscphase
elseif measurement_type == "mscampphase"
    readfun = AlfvenDetectors.readmscampphase
elseif measurement_type == "uprobe"
    readfun = AlfvenDetectors.readnormlogupsd
end
coils = [12,13,14];
# also if using uprobe, only use a few shots
if measurement_type == "uprobe"
    shotlist = shotlist[9:12]
end

In [None]:
if measurement_type == "uprobe"
    rawdata = AlfvenDetectors.collect_signals(shotlist, readfun)
else
    rawdata = AlfvenDetectors.collect_signals(shotlist, readfun, coils)
end
data = rawdata;
M,N = size(data)

In [None]:
models = readdir(modelpath)

### Now lets look at a particular model

In [None]:
modelname = "TSVAE"
ldim = 32
imodel = 1
submodels = filter(x->x[1:length(modelname)] == modelname, models)
submodels = filter(x->occursin("ldim-$ldim",x), submodels)
bsonpath = joinpath(modelpath,submodels[imodel])
submodels

In [None]:
showtime(time) = Dates.canonicalize(Dates.CompoundPeriod(Dates.Second(floor(Int,time))))

In [None]:
modeldata = BSON.load(bsonpath)
model = modeldata[:model]
history = modeldata[:history]
time = modeldata[:time]
println("Training time: $(showtime(time))" )

In [None]:
z = model.m1.encoder(data).data;
sz = model.m1.sampler(z);
z = z[1:ldim,:]
GC.gc()

In [None]:
z_umap_model = umap.UMAP(n_components = 2, n_neighbors=15, min_dist=0.1)    
if ldim > 2
    #@time Matrix(umap_model.fit_transform((z[:,1:1000])')')
    @time z2D = Matrix(z_umap_model.fit_transform((z)')')
else
    z2D = z
end

In [None]:
scatter(z2D[1,:], z2D[2,:], s= 2, alpha= 0.1)
title("UMAP projection of the model1 latent space ($ldim -> 2)")

#### Fit a GMM model to the 2D data

In [None]:
tz2D = Array(z2D')
kind = :full
gmm2D = GaussianMixtures.GMM(4, tz2D, kind=kind)

In [None]:
# for covariance isolines drawing
include("/home/vit/Dropbox/vyzkum/cisco/kod/lib/julia/BivariateNormal.jl")

In [None]:
scatter(z2D[1,:], z2D[2,:], s= 2, alpha= 0.1)
covarmats = covars(gmm2D)
for i in 1:length(gmm2D.Σ)
    if kind == :diag
        Σ = [covarmats[i,1] 0; 0 covarmats[i,2]]
    else
        Σ = covarmats[i]
    end
    μ = gmm2D.μ[i,:]
    el = BivariateNormal.getEllipsis(Σ, μ, 50, 1.0);
    plot(el[:,1], el[:,2], label = "component $i")
end
legend()
title("UMAP projection of the model1 latent space ($ldim -> 2)")

### Try to select shots where there should be an Alfven mode and project them into the latent space.

In [None]:
function get_shot_data_code_and_projection(shot, coil, readfun, umap_model)
    f = joinpath(datapath, "Jldata$(shot).h5")
    if measurement_type == "uprobe"
        data = AlfvenDetectors.get_ft_signal(f, readfun)
    else
        data = AlfvenDetectors.get_ft_signals(f, readfun, coil)
    end
    z = (model.m1.encoder(data).data)[1:ldim,:]
    if ldim > 2
        z2D = Matrix(umap_model.transform(z')')
    else
        z2D = z
    end
    return data, z, z2D
end

In [None]:
shotlist = [(10000,12), (10893,14), (10370,12)]
alldata = map(x->get_shot_data_code_and_projection(x[1], x[2], readfun, z_umap_model),shotlist);

In [None]:
for (shotinfo,shotdata) in zip(shotlist,alldata)
    figure()
    title("shot $(shotinfo[1]), coil $(shotinfo[2])")
    pcolormesh(shotdata[1])
end

In [None]:
inds = 500:1000
chirpx = alldata[2][1][:,inds]
pcolormesh(chirpx)
chirpz = (model.m1.encoder(chirpx).data)[1:ldim,:]
if ldim > 2
    chirpz2D = Matrix(z_umap_model.transform(chirpz')')
else
    chirpz2D = chirpz
end

In [None]:
scatter(z2D[1,:], z2D[2,:], s=3, alpha= 0.1, label="all data")
for (shotinfo,shotdata) in zip(shotlist, alldata)
    scatter(shotdata[3][1,:],shotdata[3][2,:], label=shotinfo[1], s=0.5,alpha=0.3)
end
scatter(chirpz2D[1,:], chirpz2D[2,:], label = "10893 chirps", s=0.5,alpha=1)
legend()
title("Model 1 latent space")

In [None]:
tchirpz2D = Array(chirpz2D')
maxinds2D = map(x->x[2], argmax(llpg(gmm2D, tchirpz2D), dims = 2));

In [None]:
plt.hist(maxinds2D)
title("Histogram of component pertinence for the chirp data")
xlabel("component index")
ylabel("number of pertinent samples in chirp data")

## Lets also try to fit GMM to the original latent space 

In [None]:
tz = Array(z')
kind = :full
@time gmmOrig = GaussianMixtures.GMM(6, tz, kind=kind)

In [None]:
tchirpz = Array(chirpz')
maxindsOrig = map(x->x[2], argmax(llpg(gmmOrig, tchirpz), dims = 2));

In [None]:
plt.hist(maxindsOrig)
title("Histogram of component pertinence for the chirp data")
xlabel("component index")
ylabel("number of pertinent samples in chirp data")

Clearly, the chirping data are more likely to belong to one of the gaussian mixture model components thatn to the rest even in the original latent space dimension.

### What if we try to fit data from a whole shot?

In [None]:
for (shotinfo,shotdata) in zip(shotlist, alldata)
    _tz = Array(shotdata[2]')
    maxinds = map(x->x[2], argmax(llpg(gmmOrig, _tz), dims = 2));
    figure()
    plt.hist(maxinds)
    title("Shot $(shotinfo[1]) - histogram of GMM model component pertinence in the latent space")
    xlabel("component index")
    ylabel("number of pertinent samples in chirp data")
end
