# Notebook of experiment

First we need to download the experiment result data.

You need to provide some information for the script:

1. The experiment id
2. If the experiment is publicly shared, then that's all, you can just run the notebook.
3. Else you need to provide a token which is used to access the experiment.

In [None]:
experiment_id = ""  # The experiment ID
token = ""  # leave empty if the experiment is publicly shared

endpoint = "https://dev.coinfer.ai"

Install dependencies.

In [None]:
using Pkg
Pkg.add("CSV")
Pkg.add("HTTP")
Pkg.add("CodecZlib")
Pkg.add("DataFrames")
Pkg.add("Plots")
Pkg.add("JSON")

using JSON
using HTTP
using CSV
using Downloads
using CodecZlib
using Statistics
using DataFrames
using Plots

Then we can download the experiment result data. The data is generated asynchronously, so we need to wait using a loop until the data is ready.
The data is downloaded as a CSV file compressed using gzip. We decompress it after download, and then save it to "/tmp/<experiment_id>.csv".

In [None]:
headers = Dict{String, String}()
if !isempty(token)
    headers["Authorization"] = "Bearer $(token)"
end

function get_sample_data_sync(experiment_id)
    println("Preparing download file")
    while true
        rsp = HTTP.get("$(endpoint)/api/object/$(experiment_id)?sampledata=true&fmt=csv", headers=headers)
        rsp_data = JSON.parse(String(rsp.body))
        if rsp_data["data"]["progress"] == "done"
            return Downloads.download(rsp_data["data"]["url"])
        end
        sleep(1)
    end
end

function decompress_gzip_file(input_path::String, output_path::String)
    open(input_path, "r") do input
        gzip_stream = GzipDecompressorStream(input)

        open(output_path, "w") do output
            buffer = Vector{UInt8}(undef, 8192)
            while !eof(gzip_stream)
                bytes_read = readbytes!(gzip_stream, buffer)
                write(output, view(buffer, 1:bytes_read))
            end
        end
    end
end

by_chain_name = Dict()
if !isfile("/tmp/$(experiment_id).csv")
    downloaded_file = get_sample_data_sync(experiment_id)
    decompress_gzip_file(downloaded_file, "/tmp/$(experiment_id).csv")
end

Now we have the experiment result data in csv format. Each line of the csv data contains chain name, variable name and a variable value.
Let's reorganize the data by chain name and variable name. It will make our following work easier.

In [None]:
csv_file = CSV.File("/tmp/$(experiment_id).csv")
for (chain, name, val) in csv_file
    if !haskey(by_chain_name, chain)
        by_chain_name[chain] = Dict()
    end

    if !haskey(by_chain_name[chain], name)
        by_chain_name[chain][name] = Vector()
    end
    push!(by_chain_name[chain][name], val)
end

Now we can calculate the summary statistics for each variable.

In [None]:
function rnd3(f)
    round(f; digits=3)
end

cells = []
for (chain, chain_data) in by_chain_name
    for (name, val) in chain_data
        parts = split(name, "/")
        if length(parts) == 2 && parts[2] == "val"
            f_val = [parse(Float64, _val) for _val in val]
            push!(cells, (chain=chain, name=name, mean=rnd3(mean(f_val)), std=rnd3(std(f_val)), num=length(f_val)))
        end
    end
end
println(DataFrame(cells))


cells = []
by_name = Dict()
for (chain, chain_data) in by_chain_name
    for (name, val) in chain_data
        parts = split(name, "/")
        if length(parts) == 2 && parts[2] == "val"
            if !haskey(by_name, name)
                by_name[name] = Vector()
            end
            append!(by_name[name], val)
        end
    end
end

for (name, val) in by_name
    f_val = [parse(Float64, _val) for _val in val]
    push!(cells, (name=name, mean=rnd3(mean(f_val)), std=rnd3(std(f_val)), num=length(f_val)))
end
println(DataFrame(cells))

Let's make a plot for one variable

In [None]:
plots = []
a_name = iterate(keys(by_name))[1]
for (chain, chain_data) in by_chain_name
    for (name, val) in chain_data
        if a_name != name
            continue
        end
        is_float_value = tryparse(Float64, val[1]) !== nothing
        is_bool_value = tryparse(Bool, val[1]) !== nothing
        if !is_float_value && !is_bool_value
            continue
        end

        f_val = [rnd3(parse(Float64, _val)) for _val in val]
        push!(plots, plot(f_val, title=chain))
    end
end
plot(plots...; layout=(length(plots), 1), title=a_name)