In [None]:
using Pkg

Pkg.activate()

inlcude("generate_ivt_fields.jl")

using preprocessing

In [None]:
"""This function loads data in given geographic bounds. It supports loading values going 'over the end' like the lon rage from 270-40 NOTE: It is expected that the longitude is given in values from 0-360 deg and lat in range from -90:90""" 
function load_data_in_geo_bounds_typed(dataset, field_id::Union{String, Symbol, Missing}, geo_bnds::GeographicBounds, indices...; precision = Float32)::Array

  lon_normal_range = geo_bnds.lon_bounds[1] < geo_bnds.lon_bounds[2]
  lat_normal_range = geo_bnds.lat_bounds[1] < geo_bnds.lat_bounds[2]

  result_dim = length(indices) + 2


  if lon_normal_range & lat_normal_range
    return dataset[field_id][geo_bnds.lon_indices[1]:geo_bnds.lon_indices[2], geo_bnds.lat_indices[1]:geo_bnds.lat_indices[2], indices...]::Array{Union{precision, Missing}, result_dim}
  elseif !lon_normal_range & lat_normal_range
    
    lon_first = dataset[field_id][geo_bnds.lon_indices[1]:end, geo_bnds.lat_indices[1]:geo_bnds.lat_indices[2],indices...]::Array{Union{precision, Missing}, result_dim}
    lon_second = dataset[field_id][1:geo_bnds.lon_indices[2], geo_bnds.lat_indices[1]:geo_bnds.lat_indices[2],indices...]::Array{Union{precision, Missing}, result_dim}
    
    return vcat(lon_first, lon_second)::Array{Union{precision, Missing}, result_dim}
  elseif lon_normal_range & !lat_normal_range
    
    lat_first = dataset[field_id][geo_bnds.lon_indices[1]:geo_bnds.lon_indices[2], geo_bnds.lat_indices[1]:end,indices...]::Array{Union{precision, Missing}, result_dim}
    lat_second = dataset[field_id][geo_bnds.lon_indices[1]:geo_bnds.lon_indices[2], 1:geo_bnds.lat_indices[2],indices...]::Array{Union{precision, Missing}, result_dim}
    
    return hcat(lat_first, lat_second)::Array{Union{precision, Missing}, result_dim}
  else
    # last case is both are over 
    lon_f_lat_f = dataset[field_id][geo_bnds.lon_indices[1]:end, geo_bnds.lat_indices[1]:end,indices...]::Array{Union{precision, Missing}, result_dim}
    lon_f_lat_s = dataset[field_id][geo_bnds.lon_indices[1]:end, 1:geo_bnds.lat_indices[2],indices...]::Array{Union{precision, Missing}, result_dim}

    lon_s_lat_f = dataset[field_id][1:geo_bnds.lon_indices[2], geo_bnds.lat_indices[1]:end,indices...]::Array{Union{precision, Missing}, result_dim}
    lon_s_lat_s = dataset[field_id][1:geo_bnds.lon_indices[2], 1:geo_bnds.lat_indices[2],indices...]::Array{Union{precision, Missing}, result_dim}

    return vcat(hcat(lon_f_lat_f, lon_f_lat_s), hcat(lon_s_lat_f, lon_s_lat_s))::Array{Union{precision, Missing}, result_dim}
  end
end

In [1]:
using NCDatasets
using DataStructures

function create_benchmark_ds(path, varname, data)
    
    vertical_size = size(data, 3)

    ps = rand(size(data, 1), size(data, 2), size(data, 4))

    ap = rand(vertical_size)

    b = rand(vertical_size)

    NCDataset(path,"c",attrib = OrderedDict("title" => "this is a test file")) do ds
        # Define the variable temperature. The dimension "lon" and "lat" with the
        # size 100 and 110 resp are implicitly created
        defVar(ds,varname,data,("lon","lat", "lev", "time"))
        defVar(ds,"ps",ps,("lon","lat", "time"))
        defVar(ds,"ap",ap,("lev",))
        defVar(ds,"b",b,("lev",))
    end
    
end

create_benchmark_ds (generic function with 1 method)

In [2]:
data = [Float32(l+i+j/k) for i = 1:60, j = 1:50, k = 1:42, l = 1:5000]

println("Data size in mem: $(sizeof(data)/1000000) MB")
for id in ["hus", "ua", "va"]
    path = "sample_data/benchmark_$id.nc"
    create_benchmark_ds(path, id, data)
end



Data size in mem: 2520.0


In [1]:
using BenchmarkTools
using NCDatasets


bmable = @benchmarkable  NCDataset(["sample_data/benchmark_hus.nc", "sample_data/benchmark_va.nc", "sample_data/benchmark_ua.nc"]; aggdim = "") do ds

    data_hus = ds[:hus][:, :, :, :]
    data_ua = ds[:ua][:, :, :, :]
    data_va = ds[:va][:, :, :, :]
    data_ps = ds[:ps][:, :, :]

end

tune!(bmable)

run(bmable)

BenchmarkTools.Trial: 2 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m4.199 s[22m[39m … [35m   4.380 s[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m13.76% … 17.42%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m4.289 s               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m15.63%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m4.289 s[22m[39m ± [32m128.221 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m15.63% ±  2.59%

  [34m█[39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m█[39m [39m 
  [34m█[39m[39m▁[39m▁[39m▁[39m▁[39m▁[39m▁

In [2]:
bmable = @benchmarkable  NCDataset(["sample_data/benchmark_hus.nc", "sample_data/benchmark_va.nc", "sample_data/benchmark_ua.nc"]; aggdim = "") do ds

    data_hus = ds[:hus][1:40, 1:20, :, :]
    data_ua = ds[:ua][1:40, 1:20, :, :]
    data_va = ds[:va][1:40, 1:20, :, :]
    data_ps = ds[:ps][1:40, 1:20, :]

end samples = 15

tune!(bmable)

run(bmable)

BenchmarkTools.Trial: 3 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m1.710 s[22m[39m … [35m   1.932 s[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m 6.43% … 17.18%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m1.753 s               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m 9.31%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m1.799 s[22m[39m ± [32m117.734 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m11.22% ±  5.56%

  [34m█[39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m█[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m█[39m [39m 
  [34m█[39m[39m▁[39m▁[39m▁[39m▁[39m▁[39m▁

In [15]:
bmable = @benchmarkable  NCDataset(["sample_data/benchmark_hus.nc", "sample_data/benchmark_va.nc", "sample_data/benchmark_ua.nc"]; aggdim = "") do ds

    data_hus = ds[:hus][:, :, :, :]::Array{Float32, 4}
    # data_ua = ds[:ua][:, :, :, :]::Array{Float32, 4}
    # data_va = ds[:va][:, :, :, :]::Array{Float32, 4}
    # data_ps = ds[:ps][:, :, :]::Array{Float64, 3}

end

tune!(bmable)

run(bmable)

BenchmarkTools.Trial: 4 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m1.155 s[22m[39m … [35m   1.416 s[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.09% … 18.81%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m1.311 s               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m7.74%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m1.298 s[22m[39m ± [32m119.586 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m9.06% ±  9.76%

  [39m█[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m█[34m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m█[39m [39m [39m [39m [39m [39m [39m [39m [39m█[39m [39m 
  [39m█[39m▁[39m▁[39m▁[39m▁[39m▁[39m▁[39m▁[

In [20]:
bmable = @benchmarkable  NCDataset(["sample_data/benchmark_hus.nc", "sample_data/benchmark_va.nc", "sample_data/benchmark_ua.nc"]; aggdim = "") do ds

    data_hus = ds[:hus][1:40, 1:20, :, :]::Array{Float32, 4}
    data_ua = ds[:ua][1:40, 1:20, :, :]::Array{Float32, 4}
    data_va = ds[:va][1:40, 1:20, :, :]::Array{Float32, 4}
    data_ps = ds[:ps][1:40, 1:20, :]::Array{Float64, 3}

end

tune!(bmable)

run(bmable)

BenchmarkTools.Trial: 3 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m1.843 s[22m[39m … [35m   2.071 s[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m 7.60% … 18.19%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m1.899 s               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m10.83%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m1.937 s[22m[39m ± [32m118.653 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m12.43% ±  5.43%

  [34m█[39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m█[39m [39m [39m [39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m█[39m [39m 
  [34m█[39m[39m▁[39m▁[39m▁[39m▁[39m▁[39m▁

In [23]:
bmable = @benchmarkable  NCDataset(["sample_data/benchmark_hus.nc", "sample_data/benchmark_va.nc", "sample_data/benchmark_ua.nc"]; aggdim = "") do ds
# bmable = @benchmarkable  NCDataset("sample_data/benchmark_hus.nc") do ds


    data = Array{Float32, 5}

    

    for (i, id) in enumerate([:hus, :ua, :va])
        # NCDatasets.load!(ds[id], data[i, :, :, :, :], :, :, :, :)
        push!(data,ds[id][:, :, :, :])
    end
    data_ps = ds[:ps][:, :, :]::Array{Float64, 3}

end

tune!(bmable)

run(bmable)

MethodError: MethodError: no method matching push!(::Type{Array{Float32, 5}}, ::Array{Float32, 4})
Closest candidates are:
  push!(::Any, ::Any, !Matched::Any) at ~/.julia/juliaup/julia-1.7.0+0.x64.linux.gnu/share/julia/base/abstractarray.jl:2952
  push!(::Any, ::Any, !Matched::Any, !Matched::Any...) at ~/.julia/juliaup/julia-1.7.0+0.x64.linux.gnu/share/julia/base/abstractarray.jl:2953
  push!(!Matched::DataStructures.MutableLinkedList{T}, ::Any) where T at ~/.julia/packages/DataStructures/b0JVf/src/mutable_list.jl:199
  ...

In [2]:
using Pkg

Pkg.activate(".")


[32m[1m  Activating[22m[39m project at `~/Documents/Uni/Master/MA/preprocessing`


In [6]:
include("generate_ivt_fields.jl")

using NCDatasets
using .preprocessing
using BenchmarkTools


bmable = @benchmarkable NCDataset(["sample_data/sample_hus_dataset_200_timesteps.nc", "sample_data/sample_ua_dataset_200_timesteps.nc", "sample_data/sample_va_dataset_200_timesteps.nc"]; aggdim = "") do dataset
    println("Threads available: $(Threads.nthreads())")
    
    hus_data = dataset[:hus][:, :, :, :]
    ua_data = dataset[:ua][:, :, :, :]
    va_data = dataset[:va][:, :, :, :]
    ps_data = dataset[:ps][:, :, :]
    
    lon_size = size(hus_data, 1)
    lat_size = size(hus_data, 2)

    # these variables are used for calculation of pressure levels at each specific lat, lon, time coordinate: p = ap + b * ps
    ap = dataset[:ap][:]
    b = dataset[:b][:]
    time_size = size(dataset[:time], 1)
      
end
tune!(bmable)

run(bmable)

Threads available: 8
Threads available: 8
Threads available: 8
Threads available: 8
Threads available: 8
Threads available: 8
Threads available: 8




BenchmarkTools.Trial: 3 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m2.375 s[22m[39m … [35m   2.617 s[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m 6.51% … 14.81%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m2.504 s               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m10.21%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m2.499 s[22m[39m ± [32m121.308 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m10.65% ±  4.16%

  [34m█[39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [32m█[39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m█[39m [39m 
  [34m█[39m[39m▁[39m▁[39m▁[39m▁[39m▁[39m▁



In [3]:
using NCDatasets

function load_data_to_var!(path, variable_name, var, indices...)
    NCDataset(path) do ds
        NCDatasets.load!(variable(ds, variable_name),var, indices...)
    end
end

load_data_to_var! (generic function with 1 method)

In [5]:
using BenchmarkTools
@benchmark begin
    hus_data = zeros(Float32, 192, 96, 47, 200)
    load_data_to_var!("sample_data/sample_hus_dataset_200_timesteps.nc", "hus", hus_data, :, :, :, :)
end

BenchmarkTools.Trial: 14 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m344.915 ms[22m[39m … [35m430.961 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.13% … 18.53%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m366.535 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m5.01%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m375.182 ms[22m[39m ± [32m 22.545 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m6.94% ±  4.77%

  [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m█[34m█[39m[39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▆[39m▁[39m▁[39

In [9]:
@benchmark begin
    NCDataset("sample_data/sample_hus_dataset_200_timesteps.nc") do ds
        hus_data = ds["hus"][:, :, :, :]::Array{Union{Missing, Float32}, 4}
    end
end

BenchmarkTools.Trial: 7 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m696.190 ms[22m[39m … [35m814.759 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.24% … 14.97%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m746.732 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m6.02%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m753.605 ms[22m[39m ± [32m 37.367 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m7.53% ±  4.78%

  [39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▁[34m▁[39m[39m [39m█[39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▁[39m [39m 
  [39m█[39m▁[39m▁[39m

In [18]:
id_var_mapping = Dict("hus" => hus_data, "ua" => ua_data, "va" => va_data)

ids = ["hus", "ua", "va"]
data = [zeros(Float32, 192, 96, 47, 200), zeros(Float32, 192, 96, 47, 200), zeros(Float32, 192, 96, 47, 200)]

bmable = @benchmarkable begin 
    Threads.@threads for i in 1:length(ids)
        id = ids[i]
        var = data[i]
        path = "sample_data/sample_$(id)_dataset_200_timesteps.nc"
        load_data_to_var!(path, id, var, :, :, :, :)
    end
end

tune!(bmable)

run(bmable)


signal (11): Segmentation fault
in expression starting at In[18]:19

signal (11): Segmentation fault
in expression starting at In[18]:19
unknown function (ip: 0x70c1c1062bfb)
H5SL_close at /home/denis/.julia/artifacts/2829a1f6a9ca59e5b9b53f52fa6519da9c9fd7d3/lib/libhdf5.so (unknown line)
H5P_copy_plist at /home/denis/.julia/artifacts/2829a1f6a9ca59e5b9b53f52fa6519da9c9fd7d3/lib/libhdf5.so (unknown line)
unknown function (ip: 0x70c1c0f24674)
H5F_open at /home/denis/.julia/artifacts/2829a1f6a9ca59e5b9b53f52fa6519da9c9fd7d3/lib/libhdf5.so (unknown line)
H5VL__native_file_open at /home/denis/.julia/artifacts/2829a1f6a9ca59e5b9b53f52fa6519da9c9fd7d3/lib/libhdf5.so (unknown line)
H5VL_file_open at /home/denis/.julia/artifacts/2829a1f6a9ca59e5b9b53f52fa6519da9c9fd7d3/lib/libhdf5.so (unknown line)
unknown function (ip: 0x70c1c0f151a6)
H5Fopen at /home/denis/.julia/artifacts/2829a1f6a9ca59e5b9b53f52fa6519da9c9fd7d3/lib/libhdf5.so (unknown line)
H5SL_search at /home/denis/.julia/artifacts/2829a

: 