In [1]:
using Pkg

Pkg.activate(".")

[32m[1m  Activating[22m[39m project at `~/Documents/Uni/Master/MA/visual_analysis`


In [2]:
using BenchmarkTools

In [3]:
include("eof.jl")

eof (generic function with 1 method)

In [4]:
include("utils.jl")

[32m[1m    CondaPkg [22m[39m[0mFound dependencies: /home/denis/Documents/Uni/Master/MA/visual_analysis/CondaPkg.toml
[32m[1m    CondaPkg [22m[39m[0mFound dependencies: /home/denis/.julia/packages/PythonCall/S5MOg/CondaPkg.toml
[32m[1m    CondaPkg [22m[39m[0mDependencies already up to date


get_mean_of_multiple_arrays (generic function with 1 method)

In [5]:
lon_dim = 100  # number of longitude points
lat_dim = 50   # number of latitude points
time_dim = 10  # number of time points

# Create a sample data array
data = rand(lon_dim, lat_dim, time_dim) .* 10000

# Generate latitude values (for example, from -90 to 90 degrees)
latitudes = LinRange(-90, 90, lat_dim)

# Calculate the weights as the cosine of the latitude values
weights = cos.(deg2rad.(latitudes))

nmodes = 5

my_impl = eof(data; nmodes = nmodes)

julia_pkg = get_eof_of_datachunk(data; nmodes = nmodes)

python_eofs = pyeof_of_datachunk(data, nmodes)

EOFResult([-0.02975749790745718 0.025766651213249236 … -0.018613003984060274 0.0006820080672172965; -0.000518205891377481 -0.010070042190505097 … 0.014283417504793729 0.004180595417967614; … ; 0.007836946581987187 -0.007168477251689969 … 0.00166384159633363 -0.02298320949479792; -0.013460625396911315 0.014432659797960988 … -0.01430866479356181 0.014877900868258596;;; 0.008422720896754905 0.012448467452380978 … -0.00316032444001148 0.009738263421264763; 0.006352334245111765 0.0197443922058457 … -0.010536684018112053 0.029037281040790733; … ; 0.03701746660402534 -0.01034406372660909 … -0.025226121468933032 -0.006860068993103382; 0.02491767792985136 -0.013958597570848103 … -0.0260079135773255 -0.016290824304030498;;; -0.014758156313129827 0.012751180114528023 … -0.0061476737098104545 0.023993406609096976; 0.01939475302684876 0.0009665519378135906 … -0.005708346956852046 0.017625033767341335; … ; -0.013971766311233865 -0.006916737027596677 … 0.006633314352169199 0.022370755282360696; 0.016

In [6]:
python_eofs.modes_variability

5-element Vector{Float64}:
 11.909623025252817
 11.506878564851117
 11.31934753149192
 11.248778289580537
 11.127075394818563

In [7]:
my_impl.spatial_modes == julia_pkg.spatial_modes

true

In [8]:
diffs = abs.(my_impl.spatial_modes .- python_eofs.spatial_modes)

maximum(diffs)

4.7878367936959876e-15

In [17]:
function calculate_eofs_of_ensemble_multithreaded(
    ensemble::EnsembleSimulation,
    chunking,
    nmodes;
    center=true,
    align_eofs_with_mean=true,
    norm_withsqrt_timedim=false,
    geoweights=true,
    scale_mode=nothing,
    saving_filepath=nothing
)::Dict{String,Vector{EOFResult}}

    result = Dict{String,Vector{EOFResult}}()

    if geoweights
        weights = sqrt.(cos.(deg2rad.(ensemble.lats)))
    else
        weights = nothing
    end

    for (i, member) in enumerate(ensemble.members)
        # Predefined array for EOFResult

        # Function to handle EOF calculation


        # Decide whether to use threading based on CONDITION
        @time "Time it took for eof calculation for member $(member.id)" begin
            eofs = Vector{EOFResult}(undef, length(chunking))
            for idx in eachindex(chunking)
                eofs[idx] = eof(member.data[:, :, chunking[idx]]; nmodes=nmodes, center=center, align_eofs_with_mean=align_eofs_with_mean, norm_withsqrt_timedim=norm_withsqrt_timedim, weights=weights, scaling=scale_mode)
            end


            result[member.id] = eofs
        end
        flush(stdout)
    end

    if !isnothing(saving_filepath)
        try
            save(saving_filepath, result)
        catch e
            println("Couldn't save to filepath $saving_filepath: $e")
        end

    end

    return result

end


calculate_eofs_of_ensemble_multithreaded (generic function with 1 method)

In [10]:
function calculate_eofs_of_ensemble_single_thread(
    ensemble::EnsembleSimulation,
    chunking,
    nmodes;
    center=true,
    align_eofs_with_mean=true,
    norm_withsqrt_timedim=false,
    geoweights=true,
    scale_mode=nothing,
    saving_filepath=nothing
)::Dict{String,Vector{EOFResult}}

    result = Dict{String,Vector{EOFResult}}()

    if geoweights
        weights = sqrt.(cos.(deg2rad.(ensemble.lats)))
    else
        weights = nothing
    end

    for (i, member) in enumerate(ensemble.members)
        # Predefined array for EOFResult

        # Function to handle EOF calculation


        # Decide whether to use threading based on CONDITION
        @time "Time it took for eof calculation for member $(member.id)" begin
            eofs = Vector{EOFResult}(undef, length(chunking))
            Threads.@threads for idx in eachindex(chunking)
                eofs[idx] = eof(member.data[:, :, chunking[idx]]; nmodes=nmodes, center=center, align_eofs_with_mean=align_eofs_with_mean, norm_withsqrt_timedim=norm_withsqrt_timedim, weights=weights, scaling=scale_mode)
            end


            result[member.id] = eofs
        end
        flush(stdout)
    end

    if !isnothing(saving_filepath)
        try
            save(saving_filepath, result)
        catch e
            println("Couldn't save to filepath $saving_filepath: $e")
        end

    end

    return result

end


calculate_eofs_of_ensemble_single_thread (generic function with 1 method)

In [11]:
function filter_winter_season(time_element)
    winter_months = [12, 1, 2, 3]
    for wm in winter_months
        if month(time_element) == wm
            return true
        end
    end
    return false
end

filter_winter_season (generic function with 1 method)

In [13]:
ps_data_monthly_path = "/mnt/bigdrive/Datasets/master_thesis_data/ps_data_monthly"
ivt_data_monthly_path = "/mnt/bigdrive/Datasets/master_thesis_data/ivt_fields_monthly"

"/mnt/bigdrive/Datasets/master_thesis_data/ivt_fields_monthly"

In [14]:
(ivt_ssp585_monthly,) = build_ensemble_data(ivt_data_monthly_path, "ssp585"; file_range_selection=:, data_field_id="ivt", member_range=1:50, filterfun = filter_winter_season)

Handling scenario ssp585 ...


1-element Vector{EnsembleSimulation}:
 EnsembleSimulation("ssp585", Union{Missing, AbstractFloat}[-90.0, -88.125, -86.25, -84.375, -82.5, -80.625, -78.75, -76.875, -75.0, -73.125  …  22.5, 24.375, 26.25, 28.125, 30.0, 31.875, 33.75, 35.625, 37.5, 39.375], Union{Missing, AbstractFloat}[21.450475037398185, 23.31573072614093, 25.180985581270594, 27.04623949994481, 28.91149236871774, 30.77674406172325, 32.64199443851768, 34.50724334150103, 36.37249059281224, 38.23773599056483  …  62.48557052203639, 64.35073040887207, 66.2158721139987, 68.08099098565125, 69.94608064698343, 71.81113211427447, 73.67613231320912, 75.54106145287895, 77.4058880820788, 79.27055903485967], Union{Missing, DateTime}[DateTime("2015-01-16T12:00:00"), DateTime("2015-02-14T21:00:00"), DateTime("2015-03-16T09:00:00"), DateTime("2015-12-16T09:00:00"), DateTime("2016-01-16T09:00:00"), DateTime("2016-02-15T09:00:00"), DateTime("2016-03-16T09:00:00"), DateTime("2016-12-16T09:00:00"), DateTime("2017-01-16T09:00:00"), DateTime

In [15]:
chunks_50_seasons_scenario = get_sliding_time_scopes_by_threshold(ivt_ssp585_monthly.time, 50)

37-element Vector{UnitRange{Int64}}:
 1:201
 4:205
 8:209
 12:213
 16:217
 20:221
 24:225
 28:229
 32:233
 36:237
 ⋮
 113:314
 117:318
 121:322
 125:327
 129:331
 133:335
 137:339
 141:343
 145:347

In [16]:
@benchmark calculate_eofs_of_ensemble_single_thread(
    $ivt_ssp585_monthly,
    $chunks_50_seasons_scenario,
    2;
    center=true,
    align_eofs_with_mean=true,
    norm_withsqrt_timedim=false,
    geoweights=true,
    scale_mode=:singularvals)

Time it took for eof calculation for member r1i1p1f1: 8.791490 seconds (35.22 M allocations: 4.189 GiB, 0.54% gc time, 222.32% compilation time)
Time it took for eof calculation for member r2i1p1f1: 4.951817 seconds (33.56 M allocations: 4.080 GiB, 8.05% gc time)
Time it took for eof calculation for member r3i1p1f1: 7.378953 seconds (33.56 M allocations: 4.080 GiB, 6.04% gc time)
Time it took for eof calculation for member r4i1p1f1: 5.670071 seconds (33.56 M allocations: 4.080 GiB)
Time it took for eof calculation for member r5i1p1f1: 4.865262 seconds (33.56 M allocations: 4.080 GiB, 11.87% gc time)
Time it took for eof calculation for member r6i1p1f1: 6.347048 seconds (33.56 M allocations: 4.080 GiB)
Time it took for eof calculation for member r7i1p1f1: 4.636049 seconds (33.56 M allocations: 4.080 GiB, 11.94% gc time)
Time it took for eof calculation for member r8i1p1f1: 6.010152 seconds (33.56 M allocations: 4.080 GiB, 10.36% gc time)
Time it took for eof calculation for member r9i1p

BenchmarkTools.Trial: 1 sample with 1 evaluation.
 Single result which took [34m284.075 s[39m (5.76% GC) to evaluate,
 with a memory estimate of [33m204.02 GiB[39m, over [33m1677803794[39m allocations.

In [18]:
@benchmark calculate_eofs_of_ensemble_multithreaded(
    $ivt_ssp585_monthly,
    $chunks_50_seasons_scenario,
    2;
    center=true,
    align_eofs_with_mean=true,
    norm_withsqrt_timedim=false,
    geoweights=true,
    scale_mode=:singularvals
)

Time it took for eof calculation for member r1i1p1f1: 4.524882 seconds (33.56 M allocations: 4.080 GiB, 10.45% gc time)
Time it took for eof calculation for member r2i1p1f1: 4.288469 seconds (33.56 M allocations: 4.080 GiB, 9.17% gc time)
Time it took for eof calculation for member r3i1p1f1: 4.282391 seconds (33.56 M allocations: 4.080 GiB, 8.34% gc time)
Time it took for eof calculation for member r4i1p1f1: 4.225646 seconds (33.56 M allocations: 4.080 GiB, 8.44% gc time)
Time it took for eof calculation for member r5i1p1f1: 4.303004 seconds (33.56 M allocations: 4.080 GiB, 9.26% gc time)
Time it took for eof calculation for member r6i1p1f1: 4.244764 seconds (33.56 M allocations: 4.080 GiB, 8.06% gc time)
Time it took for eof calculation for member r7i1p1f1: 4.321233 seconds (33.56 M allocations: 4.080 GiB, 9.09% gc time)
Time it took for eof calculation for member r8i1p1f1: 4.296857 seconds (33.56 M allocations: 4.080 GiB, 7.95% gc time)
Time it took for eof calculation for member r9i

BenchmarkTools.Trial: 1 sample with 1 evaluation.
 Single result which took [34m217.035 s[39m (7.41% GC) to evaluate,
 with a memory estimate of [33m204.02 GiB[39m, over [33m1677799181[39m allocations.