In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import xarray
import dask

import psutil
import time

# Performace analysis

This notebook compares the performance of remote data access versus next to data data access.

In [2]:
df = pd.read_csv("../../inventory.csv")
subset = df.query('type == "opendap" & variable == "t" & project == "CMIP6" & frequency == "mon" & experiment == "ssp585"')
location = subset["location"].iloc[0]
location

'https://hub.climate4r.ifca.es/thredds/dodsC/ipcc/ar6/atlas/ia-monthly/CMIP6/ssp585/t_CMIP6_ssp585_mon_201501-210012.nc'

## Measurements

First we define the parameters of the performance experiment. We test different number of workers and we perform several runs of each number of workers.

In [3]:
nworkers = [8, 4, 2, 1]
runs = 5
results = []

def measure(op, name, nworkers, run):
    start_net = psutil.net_io_counters()
    start_time = time.time()

    op.compute(num_workers=nworkers, scheduler="processes")

    end_time = time.time()
    end_net = psutil.net_io_counters()

    result = {
        "name": name,
        "run": run,
        "time": end_time-start_time,
        "bytes_recv": end_net.bytes_recv-start_net.bytes_recv,
        "bytes_sent": end_net.bytes_sent-start_net.bytes_sent,
        "packets_recv": end_net.packets_recv-start_net.packets_recv,
        "packets_sent": end_net.packets_sent-start_net.packets_sent,
        "errin": end_net.errin-start_net.errin,
        "errout": end_net.errout-start_net.errout,
        "dropin": end_net.dropin-start_net.dropin,
        "dropout": end_net.dropout-start_net.dropout,
        "workers": nworkers
    }

    return result

### OPeNDAP without compression (hub)

In [4]:
!sed -i 's|DEFLATE=1|DEFLATE=0|' ~/.dodsrc

In [5]:
for w in nworkers:
    for r in range(runs):
        ds = xarray.open_dataset(location).chunk(member=-1, time=1)
        op = ds["t"].mean(["lat", "lon", "member"])
        results.append(measure(op, "opendap-hub-uncompressed", w, r))

### OPeNDAP with compression (hub)

In [6]:
!sed -i 's|DEFLATE=0|DEFLATE=1|' ~/.dodsrc

In [7]:
for w in nworkers:
    for r in range(runs):
        ds = xarray.open_dataset(location).chunk(member=-1, time=1)
        op = ds["t"].mean(["lat", "lon", "member"])
        results.append(measure(op, "opendap-hub-compressed", w, r))

In [8]:
pd.DataFrame.from_records(results).to_csv("home.csv", index=False)