# Accessing data from the Troll Observing Network (TONe) Integrated Cloud Observatory (ICO)

## Use a Dask cluster for higher performance when reading data

- However, this is not essential.

In [None]:
from dask.distributed import Client
client = Client(processes=False)
client

In [None]:
import icechunk
import pandas as pd
import xarray as xr
import hvplot.xarray

## Define a function that access the Level-0 data repositories

In [None]:
def read_level0_data_repo(repo_L0_bucket):

    repo_prefix = ''
    repo_endpoint_url = 'https://tone-ico-gws-o.s3-ext.jc.rl.ac.uk/'
    repo_region = 'auto'
    repo_anonymous = True

    storage = icechunk.s3_storage(
        bucket=repo_L0_bucket,
        prefix=repo_prefix,
        endpoint_url=repo_endpoint_url,
        region=repo_region,
        anonymous=repo_anonymous,
    )
    repo = icechunk.Repository.open(storage)
    session = repo.readonly_session(branch='main')
    if 'radiosonde' in repo_L0_bucket:
        # ....Radiosonde data are stored as an xarray DataTree
        dt = xr.open_datatree(session.store, engine='zarr', consolidated=True)
        return dt
    else:
        # ....All other TONe-ICO data are stored as xarray DataSets
        ds = xr.open_zarr(session.store, consolidated=False)
        return ds


### Current list of TONe-ICO Level-0 data repos

In [None]:
repo = [
    'automatic-weather-station-troll-station.level0.repo',
    'automatic-weather-station-troll-airfield.level0.repo',
    'depolarisation-lidar-ceilometer.level0.repo',
    'radiosonde.level0.repo',
]

### Read automatic weather station data!

In [None]:
aws_station = read_level0_data_repo(repo[0])
aws_airfield = read_level0_data_repo(repo[1])

In [None]:
aws_station.T_air_2_vent.hvplot() * aws_airfield.T_air_2.hvplot()

## Read the Lidar data

In [None]:
lidar = read_level0_data_repo(repo[2])
lidar

In [None]:
# ....Place the lowest cloud base height into your local computer's memory
#     This speeds up the plotting below !
cbh = lidar.cloud_base_heights[:,0].compute()

In [None]:
# ....Plot a time series of the lowest cloud base height
cbh.hvplot.scatter()

In [None]:
# ....Create a violin plot that displays the statistics as a function of height of the lowest cloud base height
cbh.hvplot.violin()

## Read the sonde profiles

- Unfortunately, this takes about 2 minutes, so be PATIENT.

In [None]:
sonde_profiles = read_level0_data_repo(repo[3])

In [None]:
# %% - One accesses each level-0 radiosonde profiles like a Python dictionary.
#      Print a list of the profiles, the keys of which are the names of the raw radiosonde *.cor files.

sonde = sonde_profiles.groups
print(sonde[1])

In [None]:
sonde_profiles[sonde[1]].T.hvplot(y='height')

## Don't forget to close the client to the Dask cluster!

In [None]:
client.close()