# Accessing big data in the cloud

In [None]:
#pragma cling add_library_path("$CONDA_PREFIX/lib")

In [None]:
#pragma cling load("storage_client")

In [None]:
#include "xtensor/xio.hpp"
#include "xtensor/xview.hpp"
#include "xwidgets/ximage.hpp"
#include "xtensor/xchunked_array.hpp"
#include "xtensor-io/xio_blosc.hpp"
#include "xtensor-io/xchunk_store_manager.hpp"
#include "xtensor-io/ximage.hpp"
#include "xtensor-zarr/xzarr_hierarchy.hpp"
#include "xtensor-zarr/xzarr_gcs_store.hpp"
#include "xtensor-zarr/xtensor_zarr_config_cling.hpp"
#include "xtl/xbase64.hpp"
#include "xcpp/xdisplay.hpp"
#include <cppcolormap.h>
#include <iostream>

In this Notebook we will access some precipitation data hosted in Google Cloud Storage.

In [None]:
xt::xzarr_register_compressor<xt::xzarr_gcs_store, xt::xio_blosc_config>();
gcs::Client client((gcs::ClientOptions(gcs::oauth2::CreateAnonymousCredentials())));
xt::xzarr_gcs_store s("pangeo-data/gpm_imerg/late/chunk_time/precipitationCal", client);
auto h = xt::get_zarr_hierarchy(s, "2");
auto z = h.get_array("");
auto metadata = z.get_metadata();
metadata.dump()

Let's see how much data we have in the store.

In [None]:
float size = 1.;
std::cout << "shape: ";
for (auto s: z.shape())
{
    std::cout << s << " ";
    size *= s;
}
std::cout << std::endl;
size = size * sizeof(float) / (1024. * 1024. * 1024. * 1024.);
std::cout << size << " TB" << std::endl;

We cannot load 8 TB of data in memory, so let's access the 1000th element (in the time dimension).

In [None]:
xt::xstrided_slice_vector sv({1000, xt::all(), xt::all()});
auto z_slice = xt::strided_view(z, sv);
auto a = z_slice.get_array<float>();

The absence of data in this dataset is represented by a negative value (which is impossible for a precipitation), so we will just clip any negative value.

In [None]:
auto a_max = xt::amax(a)();
xt::xarray<float> a_clip = xt::clip(a, 0, a_max);

Now let's map a RGB color to every value in our dataset, using the jet colormap.

In [None]:
auto cmap = cppcolormap::jet();
auto a_cmap = cppcolormap::as_colors(a_clip, cmap);

We are now ready to show an image of the precipitation field.

In [None]:
std::vector<char> read_file(const char* filename)
{
    std::basic_ifstream<char> file(filename, std::ios::binary);
    return std::vector<char>((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
}

In [None]:
template <class E>
std::vector<char> to_png_buffer(const xt::xexpression<E>& e)
{
    const char* temp_filename = "/tmp/xio_image.png";
    xt::dump_image(temp_filename, e);
    return read_file(temp_filename);
}

In [None]:
xw::image img;
img.value = to_png_buffer(a_cmap);
img

Now let's plot the mean of slices 1000 to 1100 along the time dimension.

In [None]:
//xt::xstrided_slice_vector sv({xt::range(1000, 1100), xt::all(), xt::all()});
//auto z_slice = xt::strided_view(z, sv);

In [None]:
//auto z_mean = xt::mean(z_slice);

In [None]:
//auto a = z_mean.get_array<float>();

In [None]:
//auto a_max = xt::amax(a)();
//xt::xarray<uint8_t> a_clip = xt::clip(a, 0, a_max);

In [None]:
//auto a_cmap = cppcolormap::as_colors(a_clip, cmap);

In [None]:
//xw::image img;
//img.value = to_png_buffer(a_cmap);
//img