In [1]:
import os
import pandas as pd
import sys
import re
import numpy as np

sys.path.append("../")
from nexra_settings.filenames import ParquetFileNames

sys.path.append("../../")
from common.constants import DATAFOLDER

In [2]:
root_dir = DATAFOLDER.data_root_path
nexra_data_dir = os.path.join(root_dir, "nexra_data")

In [6]:
rain_max, rain_min = 0, 0
humidity_max, humidity_min = 0, 0
pressure_max, pressure_min = 1000, 1000
wind_max, wind_min = 0, 0
cloud_max, cloud_min = 0, 0
temp_max, temp_min = 273, 273
slp_max, slp_min = 1000, 1000

for year in os.listdir(os.path.join(nexra_data_dir)):
    datetime_dirs = os.listdir(os.path.join(nexra_data_dir, year))
    datetime_dirs = [i for i in datetime_dirs if re.match("[0-9]{8}", i) is not None]
    for datetime_dir in datetime_dirs:
        data_files = os.listdir(os.path.join(nexra_data_dir, year, datetime_dir))
        data_files = [i for i in data_files if re.match(".+\.parquet\.gzip$", i) is not None]

        uwind_file = None
        vwind_file = None
        for data_file in data_files:
            if ParquetFileNames.rainfall_filename in data_file:
                rain_df = pd.read_parquet(os.path.join(nexra_data_dir, year, datetime_dir, data_file))
                rain_arr = rain_df.to_numpy()
                rain_arr *= 3600
                _rain_max, _rain_min = rain_arr.max(), rain_arr.min()

                if _rain_max > rain_max:
                    rain_max = _rain_max
                if _rain_min < rain_min:
                    rain_min = _rain_min

            if ParquetFileNames.humidity_filename in data_file:
                humidity_df = pd.read_parquet(os.path.join(nexra_data_dir, year, datetime_dir, data_file))
                humidity_arr = humidity_df.to_numpy()
                _humidity_max, _humidity_min = humidity_arr.max(), humidity_arr.min()

                if _humidity_max > humidity_max:
                    humidity_max = _humidity_max

                if _humidity_min < humidity_min:
                    humidity_min = _humidity_min

            if ParquetFileNames.pressure_filename in data_file:
                press_df = pd.read_parquet(os.path.join(nexra_data_dir, year, datetime_dir, data_file))
                press_arr = press_df.to_numpy()
                press_arr /= 100
                _press_max, _press_min = press_arr.max(), press_arr.min()

                if _press_max > pressure_max:
                    pressure_max = _press_max

                if _press_min < pressure_min:
                    pressure_min = _press_min

            if ParquetFileNames.uwind_filename in data_file:
                uwind_file = data_file

            if ParquetFileNames.vwind_filename in data_file:
                vwind_file = data_file

            if ParquetFileNames.cloud_amount_filename in data_file:
                cloud_df = pd.read_parquet(os.path.join(nexra_data_dir, year, datetime_dir, data_file))
                cloud_arr = cloud_df.to_numpy()
                _cloud_max, _cloud_min = cloud_arr.max(), cloud_arr.min()

                if _cloud_max > cloud_max:
                    cloud_max = _cloud_max
                if _cloud_min < cloud_min:
                    cloud_min = _cloud_min

            if ParquetFileNames.sealevel_press_filename in data_file:
                slp_df = pd.read_parquet(os.path.join(nexra_data_dir, year, datetime_dir, data_file))
                slp_arr = slp_df.to_numpy()
                slp_arr /= 100
                _slp_max, _slp_min = slp_arr.max(), slp_arr.min()

                if _slp_max > slp_max:
                    slp_max = _slp_max

                if _slp_min < slp_min:
                    slp_min = _slp_min

            if ParquetFileNames.temperature_filename in data_file:
                temp_df = pd.read_parquet(os.path.join(nexra_data_dir, year, datetime_dir, data_file))
                temp_arr = temp_df.to_numpy()
                _temp_max, _temp_min = temp_arr.max(), temp_arr.min()

                if _temp_max > temp_max:
                    temp_max = _temp_max
                if _temp_min < temp_min:
                    temp_min = _temp_min

        # Absolute wind
        uwind_df = pd.read_parquet(os.path.join(nexra_data_dir, year, datetime_dir, uwind_file))
        vwind_df = pd.read_parquet(os.path.join(nexra_data_dir, year, datetime_dir, vwind_file))
        wind_speed_arr = np.stack((uwind_df.to_numpy(), vwind_df.to_numpy()), axis=-1)

        def abs_val(a):
            return np.sqrt(np.square(a).sum())

        abs_wind_arr = np.apply_along_axis(func1d=abs_val, axis=-1, arr=wind_speed_arr)
        _wind_max, _wind_min = abs_wind_arr.max(), abs_wind_arr.min()

        if _wind_max > wind_max:
            wind_max = _wind_max

        if _wind_min < wind_min:
            wind_min = _wind_min


In [7]:
print(f"Rainfall max: {rain_max}, min: {rain_min}")
print(f"humidity max: {humidity_max}, min: {humidity_min}")
print(f"sealevel presure max: {pressure_max}, min: {pressure_min}")
print(f"wind max: {wind_max}, min: {wind_min}")
print(f"cloud amount max: {cloud_max}, min: {cloud_min}")
print(f"slp max: {slp_max}, min: {slp_min}")
print(f"temp max: {temp_max}, min: {temp_min}")

Rainfall max: 20.649250030517578, min: 0
humidity max: 93.0313720703125, min: 0
sealevel presure max: 1065.231689453125, min: 534.627197265625
wind max: 35.910099029541016, min: 0
cloud amount max: 1.0, min: 0
slp max: 1126.4339599609375, min: 928.0443725585938
temp max: 339.24066162109375, min: 194.80801391601562
