In [None]:
from datetime import datetime, timedelta

import pandas as pd
import numpy as np
import plotly.express as px

from src.io import (
    get_techtide_hf,
    get_techtide_ionosondes,
    get_gfz_f107,
    get_gfz_hp30,
    get_noaa_l1,
    get_fmi_iu_ie,
)
from src.preprocess import (
    resample_time_series,
    get_categories,
    get_solar_position,
    get_moving_avg,
)

In [None]:
# from src.cert import check_and_update_cert
# from src import TECHTIDE_CERT_PATH

# check_and_update_cert(
#     hostname="techtide-srv-pub.space.noa.gr",
#     port=8443,
#     cert_path=TECHTIDE_CERT_PATH,
# )

In [None]:
STOP_UTC_NOW = datetime.utcnow()
START_UTC = STOP_UTC_NOW - timedelta(hours=6)

STOP_UTC_NOW = STOP_UTC_NOW.strftime("%Y-%m-%d %H:%M:%S")
START_UTC = START_UTC.strftime("%Y-%m-%d %H:%M:%S")

# TechTIDE

#### HF-INT + 2-h moving average

In [None]:
df_hf = get_techtide_hf(start=START_UTC, stop=STOP_UTC_NOW)

df_hf_30 = resample_time_series(df_hf, aggregation_function='mean').round(2)

In [None]:
df_hf_30 = get_moving_avg(df_hf_30, ['hf'], [2])

#### Ionosondes

In [None]:
df_iono = get_techtide_ionosondes(
    START_UTC,
    STOP_UTC_NOW,
    iono_list=['AT138', 'FF051', 'JR055', 'PQ052', 'RO041', 'VT139'],
)

df_iono_30 = resample_time_series(
    df_iono,
    aggregation_function='median',
).round(2)

# Solar Zenith Angle (pvlib)

In [None]:
get_solar_position(
    df_hf_30.index[-1], columns='zenith', altitude=0,
).round(1)

# GFZ

#### Hp-30

In [None]:
df_hp_30 = get_gfz_hp30()

#### F10.7 (adj)

In [None]:
get_gfz_f107().dropna().tail(1)

# NOAA

#### L1 data (propagated) + Newell coupling, defined by
$$v^{4/3} \left(B_y^2 + B_z^2\right)^{2/3} \sin^{8/3}\left(\frac{1}{2}\arctan\left(\frac{|B_y|}{B_z}\right)\right)$$

Lo storico è limitato, meglio **agganciarsi al DB interno**

In [None]:
df_l1 = get_noaa_l1(end_propagated_datetime=STOP_UTC_NOW)

In [None]:
df_l1_30 = resample_time_series(
    df_l1,
    aggregation_function='median',
)

# FMI

#### IU & IL + moving avgs + variations

In [None]:
fmi_cols = ['ie', 'iu']

In [None]:
df_fmi = get_fmi_iu_ie()

In [None]:
df_fmi_30 = resample_time_series(df_fmi, aggregation_function='median').round(2)

In [None]:
df_fmi_30 = get_moving_avg(df_fmi_30, fmi_cols, [3, 12])

In [None]:
hours = 6
for col_ in fmi_cols:
    _, labels = get_categories(
        df_fmi_30[col_],
        window=2*hours,
        zero_phase=False,
    )

    df_fmi_30[f'{col_}_variation'] = np.insert(labels, 0, 0, axis=0)

# SMR (SuperMAG partial ring current index) - TODO

In [None]:
# df_smr = read_time_series(
#     Path(DATA_IN, 'SMR.csv'),
#     column_names=['datetime','smr'],
# ).loc[START_DATE:END_DATE]

In [None]:
# df_smr_30 = resample_time_series(df_smr, aggregation_function='median')

## Dataset creation

In [None]:
# df_j = df_ejet_30.merge(
#     df_tid_30_['quality_index'],
#     how='left',
#     left_index=True,
#     right_index=True,
# ).merge(
#     df_hf_30,
#     how='left',
#     left_index=True,
#     right_index=True,
# ).merge(
#     df_solar['f_107_adj'],
#     how='left',
#     left_index=True,
#     right_index=True,
# ).merge(
#     df_hp_30,
#     left_index=True,
#     right_index=True,
# ).merge(
#     df_smr_30,
#     left_index=True,
#     right_index=True,
# ).merge(
#     df_l1_30,
#     left_index=True,
#     right_index=True,
# ).merge(
#     df_newell_30,
#     left_index=True,
#     right_index=True,
# ).merge(
#     df_ionosondes_30,
#     left_index=True,
#     right_index=True,
# )

# # Solar data need to be repeated, since they're provided on a daily basis only
# df_j['f_107_adj'] = df_j['f_107_adj'].ffill()

# # Solar zenith angle
# df_j['solar_zenith_angle'] = get_solar_position(
#     df_j.index, columns='zenith', altitude=0,
# ).round(1)

In [None]:
# assert df_j.reset_index().duplicated('datetime').sum() == 0

Construct the actual **target**, a boolean column which is set to 1 whenever a **TID event** is reported **within a 3-hours timeframe**

In [None]:
# steps = 2 * FORECAST_HOURS_IN_ADVANCE

# df_j[f'tid_within_{FORECAST_HOURS_IN_ADVANCE}h'] = df_j['quality_index'].rolling(
#     window=steps+1, min_periods=1
# ).sum().gt(0).shift(
#     -steps, fill_value=False
# ).replace(
#     {True: 1, False: 0}
# )

## Dump

In [None]:
# df_j.drop(
#     columns=['quality_index'],
# ).to_pickle(
#     Path(DATA_OUT, 'df_dataset.pickle')
# )