### Importando bibliotecas

In [1]:
# desabilita os warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import numpy as np

### Importando dados tratados

In [3]:
df_solar_wind = pd.read_parquet(r"..\data\df_solar_wind.parquet")
df_satellite_pos = pd.read_parquet(r"..\data\df_satellite_pos.parquet")
df_sunspots = pd.read_parquet(r"..\data\df_sunspots.parquet")
df_labels = pd.read_parquet(r"..\data\df_labels.parquet")

### Consolidando base de dados

##### Preparando df_solar_wind e df_satellite_pos

In [4]:
df_solar_wind["dia"] = df_solar_wind["timedelta"].dt.days
df_satellite_pos["dia"] = df_satellite_pos["timedelta"].dt.days

In [5]:
df_satellite_pos = pd.merge(df_satellite_pos, df_solar_wind[["period", "dia", "source"]], on=["period", "dia"], how="left")

In [6]:
df_satellite_pos["gse_x_satellite"] = np.nan
df_satellite_pos["gse_y_satellite"] = np.nan
df_satellite_pos["gse_z_satellite"] = np.nan
df_satellite_pos.head(3)

Unnamed: 0,period,timedelta,gse_x_ace,gse_y_ace,gse_z_ace,gse_x_dscovr,gse_y_dscovr,gse_z_dscovr,dia,source,gse_x_satellite,gse_y_satellite,gse_z_satellite
0,train_a,0 days,1522376.9,143704.6,149496.7,,,,0,ac,,,
1,train_a,0 days,1522376.9,143704.6,149496.7,,,,0,ac,,,
2,train_a,0 days,1522376.9,143704.6,149496.7,,,,0,ac,,,


In [7]:
df_satellite_pos["gse_x_satellite"] = np.where(df_satellite_pos["gse_x_satellite"].isnull() & (df_satellite_pos["source"]  == "ac"),
                                               df_satellite_pos["gse_x_ace"],
                                               df_satellite_pos["gse_x_dscovr"]
                                               )
df_satellite_pos["gse_y_satellite"] = np.where(df_satellite_pos["gse_y_satellite"].isnull() & (df_satellite_pos["source"]  == "ac"),
                                               df_satellite_pos["gse_y_ace"],
                                               df_satellite_pos["gse_y_dscovr"]
                                               )
df_satellite_pos["gse_z_satellite"] = np.where(df_satellite_pos["gse_z_satellite"].isnull() & (df_satellite_pos["source"]  == "ac"),
                                               df_satellite_pos["gse_z_ace"],
                                               df_satellite_pos["gse_z_dscovr"]
                                               )

df_satellite_pos = df_satellite_pos[['period', 
                                     'timedelta', 
                                     'dia', 
                                     'source',
                                     'gse_x_satellite', 
                                     'gse_y_satellite', 
                                     'gse_z_satellite']]

df_satellite_pos["gse_x_satellite"] = df_satellite_pos["gse_x_satellite"].interpolate(method='linear', limit_direction='both')
df_satellite_pos["gse_y_satellite"] = df_satellite_pos["gse_y_satellite"].interpolate(method='linear', limit_direction='both')
df_satellite_pos["gse_z_satellite"] = df_satellite_pos["gse_z_satellite"].interpolate(method='linear', limit_direction='both')

df_satellite_pos["gse_x_satellite"] = df_satellite_pos["gse_x_satellite"].apply(pd.to_numeric, downcast ="float")
df_satellite_pos["gse_y_satellite"] = df_satellite_pos["gse_y_satellite"].apply(pd.to_numeric, downcast ="float")
df_satellite_pos["gse_z_satellite"] = df_satellite_pos["gse_z_satellite"].apply(pd.to_numeric, downcast ="float")

df_satellite_pos.isna().sum()

period             0
timedelta          0
dia                0
source             0
gse_x_satellite    0
gse_y_satellite    0
gse_z_satellite    0
dtype: int64

##### Consolidando df_solar_wind e df_satellite_pos

In [8]:
df_solar_wind.shape

(8392320, 11)

In [9]:
df_solar_wind["gse_x_satellite"] = np.nan
df_solar_wind["gse_y_satellite"] = np.nan
df_solar_wind["gse_z_satellite"] = np.nan

df_solar_wind["gse_x_satellite"] = np.where(df_solar_wind["gse_x_satellite"].isna() 
                         & (df_solar_wind["source"]  == df_satellite_pos["source"])
                         & (df_solar_wind["period"]  == df_satellite_pos["period"])
                         & (df_solar_wind["dia"]  == df_satellite_pos["dia"]),
                          df_satellite_pos["gse_x_satellite"],
                          np.nan
                          )

df_solar_wind["gse_y_satellite"] = np.where(df_solar_wind["gse_y_satellite"].isna() 
                         & (df_solar_wind["source"]  == df_satellite_pos["source"])
                         & (df_solar_wind["period"]  == df_satellite_pos["period"])
                         & (df_solar_wind["dia"]  == df_satellite_pos["dia"]),
                          df_satellite_pos["gse_y_satellite"],
                          np.nan
                          )

df_solar_wind["gse_z_satellite"] = np.where(df_solar_wind["gse_z_satellite"].isna() 
                         & (df_solar_wind["source"]  == df_satellite_pos["source"])
                         & (df_solar_wind["period"]  == df_satellite_pos["period"])
                         & (df_solar_wind["dia"]  == df_satellite_pos["dia"]),
                          df_satellite_pos["gse_z_satellite"],
                          np.nan
                          )

df_solar_wind.head(3)

Unnamed: 0,period,timedelta,bx_gse,by_gse,bz_gse,bt,density,speed,temperature,source,dia,gse_x_satellite,gse_y_satellite,gse_z_satellite
0,train_a,0 days 00:00:00,-5.55,3.0,1.25,6.8,1.53,383.920013,110237.0,ac,0,1522376.9,143704.6,149496.7
1,train_a,0 days 00:01:00,-5.58,3.16,1.17,6.83,1.69,381.790009,123825.0,ac,0,1522376.9,143704.6,149496.7
2,train_a,0 days 00:02:00,-5.15,3.66,0.85,6.77,1.97,389.109985,82548.0,ac,0,1522376.9,143704.6,149496.7


##### Preparando df_solar_wind e df_sunspots

In [10]:
df_sunspots["dia"] = df_sunspots["timedelta"].dt.days

In [11]:
df_sunspots

Unnamed: 0,period,timedelta,smoothed_ssn,dia
0,train_a,0 days,65.400002,0
1,train_a,13 days,72.000000,13
2,train_a,44 days,76.900002,44
3,train_a,74 days,80.800003,74
4,train_a,105 days,85.400002,105
...,...,...,...,...
187,train_c,2283 days,26.400000,2283
188,train_c,2314 days,29.500000,2314
189,train_c,2344 days,34.500000,2344
190,train_c,2375 days,39.099998,2375


##### Consolidando df_solar_wind e df_sunspots

In [15]:
df_solar_wind["smoothed_ssn"] = np.nan

df_solar_wind["smoothed_ssn"] = np.where(df_solar_wind["smoothed_ssn"].isna()
                         & (df_solar_wind["period"]  == df_sunspots["period"])
                         & (df_solar_wind["dia"]  == df_sunspots["dia"]),
                          df_sunspots["smoothed_ssn"],
                          np.nan
                          )

df_solar_wind.head(3)

ValueError: Can only compare identically-labeled Series objects

##### Preparando df_solar_wind e df_labels

##### Consolidando df_solar_wind e df_labels