In [9]:
import pandas as pd
import numpy as np
import math
import plotly.express as px
import plotly.graph_objects as go
import random
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error
import pickle
import os

import plotly.io as pio

# Read the station

In [10]:
# Select station to train the models
stationCode = 'CA91'
stationPath = './all data murcia/' + stationCode + '.csv'

In [11]:
def convertirComa(x):
    if type(x) == str:
        return x.replace(",", ".")
    else:
        return x
def leerEstacionDatos(path):
    estacionDatas = pd.read_csv(path, encoding='ISO-8859-1', sep=";")
    estacionDatas.columns = ['ESTACION', 'MUNICIPIO', 'PARAJE', 'HORAS', 'FECHA', 'ETO','TMAX', 'TMIN', 'HRMAX', 'HRMIN', 'RADMED','VVMED', '-']
    estacionDatas = estacionDatas.drop(columns=['ESTACION', 'MUNICIPIO', 'PARAJE', 'HORAS', '-'])
    estacionDatas = estacionDatas.reset_index().drop(columns='index')
    estacionDatas['FECHA'] = pd.to_datetime(estacionDatas['FECHA'], format="%d/%m/%y")
    estacionDatas.index = estacionDatas['FECHA']
    estacionDatas.drop(columns='FECHA', inplace=True)
    estacionDatas.dropna(inplace=True)
    for i in estacionDatas.columns:
        estacionDatas[i] = pd.to_numeric(estacionDatas[i].apply(lambda x : convertirComa(x)))
    return estacionDatas
estacionDatas = leerEstacionDatos(stationPath)
estacionDatas

Unnamed: 0_level_0,ETO,TMAX,TMIN,HRMAX,HRMIN,RADMED,VVMED
FECHA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-01-01,1.83,16.60,5.10,80.60,39.80,109.17,2.20
2010-01-02,0.86,19.00,4.40,95.50,39.90,106.13,0.46
2010-01-03,0.84,16.80,4.30,96.30,54.40,89.29,0.60
2010-01-04,0.64,15.40,7.00,95.50,79.00,50.25,0.45
2010-01-05,0.87,17.40,7.80,95.50,53.20,63.21,0.58
...,...,...,...,...,...,...,...
2024-06-23,5.73,28.82,17.59,76.78,35.62,348.35,0.91
2024-06-24,5.25,27.94,17.55,77.99,43.34,317.53,0.90
2024-06-25,5.77,30.19,17.52,84.05,31.34,344.81,0.90
2024-06-26,5.44,29.79,17.69,86.10,35.11,308.05,1.10


In [12]:
# Split train and test set
train = estacionDatas[estacionDatas.index < '2020-01-01']
test = estacionDatas[estacionDatas.index >= '2020-01-01']
print(train)
print(test)

             ETO   TMAX  TMIN  HRMAX  HRMIN  RADMED  VVMED
FECHA                                                     
2010-01-01  1.83  16.60  5.10  80.60  39.80  109.17   2.20
2010-01-02  0.86  19.00  4.40  95.50  39.90  106.13   0.46
2010-01-03  0.84  16.80  4.30  96.30  54.40   89.29   0.60
2010-01-04  0.64  15.40  7.00  95.50  79.00   50.25   0.45
2010-01-05  0.87  17.40  7.80  95.50  53.20   63.21   0.58
...          ...    ...   ...    ...    ...     ...    ...
2019-12-27  0.74  17.05  6.27  91.30  60.71  116.96   0.43
2019-12-28  0.74  15.70  4.63  93.13  61.92  123.14   0.54
2019-12-29  0.72  15.33  6.96  91.92  56.11  100.36   0.43
2019-12-30  0.71  14.64  5.68  90.52  56.52   77.56   0.43
2019-12-31  0.72  14.19  4.01  91.67  59.56   87.39   0.53

[3628 rows x 7 columns]
             ETO   TMAX   TMIN  HRMAX  HRMIN  RADMED  VVMED
FECHA                                                      
2020-01-01  0.79  15.45   4.17  92.48  58.49  125.94   0.62
2020-01-02  0.60  12.71   4.

In [13]:
# Graphic of each variable
fig = go.Figure()
for c in estacionDatas.columns:
    fig.add_trace(go.Scatter(x= estacionDatas.index, y=estacionDatas[c],
                        name=c, mode='lines'))
fig.show()

In [14]:
import plotly.io as pio
pio.templates.default = "simple_white"
corr = estacionDatas.corr(method='pearson')
mask = np.triu(np.ones_like(corr, dtype=bool))
corr = corr.mask(mask)
corr = round(corr,2)
corr.columns = ['ET0', 'Tmax', 'Tmin', 'RHmax', 'RHmin', 'Rs', 'U2']
corr.index = ['ET0', 'Tmax', 'Tmin', 'RHmax', 'RHmin', 'Rs', 'U2']
fig = px.imshow(corr, text_auto=True)

fig.update_coloraxes(showscale=False)
fig.update_layout(
    

    margin=dict(l=0, r=0, t=0, b=0),
    font=dict(
        size=18,
    )
)
'''fig.update_layout(
        title={
            'text': '<b>Correlation of variables ('+stationCode+') </b>',
            'y':0.9,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'})'''
fig.show()


In [16]:
fig.write_image('./Images/cor'+stationCode+'.pdf')

# Map plot

In [2]:
from folium import plugins
import folium

In [None]:
def getMetricPlot(NoTlFiles, TlFiles, TLPercentages, national, metric):

    df_noTL = pd.DataFrame()
    df_TL = pd.DataFrame()
    for i in range(4):
        no_TL = pd.read_csv(NoTlFiles[i])
        no_TL = no_TL[(no_TL['Model'] == 'M'+str(4-i)) & (no_TL['TLPercentage'] == TLPercentages[i])].drop(columns=['Season']).groupby(['Station', 'Model']).mean().reset_index()[['Station', metric, 'Model']]
        df_noTL = pd.concat([df_noTL, no_TL])
        tL = pd.read_csv(TlFiles[i])
        tL = tL[(tL['Model'] == 'M'+str(4-i)) & (tL['TLPercentage'] == TLPercentages[i])].drop(columns=['Season']).groupby(['Station', 'Model']).mean().reset_index()[['Station', metric, 'Model']]
        df_TL = pd.concat([df_TL, tL])
        #display(df_noTL)
        #display(df_noTL['MAPE'].mean())

    df_merged = pd.merge(df_noTL, df_TL, how='outer', on=['Station', 'Model'])
    df_merged['Station'] = df_merged['Station'].apply(lambda x: x.split(' ')[0]) # '-'
    display(df_merged.groupby('Model').describe())

    locs = pd.read_excel('./locations.xlsx', sheet_name='Sheet1' if national==False else 'Sheet2', usecols='B:F').dropna()
    m = folium.Map([35.714444, -6.916666],zoom_start=5, tiles='https://{s}.basemaps.cartocdn.com/rastertiles/voyager_labels_under/{z}/{x}/{y}{r}.png', 
                attr='&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors &copy; <a href="https://carto.com/attributions">CARTO</a>', control_scale=True)
    for i in range(len(locs)):
        
        coords = locs['LatLon'][i].split(',')
        name = locs['Name'][i].split(' ')[0]
        try:
            station_df= df_merged.groupby('Station').get_group(name).iloc[::-1].reset_index()
        except:
            print(name, 'not in list.')
            continue
        datos = '<tspan x="15" dy="1em" stroke="blue">'+name+ '</tspan>'
        maxNumberRowText = 0
        for i in range(4):  
            model_df = station_df[station_df['Model'] == 'M'+str(4-i)].reset_index()
            if len(model_df) == 0:
                textNoTl = '*'
                textTl = '*'
            else:
                textNoTl = str(round(model_df[metric+'_x'][0], 2))
                textTl = str(round(model_df[metric+'_y'][0], 2))
            rowtext = '<tspan x="2" dy="1em">' + textNoTl+ ' &#8594; ' + textTl + '</tspan>' #'<tspan x="2" dy="1em">' + str(round(float(textNoTl)-float(textTl),3)) +'</tspan>'
            if len(rowtext) > maxNumberRowText:
                maxNumberRowText = len(rowtext)
            datos = datos + rowtext

        height = '90' 
        folium.Marker(
        location=[float(coords[0]), float(coords[1])],
        tooltip=locs['Name'][i],
        popup=locs['Name'][i],
        
        icon=folium.DivIcon(html=('<svg height="100" width="115" ">'
                                    '<rect  x="1" y="1" width= "' + str(round(maxNumberRowText*2.3)) + '" height="'+height+'" stroke="#38a3a5" stroke-width="1" fill=#c7f9cc />'
                                    '<text x="1" y="1" fill="black" font-size="17" stroke="black" stroke-width="0.5">'+ datos.replace('nan', '*')+'</text>'
                                    '</svg>'))
    ).add_to(m)

    return m

In [208]:
# Murcia estimacion
#NoTlFiles = ['./CI42/MurciaEstimation_NoTL.csv', './CI42/MurciaEstimation_NoTL.csv', './CI42/MurciaEstimation_NoTL.csv', './CR12/MurciaEstimation_NoTL.csv']
#TlFiles = ['./CI42/MurciaEstimation_TL-TrainLayers.csv', './CI42/MurciaEstimation_TL-TrainLayers.csv', './CR12/MurciaEstimation_TL-TrainLayers.csv', './CR12/MurciaEstimation_TL-NoTrainLayers.csv']
#TLPercentages = [0.8, 0.8, 0.8, 0.8]

# Murcia forecast
#NoTlFiles = ['./CI42/MurciaForecast_NoTL.csv', './CA91/MurciaForecast_NoTL.csv', './CI42/MurciaForecast_NoTL.csv', './CR12/MurciaForecast_NoTL.csv']
#TlFiles = ['./CA91/MurciaForecast_TL-NoTrainLayers.csv', './CI42/MurciaForecast_TL-TrainLayers.csv', './CI42/MurciaForecast_TL-NoTrainLayers.csv', './CR12/MurciaForecast_TL-NoTrainLayers.csv']
#TLPercentages = [0.9, 0.8, 0.8, 0.6]

'''
# Spain estimatcion
NoTlFiles = ['./CR12/EspEstimation_NoTL.csv', './CR12/EspEstimation_NoTL.csv', './CI42/EspEstimation_NoTL.csv', './CR12/EspEstimation_NoTL.csv']
TlFiles = ['./CR12/EspEstimation_TL-TrainLayers.csv', './CI42/EspEstimation_TL-TrainLayers.csv', './CR12/EspEstimation_TL-TrainLayers.csv', './CR12/EspEstimation_TL-NoTrainLayers.csv']
TLPercentages = [0.8, 0.8, 0.8, 0.8]
'''


# Spain forecast
NoTlFiles = ['./CI42/EspForecast_NoTL.csv', './CA91/EspForecast_NoTL.csv', './CA91/EspForecast_NoTL.csv', './CA91/EspForecast_NoTL.csv']
TlFiles = ['./CA91/EspForecast_TL-NoTrainLayers.csv', './CR12/EspForecast_TL-TrainLayers.csv', './CA91/EspForecast_TL-NoTrainLayers.csv', './CR12/EspForecast_TL-NoTrainLayers.csv']
TLPercentages = [0.6, 0.6, 0.6, 0.6]

getMetricPlot(NoTlFiles, TlFiles, TLPercentages, national=True, metric='MAPE')

Unnamed: 0_level_0,MAPE_x,MAPE_x,MAPE_x,MAPE_x,MAPE_x,MAPE_x,MAPE_x,MAPE_x,MAPE_y,MAPE_y,MAPE_y,MAPE_y,MAPE_y,MAPE_y,MAPE_y,MAPE_y
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
M1,27.0,28.815381,9.235909,16.471516,22.079671,28.152677,31.980451,51.582483,27.0,38.993063,12.943658,18.494656,28.801774,39.704013,50.194262,61.332787
M2,27.0,26.632227,6.730975,14.618999,21.135694,27.756164,32.289338,37.154743,27.0,27.497454,10.801665,14.182845,18.494295,26.77374,34.020131,53.206076
M3,27.0,54.249416,18.762107,22.62508,42.361277,55.796123,64.771894,90.178468,27.0,21.086635,5.583519,12.1168,16.955573,21.031484,23.458305,36.634822
M4,27.0,42.015458,14.395357,13.909594,30.731439,44.701447,51.728965,68.530731,27.0,23.0233,5.338396,13.727124,19.694381,21.713882,25.805479,35.127897


In [144]:

path= './CI42/EspForecast_' + 'NoTL' + '.csv'
l = pd.read_csv(path).groupby(['Model', 'TLPercentage'])
l = l.get_group(('M4', 0.6))
display(l.reset_index(drop=True).drop(columns=['Season']).groupby(['Station', 'Model']).describe()[[(    'R2',  'mean'),
            (   'MAE',  'mean'),
            (  'MAPE',  'mean'),
            (  'RMSE',  'mean'),
            ]].iloc[::-1].droplevel(1,1).describe())

Unnamed: 0,R2,MAE,MAPE,RMSE
count,27.0,27.0,27.0,27.0
mean,0.648821,0.826299,42.015458,0.957514
std,0.116926,0.230262,14.395357,0.226052
min,0.263728,0.459669,13.909594,0.564132
25%,0.584206,0.667121,30.731439,0.794232
50%,0.69413,0.769023,44.701447,0.895292
75%,0.735907,0.952638,51.728965,1.087964
max,0.784941,1.40529,68.530731,1.501584


In [129]:
no_TL = pd.read_csv(path)
no_TL = no_TL[(no_TL['Model'] == 'M4') & (no_TL['TLPercentage'] == 0.6)].reset_index(drop=True).drop(columns=['Season'])
display(no_TL.groupby(['Station', 'Model']).mean())

Unnamed: 0_level_0,Unnamed: 1_level_0,R2,MAE,MAPE,RMSE,TLPercentage
Station,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A19 Villena,M4,0.718276,0.668851,27.166247,0.821054,0.6
AB05 Albacete,M4,0.60059,0.891343,44.701447,1.033343,0.6
AL02 Almería,M4,0.535656,1.078387,47.928203,1.184729,0.6
AL12 Tíjola,M4,0.573028,0.58649,19.61001,0.719397,0.6
BA09 Villafranca de los Barros,M4,0.660198,0.721396,32.732529,0.865178,0.6
BU04 Tardajos,M4,0.678055,0.67572,53.641061,0.79208,0.6
BU07 Santa Gadea del Cid,M4,0.740364,0.628102,37.77815,0.751407,0.6
C02 Boimorto,M4,0.740014,0.459669,28.730349,0.564132,0.6
CA07 Jimena de la Frontera,M4,0.512795,0.769023,38.642482,0.895292,0.6
CR03 Porzuna,M4,0.697125,0.691767,35.831037,0.833628,0.6


In [171]:
for f in ['NoTL']:
    path= './CI42/EspForecast_' + f + '.csv'
    print(f)
    df = pd.read_csv(path).groupby(['Model', 'TLPercentage'])

    display(df.describe()[[(    'R2',  'mean'),
            (   'MAE',  'mean'),
            (  'MAPE',  'mean'),
            (  'RMSE',  'mean'),
            ]].iloc[::-1].droplevel(1,1))
    
no_TL = pd.read_csv(path)
no_TL = no_TL[(no_TL['Model'] == 'M4') & (no_TL['TLPercentage'] == 0.6)].drop(columns=['Season']).groupby(['Station', 'Model']).mean().reset_index()[['Station', 'MAPE', 'Model']]
display(no_TL.groupby(['Station', 'Model']).mean().describe())

NoTL


Unnamed: 0_level_0,Unnamed: 1_level_0,R2,MAE,MAPE,RMSE
Model,TLPercentage,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
M4,0.9,0.718423,0.854564,41.753355,0.994673
M4,0.8,0.697847,0.838906,41.708926,0.972341
M4,0.7,0.685912,0.826114,41.440358,0.959117
M4,0.6,0.65242,0.825823,42.143503,0.956156
M3,0.9,0.656417,1.22617,58.739721,1.404452
M3,0.8,0.651516,1.21627,58.936829,1.388129
M3,0.7,0.638284,1.201711,58.544598,1.372343
M3,0.6,0.592055,1.200364,59.356848,1.361658
M2,0.9,0.616075,0.661726,32.631949,0.79139
M2,0.8,0.56081,0.654201,32.988199,0.780586


Unnamed: 0,MAPE
count,27.0
mean,42.015458
std,14.395357
min,13.909594
25%,30.731439
50%,44.701447
75%,51.728965
max,68.530731


In [94]:
for f in ['NoTL']:
    path= './CI42/EspForecast_' + f + '.csv'
    print(f)
    l = pd.read_csv(path).groupby(['Model', 'TLPercentage'])
    l = l.get_group(('M4', 0.6)).drop(columns='Season')
    l = l.groupby(['Station', 'Model']).mean().reset_index()[['Station', 'MAE', 'Model']]
    display(l[l['Model'] == 'M4']['MAE'].mean())

NoTL


0.8262989158587075

In [44]:
for f in ['NoTL', 'TL-TrainLayers', 'TL-NoTrainLayers']:
    path= './CI42/EspForecast_' + f + '.csv'
    print(f)
    display(pd.read_csv(path).groupby(['Model', 'TLPercentage']).describe()[[(    'R2',  'mean'),
            (   'MAE',  'mean'),
            (  'MAPE',  'mean'),
            (  'RMSE',  'mean'),
            ]].iloc[::-1].droplevel(1,1))

NoTL


Unnamed: 0_level_0,Unnamed: 1_level_0,R2,MAE,MAPE,RMSE
Model,TLPercentage,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
M4,0.9,0.718423,0.854564,41.753355,0.994673
M4,0.8,0.697847,0.838906,41.708926,0.972341
M4,0.7,0.685912,0.826114,41.440358,0.959117
M4,0.6,0.65242,0.825823,42.143503,0.956156
M3,0.9,0.656417,1.22617,58.739721,1.404452
M3,0.8,0.651516,1.21627,58.936829,1.388129
M3,0.7,0.638284,1.201711,58.544598,1.372343
M3,0.6,0.592055,1.200364,59.356848,1.361658
M2,0.9,0.616075,0.661726,32.631949,0.79139
M2,0.8,0.56081,0.654201,32.988199,0.780586


TL-TrainLayers


Unnamed: 0_level_0,Unnamed: 1_level_0,R2,MAE,MAPE,RMSE
Model,TLPercentage,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
M4,0.9,0.680384,0.631419,33.52362,0.768078
M4,0.8,0.6609,0.541077,27.681782,0.672812
M4,0.7,0.670881,0.523815,25.958216,0.649312
M4,0.6,0.660502,0.530735,25.608264,0.647958
M3,0.9,0.680462,0.593091,30.380576,0.740918
M3,0.8,0.66988,0.530781,28.834682,0.669394
M3,0.7,0.677279,0.496331,25.433691,0.624027
M3,0.6,0.662726,0.479267,23.171112,0.605092
M2,0.9,0.548872,0.735021,45.965186,0.891325
M2,0.8,0.551743,0.641486,39.023688,0.78081


TL-NoTrainLayers


Unnamed: 0_level_0,Unnamed: 1_level_0,R2,MAE,MAPE,RMSE
Model,TLPercentage,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
M4,0.9,0.761007,0.483678,22.951391,0.605329
M4,0.8,0.743098,0.462336,22.229394,0.58099
M4,0.7,0.732951,0.465943,23.850225,0.582124
M4,0.6,0.707538,0.462923,24.569355,0.579729
M3,0.9,0.689664,0.562239,27.631028,0.712841
M3,0.8,0.67996,0.56424,27.885016,0.709643
M3,0.7,0.664829,0.526062,25.907887,0.664114
M3,0.6,0.630216,0.527292,26.623403,0.666349
M2,0.9,0.619202,0.56044,26.964401,0.707655
M2,0.8,0.574076,0.532391,27.189077,0.67111


In [43]:
ci42_M4 = pd.read_csv('./CI42/MurciaEstimation_NoTL.csv')
ci42_M4

Unnamed: 0,Season,R2,MAE,MAPE,RMSE,Station,Model,TLPercentage
0,Spring,0.981101,0.140533,3.215428,0.246893,AL41,M4,0.99
1,Summer,0.983379,0.132512,2.467908,0.197066,AL41,M4,0.99
2,Autumn,0.975834,0.136245,6.862111,0.175337,AL41,M4,0.99
3,Winter,0.968557,0.139000,6.505582,0.194698,AL41,M4,0.99
4,Spring,0.973950,0.151219,4.178047,0.199056,CA73,M4,0.99
...,...,...,...,...,...,...,...,...
443,Winter,0.416615,0.658382,24.491565,0.960083,LO51,M1,0.80
444,Spring,0.784305,0.587072,13.777503,0.745221,MO22,M1,0.80
445,Summer,0.812312,0.688466,14.362792,0.902754,MO22,M1,0.80
446,Autumn,0.492172,0.638675,26.438352,0.839192,MO22,M1,0.80


In [44]:
ci42_M4_tl = pd.read_csv('./CI42/MurciaEstimation_TL-TrainLayers.csv')
ci42_M4_tl

Unnamed: 0,Season,R2,MAE,MAPE,RMSE,Station,Model,TLPercentage
0,Spring,0.988383,0.252688,5.293570,0.295556,AL41,M4,0.99
1,Summer,0.980331,0.295818,6.318607,0.384549,AL41,M4,0.99
2,Autumn,0.960212,0.162367,9.415139,0.218089,AL41,M4,0.99
3,Winter,0.968383,0.227932,10.455058,0.284998,AL41,M4,0.99
4,Spring,0.962399,0.233236,6.074124,0.271300,CA73,M4,0.99
...,...,...,...,...,...,...,...,...
443,Winter,0.411833,0.721090,37.280338,0.901497,LO51,M1,0.80
444,Spring,0.809484,0.814969,16.448130,0.997147,MO22,M1,0.80
445,Summer,0.798982,0.423259,8.601061,0.612339,MO22,M1,0.80
446,Autumn,0.540365,0.471485,27.060519,0.583974,MO22,M1,0.80


In [45]:
noTL = ci42_M4[ci42_M4['TLPercentage'] == 0.8].drop(columns=['Season']).groupby(['Station', 'Model']).mean().reset_index()[['Station', 'MAE', 'Model']]
noTL

Unnamed: 0,Station,MAE,Model
0,AL41,0.768705,M1
1,AL41,0.6129,M2
2,AL41,0.220259,M3
3,AL41,0.134857,M4
4,CA73,0.518342,M1
5,CA73,0.585851,M2
6,CA73,0.458796,M3
7,CA73,0.265828,M4
8,CA91,0.553557,M1
9,CA91,0.47892,M2


In [47]:
tl = ci42_M4_tl[ (ci42_M4_tl['TLPercentage'] == 0.8)].drop(columns=['Season']).groupby(['Station', 'Model']).mean().reset_index()[['Station', 'MAE', 'Model']]
tl

Unnamed: 0,Station,MAE,Model
0,AL41,0.818375,M1
1,AL41,0.428664,M2
2,AL41,0.198255,M3
3,AL41,0.11162,M4
4,CA73,0.390744,M1
5,CA73,0.238358,M2
6,CA73,0.182423,M3
7,CA73,0.106112,M4
8,CA91,0.39219,M1
9,CA91,0.200217,M2


In [102]:
NoTlFiles = ['./CI42/EsEstimation_NoTL.csv', './CI42/MurciaEstimation_NoTL.csv', './CI42/MurciaEstimation_NoTL.csv', './CR12/MurciaEstimation_NoTL.csv']
TlFiles = ['./CI42/MurciaEstimation_TL-TrainLayers.csv', './CI42/MurciaEstimation_TL-TrainLayers.csv', './CR12/MurciaEstimation_TL-TrainLayers.csv', './CR12/MurciaEstimation_TL-NoTrainLayers.csv']
TLPercentages = [0.8, 0.8, 0.8, 0.8]

df_noTL = pd.DataFrame()
df_TL = pd.DataFrame()
for i in range(4):
    no_TL = pd.read_csv(NoTlFiles[i])
    no_TL = no_TL[(no_TL['Model'] == 'M'+str(4-i)) & (no_TL['TLPercentage'] == TLPercentages[i])].drop(columns=['Season']).groupby(['Station', 'Model']).mean().reset_index()[['Station', 'MAE', 'Model']]
    df_noTL = pd.concat([df_noTL, no_TL])
    tL = pd.read_csv(TlFiles[i])
    tL = tL[(tL['Model'] == 'M'+str(4-i)) & (tL['TLPercentage'] == TLPercentages[i])].drop(columns=['Season']).groupby(['Station', 'Model']).mean().reset_index()[['Station', 'MAE', 'Model']]
    df_TL = pd.concat([df_TL, tL])

df_merged = pd.merge(df_noTL, df_TL, how='outer', on=['Station', 'Model'])
df_merged

Unnamed: 0,Station,MAE_x,Model,MAE_y
0,A19 Villena,0.154497,M4,0.109001
1,AB05 Albacete,0.208174,M4,0.122621
2,AL02 Almería,0.196800,M4,0.124590
3,AL12 Tíjola,0.159662,M4,0.126801
4,BA09 Villafranca de los Barros,0.190229,M4,0.128482
...,...,...,...,...
103,SG01 Gomezserracín,0.602833,M1,0.519110
104,TE05 Teruel,0.499600,M1,0.361633
105,V05 Cheste,0.496919,M1,0.382261
106,Z04 Fabara,0.723489,M1,0.555163


In [55]:
station_df= df_merged.groupby('Station').get_group('CI42').iloc[::-1].reset_index()
station_df

Unnamed: 0,index,Station,MAE_x,Model,MAE_y
0,28,CI42,,M2,0.321544
1,24,CI42,0.52579,M1,0.437592


In [67]:
station_df[station_df['Model'] == 'M4'].fillna('*')

Unnamed: 0,index,Station,MAE_x,Model,MAE_y


In [50]:
pd.merge(noTL, tl, how='outer', on=['Station', 'Model'])

Unnamed: 0,Station,MAE_x,Model,MAE_y
0,AL41,0.768705,M1,0.818375
1,AL41,0.6129,M2,0.428664
2,AL41,0.220259,M3,0.198255
3,AL41,0.134857,M4,0.11162
4,CA73,0.518342,M1,0.390744
5,CA73,0.585851,M2,0.238358
6,CA73,0.458796,M3,0.182423
7,CA73,0.265828,M4,0.106112
8,CA91,0.553557,M1,0.39219
9,CA91,0.47892,M2,0.200217


In [74]:
df_merged

Unnamed: 0,Station,MAE_x,Model,MAE_y
0,AL41,0.134857,M4,0.11162
1,CA73,0.265828,M4,0.106112
2,CA91,0.183023,M4,0.11448
3,CR12,0.161389,M4,0.104409
4,JU71,0.139979,M4,0.12498
5,LO51,0.227251,M4,0.139616
6,MO22,0.142305,M4,0.111902
7,AL41,0.220259,M3,0.198255
8,CA73,0.458796,M3,0.182423
9,CA91,0.172905,M3,0.125564


In [109]:
df_merged['Station'] = df_merged['Station'].apply(lambda x: x.split(' ')[0])
df_merged

Unnamed: 0,Station,MAE_x,Model,MAE_y
0,A19,0.154497,M4,0.109001
1,AB05,0.208174,M4,0.122621
2,AL02,0.196800,M4,0.124590
3,AL12,0.159662,M4,0.126801
4,BA09,0.190229,M4,0.128482
...,...,...,...,...
103,SG01,0.602833,M1,0.519110
104,TE05,0.499600,M1,0.361633
105,V05,0.496919,M1,0.382261
106,Z04,0.723489,M1,0.555163


In [None]:
NoTlFiles = ['./CI42/EspEstimation_NoTL.csv', './CI42/EspEstimation_NoTL.csv', './CI42/EspEstimation_NoTL.csv', './CR12/EspEstimation_NoTL.csv']
TlFiles = ['./CI42/EspEstimation_TL-TrainLayers.csv', './CI42/EspEstimation_TL-TrainLayers.csv', './CR12/EspEstimation_TL-TrainLayers.csv', './CR12/EspEstimation_TL-NoTrainLayers.csv']
TLPercentages = [0.8, 0.8, 0.8, 0.8]

df_noTL = pd.DataFrame()
df_TL = pd.DataFrame()
for i in range(4):
    no_TL = pd.read_csv(NoTlFiles[i])
    no_TL = no_TL[(no_TL['Model'] == 'M'+str(4-i)) & (no_TL['TLPercentage'] == TLPercentages[i])].drop(columns=['Season']).groupby(['Station', 'Model']).mean().reset_index()[['Station', 'MAE', 'Model']]
    df_noTL = pd.concat([df_noTL, no_TL])
    tL = pd.read_csv(TlFiles[i])
    tL = tL[(tL['Model'] == 'M'+str(4-i)) & (tL['TLPercentage'] == TLPercentages[i])].drop(columns=['Season']).groupby(['Station', 'Model']).mean().reset_index()[['Station', 'MAE', 'Model']]
    df_TL = pd.concat([df_TL, tL])

df_merged = pd.merge(df_noTL, df_TL, how='outer', on=['Station', 'Model'])
df_merged['Station'] = df_merged['Station'].apply(lambda x: x.split(' ')[0])

national=True

locs = pd.read_excel('./locations.xlsx', sheet_name='Sheet1' if national==False else 'Sheet2', usecols='B:F').dropna()
m = folium.Map([35.714444, -6.916666],zoom_start=5, tiles='https://{s}.basemaps.cartocdn.com/rastertiles/voyager_labels_under/{z}/{x}/{y}{r}.png', 
               attr='&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors &copy; <a href="https://carto.com/attributions">CARTO</a>')
for i in range(len(locs)):
    
    coords = locs['LatLon'][i].split(',')
    name = locs['Name'][i].split(' ')[0]

    try:
        station_df= df_merged.groupby('Station').get_group(name).iloc[::-1].reset_index()
    except:
        print(name, 'not in list.')
        continue
    datos = '<tspan x="15" dy="1em" stroke="blue">'+name+ '</tspan>'
    
    for i in range(4):  
        model_df = station_df[station_df['Model'] == 'M'+str(4-i)].reset_index()
        if len(model_df) == 0:
            textNoTl = '*'
            textTl = '*'
        else:
            textNoTl = str(round(model_df['MAE_x'][0], 2))
            textTl = str(round(model_df['MAE_y'][0], 2))
        datos = datos + '<tspan x="2" dy="1em">' + textNoTl+ ' &#8594; ' + textTl + '</tspan>'


    height = '90' 
    folium.Marker(
    location=[float(coords[0]), float(coords[1])],
    tooltip=locs['Name'][i],
    popup=locs['Name'][i],
    
    # nacional: 95, 95, 17.5
    # regional: 92, 92, 17 -> normalized height 55
    icon=folium.DivIcon(html=('<svg height="100" width="100" ">'
                                '<rect  x="1" y="1" width= "95" height="'+height+'" stroke="#38a3a5" stroke-width="1" fill=#c7f9cc />'
                                '<text x="1" y="1" fill="black" font-size="17" stroke="black" stroke-width="0.5">'+ datos.replace('nan', '*')+'</text>'
                                '</svg>'))
).add_to(m)

m

In [15]:
ci42_M4[(ci42_M4['Model'] == 'M4') & (ci42_M4['TLPercentage'] == 0.8)].drop(columns=['Season', 'Model']).groupby('Station').mean().reset_index()[['Station', 'MAE']]

Unnamed: 0,Station,MAE
0,AL41,0.134857
1,CA73,0.265828
2,CA91,0.183023
3,CR12,0.161389
4,JU71,0.139979
5,LO51,0.227251
6,MO22,0.142305


In [16]:

for f in ['NoTL', 'TL-TrainLayers', 'TL-NoTrainLayers']:
    path= './CI42/MurciaEstimation_' + f + '.csv'
    print(f)
    display(pd.read_csv(path).groupby(['Model', 'TLPercentage']).describe()[[(    'R2',  'mean'),
            (   'MAE',  'mean'),
            (  'MAPE',  'mean'),
            (  'RMSE',  'mean'),
            ]].iloc[::-1].droplevel(1,1))

NoTL


Unnamed: 0_level_0,Unnamed: 1_level_0,R2,MAE,MAPE,RMSE
Model,TLPercentage,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
M4,0.99,0.962152,0.177947,7.721265,0.238565
M4,0.95,0.96173,0.178076,7.728514,0.239208
M4,0.9,0.962214,0.179331,7.790133,0.241051
M4,0.8,0.96216,0.179233,7.888593,0.238052
M3,0.99,0.925801,0.253149,10.642592,0.329535
M3,0.95,0.924673,0.25387,10.68228,0.330835
M3,0.9,0.926061,0.254671,10.774697,0.332097
M3,0.8,0.92542,0.255821,10.946213,0.332965
M2,0.99,0.767043,0.492352,18.132713,0.632211
M2,0.95,0.764067,0.494716,18.18853,0.635678


TL-TrainLayers


Unnamed: 0_level_0,Unnamed: 1_level_0,R2,MAE,MAPE,RMSE
Model,TLPercentage,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
M4,0.99,0.965288,0.224582,9.082879,0.275766
M4,0.95,0.968665,0.178904,7.370501,0.226313
M4,0.9,0.974505,0.145372,6.114789,0.186251
M4,0.8,0.977921,0.11616,4.760921,0.152744
M3,0.99,0.930871,0.253185,10.729804,0.324545
M3,0.95,0.935524,0.223323,9.101879,0.290862
M3,0.9,0.943502,0.194063,8.105178,0.260095
M3,0.8,0.948306,0.174711,7.132944,0.237923
M2,0.99,0.758829,0.545768,21.466617,0.678741
M2,0.95,0.745933,0.472403,19.607726,0.600835


TL-NoTrainLayers


Unnamed: 0_level_0,Unnamed: 1_level_0,R2,MAE,MAPE,RMSE
Model,TLPercentage,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
M4,0.99,0.965288,0.204834,8.627898,0.252473
M4,0.95,0.965536,0.17431,7.719116,0.222608
M4,0.9,0.969346,0.150691,6.614448,0.196626
M4,0.8,0.971879,0.129578,5.444397,0.175055
M3,0.99,0.932469,0.225414,9.380238,0.293271
M3,0.95,0.931783,0.21304,8.672803,0.280011
M3,0.9,0.933352,0.205749,8.677693,0.273495
M3,0.8,0.933551,0.197291,8.12222,0.266359
M2,0.99,0.774264,0.445964,16.368311,0.580936
M2,0.95,0.775569,0.396268,14.665111,0.531353


In [17]:
df = pd.read_csv('./CA91/EspEstimation_NoTL.csv')
for metric in ['R2', 'MAE', 'MAPE']:
    fig = px.box(df, x="Model", y=metric, color="TLPercentage", hover_name='Station', hover_data='Season')
    fig.update_traces(boxmean=True)
    fig.show()

In [18]:
df = pd.read_csv('./CA91/EspEstimation_TL-TrainLayers.csv')
for metric in ['R2', 'MAE', 'MAPE']:
    fig = px.violin(df, x="Model", y=metric, color="TLPercentage", hover_name='Station', hover_data='Season')
    #fig.update_traces(boxmean=True)
    fig.show()

# Errors between WB and real data (Murcia)

In [199]:
def hourlyToDaily(df):
    punto = [l[1] for l in list(df.groupby([df['dates'].dt.date]))]
    FECHA = []
    TMAX = []
    TMIN = []
    HRMAX = []
    HRMIN = []
    VVMED = []
    RADMED = []
    for p in punto:
        FECHA.append(pd.to_datetime(p['dates']).dt.date.iloc[0])
        TMAX.append(p['temp'].max())
        TMIN.append(p['temp'].min())
        HRMAX.append(p['rh'].max())
        HRMIN.append(p['rh'].min())
        VVMED.append(p['wind'].mean())
        RADMED.append(p['solar_rad'].mean())

    return pd.DataFrame({
        "FECHA": pd.to_datetime(FECHA),
        "TMAX": TMAX,
        "TMIN": TMIN,
        "HRMAX": HRMAX,
        "HRMIN": HRMIN,
        "VVMED": VVMED,
        "RADMED": RADMED
    })

def medidasError(y_test, y_pred, txt):
    medidas =[]
    medidas.append(txt)
    medidas.append(np.corrcoef(y_test, y_pred)[0][1]**2)
    medidas.append(mean_absolute_error(y_true=y_test,y_pred=y_pred))
    medidas.append(mean_absolute_percentage_error(y_true=y_test,y_pred=y_pred)*100)
    medidas.append(mean_squared_error(y_true=y_test,y_pred=y_pred,squared=False)/np.mean(y_test)*100)
    medidas.append(mean_squared_error(y_true=y_test,y_pred=y_pred,squared=False))
    return medidas

def compareHourlyDaily(hourly, dfDaily, weatherModel):
    hourly.columns = ['dates', 'temp', 'rh', 'wind', 'solar_rad']
    hourly['dates'] = pd.to_datetime(hourly['dates'])
    hourly = hourlyToDaily(hourly).add_suffix('-'+weatherModel)
    hourly['VVMED-'+weatherModel] = hourly['VVMED-'+weatherModel].apply(lambda x: x*4.87/np.log(67.8*10-5.42))
    hourly = hourly.rename(index=str, columns={'FECHA-'+weatherModel:'FECHA'})
    df_join = pd.merge(dfDaily.reset_index(), hourly, how='inner', on='FECHA')
    
    #fig = go.Figure()
    all_medidas = []
    for c in df_join.columns[1:]:

        var = c.split('-')[0]
        if (var != 'ETO') & (weatherModel not in c):
            all_medidas.append(medidasError(df_join[var], df_join[c+'-'+weatherModel], var))
        #fig.add_trace(go.Scatter(x=df_join['FECHA'], y=df_join[c],
        #                        name=c, mode='lines'))
    #fig.show()
    return pd.DataFrame(all_medidas, columns=['Param', 'R2', 'MAE', 'MAPE', 'CVRMSE', 'RMSE'])
    #return df_join

In [71]:

def convertirComa(x):
    if type(x) == str:
        return x.replace(",", ".")
    else:
        return x
def leerEstacionDatos(path):
    estacionDatas = pd.read_csv(path, encoding='ISO-8859-1', sep=";")
    estacionDatas.columns = ['ESTACION', 'MUNICIPIO', 'PARAJE', 'HORAS', 'FECHA', 'ETO','TMAX', 'TMIN', 'HRMAX', 'HRMIN', 'RADMED','VVMED', '-']
    estacionDatas = estacionDatas.drop(columns=['ESTACION', 'MUNICIPIO', 'PARAJE', 'HORAS', '-'])
    estacionDatas = estacionDatas.reset_index().drop(columns='index')
    estacionDatas['FECHA'] = pd.to_datetime(estacionDatas['FECHA'], format="%d/%m/%y")
    estacionDatas.index = estacionDatas['FECHA']
    estacionDatas.drop(columns='FECHA', inplace=True)
    estacionDatas.dropna(inplace=True)
    for i in estacionDatas.columns:
        estacionDatas[i] = pd.to_numeric(estacionDatas[i].apply(lambda x : convertirComa(x)))
    return estacionDatas

all_errors = pd.DataFrame()
dir = './forecastTest/'
for station in os.listdir(dir):

    print(station)
    pred = pd.read_csv(dir+station+'/WB-'+station+'.csv')[['dates', 'temp', 'rh', 'wind', 'solar_rad']]

    real = leerEstacionDatos('./all data murcia/' + station.split('-')[0] + '.csv') 
    errors = compareHourlyDaily(pred, real, 'WB')
    errors['Station'] = station
    all_errors = pd.concat([all_errors, errors])
all_errors['Station']=all_errors['Station'].apply(lambda x: x.split('-')[0])
all_errors.drop(columns=['MAPE','R2', 'RMSE'],inplace=True)
all_errors

AL41-alhama
CA73-cartagena
CA91-fuenteAlamo
CI42-cieza
CR12-caravaca
JU71-jumilla
LO51-aguilas
MO22-molinaSegura


Unnamed: 0,Param,MAE,CVRMSE,Station
0,TMAX,0.850539,4.331312,AL41
1,TMIN,2.356712,23.876027,AL41
2,HRMAX,7.408976,11.746904,AL41
3,HRMIN,5.656981,22.128825,AL41
4,RADMED,17.802895,12.279699,AL41
5,VVMED,0.598271,43.282332,AL41
0,TMAX,0.75186,4.309794,CA73
1,TMIN,1.200027,9.102686,CA73
2,HRMAX,4.67973,7.367766,CA73
3,HRMIN,5.610701,14.607453,CA73


In [73]:
all_errors.groupby('Param').describe()

Unnamed: 0_level_0,MAE,MAE,MAE,MAE,MAE,MAE,MAE,MAE,CVRMSE,CVRMSE,CVRMSE,CVRMSE,CVRMSE,CVRMSE,CVRMSE,CVRMSE
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
Param,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
HRMAX,8.0,7.148452,1.289186,4.67973,6.595255,7.537508,7.774228,8.949973,8.0,11.580677,2.345256,7.367766,10.233107,11.99013,12.994771,14.780987
HRMIN,8.0,5.335667,0.649154,3.828118,5.308427,5.471184,5.700646,5.833602,8.0,19.97085,3.435121,14.607453,17.993251,20.852513,21.912671,24.971286
RADMED,8.0,20.424941,3.192968,17.344831,18.391786,19.276625,21.606689,26.737215,8.0,14.397379,2.349237,12.279699,12.988642,13.465496,15.3056,19.389881
TMAX,8.0,0.932535,0.183968,0.734173,0.800566,0.900724,1.010383,1.252151,8.0,4.82341,0.76213,3.712041,4.325933,4.642985,5.441168,5.972492
TMIN,8.0,1.474561,0.518062,0.829435,1.187372,1.34084,1.607132,2.356712,8.0,15.305685,7.324314,6.894975,11.49703,12.926528,17.975971,28.414305
VVMED,8.0,0.64356,0.253294,0.332846,0.47445,0.56399,0.837635,1.020875,8.0,62.004493,39.317699,22.826998,34.615478,42.498521,95.406088,127.844174


In [None]:
'''
fig = px.scatter(all_errors, x='Station', y='R2', color='Param')
fig.update_traces(marker_size=18)
fig.update_layout(scattermode="group", scattergap=0.7)
fig.show()
'''

In [64]:
results = all_errors.melt(['Station', 'Param'], var_name='Error', value_name='Error values')
results.columns = ['Station', 'Climatic Variable', 'Error', 'Error values']
results['Climatic Variable'] = results['Climatic Variable'].apply(lambda x: x.replace('HR', 'RH'))
results['Climatic Variable'] = results['Climatic Variable'].apply(lambda x: x.replace('MAX', 'max'))
results['Climatic Variable'] = results['Climatic Variable'].apply(lambda x: x.replace('MIN', 'min'))
results['Climatic Variable'] = results['Climatic Variable'].apply(lambda x: x.replace('RADMED', 'Rs'))
results['Climatic Variable'] = results['Climatic Variable'].apply(lambda x: x.replace('VVMED', 'U<sub>2</sub>'))
results

Unnamed: 0,Station,Climatic Variable,Error,Error values
0,AL41,Tmax,MAE,0.850539
1,AL41,Tmin,MAE,2.356712
2,AL41,RHmax,MAE,7.408976
3,AL41,RHmin,MAE,5.656981
4,AL41,Rs,MAE,17.802895
...,...,...,...,...
91,MO22,Tmin,CVRMSE,12.295145
92,MO22,RHmax,CVRMSE,12.233355
93,MO22,RHmin,CVRMSE,21.840620
94,MO22,Rs,CVRMSE,13.141720


In [65]:
fig = px.scatter(results, x='Station', y='Error values', color='Climatic Variable', 
                facet_col='Error', facet_col_wrap=1,  height=800, width=1200,)
fig.update_traces(marker_size=16)
fig.update_layout(scattermode="group", scattergap=0.7)
fig.update_yaxes(matches=None)
fig.for_each_xaxis(lambda yaxis: yaxis.update(showticklabels=True))
fig.for_each_annotation(lambda a: a.update(text=''))
fig.update_layout(
    yaxis2=dict(
        title_text="MAE (mm d<sup>-1</sup>)",
    ),
    yaxis=dict(
        title_text="CVRMSE (%)",
    )
)
'''
fig.update_layout(
    yaxis=dict(
        title_text="RMSE (mm d<sup>-1</sup>)",
    ),    
    yaxis2=dict(
        title_text="CVRMSE (%)",
    ),  
    yaxis3=dict(
        title_text="MAE (mm d<sup>-1</sup>)",
    ),
    yaxis4=dict(
        title_text="R<sup>2</sup>",
    ),  
)
'''

fig.show()
fig.write_image('./Images/WBErros.pdf')

# Errors between NOAA and real data (Spain)

In [None]:
locSiar = pd.read_excel('./locations.xlsx', sheet_name='Sheet2', usecols='B:F').dropna()
locSiar

Unnamed: 0,Name,Location,LatLon,Type,Alt
0,AL02 Almería,"36°50'07.5""N 2°24'08.8""W","36.835404,-2.402453",Siar,5.0
1,AL12 Tíjola,"37°22'42.6""N 2°27'34.7""W","37.67851,-2.45965",Siar,776.0
2,AB05 Albacete,"38°56'57.7""N 1°54'06.4""W","39.339356,-2.001778",Siar,689.0
3,GC05 Arucas,"28°07'46.1""N 15°30'50.4""W","28.129458,-15.514004",Siar,220.0
4,GC07 Tinajo,"29°03'00.1""N 13°39'34.5""W","29.350031,-13.659593",Siar,254.0
5,IB04 Son Ferriol,"39°33'44.4""N 2°43'34.0""E","39.562324,2.726102",Siar,8.0
6,IB01 Santa Eulalia,"39°00'36.2""N 1°26'23.7""E","39.010055,1.439925",Siar,120.0
7,M03 Aranjuez,"40°02'29.9""N 3°37'49.5""W","40.041647,-3.630421",Siar,486.0
8,V05 Cheste,"39°31'08.0""N 0°44'40.0""W","39.518889,-0.744439",Siar,315.0
9,A19 Villena,"38°35'43.8""N 0°52'31.3""W","38.595500, -0.875361",Siar,488.0


In [200]:
all_errors = pd.DataFrame()
for index, row in locSiar.iterrows():
    stationCode = row['Name']
    print(stationCode)

    siarPred = pd.read_csv('./forecastSiar/'+stationCode+'.csv')
    
    siarReal = pd.read_csv('./siarRealDataForForecast/'+stationCode+'.csv')
    siarReal['Radiacion'] = siarReal['Radiacion'].apply(lambda x: x / 0.0864)
    siarReal.columns = ['FECHA', 'TMAX', 'TMIN', 'HRMAX', 'HRMIN', 'VVMED', 'RADMED', 'ETO']
    siarReal['FECHA'] = pd.to_datetime(siarReal['FECHA'])
    siarReal.set_index('FECHA', inplace=True)
    siarReal.dropna(inplace=True)

    errors = compareHourlyDaily(siarPred, siarReal, 'NOAA')
    errors['Station'] = stationCode
    all_errors = pd.concat([all_errors, errors])
all_errors['Station']=all_errors['Station'].apply(lambda x: x.split(' ')[0])
all_errors.drop(columns=['MAPE','R2', 'RMSE'],inplace=True)
all_errors

AL02 Almería
AL12 Tíjola
AB05 Albacete
GC05 Arucas
GC07 Tinajo
IB04 Son Ferriol
IB01 Santa Eulalia
M03 Aranjuez
V05 Cheste
A19 Villena
TE05 Teruel
GR02 Puebla de Don Fadrique
SA01 Ciudad Rodrigo
BA09 Villafranca de los Barros
CA07 Jimena de la Frontera
SE19 IFAPA Centro Las Torres-Tomejil
BU04 Tardajos
LE09 Santas Martas
C02 Boimorto
Z04 Fabara
HU14 Banastón
CR03 Porzuna
J06 Alcaudete
Z07 Sádaba
SG01 Gomezserracín
GC09 Antigua - Pozo Negro
BU07 Santa Gadea del Cid


Unnamed: 0,Param,MAE,CVRMSE,Station
0,TMAX,1.391798,6.669603,AL02
1,TMIN,3.253075,24.651003,AL02
2,HRMAX,14.938486,20.192777,AL02
3,HRMIN,7.192784,21.856666,AL02
4,VVMED,1.566222,174.014879,AL02
...,...,...,...,...
1,TMIN,2.562131,44.489977,BU07
2,HRMAX,8.902674,12.876762,BU07
3,HRMIN,7.595134,22.613456,BU07
4,VVMED,0.759471,54.317033,BU07


In [201]:
all_errors.groupby('Param').describe()

Unnamed: 0_level_0,MAE,MAE,MAE,MAE,MAE,MAE,MAE,MAE,CVRMSE,CVRMSE,CVRMSE,CVRMSE,CVRMSE,CVRMSE,CVRMSE,CVRMSE
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
Param,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
HRMAX,27.0,12.52609,4.395629,2.499015,9.579591,12.297273,14.980233,23.766925,27.0,17.585974,5.0194,5.713848,14.587808,17.213979,20.826093,29.829505
HRMIN,27.0,7.813011,3.435207,4.602674,5.962941,7.286257,7.973543,22.610738,27.0,25.453565,9.286565,16.377984,20.32723,22.613456,26.40282,58.850179
RADMED,27.0,23.609483,6.634608,15.092787,18.826714,22.073666,27.946162,38.854935,27.0,17.638894,4.828975,10.978428,14.612763,16.357187,20.53134,30.058125
TMAX,27.0,1.680696,0.862415,0.94354,1.225076,1.391798,1.677901,5.087292,27.0,9.00699,3.853691,4.660855,6.616265,8.432547,9.408799,23.575759
TMIN,27.0,2.875941,1.063368,0.869227,2.192884,2.931353,3.38586,4.713963,27.0,39.613446,23.10773,7.95549,25.071151,36.621242,53.210197,119.882651
VVMED,27.0,1.250889,0.440198,0.391167,0.90452,1.213171,1.59119,1.977933,27.0,111.155015,66.450162,30.693487,57.882247,91.230269,167.671697,251.910128


In [202]:
results = all_errors.melt(['Station', 'Param'], var_name='Error', value_name='Error values')
results.columns = ['Station', 'Climatic Variable', 'Error', 'Error values']
results['Climatic Variable'] = results['Climatic Variable'].apply(lambda x: x.replace('HR', 'RH'))
results['Climatic Variable'] = results['Climatic Variable'].apply(lambda x: x.replace('MAX', 'max'))
results['Climatic Variable'] = results['Climatic Variable'].apply(lambda x: x.replace('MIN', 'min'))
results['Climatic Variable'] = results['Climatic Variable'].apply(lambda x: x.replace('RADMED', 'Rs'))
results['Climatic Variable'] = results['Climatic Variable'].apply(lambda x: x.replace('VVMED', 'U<sub>2</sub>'))
results

Unnamed: 0,Station,Climatic Variable,Error,Error values
0,AL02,Tmax,MAE,1.391798
1,AL02,Tmin,MAE,3.253075
2,AL02,RHmax,MAE,14.938486
3,AL02,RHmin,MAE,7.192784
4,AL02,U<sub>2</sub>,MAE,1.566222
...,...,...,...,...
319,BU07,Tmin,CVRMSE,44.489977
320,BU07,RHmax,CVRMSE,12.876762
321,BU07,RHmin,CVRMSE,22.613456
322,BU07,U<sub>2</sub>,CVRMSE,54.317033


In [82]:
def compareTLNoTL(type, TLPercentage, name):
    df_total = pd.DataFrame()
    for i in range(4):
        df_reg = pd.read_csv('./CR12/Murcia'+type+'_TL-NoTrainLayers.csv')
        df_reg = pd.concat([df_reg, pd.read_csv('./CI42/Murcia'+type+'_TL-NoTrainLayers.csv')])
        df_reg = pd.concat([df_reg, pd.read_csv('./CA91/Murcia'+type+'_TL-NoTrainLayers.csv')])
        df_reg = df_reg[df_reg['TLPercentage'] == TLPercentage]
        df_reg = df_reg[df_reg['Model'] == 'M'+str(4-i)]

        df_nat = pd.read_csv('./CR12/Esp'+type+'_TL-NoTrainLayers.csv')
        df_nat = pd.concat([df_nat, pd.read_csv('./CI42/Esp'+type+'_TL-NoTrainLayers.csv')])
        df_nat = pd.concat([df_nat, pd.read_csv('./CA91/Esp'+type+'_TL-NoTrainLayers.csv')])
        df_nat = df_nat[df_nat['TLPercentage'] == TLPercentage]
        df_nat = df_nat[df_nat['Model'] == 'M'+str(4-i)]

        df_total = pd.concat([df_reg, df_nat, df_total])
    df_total['Method'] = 'Fr'

    df_total_TL = pd.DataFrame()
    for i in range(4):
        df_reg = pd.read_csv('./CR12/Murcia'+type+'_TL-TrainLayers.csv')
        df_reg = pd.concat([df_reg, pd.read_csv('./CI42/Murcia'+type+'_TL-TrainLayers.csv')])
        df_reg = pd.concat([df_reg, pd.read_csv('./CA91/Murcia'+type+'_TL-TrainLayers.csv')])
        df_reg = df_reg[df_reg['TLPercentage'] == TLPercentage]
        df_reg = df_reg[df_reg['Model'] == 'M'+str(4-i)]

        df_nat = pd.read_csv('./CR12/Esp'+type+'_TL-TrainLayers.csv')
        df_nat = pd.concat([df_nat, pd.read_csv('./CI42/Esp'+type+'_TL-TrainLayers.csv')])
        df_nat = pd.concat([df_nat, pd.read_csv('./CA91/Esp'+type+'_TL-TrainLayers.csv')])
        df_nat = df_nat[df_nat['TLPercentage'] == TLPercentage]
        df_nat = df_nat[df_nat['Model'] == 'M'+str(4-i)]

        df_total_TL = pd.concat([df_reg, df_nat, df_total_TL])
    df_total_TL['Method'] = 'FT'

    df_total = pd.concat([df_total, df_total_TL])
    df_total = df_total.drop(columns=['RMSE', 'TLPercentage', 'Season', 'TLPercentage', 'Station'])

    results = df_total.melt(['Model', 'Method'], var_name='Error', value_name='Error values')
    fig = px.box(results, x="Model", y='Error values', color="Method", hover_data='Model',
                facet_col='Error', facet_col_wrap=1,  height=700, width=600,)
    fig.update_traces(boxmean=True)
    fig.update_yaxes(matches=None)
    fig.for_each_xaxis(lambda yaxis: yaxis.update(showticklabels=True))
    fig.for_each_annotation(lambda a: a.update(text=''))
    fig.update_layout(
        yaxis3=dict(
            title_text="R<sup>2</sup>",
        ),
        yaxis2=dict(
            title_text="MAE (mm d<sup>-1</sup>)",
        ),
        yaxis=dict(
            title_text="MAPE (%)",
        )
    )
    fig.show()
    fig.write_image('./Images/'+name+'.pdf')


In [83]:
compareTLNoTL('Estimation', 0.8, 'compareTLNO-TLEstimation')

In [84]:
compareTLNoTL('Forecast', 0.6, 'compareTLNO-TLForecast')

In [47]:
def compareTLNoTL(station, type, TLPercentage):
    df_total = pd.DataFrame()
    for i in range(4):
        df_reg = pd.read_csv('./'+station+'/Murcia'+type+'_TL-NoTrainLayers.csv')
        #df_reg = df_reg[df_reg['TLPercentage'] == TLPercentage]
        df_reg = df_reg[df_reg['Model'] == 'M'+str(4-i)]

        df_nat = pd.read_csv('./'+station+'/Esp'+type+'_TL-NoTrainLayers.csv')
        #df_nat = df_nat[df_nat['TLPercentage'] == TLPercentage]
        df_nat = df_nat[df_nat['Model'] == 'M'+str(4-i)]

        df_total = pd.concat([df_reg, df_nat, df_total])
    df_total['TL'] = 'NoTL'

    df_total_TL = pd.DataFrame()
    for i in range(4):
        df_reg = pd.read_csv('./'+station+'/Murcia'+type+'_TL-TrainLayers.csv')
        #df_reg = df_reg[df_reg['TLPercentage'] == TLPercentage]
        df_reg = df_reg[df_reg['Model'] == 'M'+str(4-i)]

        df_nat = pd.read_csv('./'+station+'/Esp'+type+'_TL-TrainLayers.csv')
        #df_nat = df_nat[df_nat['TLPercentage'] == TLPercentage]
        df_nat = df_nat[df_nat['Model'] == 'M'+str(4-i)]

        df_total_TL = pd.concat([df_reg, df_nat, df_total_TL])
    df_total_TL['TL'] = 'TL'

    df_total = pd.concat([df_total, df_total_TL])
    df_total = df_total.drop(columns=['RMSE', 'TLPercentage', 'Season', 'TLPercentage', 'Station'])

    results = df_total.melt(['Model', 'TL'], var_name='Error', value_name='Error values')
    fig = px.box(results, x="Model", y='Error values', color="TL", hover_data='Model',
                facet_col='Error', facet_col_wrap=1,  height=700, width=650,)
    fig.update_traces(boxmean=True)
    fig.update_yaxes(matches=None)
    fig.for_each_xaxis(lambda yaxis: yaxis.update(showticklabels=True))
    fig.for_each_annotation(lambda a: a.update(text=''))
    fig.update_layout(
        yaxis3=dict(
            title_text="R<sup>2</sup>",
        ),
        yaxis2=dict(
            title_text="MAE (mm d<sup>-1</sup>)",
        ),
        yaxis=dict(
            title_text="MAPE (%)",
        )
    )
    fig.show()

compareTLNoTL('CR12', 'Forecast', 0.8)

# Boxplots

In [12]:
def getBoxplot(regFiles, natFiles, TLPercentagesReg, TLPercentagesNat, name):
    df_total = pd.DataFrame()
    for i in range(4):
        df_reg = pd.read_csv(regFiles[i])
        df_reg = df_reg[df_reg['TLPercentage'] == TLPercentagesReg[i]]
        df_reg = df_reg[df_reg['Model'] == 'M'+str(4-i)]

        df_nat = pd.read_csv(natFiles[i])
        df_nat = df_nat[df_nat['TLPercentage'] == TLPercentagesNat[i]]
        df_nat = df_nat[df_nat['Model'] == 'M'+str(4-i)]

        df_total = pd.concat([df_reg, df_nat, df_total])

    #display(df_total)
    df_total = df_total.drop(columns=['RMSE', 'TLPercentage'])
    results = df_total.melt(['Season', 'Station', 'Model'], var_name='Error', value_name='Error values')
    fig = px.box(results, x="Season", y='Error values', color="Model", hover_name='Station', hover_data='Model',
                facet_col='Error', facet_col_wrap=1,  height=700, width=650,)
    fig.update_traces(boxmean=True)
    fig.update_yaxes(matches=None)
    fig.for_each_xaxis(lambda yaxis: yaxis.update(showticklabels=True))
    fig.for_each_annotation(lambda a: a.update(text=''))
    fig.update_layout(
        yaxis3=dict(
            title_text="R<sup>2</sup>",
        ),
        yaxis2=dict(
            title_text="MAE (mm d<sup>-1</sup>)",
        ),
        yaxis=dict(
            title_text="MAPE (%)",
        )
    )
    fig.show()
    fig.write_image('./Images/'+name+'.pdf')


In [13]:
getBoxplot(['./CI42/MurciaEstimation_TL-TrainLayers.csv', './CI42/MurciaEstimation_TL-TrainLayers.csv', './CR12/MurciaEstimation_TL-TrainLayers.csv', './CR12/MurciaEstimation_TL-NoTrainLayers.csv'],
           ['./CR12/EspEstimation_TL-TrainLayers.csv', './CI42/EspEstimation_TL-TrainLayers.csv', './CR12/EspEstimation_TL-TrainLayers.csv', './CR12/EspEstimation_TL-NoTrainLayers.csv'],
           [0.8, 0.8, 0.8, 0.8], 
           [0.8, 0.8, 0.8, 0.8], 'estimationBox')

In [14]:
getBoxplot(['./CA91/MurciaForecast_TL-NoTrainLayers.csv', './CI42/MurciaForecast_TL-TrainLayers.csv', './CI42/MurciaForecast_TL-NoTrainLayers.csv', './CR12/MurciaForecast_TL-NoTrainLayers.csv'],
           ['./CA91/EspForecast_TL-NoTrainLayers.csv', './CR12/EspForecast_TL-TrainLayers.csv', './CA91/EspForecast_TL-NoTrainLayers.csv', './CR12/EspForecast_TL-NoTrainLayers.csv'],
           [0.9, 0.8, 0.8, 0.6], 
           [0.6, 0.6, 0.6, 0.6], 'forecastBox')

In [None]:
# Murcia estimacion
#NoTlFiles = ['./CI42/MurciaEstimation_NoTL.csv', './CI42/MurciaEstimation_NoTL.csv', './CI42/MurciaEstimation_NoTL.csv', './CR12/MurciaEstimation_NoTL.csv']
#TlFiles = ['./CI42/MurciaEstimation_TL-TrainLayers.csv', './CI42/MurciaEstimation_TL-TrainLayers.csv', './CR12/MurciaEstimation_TL-TrainLayers.csv', './CR12/MurciaEstimation_TL-NoTrainLayers.csv']
#TLPercentages = [0.8, 0.8, 0.8, 0.8]

# Murcia forecast
#NoTlFiles = ['./CI42/MurciaForecast_NoTL.csv', './CA91/MurciaForecast_NoTL.csv', './CI42/MurciaForecast_NoTL.csv', './CR12/MurciaForecast_NoTL.csv']
#TlFiles = ['./CA91/MurciaForecast_TL-NoTrainLayers.csv', './CI42/MurciaForecast_TL-TrainLayers.csv', './CI42/MurciaForecast_TL-NoTrainLayers.csv', './CR12/MurciaForecast_TL-NoTrainLayers.csv']
#TLPercentages = [0.9, 0.8, 0.8, 0.6]

'''
# Spain estimatcion
NoTlFiles = ['./CR12/EspEstimation_NoTL.csv', './CR12/EspEstimation_NoTL.csv', './CI42/EspEstimation_NoTL.csv', './CR12/EspEstimation_NoTL.csv']
TlFiles = ['./CR12/EspEstimation_TL-TrainLayers.csv', './CI42/EspEstimation_TL-TrainLayers.csv', './CR12/EspEstimation_TL-TrainLayers.csv', './CR12/EspEstimation_TL-NoTrainLayers.csv']
TLPercentages = [0.8, 0.8, 0.8, 0.8]
'''


# Spain forecast
NoTlFiles = ['./CI42/EspForecast_NoTL.csv', './CA91/EspForecast_NoTL.csv', './CA91/EspForecast_NoTL.csv', './CA91/EspForecast_NoTL.csv']
TlFiles = ['./CA91/EspForecast_TL-NoTrainLayers.csv', './CR12/EspForecast_TL-TrainLayers.csv', './CA91/EspForecast_TL-NoTrainLayers.csv', './CR12/EspForecast_TL-NoTrainLayers.csv']
TLPercentages = [0.6, 0.6, 0.6, 0.6]

In [210]:
df = pd.read_csv('./CA91/EspEstimation_NoTL.csv')

for metric in ['R2', 'MAE', 'MAPE']:
    fig = px.box(df, x="Season", y=metric, color="TLPercentage", hover_name='Station', hover_data='Model')
    fig.update_traces(boxmean=True)
    fig.show()

In [18]:
import plotly.express as px
for metric in ['R2', 'MAE', 'CVRMSE']:
    fig = px.violin(all_errors, x="Param", y=metric)
    #fig.update_traces(boxmean=True)
    fig.show()

# Show U2 errors

In [48]:
def hourlyToDaily(df):
    punto = [l[1] for l in list(df.groupby([df['dates'].dt.date]))]
    FECHA = []
    TMAX = []
    TMIN = []
    HRMAX = []
    HRMIN = []
    VVMED = []
    RADMED = []
    for p in punto:
        FECHA.append(pd.to_datetime(p['dates']).dt.date.iloc[0])
        TMAX.append(p['temp'].max())
        TMIN.append(p['temp'].min())
        HRMAX.append(p['rh'].max())
        HRMIN.append(p['rh'].min())
        VVMED.append(p['wind'].mean())
        RADMED.append(p['solar_rad'].mean())

    return pd.DataFrame({
        "FECHA": pd.to_datetime(FECHA),
        "TMAX": TMAX,
        "TMIN": TMIN,
        "HRMAX": HRMAX,
        "HRMIN": HRMIN,
        "VVMED": VVMED,
        "RADMED": RADMED
    })

df_forecast = pd.read_csv('./forecastMurcia/CA91-fuenteAlamo/WB-CA91-fuenteAlamo.csv')
df_forecast['dates'] = pd.to_datetime(df_forecast['dates'])
df_forecast = hourlyToDaily(df_forecast)
df_forecast['VVMED'] = df_forecast['VVMED'].apply(lambda x: x*4.87/np.log(67.8*10-5.42))
df_forecast

Unnamed: 0,FECHA,TMAX,TMIN,HRMAX,HRMIN,VVMED,RADMED
0,2023-06-18,29.9,20.7,68,26,2.223908,301.693922
1,2023-06-19,28.8,20.8,73,38,1.743038,331.847141
2,2023-06-20,30.4,21.1,78,37,1.911327,334.152405
3,2023-06-21,29.9,20.2,96,54,1.203266,323.182376
4,2023-06-22,30.4,20.2,96,45,1.666684,343.223174
...,...,...,...,...,...,...,...
368,2024-06-24,27.9,19.2,87,44,2.141010,345.373094
369,2024-06-25,29.2,18.5,92,36,1.831234,346.764835
370,2024-06-26,29.7,19.1,87,39,3.358300,341.446248
371,2024-06-27,29.7,20.1,74,38,1.832168,292.635635


In [49]:
import pandas as pd

def convertirComa(x):
    if type(x) == str:
        return x.replace(",", ".")
    else:
        return x
def leerEstacionDatos(path):
    estacionDatas = pd.read_csv(path, encoding='ISO-8859-1', sep=";")
    estacionDatas.columns = ['ESTACION', 'MUNICIPIO', 'PARAJE', 'HORAS', 'FECHA', 'ETO','TMAX', 'TMIN', 'HRMAX', 'HRMIN', 'RADMED','VVMED', '-']
    estacionDatas = estacionDatas.drop(columns=['ESTACION', 'MUNICIPIO', 'PARAJE', 'HORAS', '-'])
    estacionDatas = estacionDatas.reset_index().drop(columns='index')
    estacionDatas['FECHA'] = pd.to_datetime(estacionDatas['FECHA'], format="%d/%m/%y")
    estacionDatas.index = estacionDatas['FECHA']
    estacionDatas.drop(columns='FECHA', inplace=True)
    estacionDatas.dropna(inplace=True)
    for i in estacionDatas.columns:
        estacionDatas[i] = pd.to_numeric(estacionDatas[i].apply(lambda x : convertirComa(x)))
    return estacionDatas

df_real = leerEstacionDatos('./all data murcia/CA91.csv')
df_real = df_real[df_real.index > str(df_forecast['FECHA'][0])]
df_real

Unnamed: 0_level_0,ETO,TMAX,TMIN,HRMAX,HRMIN,RADMED,VVMED
FECHA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-06-19,5.90,30.91,19.80,72.92,29.77,322.48,1.15
2023-06-20,5.85,31.69,18.90,87.82,27.60,328.48,1.01
2023-06-21,5.22,30.07,19.92,85.00,39.22,294.67,0.88
2023-06-22,6.09,32.09,20.24,85.63,31.74,340.80,1.02
2023-06-23,5.93,30.80,19.07,79.41,34.94,348.05,0.90
...,...,...,...,...,...,...,...
2024-06-23,5.73,28.82,17.59,76.78,35.62,348.35,0.91
2024-06-24,5.25,27.94,17.55,77.99,43.34,317.53,0.90
2024-06-25,5.77,30.19,17.52,84.05,31.34,344.81,0.90
2024-06-26,5.44,29.79,17.69,86.10,35.11,308.05,1.10


In [50]:
fig = go.Figure()
fig.add_trace(go.Scatter(x= df_real.index, y=df_real['VVMED'],
                        name='U2-Real'))
fig.add_trace(go.Scatter(x= df_forecast['FECHA'], y=df_forecast['VVMED'],
                        name='U2-WeatherBit'))
fig.update_layout(
    title=dict(
        text="Real and WeatherBit forecast of U2 at CA91."
    ))
fig.show()

In [52]:
df_forecast = pd.read_csv('./forecastSiar/V05 Cheste.csv')
df_forecast['dates'] = pd.to_datetime(df_forecast['dates'])
df_forecast = hourlyToDaily(df_forecast)
df_forecast['VVMED'] = df_forecast['VVMED'].apply(lambda x: x*4.87/np.log(67.8*10-5.42))
df_forecast

Unnamed: 0,FECHA,TMAX,TMIN,HRMAX,HRMIN,VVMED,RADMED
0,2023-06-17,32.8485,21.4485,55.0,32.0,2.106472,346.375000
1,2023-06-18,26.6485,18.8485,72.0,40.0,1.413389,196.541667
2,2023-06-19,29.1485,19.5485,73.0,40.0,2.044795,251.250000
3,2023-06-20,31.0485,19.2485,77.0,31.0,1.968076,317.708333
4,2023-06-21,29.8485,20.1485,79.0,40.0,2.384452,263.250000
...,...,...,...,...,...,...,...
371,2024-06-22,32.8485,18.5485,58.0,21.0,1.958586,318.458333
372,2024-06-23,30.7485,18.4485,80.0,28.0,2.012074,345.166667
373,2024-06-24,27.4485,17.5485,81.0,44.0,2.035509,260.958333
374,2024-06-25,31.4485,17.4485,86.0,31.0,2.140915,346.500000


In [53]:
df_real = pd.read_csv('./siarRealDataForForecast/V05 Cheste.csv')
df_real['Fecha'] = pd.to_datetime(df_real['Fecha'])
df_real

Unnamed: 0,Fecha,TempMax,TempMin,HumedadMax,HumedadMin,VelViento,Radiacion,EtPMon
0,2023-06-18,27.89,14.49,92.7,36.80,0.419,11.734,2.659299
1,2023-06-19,29.07,14.22,82.7,34.70,0.569,18.800,3.837167
2,2023-06-20,33.14,15.36,91.4,31.34,0.590,23.614,4.790765
3,2023-06-21,29.61,17.43,92.3,40.97,0.690,13.721,3.232424
4,2023-06-22,34.15,14.76,88.7,25.43,0.712,25.250,5.226812
...,...,...,...,...,...,...,...,...
369,2024-06-21,30.48,12.22,87.8,26.38,0.679,26.750,5.019356
370,2024-06-22,33.21,14.03,86.8,28.05,0.862,25.470,5.325703
371,2024-06-23,31.01,14.83,94.9,33.96,0.686,26.790,5.135767
372,2024-06-24,28.33,15.95,92.6,51.29,0.750,20.880,4.133180


In [55]:
fig = go.Figure()
fig.add_trace(go.Scatter(x= df_real['Fecha'], y=df_real['VelViento'],
                        name='U2-Real'))
fig.add_trace(go.Scatter(x= df_forecast['FECHA'], y=df_forecast['VVMED'],
                        name='U2-NOAA GFS'))
fig.update_layout(
    title=dict(
        text="Real and NOAA GFS forecast of U2 at V05."
    ))
fig.show()