In [1]:
import pandas as pd
import numpy as np
from numpy import genfromtxt
import matplotlib.pyplot as plt
from utils import * 
import xarray


In [2]:
coords_mantua = {'lon' : 39.766038926962466, 'lat' : -75.14202657295235}
coords_west = {'lon' : 41.13935584322975, 'lat' : -73.52551929903115}
coords_water = {'lon' : 39.75493159954764, 'lat' : -74.88289845779185}

In [None]:
# Pull in ERA5 data for the 3 sites
uid = '155088'
key = 'e0da1563-6dd2-49b3-b565-8dd373c8bc49'

import cdsapi
from urllib.request import urlopen

# start the client
c = cdsapi.Client( url="https://cds.climate.copernicus.eu/api/v2", key=f"{uid}:{key}",verify=True)

fl = c.retrieve(
    'reanalysis-era5-land',
    {
        'format': 'netcdf',
        'variable': 'surface_solar_radiation_downwards',
        'year': [
            '2021','2022',
        ],
        'month': [
            '01','02','03','04','05','06','07','08','09','10','11','12'
        ],
        'day': [
            '01', '02', '03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
            '13', '14', '15',
            '16', '17', '18',
            '19', '20', '21',
            '22', '23', '24',
            '25', '26', '27',
            '28', '29', '30',
            '31',
        ],
        'time': [
            '00:00', '01:00', '02:00',
            '03:00', '04:00', '05:00',
            '06:00', '07:00', '08:00',
            '09:00', '10:00', '11:00',
            '12:00', '13:00', '14:00',
            '15:00', '16:00', '17:00',
            '18:00', '19:00', '20:00',
            '21:00', '22:00', '23:00',
        ],
        'area': [            
            coords_mantua["lat"] + .1, #north
            coords_mantua["lon"] - .1, #west
            coords_mantua["lat"] - .1, #south
            coords_mantua["lon"] + .1, #east
        ],
    },
    'data/inputs_mantua.nc')

2022-10-23 18:53:18,312 INFO Welcome to the CDS
2022-10-23 18:53:18,315 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-land
2022-10-23 18:53:18,468 INFO Request is queued


In [52]:
y = xr.open_dataset('data/inputs_mantua.nc')
y

In [10]:
#Prepare output data in the form of localized physical weather stations on 2 sites
#Convert from .csv to .nc
train_path = "data/train/"

df_mantua = pd.read_csv(train_path+"mantua.csv").drop("Unnamed: 0",axis=1)
df_mantua.date = pd.to_datetime(df_mantua.date)
df_mantua = df_mantua.rename(columns={"date":"time"}).set_index("time")

df_west = pd.read_csv(train_path+"west.csv").drop("Unnamed: 0",axis=1)
df_mantua.date = pd.to_datetime(df_west.date)
df_west = df_west.rename(columns={"date":"time"}).set_index("time")


df_water = pd.read_csv(train_path+"waterford.csv").drop("Unnamed: 0",axis=1)
df_mantua.date = pd.to_datetime(df_water.date)
df_water = df_water.rename(columns={"date":"time"}).set_index("time")

xr_mantua = xarray.Dataset.from_dataframe(df_mantua).assign_coords(coords=coords_mantua).expand_dims(['lat','lon'])
xr_west = xarray.Dataset.from_dataframe(df_west).assign_coords(coords=coords_west).expand_dims(['lat','lon'])
xr_water = xarray.Dataset.from_dataframe(df_water).assign_coords(coords=coords_water).expand_dims(['lat','lon'])

xr_mantua.to_netcdf("data/outputs_mantua.nc")
xr_west.to_netcdf("data/outputs_west.nc")
xr_water.to_netcdf("data/outputs_waterford.nc")


  df_mantua.date = pd.to_datetime(df_west.date)


In [15]:
# Training set
train_files    = ["mantua", "water"]
X_train_xr, _  = prepare_predictor(train_files,train_path)
y_train_xr, _  = prepare_predictand(train_files,train_path)


# Test set
test_path = "data/test/"
X_test_xr, _ = prepare_predictor('west', data_path=test_path,time_reindex=False)
y_test_xr, _ = prepare_predictand('west',data_path=test_path,time_reindex=False)

In [35]:
X_train_xr["ssrd"].data

array([[[           nan,  3.7027388e+07,            nan, ...,
                    nan,  3.7026152e+07,            nan],
        [           nan,  3.6738744e+07,            nan, ...,
                    nan,  3.6725736e+07,            nan],
        [           nan,            nan,            nan, ...,
                    nan,            nan,            nan],
        [           nan,            nan,            nan, ...,
                    nan,            nan,            nan]],

       [[           nan,  7.1914200e+05,            nan, ...,
                    nan,  7.2533600e+05,            nan],
        [           nan,  7.1976200e+05,            nan, ...,
                    nan,  7.2595600e+05,            nan],
        [           nan,            nan,            nan, ...,
                    nan,            nan,            nan],
        [           nan,            nan,            nan, ...,
                    nan,            nan,            nan]],

       [[           nan,  1.6711820e

In [19]:
X_train_df = pd.DataFrame({"ssrd": X_train_xr["ssrd"].data,
                          }, index=X_train_xr["ssrd"].coords['time'].data)

X_test_df  = pd.DataFrame({"ssrd": X_test_xr["ssrd"].data,
                          }, index=X_test_xr["ssrd"].coords['time'].data)


y_train_df = y_train_xr["planeOfArrayIrradiance"].stack(dim=["latitude", "longitude"])
y_train_df = pd.DataFrame(y_train_df.to_pandas())

ValueError: Data must be 1-dimensional