# Download data from AROME-Arctic and save as ncfile

The following codes is a guide to download data from the NWP AROME-Arctic extracted at one location with coordinates (Lat/Lon) for a chosen time period and save it as an nc-file, available at https://thredds.met.no/thredds/catalog/aromearcticarchive/catalog.html. This script use the arome_arctic_full files containing several variables.

choose a datespan (years, months, days) and save it as a ncfile.

Data available from: 2015.10.21 to current date


- AROME-Arctic issues deterministic forecasts 4 times a day with a lead time of 66 hours

- The model utilises a three-dimensional variational data assimilation with 3-hourly cycling to assimilate conventional observations, scatterometer ocean surface winds, satellite radiances and atmospheric motion vectors.

(Source and more informatil about the model: https://www.met.no/en/projects/The-weather-model-AROME-Arctic/about)


In [None]:
# Importing necessary modules

import xarray as xr
import numpy as np

import netCDF4 as nc
import matplotlib.pylab as plt
import pandas as pd

import pyproj
import datetime as dt
from datetime import date, timedelta

import requests
from __future__ import print_function
from IPython.display import display, HTML

In [None]:
"""
The coordinates in get_coordinates is at the center of the offshore wind sites
"""

def get_coordinates(location_name):
    locations = {
        'Nordavind_A': {
            'latitude': 71.1314956,
            'longitude': 32.048109
        },
        'Nordavind_B': {
            'latitude': 71.7880587,
            'longitude': 27.7221338
        },
        'Nordavind_C': {
            'latitude': 71.7471898,
            'longitude': 19.9808019
        },
        'Nordavind_D': {
            'latitude': 71.473272,
            'longitude': 18.7614613
        }
        
    }

    if location_name in locations:
        coordinates = locations[location_name]
        return coordinates['latitude'], coordinates['longitude']
    else:
        return None

## Seklima station information retrival with frost api 

[Link to frost webpage](https://frost.met.no/index.html)

1. Create client id for yourself 
    * [Create client id here](https://frost.met.no/auth/requestCredentials.html)
    
This is used to retrive the longitude and latitude

PS: Not all stations are included in the model area. Arome-Arctic project northern areas  

In [None]:
# Insert your own client ID here
client_id = 'insert client ID'

In [None]:
'''
Getting the data for the meta dataframe

Using the frost api for seklima
'''

endpoint = 'https://frost.met.no/sources/v0.jsonld'

# Dictionary with elements to retrieve
parameters = {
    'fields': 'name,id,geometry,masl'
}

# Issue an HTTP GET request
r = requests.get(endpoint, parameters, auth=(client_id,''))
# Extract JSON data
json = r.json()

# Check if the request worked, print out any errors
if r.status_code == 200:
    data_exp = json['data']
    print('Data retrieved from frost.met.no!')
else:
    print('Error! Returned status code %s' % r.status_code)
    print('Message: %s' % json['error']['message'])
    print('Reason: %s' % json['error']['reason'])

In [None]:
'''
Transforming the data from a raw json format retrieved by requests 
to a pandas df format

When redefining the query the new columns need to be spesified for the dataframe df
'''

meta_df = pd.DataFrame(columns=['id', 'name', 'lon', 'lat', 'heigh-asl (m)'])
ignored_values = 0
for i in range(len(data_exp)):
    row = []
    try:
        row.append(data_exp[i]['id'])
        row.append(data_exp[i]['name'])
        row.append(data_exp[i]['geometry']['coordinates'][0])
        row.append(data_exp[i]['geometry']['coordinates'][1])
        row.append(data_exp[i]['masl'])
        meta_df.loc[len(meta_df)] = row
    except:
        ignored_values += 1
        continue


print(f'Number of discarded values {ignored_values}')

# Setting the station id as row index2
meta_df = meta_df.set_index('id')

display(meta_df)

- Change station ID to match measurement station to get station coordinates
    * Find station ID at ([stations](https://seklima.met.no/stations/))
 
or 

- Insert site_name to get coordinates from center of OWS

In [None]:

#################


station_id = "SN76956" # insert station ID, ex: 'SN76956' Goliat ,  'SN20926' hjelmsøybanken


################

longitude = meta_df.loc[f"{station_id}"]["lon"]
latitude = meta_df.loc[f"{station_id}"]["lat"]

print(f"{station_id}: longitude = {longitude}, latitude = {latitude}")

In [None]:
# insert the site name you want the data extracted from

################

site_name = 'Nordavind_C'

################

latitude, longitude = get_coordinates_center_OWS(site_name) # gets the latitude and longitude of the chosen site
print(f"{site_name}: longitude = {longitude}, latitude = {latitude}")

In [None]:
# Choose latitude, longitude

latitude = 
longitude = 

## Projecting coordinates
Starts by projecting the chosen Latitude and Longitude coordinate to x and y cooridnates used in the model
- No changes needed in this part

In [None]:

# Load in a random file to project coordinates
filename = "https://thredds.met.no/thredds/dodsC/aromearcticarchive/"+\
               "2022/01/01/arome_arctic_full_2_5km_20220101T00Z.nc"
ncfile = nc.Dataset(filename)

crs_AA = pyproj.CRS.from_cf(
         {
            "grid_mapping_name": "lambert_conformal_conic",
            "standard_parallel": [77.5, 77.5],
            "longitude_of_central_meridian": -25.0,
            "latitude_of_projection_origin": 77.5,
             "earth_radius": 6371000.0,
         }
)

# Transformer to project from ESPG:4368 (WGS:84) to our lambert_conformal_conic
proj = pyproj.Transformer.from_crs(4326,crs_AA,always_xy=True)

# Compute projected coordinates of lat/lon point
lat = latitude
lon = longitude
X,Y = proj.transform(lon,lat)

# Find nearest neighbour
x = ncfile.variables["x"][:]
y = ncfile.variables["y"][:]

Ix = np.argmin(np.abs(x - X))
Iy = np.argmin(np.abs(y - Y))

ncfile.close()


## Collecting the data 
In this section you can choose "start date" and "end date" for the timeperiod of data to be extracted. 

Each file contains a forecast of 66 hours. Due to spin-up error this script retrieves data starting 6h-in in each file
extracting 3 hours from the file before jumping to next file. The files contains several variables and this scrips acts 
as an example, other variables can be added or removed by alternating lists and variables. Variables will be extracted from the chosen height (except variables at a given height, ex: surface_air_pressure)


* Change start_date and end_date
* Choose hyrbid level (height)
* Change empty lists and extracted variables

- PS: x_wind and y_wind is relativ to model, not rotated to cardinal directions. 
- (Rotation to cardinal direction is done by using the alpha.nc file)

time to collect ?? months; approx ?? min. 

In [None]:

####################

start_date = date(2021, 12, 31) # choose start date (year, month, day)
end_date = date(2022, 12, 31) # choose end date (year, month, day)

delta = timedelta(days=1) # delta (timestep), default set to 1 day


"""
Choose a hybrid level [64 to 0]:
64 = 0 masl
63 = 24 masl
62 = 48 masl
61 = 73 masl
60 = 99 masl
59 = 127 masl
58 = 156 masl
57 = 187 masl
56 = 221 masl
55 = 259 masl
"""

hybrid_lvl = 62   # 64 surface, 0 ToA 

######################



"""
Create empty lists for each variable to be extracted
"""

time = []
air_temp=[]
x_wind = []
y_wind = []
surface_air_pressure = []


hours = ["00", "03", "06", "09", "12", "15", "18", "21"]

while start_date <= end_date:
    y = start_date.strftime("%Y")
    m = start_date.strftime("%m")
    d = start_date.strftime("%d")
    
    for i in hours:
        opendap_url = f"https://thredds.met.no/thredds/dodsC/aromearcticarchive/{y}/{m}/{d}/arome_arctic_full_2_5km_{y}{m}{d}T{i}Z.nc"
        
        try:
            
            # Extract variables 
            ncfile = nc.Dataset(opendap_url)
            times = ncfile.variables["time"][6:9]
            airtemp = ncfile.variables["air_temperature_ml"][6:9, hybrid_lvl, Iy, Ix]
            xwind = ncfile.variables["x_wind_ml"][6:9, hybrid_lvl, Iy, Ix]
            ywind = ncfile.variables["y_wind_ml"][6:9, hybrid_lvl, Iy, Ix]
            sur_p = ncfile.variables["surface_air_pressure"][6:9, 0, Iy, Ix]
            
            # Add variables to lists
            time.extend(times)
            air_temp.extend(airtemp)
            x_wind.extend(xwind)
            y_wind.extend(ywind)
            surface_air_pressure.extend(sur_p)
            
            ncfile.close()
        except Exception as e: # sometimes files for 1 day are missing spesific hours, this skipps to next file 
            # If opendap_url is not found or any other exception occurs, skip to the next hour
            print(f"Skipping hour {i} of date {y}-{m}-{d} due to exception: {e}")
            continue

    start_date += delta


### Rotating wind with Alpha
This section uses the alpha.nc file with rotated local grid in every location. alpha.nc is created by running a seperate code, this needs to be done before running the following.
- This section rotates wind into cardinal directions before saving the file. This can also be skipped in this step and be done when processing the final ncfile. 

In [None]:
# Convert x-y to cardinal direction and speed


# Open alpha.nc with rotated local grids and extract alpha from Iy and Ix
ncfile = nc.Dataset("alpha.nc")

alpha = ncfile.variables["alpha"][Iy, Ix]



# Wind direction relative to Earth (wdir) may be calculated as follows:
#   wdir = alpha + 90-atan2(v,u)
# where u and v are model wind relative to model grid

wdir = []
ws = []

for i in range(0,len(x_wind)):
    w = alpha + (90-np.arctan2(y_wind[i], x_wind[i]))
    wdir.append(w)
    
    speed = np.sqrt(x_wind[i]**2 + y_wind[i]**2)
    ws.append(speed)


## Dictionary
The output from collecting data is stored in lists. This section creates an dictionary and uses pandas to store in a easy-reference system
- Changes can be made in the dictionary "d_data" if variables are ignored or added

In [None]:
# create a dictionary and give name to the lists
d_data = {
     "time" : time, 
     f"air_temperature_Hyb:{hybrid_lvl}": air_temp,
     "surface_pressure": surface_air_pressure,
     f"x_wind_Hyb:{hybrid_lvl}": x_wind,
     f"y_wind_Hyb:{hybrid_lvl}": y_wind,
     f"wind_speed": ws,
     f"wind_direction": wdir
}


# convert to pandas dataframe

weather_data = pd.DataFrame.from_dict(d_data)
weather_data = weather_data.set_index('time')

display(weather_data)

## Save as NC-file
Define a function that saves the "weather_data" as an nc-file. 
- Change the output_filename to desired new saved filename

Filesize for time, wind speed and wind direction for ?? months: ?? KB

In [None]:
"""
This code is an easy option saving the new file directly in the same directory as the working directory
"""

def save_dataframe_to_netcdf(dataframe, output_file):
    dataset = xr.Dataset(data_vars=dataframe.to_dict('series'))
    dataset.to_netcdf(output_file)

    print(f"Data saved successfully to {output_file}.")



output_filename = 'AROME_dir13_weather_data.nc' #choose filename: default: "AROME_weather_data.nc"

save_dataframe_to_netcdf(weather_data, output_filename)

In [None]:
"""
This code makes a new optional directory at a desired location and saves the new file in this new directory.
If the directory already exist it will just save the new file in that directory. This way different ncfiles can be saved
in the same directory by keeping the same directory name and changing only the output_filename
"""

import os

# New directory 
directory = "weather_data_NORA3"
  
# Parent Directory path 
parent_dir = "C:/Users/Ida/"

# Filename new ncfile
# {site_name} or {station_id}
output_filename = 'AROME2_weather_data.nc' #choose filename: default: "weather_data_NORA3.nc"


# Path 
path = os.path.join(parent_dir, directory) 
  
# Create the directory if not already existing
if not os.path.isdir(path):
    os.mkdir(path)


def save_dataframe_to_netcdf(dataframe, output_file):
    dataset = xr.Dataset(data_vars=dataframe.to_dict('series'))
    output_path = f"{parent_dir}{directory}/{output_file}"
    dataset.to_netcdf(output_path)

    print(f"Data saved successfully to {output_path}.")


save_dataframe_to_netcdf(weather_data, output_filename)