In [27]:
from glob import glob
import os
from pathlib import Path
from datetime import datetime

from frictionless import Package
import pandas as pd

In [30]:
# Get definitions
%run "00-definitions.ipynb"

Merge csv files into one Excel workbook

In [4]:
with pd.ExcelWriter('../data/processed/Task25data.xlsx') as excel:
    for csvfile in glob('../data/processed/*.csv'):
        varname = os.path.split(csvfile)[1]
        (pd.read_csv(csvfile, index_col=0, parse_dates=True)
           .tz_localize(None)
           .to_excel(excel, sheet_name=varname, na_rep=''))

## Create a data package

First, infer the format of data files on disk.

In [6]:
package = Package(basepath='../data/processed')
package.infer(['*.csv', '*.xlsx'])

Change all `'any'` fields to numeric data type.

In [7]:
for resource in package.resources:
    for field in resource.schema.fields:
        if field.type == 'any':
            field.type = 'number'

Remove some unnecessary keys:

In [22]:
for res in package.resources:
    for key in ['control', 'dialect']:
        if key in res.keys():
            del res[key]

### Add package metadata

In [54]:
package['name'] = "windtask25-timeseries-database"
package['title'] = "Time series database of IEA Wind Task 25"
package['version'] = "2018"
package['homepage'] = "https://community.ieawind.org/task25/home"

In [63]:
package['description'] = (
    "Realised and forecast values for wind and solar photovoltaic generation and "
    "electrical load. All CF values are in p.u. and load is share of maximum load during year. "
    "All times in UTC."
)

In [56]:
package['sources'] = [
    {'title': 'ENTSO-E Transparency Platform',
     'path': "https://transparency.entsoe.eu/"},
    {'title': 'WindEurope'},
    {'title': "Eurostat (2020), 'Electricity production capacities by main fuel groups and operator'",
     'path': "https://ec.europa.eu/eurostat/data"},
    {'title': "REN"},
    {'title': "Netztransparenz.de",
     'path': "https://www.netztransparenz.de/EEG/Marktpraemie/Online-Hochrechnung-Wind-Onshore"},
    {'title': "Hydro Quebec"},
    {'title': "ERCOT"},
    {'title': "Svenska kraftnät"}
]

In [57]:
package['contributors'] = [{
    "title": "Erkka Rinne",
    "email": "erkka.rinne@vtt.fi",
    "role": "author"
}]

In [58]:
package['created'] = f"{datetime.utcnow().isoformat()}Z"

In [59]:
package['keywords'] = [
        "energy",
        "wind power",
        "solar photovoltaics",
        "IEA Wind Task 25"
]

In [60]:
package['temporal'] = {
    "start": START,
    "end": END,
    "resolution": "15, 30 or 60 min depending on area"
}

In [61]:
package['spatial'] = {
    "name": "IEA Wind Task 25 members",
    "location": ", ".join(AREAS.keys())
}

In [64]:
package.to_json(os.path.join(package.basepath, 'datapackage.json'))