In [1]:
from glob import glob
import os
from pathlib import Path
from datetime import datetime

from frictionless import Package
import pandas as pd

In [2]:
# Get definitions
%run "00-definitions.ipynb"

Merge csv files into one Excel workbook

In [3]:
with pd.ExcelWriter('../data/processed/Task25data.xlsx') as excel:
    for csvfile in glob('../data/processed/*.csv'):
        varname = os.path.split(csvfile)[1]
        (pd.read_csv(csvfile, index_col=0, parse_dates=True)
           .tz_localize(None)
           .to_excel(excel, sheet_name=varname, na_rep=''))

## Create a data package

First, infer the format of data files on disk.

In [4]:
package = Package(basepath='../data/processed')

### Add package metadata

In [5]:
package['name'] = "windtask25-timeseries-database"
package['title'] = "Time series database of IEA Wind Task 25"
package['version'] = "2018.0"
package['homepage'] = "https://community.ieawind.org/task25/home"
package['created'] = f"{datetime.utcnow().isoformat()}Z"

In [6]:
package['description'] = (
    "Realised and forecast values for wind and solar photovoltaic generation and "
    "electrical load. All CF values are in p.u. and load is share of maximum load during year. "
    "All times in UTC."
)

In [7]:
package['sources'] = [
    {'title': 'ENTSO-E Transparency Platform',
     'path': "https://transparency.entsoe.eu/"},
    {'title': "Fraile, Daniel, and Ariola Mbistrova. 2018. 'Wind in Power 2017. "
              "Annual Combined Onshore and Offshore Wind Energy Statistics.' "
              "Edited by Iván Pineda and Pierre Tardieu. WindEurope.",
     'path': "https://windeurope.org/wp-content/uploads/files/about-wind/statistics/WindEurope-Annual-Statistics-2017.pdf"},
     {'title': "Komusanac, Ivan, Daniel Fraile, and Guy Brindley. 2019. "
               "'Wind Energy in Europe in 2018. Trends and Statistics.' "
               "Edited by Colin Walsh and Ivan Pineda. WindEurope. ",
     'path': "https://windeurope.org/wp-content/uploads/files/about-wind/statistics/WindEurope-Annual-Statistics-2018.pdf"},
    {'title': "Eurostat (2020), 'Electricity production capacities by main fuel groups and operator'",
     'path': "https://ec.europa.eu/eurostat/data"},
    {'title': "REN",
     'email': "antonio.couto@lneg.pt"},
    {'title': "Netztransparenz.de",
     'path': "https://www.netztransparenz.de/EEG/Marktpraemie/Online-Hochrechnung-Wind-Onshore",
     'email': "jan.dobschinski@iee.fraunhofer.de"},
    {'title': "Hydro Quebec",
     'email': "Menemenlis.Nickie@ireq.ca"},
    {'title': "ERCOT",
     'email': "kate.doubleday@nrel.gov"},
    {'title': "Svenska kraftnät. 2020. 'Statistik per elområde och timme 2018'",
     'path': "https://www.svk.se/om-kraftsystemet/kraftsystemdata/elstatistik/"},
    {'title': "Energiateollisuus. 2019. 'Sähkön tuntidata 2018'",
     'path': "https://energia.fi/uutishuone/materiaalipankki/sahkon_tuntidata.html"},
    {'title': "Noregs vassdrags- og energidirektorat. Vindkraftdata",
     'path': "https://www.nve.no/energiforsyning/kraftproduksjon/vindkraft/vindkraftdata/"}
]

In [8]:
package['contributors'] = [{
    "title": "Erkka Rinne",
    "email": "erkka.rinne@vtt.fi",
    "organization": "VTT, Finland",
    "role": "author"
},{
  "title": "Hannele Holttinen",
  "email": "hannele.holttinen@recognis.fi",
  "role": "contributor"
}]

In [9]:
package['keywords'] = [
        "energy",
        "electricity"
        "wind power",
        "solar photovoltaics",
        "actual generation",
        "forecast"
        "IEA Wind Task 25"
]

In [10]:
package['temporal'] = {
    "start": START,
    "end": END,
    "resolution": "15, 30 or 60 min depending on area"
}

In [11]:
package['spatial'] = {
    "name": "IEA Wind Task 25 member countries",
    "location": ", ".join(AREAS.keys())
}

### Infer package resources

In [12]:
package.infer(['*.csv', '*.xlsx'])

Change all `'any'` fields to numeric data type.

In [13]:
for resource in package.resources:
    for field in resource.schema.fields:
        if field.type == 'any':
            field.type = 'number'

Remove some unnecessary keys:

In [14]:
for res in package.resources:
    for key in ['control', 'dialect', 'compression', 'compressionPath', 'query', 'scheme', 'stats', 'hashing', 'schema']:
        if key in res.keys():
            del res[key]

In [15]:
for res in package.resources:
    res.title = res.path.split('.')[0]
    if res.format == 'xlsx':
        res.title = "Combined data"

### Write data package descritor

In [16]:
package.to_json(os.path.join(package.basepath, 'datapackage.json'))