In [30]:
from zipfile import ZipFile
from tempfile import TemporaryDirectory
import re
import os
from multiprocessing.dummy import Pool

import pandas as pd

In [31]:
# Get definitions
%run "00-definitions.ipynb"

## Other data sources

### Using data from Energiateollisuus for Finnish wind power generation

In [32]:
ts = pd.read_excel('../data/external/Energiateollisuus_tuntidata_2018.xlsx')['TUULIVOIMA (MWh)\nWind Power']
ts.index = pd.date_range('2018-01-01 00:00', '2018-12-31 23:00', freq='1H', tz='Europe/Helsinki')
df = pd.DataFrame({'FI': ts.tz_convert('UTC')})
df.to_csv('../data/raw/others/Energiateollisuus_generation_Wind Onshore.csv', header=True)

### Using data from NVE for Norway

In [33]:
ts = pd.read_csv('../data/external/vindproduksjon_2002-2019_faktisk_produksjon.csv', 
                 engine='python', encoding='latin1',
                 sep=';', skiprows=3, 
                 index_col=0, 
                 parse_dates=True, infer_datetime_format=True, 
                 decimal=',',
                 usecols=[0,1],
                 squeeze=True
                ).tz_localize('Etc/GMT-1')
df = pd.DataFrame({'NO': ts.tz_convert('UTC')})
df.to_csv('../data/raw/others/NVE_generation_Wind Onshore.csv', header=True)

### Using ERCOT data for Texas

In [34]:
# Get csv archives from the master archive
master_archive_path = '../data/external/IEA data/WPP_Hrly_Avg_Actual_and_Forecasted_Values_by_Geo_Region_14787.zip'
with ZipFile(master_archive_path) as master_archive, \
     TemporaryDirectory() as tmpdir:
    sub_archives = master_archive.namelist()
    r = re.compile('^cdr.00014787\.0{16}\.(2018\d{4}|20190102)\.\d{9}\.WPPHRLYAVGACTGEONP4742_csv\.zip$')
    csv_zip_files = filter(r.match, sub_archives)
    print(f"Extracting files to {tmpdir}. . .")
    extracted = list(map(lambda member: master_archive.extract(member, path=tmpdir), csv_zip_files))
    if not extracted: raise RuntimeError("No files extracted")
    print("Reading files. . .")
    ercot_data = list(map(lambda filepath: pd.read_csv(filepath, usecols=[0,1,2], nrows=1), extracted))
    print("Done!")

Extracting files to C:\Users\ERERKKA\AppData\Local\Temp\tmplxb6_a47. . .
Reading files. . .
Done!


In [35]:
df = pd.concat(ercot_data).reset_index(drop=True)
df['date'] = pd.to_datetime(df['DELIVERY_DATE'])
df['time'] = pd.to_timedelta(df['HOUR_ENDING'] - 1, unit='hour')
df['datetime'] = (df.date + df.time)
ercot_df = df.set_index('datetime').tz_localize('Etc/GMT+6').sort_index()

In [36]:
ts = ercot_df['ACTUAL_SYSTEM_WIDE'].tz_convert('UTC')
df = pd.DataFrame({'US-TX': ts[~ts.index.duplicated()]})
df.to_csv('../data/raw/others/ERCOT_generation_Wind Onshore.csv', header=True)

### SVK data for Sweden

In [37]:
svk_excel_raw = pd.read_excel('../data/external/timvarden-2018-01-12.xls', 
                         #index_col=0, 
                         #parse_dates=True, 
                         #header=[0,1,2,3], 
                         header=None,
                         skiprows=[4])
svk_excel_raw.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,31,32,33,34,35,36,37,38,39,40
0,,Timmätt förbr,Timmätt förbr,Timmätt förbr,Timmätt förbr,Avkopplingsb.,Avkopplingsb.,Avkopplingsb.,Avkopplingsb.,Ospec.,...,Schablonleverans,Schablonleverans,Schablonleverans,Schablonleverans,Schablonleverans,Schablonleverans,Schablonleverans,,,
1,,exkl. avk.last,exkl. avk.last,exkl. avk.last,exkl. avk.last,last,last,last,last,produktion,...,förbrukning,förbrukning,förbrukning,förluster,förluster,förluster,förluster,,,
2,2018,SE1,SE2,SE3,SE4,SE1,SE2,SE3,SE4,SE1,...,SE2,SE3,SE4,SE1,SE2,SE3,SE4,,,
3,,MWh,MWh,MWh,MWh,MWh,MWh,MWh,MWh,MWh,...,MWh,MWh,MWh,MWh,MWh,MWh,MWh,,,
4,1.1.2018 0:00,-879.546,-1324.91,-5082.33,-1032.21,-15.1393,-4.28215,-35.3795,-1.82,0,...,-677.513,-4146.39,-1315.83,-28.0754,-52.2888,-312.81,-94.713,,,


In [38]:
svk_data = pd.DataFrame(
    index=pd.to_datetime(svk_excel_raw.loc[4:, 0], 
                          dayfirst=True,
                          infer_datetime_format=True),
    columns=pd.MultiIndex.from_arrays(svk_excel_raw.loc[:3, 1:].values),
    data=svk_excel_raw.loc[4:,1:].values
).tz_localize('Etc/GMT-1').sort_index()
svk_data.index.name = None
svk_data.head()

Unnamed: 0_level_0,Timmätt förbr,Timmätt förbr,Timmätt förbr,Timmätt förbr,Avkopplingsb.,Avkopplingsb.,Avkopplingsb.,Avkopplingsb.,Ospec.,Ospec.,...,Schablonleverans,Schablonleverans,Schablonleverans,Schablonleverans,Schablonleverans,Schablonleverans,Schablonleverans,NaN,NaN,NaN
Unnamed: 0_level_1,exkl. avk.last,exkl. avk.last,exkl. avk.last,exkl. avk.last,last,last,last,last,produktion,produktion,...,förbrukning,förbrukning,förbrukning,förluster,förluster,förluster,förluster,NaN,NaN,NaN
Unnamed: 0_level_2,SE1,SE2,SE3,SE4,SE1,SE2,SE3,SE4,SE1,SE2,...,SE2,SE3,SE4,SE1,SE2,SE3,SE4,NaN,NaN,NaN
Unnamed: 0_level_3,MWh,MWh,MWh,MWh,MWh,MWh,MWh,MWh,MWh,MWh,...,MWh,MWh,MWh,MWh,MWh,MWh,MWh,NaN,NaN.1,NaN.2
2018-01-01 00:00:00+01:00,-879.546,-1324.91,-5082.33,-1032.21,-15.1393,-4.28215,-35.3795,-1.82,0.0,0.01062,...,-677.513,-4146.39,-1315.83,-28.0754,-52.2888,-312.81,-94.713,,,
2018-01-01 01:00:00+01:00,-854.413,-1301.17,-5068.95,-1021.14,-8.58808,-4.34295,-33.0901,-2.034,0.554,0.00833,...,-655.94,-3985.22,-1266.76,-27.4643,-50.5742,-299.41,-91.1751,,,
2018-01-01 02:00:00+01:00,-849.282,-1263.35,-5045.62,-1002.61,-15.3682,-4.2976,-32.9179,-2.21,0.771,0.01621,...,-635.339,-3840.44,-1214.78,-26.685,-48.9437,-288.457,-87.4107,,,
2018-01-01 03:00:00+01:00,-855.437,-1261.09,-5018.43,-1009.78,-10.7719,-4.3247,-32.6601,-1.98,1.0132,0.0207,...,-619.271,-3693.67,-1162.92,-25.9914,-47.6821,-277.179,-83.6394,,,
2018-01-01 04:00:00+01:00,-864.75,-1257.59,-5010.12,-1034.71,-15.1831,-4.49015,-32.1975,-2.072,0.765,0.00064,...,-613.083,-3609.15,-1136.42,-25.75,-47.1892,-270.631,-81.6979,,,


In [39]:
df = pd.DataFrame({'SE': svk_data['Vindkraft'].sum(1).tz_convert('UTC')})
df.to_csv('../data/raw/others/SVK_generation_Wind Onshore.csv')

In [40]:
df = pd.DataFrame({'SE': svk_data['Solkraft'].sum(1).tz_convert('UTC')})
df.to_csv('../data/raw/others/SVK_generation_Solar.csv')

### REN data for Portugal

Data was delivered by Antonio Couto <antonio.couto@lneg.pt>

In [41]:
ren_data = pd.read_excel('../data/external/Wind&SolarPV_AveragePower_2018Portugal_15min.xlsx',
                  index_col=0, parse_dates=True).tz_localize('Etc/GMT+0')
ren_data.head()

Unnamed: 0_level_0,Wind,Solar
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-01-01 00:00:00+00:00,1570.3,0.0
2018-01-01 00:15:00+00:00,1577.5,0.0
2018-01-01 00:29:59.990000+00:00,1619.8,0.0
2018-01-01 00:44:59.985000+00:00,1627.1,0.0
2018-01-01 00:59:59.980000+00:00,1642.3,0.0


Let’s round the time stamps to closes minute.

In [42]:
ren_data.index = ren_data.index.round('min')

In [43]:
df = pd.DataFrame({'PT': ren_data['Wind'].tz_convert('UTC')})
df.to_csv('../data/raw/others/REN_generation_Wind Onshore.csv')