In [50]:
import os
import re
from pathlib import Path
import datetime
import pytz

import IPython
import IPython.display
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf

from scipy.signal import find_peaks

In [51]:
mpl.rcParams['figure.figsize'] = (10, 6)
mpl.rcParams['axes.grid'] = False

def is_gzip_file(filepath):
    with open(filepath, 'rb') as f:
        return f.read(2) == b'\x1f\x8b'

In [52]:
csv_path = tf.keras.utils.get_file(
    origin='https://itsci.mju.ac.th/downloads/watcharin/datasets/pv/merge_15min_filled.csv.zip')
csv_path

'/Users/watcharinsarachai/.keras/datasets/merge_15min_filled.csv.zip'

In [53]:
if is_gzip_file(csv_path):
  # Read the data directly into a pandas DataFrame
  df = pd.read_csv(csv_path, compression='gzip')
else:
  # Read the data without compression
  df = pd.read_csv(csv_path)

row_count = df.shape[0]
print(f"Total rows: {row_count}")
df.head(5)

Total rows: 138238


Unnamed: 0.1,Unnamed: 0,ambient_temperature,current_power,current_value_of_consumption,external_energy_supply,grid_feed_in,internal_power_supply,self_consumption,temperature_measurement,total_irradiation,...,sp,sshf,ssr,ssrd,str,strd,t2m,tp,u10,v10
0,2021-11-06 11:15:00,30.288889,46519.355556,37280.422222,0.0,9238.933333,37280.422222,37280.422222,58.022222,798.444444,...,96116.322417,-1159087.0,15779500.0,18901260.0,-3907932.0,14236820.0,298.020567,4.261732e-07,0.695138,0.362603
1,2021-11-06 11:30:00,30.076923,47600.714286,39472.626374,0.0,9397.756098,39132.406593,39132.406593,58.263736,812.571429,...,96116.322417,-1159087.0,15779500.0,18901260.0,-3907932.0,14236820.0,298.020567,4.261732e-07,0.695138,0.362603
2,2021-11-06 11:45:00,30.629213,47056.988764,32669.393258,0.0,14387.595506,32669.393258,32669.393258,57.235955,807.224719,...,96116.322417,-1159087.0,15779500.0,18901260.0,-3907932.0,14236820.0,298.020567,4.261732e-07,0.695138,0.362603
3,2021-11-06 12:00:00,31.021978,48415.835165,36126.516484,0.0,12289.318681,36126.516484,36126.516484,56.846154,817.527473,...,96169.262638,-1129156.0,15779500.0,18901260.0,-4231913.0,15486640.0,297.380576,4.261732e-07,0.378476,0.374936
4,2021-11-06 12:15:00,31.831461,49345.404494,36879.966292,0.0,12465.438202,36879.966292,36879.966292,55.842697,816.651685,...,96169.262638,-1129156.0,15779500.0,18901260.0,-4231913.0,15486640.0,297.380576,4.261732e-07,0.378476,0.374936


In [54]:
df.pop('current_value_of_consumption')
df.pop('external_energy_supply')
df.pop('grid_feed_in')
df.pop('internal_power_supply')
df.pop('self_consumption')
df.head(5)

Unnamed: 0.1,Unnamed: 0,ambient_temperature,current_power,temperature_measurement,total_irradiation,utci_mean,cc,q,r,t,...,sp,sshf,ssr,ssrd,str,strd,t2m,tp,u10,v10
0,2021-11-06 11:15:00,30.288889,46519.355556,58.022222,798.444444,302.213731,0.0,0.011205,63.667006,293.487773,...,96116.322417,-1159087.0,15779500.0,18901260.0,-3907932.0,14236820.0,298.020567,4.261732e-07,0.695138,0.362603
1,2021-11-06 11:30:00,30.076923,47600.714286,58.263736,812.571429,302.213731,0.0,0.011205,63.667006,293.487773,...,96116.322417,-1159087.0,15779500.0,18901260.0,-3907932.0,14236820.0,298.020567,4.261732e-07,0.695138,0.362603
2,2021-11-06 11:45:00,30.629213,47056.988764,57.235955,807.224719,302.213731,0.0,0.011205,63.667006,293.487773,...,96116.322417,-1159087.0,15779500.0,18901260.0,-3907932.0,14236820.0,298.020567,4.261732e-07,0.695138,0.362603
3,2021-11-06 12:00:00,31.021978,48415.835165,56.846154,817.527473,299.223424,0.0,0.011366,65.189689,293.320821,...,96169.262638,-1129156.0,15779500.0,18901260.0,-4231913.0,15486640.0,297.380576,4.261732e-07,0.378476,0.374936
4,2021-11-06 12:15:00,31.831461,49345.404494,55.842697,816.651685,299.223424,0.0,0.011366,65.189689,293.320821,...,96169.262638,-1129156.0,15779500.0,18901260.0,-4231913.0,15486640.0,297.380576,4.261732e-07,0.378476,0.374936


In [55]:
# 1. Wind Speed and Direction
if 'u' in df.columns and 'v' in df.columns:
    df['wind_speed'] = np.sqrt(df['u']**2 + df['v']**2)
    df['wind_direction'] = (270 - np.arctan2(df['v'], df['u']) * 180 / np.pi) % 360
    df.pop('u')
    df.pop('v')
    print("Added 'wind_speed' from 'u' and 'v'")

if 'u10' in df.columns and 'v10' in df.columns:
    df['wind_speed10'] = np.sqrt(df['u10']**2 + df['v10']**2)
    df['wind_direction10'] = (270 - np.arctan2(df['v10'], df['u10']) * 180 / np.pi) % 360
    df.pop('u10')
    df.pop('v10')
    print("Added 'wind_speed' from 'u10' and 'v10'")
    
df.head(5)

Added 'wind_speed' from 'u' and 'v'
Added 'wind_speed' from 'u10' and 'v10'


Unnamed: 0.1,Unnamed: 0,ambient_temperature,current_power,temperature_measurement,total_irradiation,utci_mean,cc,q,r,t,...,ssr,ssrd,str,strd,t2m,tp,wind_speed,wind_direction,wind_speed10,wind_direction10
0,2021-11-06 11:15:00,30.288889,46519.355556,58.022222,798.444444,302.213731,0.0,0.011205,63.667006,293.487773,...,15779500.0,18901260.0,-3907932.0,14236820.0,298.020567,4.261732e-07,1.293701,334.553506,0.784027,242.452253
1,2021-11-06 11:30:00,30.076923,47600.714286,58.263736,812.571429,302.213731,0.0,0.011205,63.667006,293.487773,...,15779500.0,18901260.0,-3907932.0,14236820.0,298.020567,4.261732e-07,1.293701,334.553506,0.784027,242.452253
2,2021-11-06 11:45:00,30.629213,47056.988764,57.235955,807.224719,302.213731,0.0,0.011205,63.667006,293.487773,...,15779500.0,18901260.0,-3907932.0,14236820.0,298.020567,4.261732e-07,1.293701,334.553506,0.784027,242.452253
3,2021-11-06 12:00:00,31.021978,48415.835165,56.846154,817.527473,299.223424,0.0,0.011366,65.189689,293.320821,...,15779500.0,18901260.0,-4231913.0,15486640.0,297.380576,4.261732e-07,0.795499,321.318021,0.532749,225.269191
4,2021-11-06 12:15:00,31.831461,49345.404494,55.842697,816.651685,299.223424,0.0,0.011366,65.189689,293.320821,...,15779500.0,18901260.0,-4231913.0,15486640.0,297.380576,4.261732e-07,0.795499,321.318021,0.532749,225.269191


In [56]:
# 2. Net Radiation (short-wave + long-wave)
if 'ssr' in df.columns and 'str' in df.columns:
    df['net_radiation'] = df['ssr'] + df['str']
    df.pop('ssr')
    df.pop('str')
    print("✓ Added 'net_radiation' (ssr + str)")

df.head(5)

✓ Added 'net_radiation' (ssr + str)


Unnamed: 0.1,Unnamed: 0,ambient_temperature,current_power,temperature_measurement,total_irradiation,utci_mean,cc,q,r,t,...,sshf,ssrd,strd,t2m,tp,wind_speed,wind_direction,wind_speed10,wind_direction10,net_radiation
0,2021-11-06 11:15:00,30.288889,46519.355556,58.022222,798.444444,302.213731,0.0,0.011205,63.667006,293.487773,...,-1159087.0,18901260.0,14236820.0,298.020567,4.261732e-07,1.293701,334.553506,0.784027,242.452253,11871570.0
1,2021-11-06 11:30:00,30.076923,47600.714286,58.263736,812.571429,302.213731,0.0,0.011205,63.667006,293.487773,...,-1159087.0,18901260.0,14236820.0,298.020567,4.261732e-07,1.293701,334.553506,0.784027,242.452253,11871570.0
2,2021-11-06 11:45:00,30.629213,47056.988764,57.235955,807.224719,302.213731,0.0,0.011205,63.667006,293.487773,...,-1159087.0,18901260.0,14236820.0,298.020567,4.261732e-07,1.293701,334.553506,0.784027,242.452253,11871570.0
3,2021-11-06 12:00:00,31.021978,48415.835165,56.846154,817.527473,299.223424,0.0,0.011366,65.189689,293.320821,...,-1129156.0,18901260.0,15486640.0,297.380576,4.261732e-07,0.795499,321.318021,0.532749,225.269191,11547590.0
4,2021-11-06 12:15:00,31.831461,49345.404494,55.842697,816.651685,299.223424,0.0,0.011366,65.189689,293.320821,...,-1129156.0,18901260.0,15486640.0,297.380576,4.261732e-07,0.795499,321.318021,0.532749,225.269191,11547590.0


In [57]:
# 3. Total Downward Radiation
if 'ssrd' in df.columns and 'strd' in df.columns:
    df['total_downward_radiation'] = df['ssrd'] + df['strd']
    df.pop('ssrd')
    df.pop('strd')
    print("✓ Added 'total_downward_radiation' (ssrd + strd)")

df.head(5)

✓ Added 'total_downward_radiation' (ssrd + strd)


Unnamed: 0.1,Unnamed: 0,ambient_temperature,current_power,temperature_measurement,total_irradiation,utci_mean,cc,q,r,t,...,sp,sshf,t2m,tp,wind_speed,wind_direction,wind_speed10,wind_direction10,net_radiation,total_downward_radiation
0,2021-11-06 11:15:00,30.288889,46519.355556,58.022222,798.444444,302.213731,0.0,0.011205,63.667006,293.487773,...,96116.322417,-1159087.0,298.020567,4.261732e-07,1.293701,334.553506,0.784027,242.452253,11871570.0,33138070.0
1,2021-11-06 11:30:00,30.076923,47600.714286,58.263736,812.571429,302.213731,0.0,0.011205,63.667006,293.487773,...,96116.322417,-1159087.0,298.020567,4.261732e-07,1.293701,334.553506,0.784027,242.452253,11871570.0,33138070.0
2,2021-11-06 11:45:00,30.629213,47056.988764,57.235955,807.224719,302.213731,0.0,0.011205,63.667006,293.487773,...,96116.322417,-1159087.0,298.020567,4.261732e-07,1.293701,334.553506,0.784027,242.452253,11871570.0,33138070.0
3,2021-11-06 12:00:00,31.021978,48415.835165,56.846154,817.527473,299.223424,0.0,0.011366,65.189689,293.320821,...,96169.262638,-1129156.0,297.380576,4.261732e-07,0.795499,321.318021,0.532749,225.269191,11547590.0,34387900.0
4,2021-11-06 12:15:00,31.831461,49345.404494,55.842697,816.651685,299.223424,0.0,0.011366,65.189689,293.320821,...,96169.262638,-1129156.0,297.380576,4.261732e-07,0.795499,321.318021,0.532749,225.269191,11547590.0,34387900.0


In [58]:
# 4. Net Heat Flux
if 'slhf' in df.columns and 'sshf' in df.columns:
    df['net_heat_flux'] = df['slhf'] + df['sshf']
    df.pop('slhf')
    df.pop('sshf')
    print("✓ Added 'net_heat_flux' (slhf + sshf)")

df.head(5)

✓ Added 'net_heat_flux' (slhf + sshf)


Unnamed: 0.1,Unnamed: 0,ambient_temperature,current_power,temperature_measurement,total_irradiation,utci_mean,cc,q,r,t,...,sp,t2m,tp,wind_speed,wind_direction,wind_speed10,wind_direction10,net_radiation,total_downward_radiation,net_heat_flux
0,2021-11-06 11:15:00,30.288889,46519.355556,58.022222,798.444444,302.213731,0.0,0.011205,63.667006,293.487773,...,96116.322417,298.020567,4.261732e-07,1.293701,334.553506,0.784027,242.452253,11871570.0,33138070.0,-9581235.0
1,2021-11-06 11:30:00,30.076923,47600.714286,58.263736,812.571429,302.213731,0.0,0.011205,63.667006,293.487773,...,96116.322417,298.020567,4.261732e-07,1.293701,334.553506,0.784027,242.452253,11871570.0,33138070.0,-9581235.0
2,2021-11-06 11:45:00,30.629213,47056.988764,57.235955,807.224719,302.213731,0.0,0.011205,63.667006,293.487773,...,96116.322417,298.020567,4.261732e-07,1.293701,334.553506,0.784027,242.452253,11871570.0,33138070.0,-9581235.0
3,2021-11-06 12:00:00,31.021978,48415.835165,56.846154,817.527473,299.223424,0.0,0.011366,65.189689,293.320821,...,96169.262638,297.380576,4.261732e-07,0.795499,321.318021,0.532749,225.269191,11547590.0,34387900.0,-9562669.0
4,2021-11-06 12:15:00,31.831461,49345.404494,55.842697,816.651685,299.223424,0.0,0.011366,65.189689,293.320821,...,96169.262638,297.380576,4.261732e-07,0.795499,321.318021,0.532749,225.269191,11547590.0,34387900.0,-9562669.0


In [59]:
# 5. Dewpoint Temperature (from temperature and relative humidity)
if 't' in df.columns and 'r' in df.columns:
    a, b = 17.27, 237.7
    T_celsius = df['t'] - 273.15
    alpha = ((a * T_celsius) / (b + T_celsius)) + np.log(df['r'] / 100)
    df['dewpoint'] = (b * alpha) / (a - alpha) + 273.15  # Kelvin
    print("✓ Added 'dewpoint' from 't' and 'r'")
if 't2m' in df.columns and 'r' in df.columns:
    a, b = 17.27, 237.7
    T_celsius = df['t2m'] - 273.15
    alpha = ((a * T_celsius) / (b + T_celsius)) + np.log(df['r'] / 100)
    df['dewpoint2m'] = (b * alpha) / (a - alpha) + 273.15  # Kelvin
    print("✓ Added 'dewpoint' from 't2m' and 'r'")

df.head(5)

✓ Added 'dewpoint' from 't' and 'r'
✓ Added 'dewpoint' from 't2m' and 'r'


  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


Unnamed: 0.1,Unnamed: 0,ambient_temperature,current_power,temperature_measurement,total_irradiation,utci_mean,cc,q,r,t,...,tp,wind_speed,wind_direction,wind_speed10,wind_direction10,net_radiation,total_downward_radiation,net_heat_flux,dewpoint,dewpoint2m
0,2021-11-06 11:15:00,30.288889,46519.355556,58.022222,798.444444,302.213731,0.0,0.011205,63.667006,293.487773,...,4.261732e-07,1.293701,334.553506,0.784027,242.452253,11871570.0,33138070.0,-9581235.0,286.366585,290.650567
1,2021-11-06 11:30:00,30.076923,47600.714286,58.263736,812.571429,302.213731,0.0,0.011205,63.667006,293.487773,...,4.261732e-07,1.293701,334.553506,0.784027,242.452253,11871570.0,33138070.0,-9581235.0,286.366585,290.650567
2,2021-11-06 11:45:00,30.629213,47056.988764,57.235955,807.224719,302.213731,0.0,0.011205,63.667006,293.487773,...,4.261732e-07,1.293701,334.553506,0.784027,242.452253,11871570.0,33138070.0,-9581235.0,286.366585,290.650567
3,2021-11-06 12:00:00,31.021978,48415.835165,56.846154,817.527473,299.223424,0.0,0.011366,65.189689,293.320821,...,4.261732e-07,0.795499,321.318021,0.532749,225.269191,11547590.0,34387900.0,-9562669.0,286.57127,290.419699
4,2021-11-06 12:15:00,31.831461,49345.404494,55.842697,816.651685,299.223424,0.0,0.011366,65.189689,293.320821,...,4.261732e-07,0.795499,321.318021,0.532749,225.269191,11547590.0,34387900.0,-9562669.0,286.57127,290.419699


In [60]:
df = df.rename(columns={df.columns[0]: 'datetime'})
df.head(5)

Unnamed: 0,datetime,ambient_temperature,current_power,temperature_measurement,total_irradiation,utci_mean,cc,q,r,t,...,tp,wind_speed,wind_direction,wind_speed10,wind_direction10,net_radiation,total_downward_radiation,net_heat_flux,dewpoint,dewpoint2m
0,2021-11-06 11:15:00,30.288889,46519.355556,58.022222,798.444444,302.213731,0.0,0.011205,63.667006,293.487773,...,4.261732e-07,1.293701,334.553506,0.784027,242.452253,11871570.0,33138070.0,-9581235.0,286.366585,290.650567
1,2021-11-06 11:30:00,30.076923,47600.714286,58.263736,812.571429,302.213731,0.0,0.011205,63.667006,293.487773,...,4.261732e-07,1.293701,334.553506,0.784027,242.452253,11871570.0,33138070.0,-9581235.0,286.366585,290.650567
2,2021-11-06 11:45:00,30.629213,47056.988764,57.235955,807.224719,302.213731,0.0,0.011205,63.667006,293.487773,...,4.261732e-07,1.293701,334.553506,0.784027,242.452253,11871570.0,33138070.0,-9581235.0,286.366585,290.650567
3,2021-11-06 12:00:00,31.021978,48415.835165,56.846154,817.527473,299.223424,0.0,0.011366,65.189689,293.320821,...,4.261732e-07,0.795499,321.318021,0.532749,225.269191,11547590.0,34387900.0,-9562669.0,286.57127,290.419699
4,2021-11-06 12:15:00,31.831461,49345.404494,55.842697,816.651685,299.223424,0.0,0.011366,65.189689,293.320821,...,4.261732e-07,0.795499,321.318021,0.532749,225.269191,11547590.0,34387900.0,-9562669.0,286.57127,290.419699


In [61]:
# Sort by time if it exists
df.index = df['datetime']
df.pop('datetime')
df = df.sort_index()
print(df.shape)
df.head(5)

(138238, 22)


Unnamed: 0_level_0,ambient_temperature,current_power,temperature_measurement,total_irradiation,utci_mean,cc,q,r,t,fal,...,tp,wind_speed,wind_direction,wind_speed10,wind_direction10,net_radiation,total_downward_radiation,net_heat_flux,dewpoint,dewpoint2m
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-11-06 11:15:00,30.288889,46519.355556,58.022222,798.444444,302.213731,0.0,0.011205,63.667006,293.487773,0.16517,...,4.261732e-07,1.293701,334.553506,0.784027,242.452253,11871570.0,33138070.0,-9581235.0,286.366585,290.650567
2021-11-06 11:30:00,30.076923,47600.714286,58.263736,812.571429,302.213731,0.0,0.011205,63.667006,293.487773,0.16517,...,4.261732e-07,1.293701,334.553506,0.784027,242.452253,11871570.0,33138070.0,-9581235.0,286.366585,290.650567
2021-11-06 11:45:00,30.629213,47056.988764,57.235955,807.224719,302.213731,0.0,0.011205,63.667006,293.487773,0.16517,...,4.261732e-07,1.293701,334.553506,0.784027,242.452253,11871570.0,33138070.0,-9581235.0,286.366585,290.650567
2021-11-06 12:00:00,31.021978,48415.835165,56.846154,817.527473,299.223424,0.0,0.011366,65.189689,293.320821,0.16517,...,4.261732e-07,0.795499,321.318021,0.532749,225.269191,11547590.0,34387900.0,-9562669.0,286.57127,290.419699
2021-11-06 12:15:00,31.831461,49345.404494,55.842697,816.651685,299.223424,0.0,0.011366,65.189689,293.320821,0.16517,...,4.261732e-07,0.795499,321.318021,0.532749,225.269191,11547590.0,34387900.0,-9562669.0,286.57127,290.419699


In [62]:
filename = Path(csv_path).name
base = re.split(r'\.', filename, 1)[0]
print(base)

merge_15min_filled


In [63]:
# Compressed CSV
df.to_csv(base + '_added.csv')