In [7]:
import sqlite3
import pandas as pd

conn = sqlite3.connect('../Data/WARP.db')
df = pd.read_sql_query("SELECT * FROM raw_NED_obs", conn)
conn.close()
df_NED_obs_processed = df.drop(columns=[
    '@id', '@type', 'id', 'point', 'granularity', 'lastupdate', 'granularitytimezone', 'activity', 'classification', 'capacity','percentage','emission','emissionfactor','validfrom',])
df_NED_obs_processed['type'] = df_NED_obs_processed['type'].str.replace('/v1/types/', 'Obs_Type_')

missing_values = df_NED_obs_processed.isnull().sum()
print("Missing values per column:\n", missing_values)
print(df_NED_obs_processed.tail())

print("Unique values in 'type':", df_NED_obs_processed['type'].unique())



Missing values per column:
 type       0
volume     0
validto    0
dtype: int64
             type   volume                    validto
29250  Obs_Type_2  2423299  2025-05-03T18:00:00+00:00
29251  Obs_Type_2   973805  2025-05-03T19:00:00+00:00
29252  Obs_Type_2    38333  2025-05-03T20:00:00+00:00
29253  Obs_Type_2        0  2025-05-03T21:00:00+00:00
29254  Obs_Type_2        0  2025-05-03T22:00:00+00:00
Unique values in 'type': ['Obs_Type_2']


In [None]:
# Pivot the table
df_NED_obs_pivoted = df_NED_obs_processed.pivot_table(
    index=['validto'],  # keep these as index
    columns='type',                   # columns become unique values from 'type'
    values='volume',                  # values to fill in the new columns
    aggfunc='first'                   # if duplicates exist, take the first
)

print("Pivoted DataFrame:\n", df_NED_obs_pivoted.head())
# Reset the index to make 'validto' a column again
df_NED_obs_pivoted.reset_index(inplace=True)


In [None]:
# Save the processed DataFrame to a new table in the database
conn = sqlite3.connect('../Data/WARP.db')
df_NED_obs_processed.to_sql('transform_ned_obs_2', conn, if_exists='replace', index=False)
conn.close()

In [11]:
# Reopen the database and load the new table as a temporary DataFrame
conn = sqlite3.connect('../Data/WARP.db')
df_temp = pd.read_sql_query("SELECT * FROM transform_ned_obs_2", conn)
conn.close()

# Print feature overview
print(df_temp.info())
print(df_temp.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29255 entries, 0 to 29254
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   type    29255 non-null  object
 1   volume  29255 non-null  object
 2   date    29255 non-null  object
dtypes: object(3)
memory usage: 685.8+ KB
None
         type volume                       date
0  Obs_Type_2      0  2022-01-01T00:00:00+00:00
1  Obs_Type_2      0  2022-01-01T01:00:00+00:00
2  Obs_Type_2      0  2022-01-01T02:00:00+00:00
3  Obs_Type_2      0  2022-01-01T03:00:00+00:00
4  Obs_Type_2      0  2022-01-01T04:00:00+00:00
