In [1]:
import sqlite3
import pandas as pd

conn = sqlite3.connect('../Data/WARP.db')

df = pd.read_sql_query("SELECT * FROM raw_NED_obs_2", conn)
conn.close()

In [2]:
print(df.info())
print(df.head())
print("Unique values in 'type':", df['type'].unique())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12764 entries, 0 to 12763
Data columns (total 17 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   @id                  12764 non-null  object
 1   @type                12764 non-null  object
 2   id                   12764 non-null  object
 3   point                12764 non-null  object
 4   type                 12764 non-null  object
 5   granularity          12764 non-null  object
 6   granularitytimezone  12764 non-null  object
 7   activity             12764 non-null  object
 8   classification       12764 non-null  object
 9   capacity             12764 non-null  object
 10  volume               12764 non-null  object
 11  percentage           12764 non-null  object
 12  emission             12764 non-null  object
 13  emissionfactor       12764 non-null  object
 14  validfrom            12764 non-null  object
 15  validto              12764 non-null  object
 16  last

In [3]:
type_counts = df['type'].value_counts()
print(type_counts)

type
/v1/types/1     3191
/v1/types/2     3191
/v1/types/17    3191
/v1/types/20    3191
Name: count, dtype: int64


In [4]:

df_NED_obs_processed = df.drop(columns=[
    '@id', '@type', 'id', 'point', 'granularity', 'lastupdate', 'granularitytimezone', 'activity', 'classification', 'capacity','percentage','emission','emissionfactor','validfrom',])
df_NED_obs_processed['type'] = df_NED_obs_processed['type'].str.replace('/v1/types/', 'Type_')

missing_values = df_NED_obs_processed.isnull().sum()
print("Missing values per column:\n", missing_values)
print(df_NED_obs_processed.head())

print("Unique values in 'type':", df_NED_obs_processed['type'].unique())



Missing values per column:
 type       0
volume     0
validto    0
dtype: int64
     type   volume                    validto
0  Type_1  6519520  2025-01-01T00:00:00+00:00
1  Type_1  5917659  2025-01-01T01:00:00+00:00
2  Type_1  4994553  2025-01-01T02:00:00+00:00
3  Type_1  4990896  2025-01-01T03:00:00+00:00
4  Type_1  4988425  2025-01-01T04:00:00+00:00
Unique values in 'type': ['Type_1' 'Type_2' 'Type_17' 'Type_20']


In [8]:
# Pivot the DataFrame so each unique 'type' becomes a column named like 'Type_1_Vol'
df_type_vol = df_NED_obs_processed.pivot_table(
    index='validto',
    columns='type',
    values='volume',
    aggfunc='first'
)

# Rename columns to add '_Vol' suffix
df_type_vol.columns = [f"{col}_Vol" for col in df_type_vol.columns]

# Convert all columns except 'validto' to numeric (float)
for col in df_type_vol.columns:
    if col != 'validto':
        df_type_vol[col] = pd.to_numeric(df_type_vol[col], errors='coerce')

# Reset index to make 'validto' a column
df_type_vol = df_type_vol.reset_index()

print(df_type_vol.head())

                     validto  Type_1_Vol  Type_17_Vol  Type_2_Vol  Type_20_Vol
0  2025-01-01T00:00:00+00:00     6519520      4158000           0       486250
1  2025-01-01T01:00:00+00:00     5917659      4158000           0       487000
2  2025-01-01T02:00:00+00:00     4994553      4158000           0       487000
3  2025-01-01T03:00:00+00:00     4990896      3930300           0       487000
4  2025-01-01T04:00:00+00:00     4988425      3474900           0       487000


In [9]:
conn = sqlite3.connect('../Data/WARP.db')
df_type_vol.to_sql('transform_ned_obs_2', conn, if_exists='replace', index=False)
conn.close()

In [10]:
# Reopen the database and load the new table as a temporary DataFrame
conn = sqlite3.connect('../Data/WARP.db')
df_temp = pd.read_sql_query("SELECT * FROM transform_ned_obs_2", conn)
conn.close()

# Print feature overview
print(df_temp.info())
print(df_temp.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3191 entries, 0 to 3190
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   validto      3191 non-null   object
 1   Type_1_Vol   3191 non-null   int64 
 2   Type_17_Vol  3191 non-null   int64 
 3   Type_2_Vol   3191 non-null   int64 
 4   Type_20_Vol  3191 non-null   int64 
dtypes: int64(4), object(1)
memory usage: 124.8+ KB
None
                     validto  Type_1_Vol  Type_17_Vol  Type_2_Vol  Type_20_Vol
0  2025-01-01T00:00:00+00:00     6519520      4158000           0       486250
1  2025-01-01T01:00:00+00:00     5917659      4158000           0       487000
2  2025-01-01T02:00:00+00:00     4994553      4158000           0       487000
3  2025-01-01T03:00:00+00:00     4990896      3930300           0       487000
4  2025-01-01T04:00:00+00:00     4988425      3474900           0       487000
