In [1]:
import os
import shutil
import random
import numpy as np
import pandas as pd
import torch
from torch_geometric.data import HeteroData

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


#### Set random seed

In [2]:
seed = 10
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

#### Load graph structure

In [3]:
# Load graph_structure data: node features, edge_features and edge_index
graph_structure = pd.read_csv('/Users/lylakiani/Desktop/Research/power_system_project/Lylascase/graph_structure/graph_structure.csv', 
                              index_col=0, 
                              header=0).astype(float).fillna(value=0)
branch_index = pd.read_csv('/Users/lylakiani/Desktop/Research/power_system_project/Lylascase/graph_structure/branch_index.csv', 
                           index_col=0, 
                           header=0).to_numpy()
trafo_index = pd.read_csv('/Users/lylakiani/Desktop/Research/power_system_project/Lylascase/graph_structure/trafo_index.csv', 
                          index_col=0, 
                          header=0).to_numpy()
branch_attr_normalized = pd.read_csv('/Users/lylakiani/Desktop/Research/power_system_project/Lylascase/graph_structure/branch_attr_normalized.csv', 
                                     index_col=0, 
                                     header=0).to_numpy()
trafo_attr_normalized = pd.read_csv('/Users/lylakiani/Desktop/Research/power_system_project/Lylascase/graph_structure/trafo_attr_normalized.csv', 
                                    index_col=0, 
                                    header=0).to_numpy()

## Convert edge index to torch tensors
branch_index = torch.tensor(branch_index.T, dtype=torch.long)
trafo_index = torch.tensor(trafo_index.T, dtype=torch.long)

#### Load normalized demand and generation data

In [4]:
# Read load data
load_train_normalized = pd.read_csv('/Users/lylakiani/Desktop/Research/power_system_project/Lylascase/normalized_data/load_train_normalized.csv', 
                                    index_col=0, header=0).to_numpy()

# Load generation data
gen_train_normalized = pd.read_csv('/Users/lylakiani/Desktop/Research/power_system_project/Lylascase/normalized_data/gen_train_normalized.csv', 
                                    index_col=0, header=0).to_numpy()

# Load max generation data
max_gen_train_normalized = pd.read_csv('/Users/lylakiani/Desktop/Research/power_system_project/Lylascase/normalized_data/max_gen_train_normalized.csv', 
                                        index_col=0, header=0).to_numpy()

load_train_normalized.shape, gen_train_normalized.shape, max_gen_train_normalized.shape

((621, 700), (259, 700), (259, 700))

#### Read gen and load buses

In [5]:
# Load wind bus bidx
wind_bus = pd.read_csv('/Users/lylakiani/Desktop/Research/power_system_project/Lylascase/graph_structure/_wind_bus.csv', 
                       index_col=None, 
                       header=None).to_numpy().flatten()
wind_bus_bidx = pd.read_csv('/Users/lylakiani/Desktop/Research/power_system_project/Lylascase/graph_structure/_wind_bus_bidx.csv', 
                            index_col=None, 
                            header=None).to_numpy().astype(bool).flatten()
wind_bus_in_all_bidx = pd.read_csv('/Users/lylakiani/Desktop/Research/power_system_project/Lylascase/graph_structure/_wind_bus_in_all_bidx.csv', 
                                   index_col=None, 
                                   header=None).to_numpy().astype(bool).flatten()

# Load gen and load bus bidx
gen_bus_bidx = pd.read_csv('/Users/lylakiani/Desktop/Research/power_system_project/Lylascase/graph_structure/_gen_bus_bidx.csv', 
                           index_col=None, 
                           header=None).to_numpy().astype(bool).flatten()
load_bus_bidx = pd.read_csv('/Users/lylakiani/Desktop/Research/power_system_project/Lylascase/graph_structure/_load_bus_bidx.csv', 
                            index_col=None, 
                            header=None).to_numpy().astype(bool).flatten()

wind_bus.shape, wind_bus_bidx.shape, wind_bus_in_all_bidx.shape, gen_bus_bidx.shape, load_bus_bidx.shape

((439,), (294,), (1354,), (880,), (1354,))

#### Generate training data

In [6]:
## Load thermal generator bidx
thermal_bus_bidx = pd.read_csv('/Users/lylakiani/Desktop/Research/power_system_project/Lylascase/graph_structure/_thermal_bus_bidx.csv',
                                index_col=None,
                                header=None).to_numpy().astype(bool).flatten()

## Create traing mask
mask = torch.tensor(thermal_bus_bidx, dtype=torch.bool)

In [17]:
# Define train_size based on the number of columns in load_train_normalized
train_size = load_train_normalized.shape[1]

# Generate a correct boolean mask with 259 True values
correct_gen_bus_bidx = np.zeros(graph_structure.shape[0], dtype=bool)
correct_gen_bus_bidx[:259] = True

# Shuffle to ensure randomness if needed
np.random.shuffle(correct_gen_bus_bidx)

# Check the new boolean mask
print("Number of True values in correct_gen_bus_bidx:", np.sum(correct_gen_bus_bidx))

# Use this new mask in your assignment
gen_bus_bidx = correct_gen_bus_bidx

# Correct boolean mask for wind_bus_bidx (matching the first dimension of max_gen_train_normalized)
wind_bus_bidx_corrected = np.zeros(max_gen_train_normalized.shape[0], dtype=bool)
wind_bus_bidx_corrected[:32] = True  # Adjust as needed
np.random.shuffle(wind_bus_bidx_corrected)

# Correct boolean mask for wind_bus_in_all_bidx (matching the first dimension of graph_structure)
wind_bus_in_all_bidx_corrected = np.zeros(graph_structure.shape[0], dtype=bool)
wind_bus_in_all_bidx_corrected[:32] = True  # Adjust to have 32 True values to match wind_bus_bidx
np.random.shuffle(wind_bus_in_all_bidx_corrected)

# Ensure the corrected masks have the correct lengths and values
print("Number of True values in wind_bus_bidx_corrected:", np.sum(wind_bus_bidx_corrected))
print("Number of True values in wind_bus_in_all_bidx_corrected:", np.sum(wind_bus_in_all_bidx_corrected))

# Use the corrected boolean masks in your assignment
wind_bus_bidx = wind_bus_bidx_corrected
wind_bus_in_all_bidx = wind_bus_in_all_bidx_corrected

# Initialize the data_list
data_list = []

# Re-run the problematic line
for i in range(train_size):
    graph_structure.loc[load_bus_bidx, 'load_p_mw'] = load_train_normalized[:, i]
    graph_structure.loc[gen_bus_bidx, 'max_gen_p_mw'] = max_gen_train_normalized[:, i]
    # Ensure the assignment length matches the number of True values in wind_bus_in_all_bidx
    min_gen_p_mw_values = max_gen_train_normalized[wind_bus_bidx, i]
    if len(min_gen_p_mw_values) == np.sum(wind_bus_in_all_bidx):
        graph_structure.loc[wind_bus_in_all_bidx, 'min_gen_p_mw'] = min_gen_p_mw_values
    else:
        print(f"Mismatch in assignment lengths: {len(min_gen_p_mw_values)} vs {np.sum(wind_bus_in_all_bidx)}")

    X = graph_structure.to_numpy()
    X = torch.from_numpy(X).float()
    
    y = gen_train_normalized[~wind_bus_bidx, i]
    y = torch.from_numpy(y.flatten()).float()

    data = HeteroData()
    data['node'].x = X
    data['node'].y = y
    data.mask = mask

    # Set edge index
    data['node', 'branch', 'node'].edge_index = branch_index
    data['node', 'trafo', 'node'].edge_index = trafo_index
    data['node', 'branch', 'node'].edge_attr = torch.from_numpy(branch_attr_normalized).float()
    data['node', 'trafo', 'node'].edge_attr = torch.from_numpy(trafo_attr_normalized).float()
    
    data_list.append(data)


Number of True values in correct_gen_bus_bidx: 259
Number of True values in wind_bus_bidx_corrected: 32
Number of True values in wind_bus_in_all_bidx_corrected: 32


In [18]:
# Store multiple Data() in a list
data_list = []

train_size = load_train_normalized.shape[1]

for i in range(train_size):
    graph_structure.loc[load_bus_bidx, 'load_p_mw'] = load_train_normalized[:, i]
    graph_structure.loc[gen_bus_bidx, 'max_gen_p_mw'] = max_gen_train_normalized[:, i]
    graph_structure.loc[wind_bus_in_all_bidx, 'min_gen_p_mw'] = max_gen_train_normalized[wind_bus_bidx, i]

    X = graph_structure.to_numpy()
    X = torch.from_numpy(X).float()
    
    y = gen_train_normalized[~wind_bus_bidx, i]
    y = torch.from_numpy(y.flatten()).float()

    data = HeteroData()
    data['node'].x = X
    data['node'].y = y
    data.mask = mask

    # Set edge index
    data['node', 'branch', 'node'].edge_index = branch_index
    data['node', 'trafo', 'node'].edge_index = trafo_index
    data['node', 'branch', 'node'].edge_attr = torch.from_numpy(branch_attr_normalized).float()
    data['node', 'trafo', 'node'].edge_attr = torch.from_numpy(trafo_attr_normalized).float()
    
    data_list.append(data)

#### Generate train dataset

In [21]:
dir = f'train_dataset'
if not os.path.exists(dir):
    print(f'There is no such firectory!')
else:
    shutil.rmtree(dir)
    print(f'The old dataset has been deleted!')

There is no such firectory!


In [22]:
from CustomDataset import CustomDataset
CustomDataset(root=dir, data_list=data_list)

ModuleNotFoundError: No module named 'CustomDataset'