In [1]:
import os
import shutil
import random
import numpy as np
import pandas as pd

import torch
from torch_geometric.data import Data

#### Read files

###### Power grid info

In [2]:
# Create buses
num_buses = 118
buses = np.arange(1, num_buses+1)

# Read gen buses
file_path = '../../data/IEEE_Case118/zones/gen_bus.csv'
gen_buses = pd.read_csv(file_path, header=None, index_col=None)

# Read thermal buses
file_path = '../../data/IEEE_Case118/zones/thermal_bus.csv'
thermal_buses = pd.read_csv(file_path, header=None, index_col=None)

# Read wind gen buses
file_path = '../../data/IEEE_Case118/zones/wind_bus.csv'
wind_buses = pd.read_csv(file_path, header=None, index_col=None)

# Read load buses
file_path = '../../data/IEEE_Case118/zones/load_bus.csv'
load_buses = pd.read_csv(file_path, header=None, index_col=None)

#### Create node features

###### Create Boolean idx for thermals, winds, and loads

In [3]:
# Boolean index of thermal buses
thermal_bidx = np.isin(buses, thermal_buses)

# Boolean index of wind buses
wind_bidx = np.isin(buses, wind_buses)

# Boolean index of load buses
load_bidx = np.isin(buses, load_buses)

###### Create node features matrix with correct dimension

In [4]:
# Get thermal gen features (To be determined)
Pmax = pd.read_csv('../../data/IEEE_Case118/gen_params/Pmax.csv',
                   header=None,
                   index_col=None)
Pmin = pd.read_csv('../../data/IEEE_Case118/gen_params/Pmin.csv',
                   header=None,
                   index_col=None)
ramp_30 = pd.read_csv('../../data/IEEE_Case118/gen_params/ramp_30.csv',
                      header=None,
                      index_col=None)
startup_cost = pd.read_csv('../../data/IEEE_Case118/gen_params/startup_cost.csv',
                            header=None,
                            index_col=None)
shutdown_cost = pd.read_csv('../../data/IEEE_Case118/gen_params/shutdown_cost.csv',
                            header=None,
                            index_col=None)
gencost_params = pd.read_csv('../../data/IEEE_Case118/gen_params/gencost_params.csv',
                                header=None,
                                index_col=None)
reserve_qty = pd.read_csv('../../data/IEEE_Case118/gen_params/reserve_qty.csv',
                            header=None,
                            index_col=None)
reserve_cost = pd.read_csv('../../data/IEEE_Case118/gen_params/reserve_cost.csv',
                            header=None,
                            index_col=None)

# Get the Boolean idx of thermals in gens
bidx = np.isin(gen_buses, thermal_buses).reshape((-1))

temp = np.concatenate([Pmax, Pmin, ramp_30, startup_cost, 
                  shutdown_cost, gencost_params], axis=1)

# Create thermal gens features
thermal_gen_features = np.concatenate([temp[bidx], reserve_qty, reserve_cost], axis=1)

###### Assign thermal gens features

In [5]:
# Time steps
nt = 12

# Read wind and load inputs into MATPOWER
num_samples = 1000

# Empty list to store data
x = []

# Get dimension of thermal, wind and load
num_thermal_features = thermal_gen_features.shape[1]
num_wind_features = nt
num_load_features = nt

# Empty node feature matrix
node_features = np.zeros((buses.shape[0], 
                          num_thermal_features+
                          num_wind_features+
                          num_load_features))

# Assign thermal features
node_features[thermal_bidx, :num_thermal_features] = thermal_gen_features

# Assign wind and load features
max_num = 1181  # This number is manually determined
for i in range(max_num):
    # File path
    wind_path = f'../../data/IEEE_Case118/inputs/wind/sample_{i+1}.csv'
    load_path = f'../../data/IEEE_Case118/inputs/load/sample_{i+1}.csv'

    # Read wind and load
    try:
        wind = pd.read_csv(wind_path, header=None, index_col=None)
        load = pd.read_csv(load_path, header=None, index_col=None)
    except FileNotFoundError:
        print(f'The file sample_{i+1} is not found')
        continue

    # Assign node features
    node_features[wind_bidx, num_thermal_features:num_thermal_features+num_wind_features] = wind
    node_features[load_bidx, -num_load_features:] = load

    x.append(node_features.copy())

The file sample_8 is not found
The file sample_11 is not found
The file sample_22 is not found
The file sample_31 is not found
The file sample_41 is not found
The file sample_44 is not found
The file sample_45 is not found
The file sample_51 is not found
The file sample_60 is not found
The file sample_64 is not found
The file sample_76 is not found
The file sample_84 is not found
The file sample_86 is not found
The file sample_90 is not found
The file sample_93 is not found
The file sample_94 is not found
The file sample_101 is not found
The file sample_109 is not found
The file sample_110 is not found
The file sample_111 is not found
The file sample_117 is not found
The file sample_125 is not found
The file sample_130 is not found
The file sample_131 is not found
The file sample_135 is not found
The file sample_137 is not found
The file sample_153 is not found
The file sample_158 is not found
The file sample_164 is not found
The file sample_169 is not found
The file sample_183 is not 

#### Create training labels (UC)

In [6]:
# Read MATPOWER UC solution
y = []

# Add zeros to make the dimension [num_buses, nt]
all_UC = np.zeros((num_buses, nt))

for i in range(max_num):
    UC_path = f'../../data/IEEE_Case118/outputs/UC/sample_{i+1}.csv'
    try:
        UC = pd.read_csv(UC_path, header=None, index_col=None).to_numpy()
    except FileNotFoundError:
        print(f'The file sample_{i+1} is not found')
        continue

    all_UC[thermal_bidx] = UC
    y.append(all_UC.copy())

The file sample_8 is not found
The file sample_11 is not found
The file sample_22 is not found
The file sample_31 is not found
The file sample_41 is not found
The file sample_44 is not found
The file sample_45 is not found
The file sample_51 is not found
The file sample_60 is not found
The file sample_64 is not found
The file sample_76 is not found
The file sample_84 is not found
The file sample_86 is not found
The file sample_90 is not found
The file sample_93 is not found
The file sample_94 is not found
The file sample_101 is not found
The file sample_109 is not found
The file sample_110 is not found
The file sample_111 is not found
The file sample_117 is not found
The file sample_125 is not found
The file sample_130 is not found
The file sample_131 is not found
The file sample_135 is not found
The file sample_137 is not found
The file sample_153 is not found
The file sample_158 is not found
The file sample_164 is not found
The file sample_169 is not found
The file sample_183 is not 

#### Create edge index and attributes

In [7]:
# Read branch info
file_path = '../../data/IEEE_Case118/branch_params/branch_params.csv'
branch = pd.read_csv(file_path, header=None, index_col=None).to_numpy()

# Read and assign PF_max
RATE_A = 5                  # Index of RATE_A
########################## These numbers are determined separately ##############################
file_path = f'../../data/IEEE_Case118/branch_params/PF_max_category1.csv'
PF_max_category1 = pd.read_csv(file_path, header=None, index_col=None).to_numpy().astype(bool).flatten()
file_path = f'../../data/IEEE_Case118/branch_params/PF_max_category2.csv'
PF_max_category2 = pd.read_csv(file_path, header=None, index_col=None).to_numpy().astype(bool).flatten()
file_path = f'../../data/IEEE_Case118/branch_params/PF_max_category3.csv'
PF_max_category3 = pd.read_csv(file_path, header=None, index_col=None).to_numpy().astype(bool).flatten()
PF_max1 = 1200
PF_max2 = 500
PF_max3 = 300
branch[PF_max_category1, RATE_A] = PF_max1
branch[PF_max_category2, RATE_A] = PF_max2
branch[PF_max_category3, RATE_A] = PF_max3

# Get branch index and attr
edge_index = branch[:, :2] - 1
edge_attr = branch[:, 2:]

# Convert to standard format
edge_index = torch.tensor(edge_index.T, dtype=torch.long)
edge_attr = torch.from_numpy(edge_attr).float()

#### Create train, test and validation dataset

In [8]:
data_list = []

for i in range(len(x)):
    X = torch.from_numpy(x[i]).float()
    Y = torch.from_numpy(y[i]).float()
    graph = Data(x=X, y=Y, edge_index=edge_index, edge_attr=edge_attr)
    data_list.append(graph)

In [9]:
dir = f'../../train_val_test_dataset/IEEE_Case118/UC/processed'
if not os.path.exists(dir):
    print(f'There is no dataset found!')
else:
    shutil.rmtree(dir)
    print(f'The old dataset has been deleted!')

from MyDataset import MyDataset
# Not that if there is already saved dataset, this cell won't work
# Save train, val and test data
root = '../../train_val_test_dataset/IEEE_Case118/UC'
MyDataset(root=root, data_list=data_list)

The old dataset has been deleted!


Processing...
Done!


MyDataset(1000)