In [2]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import joblib   # To save and load scaler

#### Set random seed

In [3]:
seed = 10
random.seed(seed)
np.random.seed(seed)

#### Normalize load

In [4]:
## Load data
file_path = f'synthetic_data_40percent/p_load_data40.csv'
p_load_data = pd.read_csv(file_path, index_col=0, header=0).to_numpy()

## Normalize data using training data
num_load_data = p_load_data.shape[1] # Get number of data points
load_train_size = int(0.7*num_load_data)

load_scaler = StandardScaler()
load_train_normalized = load_scaler.fit_transform(p_load_data[:, :load_train_size].T).T

## Normalize testing data
load_test_normalized = load_scaler.transform(p_load_data[:, load_train_size:].T).T

## Save normalized data
pd.DataFrame(load_train_normalized).to_csv('normalized_data/load_train_normalized.csv')
pd.DataFrame(load_test_normalized).to_csv('normalized_data/load_test_normalized.csv')

## Save scaler
joblib.dump(load_scaler, 'normalized_data/load_scaler.pkl')

['normalized_data/load_scaler.pkl']

#### Normalize generation data

In [5]:
## Set scaler number of input features
num_features = 369      # Same as the number of generator buses

## Set min generator capacity
min_gen = 0 * np.ones(num_features)

## Set max generator capacity
max_gen = 1300 * np.ones(num_features)

## Set scaler
gen_scaler = MinMaxScaler()
gen_scaler.data_min_ = min_gen
gen_scaler.data_max_ = max_gen
gen_scaler.min_ = 0 * np.ones(num_features)
gen_scaler.scale_ = 1 / (max_gen - min_gen)

## Save scaler
joblib.dump(gen_scaler, 'normalized_data/gen_scaler.pkl')

['normalized_data/gen_scaler.pkl']

In [6]:
## Load data
file_path = f'synthetic_data_40percent/p_gen_data40.csv'
p_gen_data = pd.read_csv(file_path, index_col=0, header=0).to_numpy()

## Get number of training data
num_gen_data = p_gen_data.shape[1] 
gen_train_size = int(0.7*num_gen_data)

## Normalize data using training data
gen_train_normalized = gen_scaler.transform(p_gen_data[:, :gen_train_size].T).T

## Normalize testing data
gen_test_normalized = gen_scaler.transform(p_gen_data[:, gen_train_size:].T).T

## Save normalized data
pd.DataFrame(gen_train_normalized).to_csv('normalized_data/gen_train_normalized.csv')
pd.DataFrame(gen_test_normalized).to_csv('normalized_data/gen_test_normalized.csv')

In [7]:
## Load data
file_path = f'synthetic_data_40percent/p_max_gen_data40.csv'
p_max_gen_data = pd.read_csv(file_path, index_col=0, header=0).to_numpy()

## Get number of training data
num_max_gen_data = p_max_gen_data.shape[1] 
max_gen_train_size = int(0.7*num_max_gen_data)

## Normalize data using training data
max_gen_train_normalized = gen_scaler.transform(p_max_gen_data[:, :max_gen_train_size].T).T

## Normalize testing data
max_gen_test_normalized = gen_scaler.transform(p_max_gen_data[:, max_gen_train_size:].T).T

## Save normalized data
pd.DataFrame(max_gen_train_normalized).to_csv('normalized_data/max_gen_train_normalized.csv')
pd.DataFrame(max_gen_test_normalized).to_csv('normalized_data/max_gen_test_normalized.csv')