In [3]:
!pip install gymnasium

Collecting gymnasium
  Downloading gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Downloading gymnasium-1.0.0-py3-none-any.whl (958 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m958.1/958.1 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium
Successfully installed farama-notifications-0.0.4 gymnasium-1.0.0


In [1]:
!pip install pandapower

Collecting pandapower
  Downloading pandapower-2.14.11.zip (13.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.2/13.2 MB[0m [31m31.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting deepdiff (from pandapower)
  Downloading deepdiff-8.1.1-py3-none-any.whl.metadata (9.5 kB)
Collecting orderly-set<6,>=5.2.3 (from deepdiff->pandapower)
  Downloading orderly_set-5.2.3-py3-none-any.whl.metadata (6.0 kB)
Downloading deepdiff-8.1.1-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.7/84.7 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading orderly_set-5.2.3-py3-none-any.whl (12 kB)
Building wheels for collected packages: pandapower
  Building wheel for pandapower (setup.py) ... [?25l[?25hdone
  Created wheel for pandapower: filename=pandapower-2.14.11-py3-none-any.whl size=13131028 sha256=432ef270996defc9c1123b09c780d1a79710228ddea38b5570f9a1b35aa264e6
  Stored in 

In [9]:
!pip install stable-baselines3

Collecting stable-baselines3
  Downloading stable_baselines3-2.4.1-py3-none-any.whl.metadata (4.5 kB)
Downloading stable_baselines3-2.4.1-py3-none-any.whl (183 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.0/184.0 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: stable-baselines3
Successfully installed stable-baselines3-2.4.1


In [20]:
import pandapower as pp
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torch
from torch.utils.data import Dataset
from sklearn.preprocessing import StandardScaler
import pickle
import gymnasium as gym
from gymnasium import spaces
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env

In [21]:
class SimpleTwoBus:

    def __init__(self, V_ext, P, Q, G, B, V_init, theta_init):
        '''This class creates a simple 2-bus network.'''

        self.V_ext = V_ext
        self.P = P
        self.Q = Q
        self.G = G
        self.B = B
        self.V_init = V_init
        self.theta_init = theta_init
        self.net = pp.create_empty_network()
        self.create_two_bus_grid()


    def create_two_bus_grid(self):

        # Create two buses with initialized voltage and angle
        bus1 = pp.create_bus(self.net, vn_kv=20.0, name="Bus 1")
        bus2 = pp.create_bus(self.net, vn_kv=0.4, name="Bus 2")

        # Initialize voltage and angle for buses
        self.net.bus.loc[bus1, 'vm_pu'] = self.V_init[0]
        self.net.bus.loc[bus1, 'va_degree'] = self.theta_init[0]
        self.net.bus.loc[bus2, 'vm_pu'] = self.V_init[1]
        self.net.bus.loc[bus2, 'va_degree'] = self.theta_init[1]

        # create a line between the two buses
        pp.create_line_from_parameters(
            self.net,
            from_bus=0,
            to_bus=1,
            length_km=1.0,
            r_ohm_per_km=1/self.G,
            x_ohm_per_km=1/self.B,
            c_nf_per_km=0.0,
            g_us_per_km=0.0,
            max_i_ka=100.0,
        )

        # Create a transformer between the two buses
        # pp.create_transformer(self.net, bus1, bus2, std_type="0.25 MVA 20/0.4 kV")

        # Create a load at bus 2 with specified P and Q
        pp.create_load(self.net, bus2, p_mw=self.P, q_mvar=self.Q, name="Load")

        # Create an external grid connection at bus 1 with specified G and B
        pp.create_ext_grid(self.net, bus1, vm_pu=self.V_ext, name="Grid Connection")


In [25]:
class GridEnv(gym.Env):
    def __init__(self,V_ext = 1.02, G = 100, B = 0.1, k_limit = 5, termination_counter=100):

        self.observation_space = spaces.Box(low = np.array([-1e10,-1e10]), high = np.array([1e10, 1e10]), dtype=np.float32)
        self.action_space = spaces.Box(low=np.array([-0.05, -30]), high=np.array([0.05, 30]), dtype=np.float32)

        self.k_limit = k_limit
        self.termination_counter = termination_counter
        self.counter = 0
        self.done = False
        self.terminated = False

        self.G = G
        self.B = B
        self.V_ext = V_ext

        #initialize network
        self.reset()


    # def reset(self, seed=0):

    #     self.P = np.random.uniform(low= 0, high=0.2)
    #     self.Q = np.random.uniform(low = 0, high = 0.1)
    #     self.V_init = np.random.uniform(low = 0.9, high = 1.1, size=2)
    #     self.theta_init = np.random.uniform(low = -20, high = 20, size=2)

    #     Net = SimpleTwoBus(self.V_ext,self.P,self.Q,self.G,self.B,self.V_init,self.theta_init)
    #     self.net = Net.net

    #     initial_guesses = np.array([self.V_init, self.theta_init])

    #     self.state = self.calculate_residual(initial_guesses)

    def reset(self, seed=0):
        self.P = np.random.uniform(low=0, high=0.2)
        self.Q = np.random.uniform(low=0, high=0.1)
        self.V_init = np.random.uniform(low=0.9, high=1.1, size=2)
        self.theta_init = np.random.uniform(low=-20, high=20, size=2)

        Net = SimpleTwoBus(self.V_ext, self.P, self.Q, self.G, self.B, self.V_init, self.theta_init)
        self.net = Net.net

        initial_guesses = np.array([self.V_init, self.theta_init])
        self.state = self.calculate_residual(initial_guesses)

        self.counter = 0
        self.done = False
        self.terminated = False

        return self.state




    def calculate_residual(self, action):

        net = self.net.deepcopy()  # Keep the network unchanged

        pp.runpp(net, max_iteration = 1, tolerance_mva = np.inf)
        err = net._ppc['et']

        residual = np.zeros(2)
        residual[:] = err

        #needs a function!

        return residual


    def perform_NR_step(self):

        net = self.net.deepcopy()  # Keep the network unchanged

        pp.runpp(net, max_iteration = 50, tolerance_mva = 1e-5)

        iterations = net._ppc["iterations"]

        return iterations



    def calculate_reward(self):

        iterations = self.perform_NR_step()

        reward = - iterations

        return reward


    def step(self, action):



        # action = [delta_V, delta_theta]

        # perform action
        residual = self.calculate_residual(action)


        # calcualate reward
        reward = self.calculate_reward()


        #update state:
        self.state = residual

        if reward == -self.k_limit:
            self.done = True

        self.counter += 1

        if self.counter == self.termination_counter:
            self.terminated = True

        return self.state, reward, self.done, self.terminated




    def render(self):
        pass

In [26]:
# Test run


env = GridEnv()


state = env.reset()
print("Initial State:")
# env.render()

# Define a sample action within the specified ranges
action = np.array([0.03, 15.0], dtype=np.float32)

# Take a step in the environment using the sample action
next_state, reward, done, info = env.step(action)

# Print the results
print("\nAction Taken:", action)
print("Next State:", next_state)
# env.render()
print("Reward:", reward)
print("Done:", done)

Initial State:

Action Taken: [ 0.03 15.  ]
Next State: [0.00519081 0.00519081]
Reward: -2
Done: False


  gym.logger.warn(
  gym.logger.warn(


In [28]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Normal

class ActorCritic(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(ActorCritic, self).__init__()

        self.actor = nn.Sequential(
            nn.Linear(state_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, action_dim),
        )

        self.critic = nn.Sequential(
            nn.Linear(state_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 1),
        )

    def forward(self, state):
        action_mean = self.actor(state)
        value = self.critic(state)
        return action_mean, value

  gym.logger.warn(
  gym.logger.warn(


In [None]:
state_dim = 2
action_dim = 2
lr = 3e-4
num_episodes = 10
max_steps = 2
gamma = 0.99
entropy_coefficient = 0.01

In [None]:
env = GridEnv()
model = ActorCritic(state_dim, action_dim)
optimizer = optim.Adam(model.parameters(), lr=lr)

In [None]:
for episode in range(num_episodes):
    state = env.reset()
    #

    for step in range(max_steps):
        action = #
        next_state, reward, done, _ = env.step(action)
        values.append(value)
        rewards.append(reward)

        state = next_state

        if done:
            break