# Reinforcement Learning control strategies for Electric Vehicles fleet Virtual Power Plants
Thesis based on the development of a RL agent that manages a VPP through EVs charging stations in an household environment. Main optimization objectives of the VPP are: Valley filling, peak shaving and zero resulting load over time. Main action performed to reach objectives are: storage of Renewable energy resources and power push in the grid at high demand times. The development of the Virtual Power Plant environment is based on the ELVIS (Electric Vehicles Infrastructure Simulator) open library from DAI-Labor: https://github.com/dailab/elvis The thesis code is currently available at: (https://github.com/francescomaldonato/RL_VPP_Thesis)

Author: Francesco Maldonato

## VPP simulator Notebook based on EVs arrival, with StableBaselines3 trained model
(Recurrent PPO) [default: 25 EVs per week simulation]

Installing required packages and dependencies

In [None]:
# %%capture
# !pip install py-elvis==0.2.1
# !pip install pyyaml==5.4
# !pip install plotly==5.9.0
# !pip install -U kaleido==0.2.1

# !pip install stable-baselines3[extra]==1.6.1
# !pip install stable-baselines==1.6.1
# !pip install sb3-contrib==1.6.1
# !pip install gym==0.20.0
# !pip install -q wandb==0.13.4

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

# %cd /content/drive/MyDrive/DAI-Labor/RL_VPP_Thesis/
# %ls

In [None]:
# #Cloning repository and changing directory
# !git clone https://github.com/francescomaldonato/RL_VPP_Thesis.git
# %cd RL_VPP_Thesis/
# %ls

In [None]:
import yaml
import numpy as np
from gym import Env
from VPP_environment import VPPEnv, VPP_Scenario_config
from elvis.config import ScenarioConfig
import os
import torch
import random
import wandb
from sb3_contrib import RecurrentPPO #The available algoritmhs in sb3-contrib for the custom environment with MultiInputPolicy
from sb3_contrib.common.maskable.utils import get_action_masks
import stable_baselines3 as sb3
from stable_baselines3.common.env_checker import check_env

import plotly.io as pio  # 我加的，不知道为啥画不出来图了
pio.renderers.default = "notebook_connected"  # 我加的，不知道为啥画不出来图了


#Check if cuda device is available for training
print("Torch-Cuda available device:", torch.cuda.is_available())
print(sb3.get_system_info())
#!wandb --version

In [None]:
# 确定性结果可复现  Ensure deterministic behavior
torch.backends.cudnn.deterministic = True
random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed_all(0)

## Load ELVIS YAML config file
Section where the EVs arrival simulation parameters are loaded through the Yaml config file from the 'data/config_builder/' folder.

In [None]:
#Loading paths for input data
current_folder = ''  # 如果是.py调用这里需要改成'/home/ef/Documents/code/ubuntu/Car/RL_VPP_Thesis-main/'
VPP_training_data_input_path = current_folder + 'data/data_training/environment_table/' + 'Environment_data_2019.csv'
VPP_testing_data_input_path = current_folder + 'data/data_testing/environment_table/' + 'Environment_data_2020.csv'
VPP_validating_data_input_path = current_folder + 'data/data_validating/environment_table/' + 'Environment_data_2018.csv'
elvis_input_folder = current_folder + 'data/config_builder/'

#case = 'wohnblock_household_simulation_adaptive.yaml' #(loaded by default, 20 EVs arrivals per week with 50% average battery)

#Try different simulation parameters, uncomment below
#case = 'wohnblock_household_simulation_adaptive_10.yaml' #(10 EVs arrivals per week with 50% average battery) 
#case = 'wohnblock_household_simulation_adaptive_15.yaml' #(15 EVs arrivals per week with 50% average battery)
case = 'wohnblock_household_simulation_adaptive_25.yaml' #(25 EVs arrivals per week with 50% average battery) 
#case = 'wohnblock_household_simulation_adaptive_30.yaml' #(30 EVs arrivals per week with 50% average battery) 
#case = 'wohnblock_household_simulation_adaptive_35.yaml' #(35 EVs arrivals per week with 50% average battery) 

with open(elvis_input_folder + case, 'r') as file:
    yaml_str = yaml.full_load(file)

elvis_config_file = ScenarioConfig.from_yaml(yaml_str)  # 要生成的场景信息-EV
VPP_config_file = VPP_Scenario_config(yaml_str)  # VPP应该指的是整个优化场景

# print(elvis_config_file)  # 占用时间
# print(VPP_config_file)

In [None]:
#TESTING Environment initialization
env = VPPEnv(VPP_testing_data_input_path, elvis_config_file, VPP_config_file)  # elvis_config_file输入EV参数，调用Elvis返回的EV分布


In [None]:
env.plot_Dataset_autarky()

In [None]:
# env.plot_VPP_input_data()

In [None]:
# env.plot_ELVIS_data()

In [None]:
#Function to check custom environment and output additional warnings if needed
# check_env(env)  # baseline带的  检查环境是否遵循 Gym API
# env.plot_reward_functions()

### VPP Simulation test with random actions [no model loaded]

In [None]:
episodes = 1
for episode in range(1, episodes+1):
    state = env.reset()
    done = False  # 此done非self.done
    score = 0
    while not done:  # 执行35041次
        action_masks = get_action_masks(env)  # [闲置*4, 充*4, ,放*4]值为True或False
        action = env.action_space.sample()  # [3,3,3,3]
        
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))

VPP_table = env.VPP_table
# env.plot_VPP_energies()  # EV电池变化曲线

In [None]:
# env.plot_Elvis_results()  # 买电消费、EV功率、住宅还需负载、总负载曲线, 与下面的对比.   住宅+EV负载=总负载

In [None]:
# env.plot_VPP_results()  # 由RL agent控制的充电行动的VPP仿真结果（35041步结束后记录,由houseRW_load, ev_power and overcost叠加的输入数据集）动作的比较乱。RL模型控制的就不乱

In [None]:
env.plot_VPP_autarky()  # 自消纳和自给自足对比:Elvis非控制性充电仿真和有控制性充电行动的VPP仿真(这里是随机动作)。

In [None]:
# env.plot_VPP_supply_demand()  # 时间序列下的数据  正:#家庭负荷来源 ?2house #EV充电来源 ?2EV #消费实体 CO_?     # 负:#使用新能源的功率 RE2? #使用EV放电的功率 EV2? #产生的能源 PRO_?

In [None]:
env.plot_VPP_Elvis_comparison()  # Elvis和VPP对比:EV离开时的平均能量、买电量、新能源给EV未使用的部分、买电费用

In [None]:
# env.plot_rewards_results()  # VPP仿真中，EV奖励(每一步的total load，离开时电动车剩余电量, 可用EV数)随时间推移的变化。

In [None]:
env.plot_rewards_stats()  # 绘制奖励统计 见代码解释,cumulative = final_reward + step_total, step_total = step_EV_en + step_load

In [None]:
env.plot_EVs_kpi()  # 在VPP仿真中，EV离开时能量区间计数图。

In [None]:
env.plot_actions_kpi()  # 在VPP仿真中，agent所采取行动是否有效的热图。action_truth_table[T,T,T,F]

In [None]:
env.plot_load_kpi()  # 取一周2022-01-01~2022-01-08、一月2022-06-01~2022-07-01、一年的负载数据柱状图分析, ELVIS和VPP对比

In [None]:
env.plot_yearly_load_log()  # 按负载功率0.2kW为区间长度对step计数(年,叠加),画"对数"+直方图,对比ELVIS和VPP. 计算负载在-0.1~0.1区间的step百分比

In [None]:
## Wandb login to load models
#In Colab, uncomment below:
# %env "WANDB_DISABLE_CODE" True
# %env "WANDB_NOTEBOOK_NAME" "VPP_simulator.ipynb"
# os.environ['WANDB_NOTEBOOK_NAME'] = 'VPP_simulator.ipynb'
#wandb.login(relogin=True)

#In local notebook, uncomment below:
your_wandb_login_code = "7bbad3c9c98621391634ef832a6c90f4e34f14b6" #example length
!wandb login {your_wandb_login_code}

In [None]:
#Loading training model, from local directory or from wandb previous trainings
RecurrentPPO_path = "trained_models/RecurrentPPO_models/model_RecurrentPPO_"

#model_id = "s37o8q0n"
model_id = "333ckz0i"
model = RecurrentPPO.load(RecurrentPPO_path + model_id, env=env)  # 用 "Monitor "包装器来包装env,用DummyVecEnv来包装env。

# run_id_restore = "2y2dqvyn"
# model = wandb.restore(f'model_{run_id_restore}.zip', run_path=f"francesco_maldonato/RL_VPP_Thesis/{run_id_restore}")

## Testing dataset VPP Simulation using the loaded trained model
测试数据集VPP仿真  使用加载的训练模型 #现在使用我们训练有素的模型进行确定性的预测[应该可以提高性能]

In [None]:
#TEST Model
episodes = 1
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0
    # cell and hidden state of the LSTM
    lstm_states = None
    num_envs = 1
    # Episode start signals are used to reset the lstm states
    episode_starts = np.ones((num_envs,), dtype=bool)
    while not done:
        #env.render()
        action_masks = get_action_masks(env)
        action, lstm_states = model.predict(obs, state=lstm_states, episode_start=episode_starts, deterministic=True) #现在使用我们训练有素的模型进行确定性的预测[应该可以提高性能] Now using our trained model with deterministic prediction [should improve performances]
        env.lstm_state = lstm_states
        obs, reward, done, info = env.step(action)
        episode_starts = done
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))

#Save the VPP table
VPP_table = env.save_VPP_table(save_path='data/environment_optimized_output/VPP_table.csv')

In [None]:
env.plot_VPP_energies()

In [None]:
VPP_table.head(15000)

In [None]:
# env.plot_Elvis_results()  # 买电消费、EV功率、住宅还需负载、总负载曲线, 与下面的对比.   住宅+EV负载=总负载

In [None]:
# env.plot_VPP_results()  # 买电消费、EV功率、住宅还需负载、总负载（35041步结束后记录,由houseRW_load, ev_power and overcost叠加的输入数据集）.由随机不控制的动作的比较乱。RL模型控制的就不乱.

In [None]:
env.plot_VPP_autarky()  # 自消纳风光率、自给自足率无需买电对比:Elvis非控制性充电仿真和有控制性充电行动的VPP仿真(这里是RL控制的动作)。

In [None]:
# env.plot_VPP_supply_demand()  #  超多组  时间序列下的数据  正:#家庭负荷来源 ?2house #EV充电来源 ?2EV #消费实体 CO_?     # 负:#使用新能源的功率 RE2? #使用EV放电的功率 EV2? #产生的能源 PRO_?

In [None]:
env.plot_VPP_Elvis_comparison()  # Elvis和VPP对比:EV离开时的平均能量、买电量、新能源给EV未使用的部分、买电费用

In [None]:
env.plot_rewards_results()  # VPP仿真中，EV奖励(每一步的total load，离开时电动车剩余电量, 可用EV数)随时间推移的变化。

In [None]:
env.plot_rewards_stats()  # 绘制奖励统计 见代码解释,cumulative = final_reward + step_total, step_total = step_EV_en + step_load

In [None]:
env.plot_EVs_kpi()  # 在VPP仿真中，EV离开时能量区间计数图。

In [None]:
env.plot_load_kpi()  # 取一周2022-01-01~2022-01-08、一月2022-06-01~2022-07-01、一年的负载数据柱状图分析, ELVIS和VPP对比

In [None]:
env.plot_yearly_load_log()  # 按负载功率0.2kW为区间长度对step计数(年,叠加),画"对数"+直方图,对比ELVIS和VPP. 计算负载在-0.1~0.1区间的step百分比

## Validating dataset VPP Simulation using the loaded trained model
验证数据集VPP仿真，使用加载的训练模型(上面一组是测试)  #现在使用我们训练有素的模型进行确定性预测[应该会提高性能]

In [None]:
#VALIDATING Environment initialization
env = VPPEnv(VPP_validating_data_input_path, elvis_config_file, VPP_config_file)

In [None]:
#Function to check custom environment and output additional warnings if needed
check_env(env)
plot_VPP_input_data = env.plot_VPP_input_data()
plot_VPP_input_data.show()

In [None]:
#model = PPO.load(PPO_path + model_run_ID, env = env)
model = RecurrentPPO.load(RecurrentPPO_path + model_id, env=env)

In [None]:
#TEST Model
episodes = 1
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0
    # cell and hidden state of the LSTM
    lstm_states = None
    num_envs = 1
    # Episode start signals are used to reset the lstm states
    episode_starts = np.ones((num_envs,), dtype=bool)
    while not done:
        #env.render()
        action_masks = get_action_masks(env)
        action, lstm_states = model.predict(obs, state=lstm_states, episode_start=episode_starts, deterministic=True) #现在使用我们训练有素的模型进行确定性预测[应该会提高性能] Now using our trained model with deterministic prediction [should improve performances]
        env.lstm_state = lstm_states
        obs, reward, done, info = env.step(action)
        episode_starts = done
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))

VPP_table = env.VPP_table
#print(env.lstm_states_list)

In [None]:
env.plot_VPP_energies()

In [None]:
VPP_table.head(15000)

In [None]:
env.plot_VPP_results()


In [None]:
env.plot_VPP_autarky()

In [None]:
env.plot_VPP_supply_demand()

In [None]:
env.plot_VPP_Elvis_comparison()

In [None]:
env.plot_rewards_results()

In [None]:
env.plot_rewards_stats()

In [None]:
env.plot_EVs_kpi()

In [None]:
env.plot_load_kpi()

In [None]:
env.plot_yearly_load_log()

## Training dataset VPP Simulation using the loaded trained model
训练数据集 使用加载的训练模型进行VPP模拟

In [None]:
#TRAINING Environment initialization
env = VPPEnv(VPP_training_data_input_path, elvis_config_file, VPP_config_file)

In [None]:
#Function to check custom environment and output additional warnings if needed
check_env(env)
plot_VPP_input_data = env.plot_VPP_input_data()
plot_VPP_input_data.show()

In [None]:
#model = PPO.load(PPO_path + model_run_ID, env = env)
model = RecurrentPPO.load(RecurrentPPO_path + model_id, env=env)

In [None]:
#TEST Model
episodes = 1
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0
    # cell and hidden state of the LSTM
    lstm_states = None
    num_envs = 1
    # Episode start signals are used to reset the lstm states
    episode_starts = np.ones((num_envs,), dtype=bool)
    while not done:
        #env.render()
        action_masks = get_action_masks(env)
        action, lstm_states = model.predict(obs, state=lstm_states, episode_start=episode_starts, deterministic=True) #Now using our trained model with deterministic prediction [should improve performances]
        env.lstm_state = lstm_states
        obs, reward, done, info = env.step(action)
        episode_starts = done
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))

VPP_table = env.VPP_table
#print(env.lstm_states_list)

In [None]:
env.plot_VPP_energies()

In [None]:
VPP_table.head(14995)

In [None]:
#env.plot_VPP_results()

In [None]:
env.plot_VPP_autarky()

In [None]:
env.plot_VPP_supply_demand()

In [None]:
env.plot_VPP_Elvis_comparison()

In [None]:
env.plot_rewards_results()

In [None]:
env.plot_rewards_stats()

In [None]:
env.plot_EVs_kpi()

In [None]:
env.plot_actions_kpi()

In [None]:
env.plot_load_kpi()

In [None]:
env.plot_yearly_load_log()

In [None]:
env.close()