# Basic example

Sinergym uses the standard OpenAI gym API. Lets see how to create a basic loop.

First we need to include sinergym and create an environment, in our case using 'Eplus-demo-v1'

In [2]:
import gymnasium as gym
import numpy as np

import sinergym

In [None]:
env = gym.make('Eplus-demo-v1')

At first glance may appear that sinergym is only imported but never used, but by importing Sinergym all its [Environments](https://ugr-sail.github.io/sinergym/compilation/html/pages/environments.html)
are defined to be used, in this case 'Eplus-demo-v1' with all the information contained in the idf file and the config file.

After this simple definition we are ready to loop the episodes, for this simple example we are going to consider only 1 episode. In summary the code we need is something like this:

In [4]:
for i in range(1):
    obs, info = env.reset()
    rewards = []
    observations = []
    terminated = False
    current_month = 0
    while not terminated:
        a = env.action_space.sample()
        obs, reward, terminated, truncated, info = env.step(a)
#         print('Obs: ', obs)
        rewards.append(reward)
        observations.append(obs)
        if info['month'] != current_month:  # display results every month
            current_month = info['month']
            print('Reward: ', sum(rewards), info)

[2023-05-17 05:43:41,335] EPLUS_ENV_demo-v1_MainThread_ROOT INFO:Creating new EnergyPlus simulation episode...
[2023-05-17 05:43:41,519] EPLUS_ENV_demo-v1_MainThread_ROOT INFO:EnergyPlus working directory is in /root/my-project/examples/Eplus-env-demo-v1-res8/Eplus-env-sub_run1


  epw_content = self._headers_to_epw(use_datetimes=use_datetimes) + df.to_csv(


Reward:  -0.18054534463652905 {'timestep': 1, 'time_elapsed': 900, 'year': 1991, 'month': 1, 'day': 1, 'hour': 0, 'action': [21, 21], 'reward': -0.18054534463652905, 'reward_energy': -1.8054534463652903, 'reward_comfort': -0.0, 'total_energy': 18054.5344636529, 'abs_comfort': 0.0, 'temperatures': [20.99998833869494, 20.81866975215882, 20.9999880037787, 20.71504075275832, 20.99998305378782]}
Reward:  -5763.117736881167 {'timestep': 2976, 'time_elapsed': 2678400, 'year': 1991, 'month': 2, 'day': 1, 'hour': 0, 'action': [22, 23], 'reward': -0.2266049065564041, 'reward_energy': -2.266049065564041, 'reward_comfort': -0.0, 'total_energy': 22660.49065564041, 'abs_comfort': 0.0, 'temperatures': [21.64848453584105, 20.27223518090898, 21.59761757334702, 20.19741394829678, 20.66473502531415]}
Reward:  -12028.195440598773 {'timestep': 5664, 'time_elapsed': 5097600, 'year': 1991, 'month': 3, 'day': 1, 'hour': 0, 'action': [20, 25], 'reward': -0.32230673238890206, 'reward_energy': -0.974010773646954

Now we can see the final rewards:

In [5]:
print(
    'Mean reward: ',
    np.mean(rewards),
    'Cumulative reward: ',
    sum(rewards))

Mean reward:  -3.389259588758715 Cumulative reward:  -118759.65599010859


The [list of environments](https://github.com/ugr-sail/sinergym/blob/main/sinergym/__init__.py) that we have registered in Sinergym is extensive and we use buildings changing particularities. For example, continuous action space or discrete, noise over weather, runperiod, timesteps, reward function, etc. We will see it in the following notebooks.****

In [6]:
env.variables['observation']

['year',
 'month',
 'day',
 'hour',
 'Site Outdoor Air Drybulb Temperature(Environment)',
 'Site Outdoor Air Relative Humidity(Environment)',
 'Site Wind Speed(Environment)',
 'Site Wind Direction(Environment)',
 'Site Diffuse Solar Radiation Rate per Area(Environment)',
 'Site Direct Solar Radiation Rate per Area(Environment)',
 'Zone Thermostat Heating Setpoint Temperature(SPACE1-1)',
 'Zone Thermostat Cooling Setpoint Temperature(SPACE1-1)',
 'Zone Air Temperature(SPACE1-1)',
 'Zone Air Temperature(SPACE2-1)',
 'Zone Air Temperature(SPACE3-1)',
 'Zone Air Temperature(SPACE4-1)',
 'Zone Air Temperature(SPACE5-1)',
 'Zone Thermal Comfort Mean Radiant Temperature(SPACE1-1 PEOPLE 1)',
 'Zone Air Relative Humidity(SPACE1-1)',
 'Zone Thermal Comfort Clothing Value(SPACE1-1 PEOPLE 1)',
 'Zone Thermal Comfort Fanger Model PPD(SPACE1-1 PEOPLE 1)',
 'Zone People Occupant Count(SPACE1-1)',
 'People Air Temperature(SPACE1-1 PEOPLE 1)',
 'Facility Total HVAC Electricity Demand Rate(Whole Buildin

In [7]:
observations[0]

array([1.9910000e+03, 1.0000000e+00, 1.0000000e+00, 0.0000000e+00,
       1.8000000e+00, 9.5250000e+01, 4.0999999e+00, 2.6500000e+02,
       0.0000000e+00, 0.0000000e+00, 2.1000000e+01, 2.1000000e+01,
       2.0999989e+01, 2.0818670e+01, 2.0999989e+01, 2.0715040e+01,
       2.0999983e+01, 1.9362980e+01, 3.9579979e+01, 7.5000000e-01,
       2.9739861e+01, 0.0000000e+00, 2.0999989e+01, 1.8054535e+04],
      dtype=float32)

And as always don't forget to close the environment:

In [None]:
env.close()

# Custom Stuff

In [1]:
import gymnasium as gym
import numpy as np

import sinergym
from sinergym.utils.rewards import LinearReward, BaseReward
from sinergym.utils import env_checker
from datetime import datetime
from math import exp
from typing import Any, Dict, List, Tuple, Union

In [2]:
# For checking custom env
# env_checker.check_env(env)

In [3]:
class CustomReward(BaseReward):
    def __init__(
        self,
        temperature_variable: Union[str, list],
        occupancy_variable: Union[str, list],
        energy_variable: str,
        range_comfort_winter: Tuple[int, int],
        range_comfort_summer: Tuple[int, int],
        summer_start: Tuple[int, int] = (6, 1),
        summer_final: Tuple[int, int] = (9, 30),
        energy_weight: float = 0.5,
        lambda_energy: float = 1e-4,
        lambda_temperature: float = 1.0
    ):
        """
        Linear reward function.

        It considers the energy consumption and the absolute difference to temperature comfort.

        .. math::
            R = - W * lambda_E * power - (1 - W) * lambda_T * (max(T - T_{low}, 0) + max(T_{up} - T, 0))

        Args:
            temperature_variable (Union[str, list]): Name(s) of the temperature variable(s).
            occupancy_variable (Union[str, list]): Name(s) of the occupancy variable(s).
            energy_variable (str): Name of the energy/power variable.
            range_comfort_winter (Tuple[int,int]): Temperature comfort range for cold season. Depends on environment you are using.
            range_comfort_summer (Tuple[int,int]): Temperature comfort range for hot season. Depends on environment you are using.
            summer_start (Tuple[int,int]): Summer session tuple with month and day start. Defaults to (6,1).
            summer_final (Tuple[int,int]): Summer session tuple with month and day end. defaults to (9,30).
            energy_weight (float, optional): Weight given to the energy term. Defaults to 0.5.
            lambda_energy (float, optional): Constant for removing dimensions from power(1/W). Defaults to 1e-4.
            lambda_temperature (float, optional): Constant for removing dimensions from temperature(1/C). Defaults to 1.0.
        """

        super(CustomReward, self).__init__()
        
        # Check that occupancy_variable is of same type [str, list] as temperature_variable. 
        # If both are lists, check that they have same length.
        if (type(temperature_variable) == type(occupancy_variable)):
            if (type(temperature_variable) == list and len(temperature_variable) != len(occupancy_variable)):
                raise Exception("temperature_variable should have the same length as occupancy_variable")
        else: 
            raise Exception("temperature_variable must be of same type as occupancy_variable")

        # Name of the variables
        self.temp_name = temperature_variable
        self.occ_name = occupancy_variable
        self.energy_name = energy_variable

        # Reward parameters
        self.range_comfort_winter = range_comfort_winter
        self.range_comfort_summer = range_comfort_summer
        self.W_energy = energy_weight
        self.lambda_energy = lambda_energy
        self.lambda_temp = lambda_temperature

        # Summer period
        self.summer_start = summer_start  # (month,day)
        self.summer_final = summer_final  # (month,day)

    def __call__(self, obs_dict: Dict[str, Any]
                 ) -> Tuple[float, Dict[str, Any]]:
        """Calculate the reward function.

        Args:
            obs_dict (Dict[str, Any]): Dict with observation variable name (key) and observation variable value (value)

        Returns:
            Tuple[float, Dict[str, Any]]: Reward value and dictionary with their individual components.
        """

        # Energy term
        reward_energy = - self.lambda_energy * obs_dict[self.energy_name]

        # Comfort
        comfort, temps = self._get_comfort(obs_dict)
        reward_comfort = - self.lambda_temp * comfort

        # Weighted sum of both terms
        reward = self.W_energy * reward_energy + \
            (1.0 - self.W_energy) * reward_comfort

        reward_terms = {
            'reward_energy': reward_energy,
            'reward_comfort': reward_comfort,
            'total_energy': obs_dict[self.energy_name],
            'abs_comfort': comfort,
            'temperatures': temps
        }

        return reward, reward_terms

    def _get_comfort(self,
                     obs_dict: Dict[str,
                                    Any]) -> Tuple[float,
                                                   List[float]]:
        """Calculate the comfort term of the reward.

        Returns:
            Tuple[float, List[float]]: comfort penalty and List with temperatures used.
        """

        month = obs_dict['month']
        day = obs_dict['day']
        year = obs_dict['year']
        current_dt = datetime(int(year), int(month), int(day))

        # Periods
        summer_start_date = datetime(
            int(year),
            self.summer_start[0],
            self.summer_start[1])
        summer_final_date = datetime(
            int(year),
            self.summer_final[0],
            self.summer_final[1])

        if current_dt >= summer_start_date and current_dt <= summer_final_date:
            temp_range = self.range_comfort_summer
        else:
            temp_range = self.range_comfort_winter

        temps = [v for k, v in obs_dict.items() if k in self.temp_name]
        occs = [v for k, v in obs_dict.items() if k in self.occ_name]
        comfort = 0.0
        for T in temps:
            if T < temp_range[0] or T > temp_range[1]:
                comfort += min(abs(temp_range[0] - T), abs(T - temp_range[1]))

        return comfort, temps

In [4]:
OBSERVATION_VARIABLES=[
        'Site Outdoor Air Drybulb Temperature(Environment)',
        'Site Outdoor Air Relative Humidity(Environment)',
        'Site Wind Speed(Environment)',
        'Site Wind Direction(Environment)',
        'Site Diffuse Solar Radiation Rate per Area(Environment)',
        'Site Direct Solar Radiation Rate per Area(Environment)',
        'Zone Thermostat Heating Setpoint Temperature(SPACE1-1)',
        'Zone Thermostat Cooling Setpoint Temperature(SPACE1-1)',
        'Zone Air Temperature(SPACE1-1)',
        'Zone Air Temperature(SPACE2-1)',
        'Zone Air Temperature(SPACE3-1)',
        'Zone Air Temperature(SPACE4-1)',
        'Zone Air Temperature(SPACE5-1)',
        'Zone Thermal Comfort Mean Radiant Temperature(SPACE1-1 PEOPLE 1)',
        'Zone Air Relative Humidity(SPACE1-1)',
        'Zone Thermal Comfort Clothing Value(SPACE1-1 PEOPLE 1)',
        'Zone Thermal Comfort Fanger Model PPD(SPACE1-1 PEOPLE 1)',
        'Zone People Occupant Count(SPACE1-1)',
        'People Air Temperature(SPACE1-1 PEOPLE 1)',
        'Facility Total HVAC Electricity Demand Rate(Whole Building)'
    ]

env = gym.make(
    'Eplus-demo-v1', 
    observation_space= gym.spaces.Box(
        low=-5e6,
        high=5e6,
        shape=(len(OBSERVATION_VARIABLES) + 4,),
        dtype=np.float32
    ),
    observation_variables=OBSERVATION_VARIABLES,
    reward=CustomReward, 
    reward_kwargs={
        'temperature_variable': [
            'Zone Air Temperature(SPACE1-1)',
            'Zone Air Temperature(SPACE2-1)',
            'Zone Air Temperature(SPACE3-1)',
            'Zone Air Temperature(SPACE4-1)',
            'Zone Air Temperature(SPACE5-1)',
        ],
        'occupancy_variable': [
            'Zone People Occupant Count(SPACE1-1)',
            'Zone People Occupant Count(SPACE2-1)',
            'Zone People Occupant Count(SPACE3-1)',
            'Zone People Occupant Count(SPACE4-1)',
            'Zone People Occupant Count(SPACE5-1)',
        ],
        'energy_variable': 'Facility Total HVAC Electricity Demand Rate(Whole Building)',
        'range_comfort_winter': (20.0, 23.5),
        'range_comfort_summer': (23.0, 26.0),
        'energy_weight': 0.1
    }
)

[2023-05-17 06:42:07,068] EPLUS_ENV_demo-v1_MainThread_ROOT INFO:Updating idf ExternalInterface object if it is not present...
[2023-05-17 06:42:07,070] EPLUS_ENV_demo-v1_MainThread_ROOT INFO:Updating idf Site:Location and SizingPeriod:DesignDay(s) to weather and ddy file...
[2023-05-17 06:42:07,072] EPLUS_ENV_demo-v1_MainThread_ROOT INFO:Updating idf OutPut:Variable and variables XML tree model for BVCTB connection.
[2023-05-17 06:42:07,074] EPLUS_ENV_demo-v1_MainThread_ROOT INFO:Setting up extra configuration in building model if exists...
[2023-05-17 06:42:07,075] EPLUS_ENV_demo-v1_MainThread_ROOT INFO:Setting up action definition in building model if exists...


In [5]:
for i in range(1):
    obs, info = env.reset()
    rewards = []
    observations = []
    terminated = False
    current_month = 0
    while not terminated:
        a = env.action_space.sample()
        obs, reward, terminated, truncated, info = env.step(a)
#         print('Obs: ', obs)
        rewards.append(reward)
        observations.append(obs)
        if info['month'] != current_month:  # display results every month
            current_month = info['month']
            print('Reward: ', sum(rewards), info)

[2023-05-17 06:42:11,220] EPLUS_ENV_demo-v1_MainThread_ROOT INFO:Creating new EnergyPlus simulation episode...
[2023-05-17 06:42:11,404] EPLUS_ENV_demo-v1_MainThread_ROOT INFO:EnergyPlus working directory is in /root/my-project/examples/Eplus-env-demo-v1-res13/Eplus-env-sub_run1


  epw_content = self._headers_to_epw(use_datetimes=use_datetimes) + df.to_csv(


Reward:  -3.349656611852669 {'timestep': 1, 'time_elapsed': 900, 'year': 1991, 'month': 1, 'day': 1, 'hour': 0, 'action': [18, 27], 'reward': -3.349656611852669, 'reward_energy': -0.3780170717786078, 'reward_comfort': -3.679838782972009, 'total_energy': 3780.170717786078, 'abs_comfort': 3.679838782972009, 'temperatures': [19.23928542997237, 18.94104676407819, 19.25996050906064, 18.87986851391679, 20.09605568964284]}
Reward:  -6263.744721629314 {'timestep': 2976, 'time_elapsed': 2678400, 'year': 1991, 'month': 2, 'day': 1, 'hour': 0, 'action': [19, 26], 'reward': -5.034556175562543, 'reward_energy': -1.258423096310944, 'reward_comfort': -5.45412651770161, 'total_energy': 12584.23096310944, 'abs_comfort': 5.45412651770161, 'temperatures': [18.944821747237, 18.80525802437179, 18.9468120809365, 18.79149975135721, 19.05748187839589]}
Reward:  -11901.467417660475 {'timestep': 5664, 'time_elapsed': 5097600, 'year': 1991, 'month': 3, 'day': 1, 'hour': 0, 'action': [16, 29], 'reward': -2.527091

In [6]:
print(
    'Mean reward: ',
    np.mean(rewards),
    'Cumulative reward: ',
    sum(rewards))

Mean reward:  -3.4070482597321616 Cumulative reward:  -119382.97102101895


In [7]:
env.close()

[2023-05-17 06:02:37,810] EPLUS_ENV_demo-v1_MainThread_ROOT INFO:EnergyPlus simulation closed successfully. 


In [12]:
test_str = ["asd"]
test_list = ["asdkj", "askdj"]

if (type(test_str) == type(test_list)):
    print('OKAY')
    if (type(test_str) == list and len(test_str) != len(test_list)):
        print("Lists length don't match")
else: 
    print('not same type')

OKAY
Lists length don't match
