In [10]:
import numpy as np
from time import sleep

# Define the environment class
class RoomEnvironment:
    def __init__(self):
        self.light_intensity = 0  # Initial light intensity
        self.outside_temperature = 25  # Initial outside temperature
        self.room_temperature = 25  # Initial room temperature
        self.fan_speed = 0  # Initial fan speed
        self.ac_temperature = 25  # Initial air conditioner temperature
        self.is_person_present = False  # Whether someone is in the room or not

    def update_state(self, light_intensity, outside_temperature, room_temperature, fan_speed, ac_temperature, is_person_present):
        self.light_intensity = light_intensity
        self.outside_temperature = outside_temperature
        self.room_temperature = room_temperature
        self.fan_speed = fan_speed
        self.ac_temperature = ac_temperature
        self.is_person_present = is_person_present

    def get_state(self):
        return (self.light_intensity, self.outside_temperature, self.room_temperature, self.fan_speed, self.ac_temperature, self.is_person_present)

    def take_action(self, action):
        # Apply the action and update the environment state
        if action == 0:  # Decrease AC temperature
            self.ac_temperature -= 1
        elif action == 1:  # Increase AC temperature
            self.ac_temperature += 1
        elif action == 2:  # Decrease fan speed
            self.fan_speed -= 1
        elif action == 3:  # Increase fan speed
            self.fan_speed += 1

        # Update the room temperature based on the fan speed and AC temperature
        if self.is_person_present:
            self.room_temperature += (self.fan_speed - 1) * 0.5 + (self.ac_temperature - 25) * 0.1
        else:
            self.room_temperature += (self.ac_temperature - 25) * 0.1

        # Calculate the reward
        reward = -abs(self.room_temperature - 22)  # Penalty for deviation from the desired room temperature

        return reward

# Define the Q-learning agent class
class QLearningAgent:
    def __init__(self, state_space, action_space, learning_rate=0.9, discount_factor=0.9, exploration_rate=0.9):
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.state_space = state_space
        self.action_space = action_space
        self.q_table = np.zeros((len(state_space), len(action_space)))

    def get_action(self, state):
        if np.random.rand() < self.exploration_rate:
            return np.random.choice(self.action_space)
        else:
            state_index = self.state_space.index(state)
            q_values = self.q_table[state_index]
            max_q_value = np.max(q_values)
            max_action_indices = np.where(q_values == max_q_value)[0]
            return np.random.choice(max_action_indices)

    def update_q_table(self, state, action, reward, next_state):
        state_index = self.state_space.index(state)
        try:
            next_state_index = self.state_space.index(next_state)
        except ValueError:
            # Add new state to state_space and expand q_table
            self.state_space.append(next_state)
            self.q_table = np.vstack([self.q_table, np.zeros(len(self.action_space))])
            next_state_index = len(self.state_space) - 1

        next_max_q_value = np.max(self.q_table[next_state_index])
        q_value = (1 - self.learning_rate) * self.q_table[state_index, action] + self.learning_rate * (reward + self.discount_factor * next_max_q_value)
        self.q_table[state_index, action] = q_value

# Define the state space, action space, and initialize the environment and the agent
state_space = [
    (
        light_intensity,
        outside_temperature,
        room_temperature,
        fan_speed,
        ac_temperature,
        is_person_present,
    )
    for light_intensity in range(11)
    for outside_temperature in range(15, 36)
    for room_temperature in range(15, 36)
    for fan_speed in range(6)
    for ac_temperature in range(15, 36)
    for is_person_present in [False, True]]

action_space = [0, 1, 2, 3]  # Decrease AC temperature, increase AC temperature, decrease fan speed, increase fan speed
environment = RoomEnvironment()
agent = QLearningAgent(state_space, action_space)

# Training loop
for episode in range(10):  # 100 episodes
    state = environment.get_state()
    for step in range(100):  # 100 steps per episode
        action = agent.get_action(state)
        reward = environment.take_action(action)
        next_state = environment.get_state()
        agent.update_q_table(state, action, reward, next_state)
        state = next_state
        sleep(0.0001)
        print('Episode:', episode, 'Step:', step, 'Action:', action, 'Reward:', reward, 'Next state:', next_state, 'Q-table size:', agent.q_table.shape, 'Exploration rate:', agent.exploration_rate,end='        \r')

# Evaluation loop
for episode in range(3):  # 10 episodes
    state = environment.get_state()
    total_reward = 0
    for step in range(100):  # 100 steps per episode
        action = agent.get_action(state)
        reward = environment.take_action(action)
        next_state = environment.get_state()
        agent.update_q_table(state, action, reward, next_state)
        state = next_state
        total_reward += reward
        
    print('Episode:', episode, 'Total reward:', total_reward)



Episode: 0 Total reward: -89157.899999999956999999999995 Next state: (0, 25, -868.6999999999995, 39, 22, False) Q-table size: (1223450, 4) Exploration rate: 0.9          
Episode: 1 Total reward: -88391.79999999983
Episode: 2 Total reward: -85648.49999999985
