In [1]:
import numpy as np
import random

In [2]:
# Define State and Actions

states = np.arange(0, 101, 10)
# 0, 10, 20, 30, 40, ... 100

actions = ['FILL', 'STOP']  # Fill or Stop

print('States:', states)
print('Actions:', actions)


States: [  0  10  20  30  40  50  60  70  80  90 100]
Actions: ['FILL', 'STOP']


In [3]:
# Create Q-table and set alpha, gamma, epsilon, episodes

Q = np.zeros((len(states), len(actions)))

alpha = 0.1   # Learning rate
gamma = 0.9   # Discount factor
epsilon = 0.2 # Exploration rate
episodes = 300 # Number of training runs

print('Q-table shape:', Q.shape)


Q-table shape: (11, 2)


In [5]:
def get_reward(level, action):
    if 40 < level <= 70:
        reward = 10  # ideal range
    else:
        reward = -10  # too low or too high

    if action == 'FILL' and level >= 90:
        reward -= 10  # overflow risk

    if action == 'STOP' and level <= 10:
        reward -= 10  # empty risk

    return reward


print("Reward Example (level=60, FILL):", get_reward(60, 'FILL'))


Reward Example (level=60, FILL): 10


In [6]:
def next_level(level, action):
    if action == 'FILL':
        level += random.choice([5, 10, 15])
    else:
        level -= random.choice([5, 10, 15])
    
    # Keep level within 0–100 range
    return int(np.clip(level, 0, 100))

print("Next Level Example:", next_level(50, 'FILL'))

Next Level Example: 55


In [7]:
for ep in range(episodes):
    level = random.choice(states)

    for _ in range(15):  # steps per episode
        # Choose action (epsilon-greedy)
        if random.uniform(0, 1) < epsilon:
            action = random.choice(actions)
        else:
            action = actions[np.argmax(Q[level // 10])]  # use integer index for level

        # Get next state and reward
        next_state = next_level(level, action)
        reward = get_reward(next_state, action)

        # Q-learning update
        a = actions.index(action)
        best_next = np.max(Q[next_state // 10])  # best action for next state
        Q[level // 10, a] += alpha * (reward + gamma * best_next - Q[level // 10, a])

        # Move to next state
        level = next_state

print('Training Done')


Training Done


In [8]:
try:
    level = int(input('Enter starting water level (0-100): '))
    if level < 0 or level > 100:
        raise ValueError('Water level out of range!')
except ValueError as e:
    print(e)
    level = 50
    print('Defaulting to 50%.')

print(f'\nStarting with level: {level}%')
print("Simulating for 10 steps:\n")

for step in range(10):
    action = actions[np.argmax(Q[level // 10])]
    print(f"Step {step + 1}: Level {level}% → Action: {action}")
    level = next_level(level, action)

print("\nSimulation complete. Water tank control finished.")


Enter starting water level (0-100):  50



Starting with level: 50%
Simulating for 10 steps:

Step 1: Level 50% → Action: FILL
Step 2: Level 55% → Action: FILL
Step 3: Level 65% → Action: STOP
Step 4: Level 50% → Action: FILL
Step 5: Level 60% → Action: STOP
Step 6: Level 45% → Action: FILL
Step 7: Level 55% → Action: FILL
Step 8: Level 65% → Action: STOP
Step 9: Level 50% → Action: FILL
Step 10: Level 55% → Action: FILL

Simulation complete. Water tank control finished.


## Exercise: Smart Traffic Light Controller using Q-learning

    ## Objective: In this exercise, you'll design a Smart Traffic Light System that learns when to switch lights (Green/Red) based on real-time traffic consitions using Q-Learning. Your AI Agent will balance reducing waiting time for vehicles, saving energy, and maintaining safety.

In [12]:
#Step 1 
import numpy as np
import random
print("Required libraries (NumPy, random) imported successfully.")

Required libraries (NumPy, random) imported successfully.


In [13]:
# Define State and Actions
# States: Traffic levels (0 = Empty, 4 = Very Heavy)
states = {
    0: "Empty Road",
    1: "Light Traffic",
    2: "Moderate Traffic",
    3: "Heavy Traffic",
    4: "Very Heavy Traffic"
}
num_states = len(states)

# Actions: Decisions for the traffic light
# These indices correspond to the columns in the Q-table.
actions = {
    0: "GREEN (Keep Green)",
    1: "RED (Turn Red)"
}
action_names = list(actions.values())
num_actions = len(actions)

print("\nDefined Traffic States (0-4):")
for k, v in states.items():
    print(f"  {k}: {v}")

print("\nDefined Actions (0-1):")
for k, v in actions.items():
    print(f"  {k}: {v}")


Defined Traffic States (0-4):
  0: Empty Road
  1: Light Traffic
  2: Moderate Traffic
  3: Heavy Traffic
  4: Very Heavy Traffic

Defined Actions (0-1):
  0: GREEN (Keep Green)
  1: RED (Turn Red)


In [16]:
# Step 3: Initialize the Q-Table and Hyperparameters
Q_table = np.zeros((num_states, num_actions))

# Hyperparameters
alpha = 0.1  
gamma = 0.9  
epsilon = 0.2  
episodes = 300 

print('Q-Table shape:', Q_table.shape)


Q-Table shape: (5, 2)


In [18]:
# Step 4: Design the Reward Function 
def get_reward(traffic_level, action):
    GREEN = 0
    RED = 1

    # Heavy/Very Heavy Traffic (3, 4)
    if traffic_level >= 3:
        if action == GREEN:
            return 10  # Good: clears congestion
        else:
            return -10 # Bad: creates jams

    # Empty Road (0)
    elif traffic_level == 0:
        if action == RED:
            return 5   # Good: Saves energy
        else:
            return -5  # Bad: Wastes power

    # Light/Moderate Traffic (1, 2)
    else:
        return 1
print("Reward Example (level=0, RED):", get_reward(0, 'RED'))


Reward Example (level=0, RED): -5


In [19]:
# Step 5: Define Environment Dynamics
def next_traffic(current_traffic):
   
    change = random.choice([-1, 0, 1])
    new_traffic = current_traffic + change
    # Use np.clip to ensure the traffic level stays between 0 and 4
    return np.clip(new_traffic, 0, 4)

def choose_action(state, Q_table, epsilon):
   
    if random.random() < epsilon:
        return random.choice(list(actions.keys()))
    else:
        return np.argmax(Q_table[state, :])


In [23]:
#  Step 6: Train the Agent 
def train_agent(Q_table, episodes, alpha, gamma, epsilon):
    print(" Starting Q-Learning Training ")
    for episode in range(episodes):
       
        current_state = random.randint(0, num_states - 1)
        action = choose_action(current_state, Q_table, epsilon)
        
        reward = get_reward(current_state, action)
        next_state = next_traffic(current_state)

        max_future_q = np.max(Q_table[next_state, :])
        Q_table[current_state, action] += alpha * reward + gamma * max_future_q - Q_table[current_state, action]

print('Training Done')

Training Done


In [38]:
# Step 7: Test traffic Controller

try:
    level = int(input('Enter traffic level (0-4): '))
    if level < 0 or level > 4:
        raise ValueError('Traffic level out of range!')
except ValueError as e:
    print(e)
    level = 4
    print('Defaulting to 4.')

print(f'\nStarting with level: {level}')
print("Simulating traffic control for 10 steps:\n")

for step in range(10):
   
    action = actions[np.argmax(Q[level])]
    print(f"Step {step + 1}: Level {level} → Action: {action}")
    
    level = next_traffic(level)

print("\nSimulation complete. Traffic control finished.")


Enter traffic level (0-4):  1



Starting with level: 1
Simulating traffic control for 10 steps:

Step 1: Level 1 → Action: GREEN (Keep Green)
Step 2: Level 1 → Action: GREEN (Keep Green)
Step 3: Level 2 → Action: GREEN (Keep Green)
Step 4: Level 1 → Action: GREEN (Keep Green)
Step 5: Level 0 → Action: GREEN (Keep Green)
Step 6: Level 0 → Action: GREEN (Keep Green)
Step 7: Level 1 → Action: GREEN (Keep Green)
Step 8: Level 2 → Action: GREEN (Keep Green)
Step 9: Level 1 → Action: GREEN (Keep Green)
Step 10: Level 1 → Action: GREEN (Keep Green)

Simulation complete. Traffic control finished.
