In [360]:
import numpy as np
import random
import matplotlib.pyplot as plt
from collections import defaultdict

# --- Simulation Constants ---
TOTAL_CAPACITY = 90
BASE_FARE = 1100.0
BOOKING_DAYS = 60 # Booking window opens 60 days before departure

# --- Baseline Scenario (No Dynamic Pricing) ---
# As per your note, 75 seats are sold at a fixed price of 1100.
BASELINE_SEATS_SOLD = 75
BASELINE_REVENUE = BASELINE_SEATS_SOLD * BASE_FARE

# --- Q-Learning Agent Hyperparameters ---
LEARNING_RATE = 0.1 # Alpha
DISCOUNT_FACTOR = 1 # Gamma
EPSILON = 0.1 # Exploration rate

# --- Reward Function Hyperparameter ---
BETA = 10.0 # Penalty for price volatility in RVP 

In [361]:
class RailwayEnvironment:
    """
    Simulates the railway booking environment and customer demand.
    The environment calculates demand based on a chosen price elasticity model
    and returns the number of tickets sold as a Poisson distribution.
    """
    def __init__(self, elasticity_model='power'):
        self.elasticity_model = elasticity_model
        
        # Parameters for the elasticity models
        if self.elasticity_model == 'linear':
            # D(p) = a - b*p
            # Set so that D(1100) is approx 75
            self.a = 137  # Max demand at price 0
            self.b = 0.114 # Sensitivity
        elif self.elasticity_model == 'power':
            # D(p) = a * p^(-b)
            # Set so that D(1100) is approx 75
            self.a = 90000 # Scaling factor
            self.b = 1.2    # Price elasticity of demand
        else:
            raise ValueError("Invalid elasticity model. Choose 'linear' or 'power'.")

    def get_demand(self, price):
        """
        Calculates the mean demand based on the price and model, then returns
        a stochastic demand value from a Poisson distribution.
        """
        if self.elasticity_model == 'linear':
            # D(p) = a - b*p [cite: 114]
            mean_demand = self.a - self.b * price
        else: # Power model
            # D(p) = a * p^(-b) [cite: 119]
            mean_demand = self.a * (price ** -self.b)
        
        # Ensure demand is not negative
        mean_demand = max(0, mean_demand)
        
        # Return a random demand based on the mean (Poisson distribution)
        return np.random.poisson(mean_demand)

In [362]:
class QLearningAgent:
    """
    Implements the Q-learning algorithm to find the optimal pricing strategy.
    """
    def __init__(self):
        # Action space: Percentage change from base fare 
        self.actions = [-0.15, -0.10, -0.05, 0.0, 0.05, 0.10, 0.20, 0.30]
        # Initialize Q-table as a dictionary for sparse states
        self.q_table = defaultdict(lambda: np.zeros(len(self.actions)))

    def get_state(self, days_left, capacity_rem, velocity):
        """
        Discretizes the continuous state variables into a manageable tuple.
        This tuple will be used as a key in the Q-table.
        """
        # Discretize days_left (e.g., into 10-day bins)
        days_bin = days_left // 10
        
        # Discretize remaining capacity (e.g., into 10% bins)
        capacity_bin = int((capacity_rem / TOTAL_CAPACITY) * 10)
        
        # Discretize booking velocity (e.g., low, medium, high)
        if velocity < 5:
            velocity_bin = 0 # Low
        elif velocity < 15:
            velocity_bin = 1 # Medium
        else:
            velocity_bin = 2 # High
            
        return (days_bin, capacity_bin, velocity_bin)

    def choose_action(self, state):
        """
        Epsilon-greedy strategy for action selection.
        - With probability epsilon, explores a random action.
        - With probability 1-epsilon, exploits the best-known action.
        """
        if random.uniform(0, 1) < EPSILON:
            return random.choice(self.actions)  # Explore
        else:
            action_idx = np.argmax(self.q_table[state])
            return self.actions[action_idx]  # Exploit

    def update_q_table(self, state, action, reward, next_state):
        """
        Updates the Q-value for the given state-action pair using the Bellman equation.
        """
        action_idx = self.actions.index(action)
        
        old_value = self.q_table[state][action_idx]
        next_max = np.max(self.q_table[next_state])
        
        # Q-learning formula
        new_value = old_value + LEARNING_RATE * (reward + DISCOUNT_FACTOR * next_max - old_value)
        self.q_table[state][action_idx] = new_value

In [363]:
def run_simulation(reward_function_type, elasticity_model_type, num_episodes=3000):
    """
    Runs the full Q-learning simulation with a fixed starting price on the first day.
    
    Args:
        reward_function_type (str): 'SRM' or 'RVP'.
        elasticity_model_type (str): 'linear' or 'power'.
        num_episodes (int): Number of training iterations.
        
    Returns:
        dict: A dictionary containing final simulation results.
    """
    env = RailwayEnvironment(elasticity_model=elasticity_model_type)
    agent = QLearningAgent()

    print(f"Reward Function: {reward_function_type} | Elasticity Model: {elasticity_model_type}")

    # --- Training Loop ---
    for episode in range(num_episodes):
        capacity_rem = TOTAL_CAPACITY
        last_24h_bookings = 0
        prev_price = BASE_FARE

        for day in range(BOOKING_DAYS, 0, -1):
            # 1. Get State
            velocity = last_24h_bookings
            state = agent.get_state(day, capacity_rem, velocity)
            
            # 2. Choose Action - WITH NEW CONSTRAINT
            if day == BOOKING_DAYS:
                # On the first day, the price is always the base fare
                action = 0.0 
            else:
                # From the second day onwards, the agent decides
                action = agent.choose_action(state)
            
            price = BASE_FARE * (1 + action)
            
            # 3. Environment Responds
            daily_demand = env.get_demand(price) / day 
            tickets_sold = min(daily_demand, capacity_rem)
            
            # 4. Calculate Reward
            if reward_function_type == 'SRM':
                reward = tickets_sold * price
            else: # RVP
                reward = (tickets_sold * price) - (BETA * abs(price - prev_price))
            
            # 5. Update State and Agent
            capacity_rem -= tickets_sold
            last_24h_bookings = tickets_sold
            prev_price = price
            
            next_state = agent.get_state(day - 1, capacity_rem, last_24h_bookings)
            agent.update_q_table(state, action, reward, next_state)
            
            if capacity_rem <= 0:
                break
        
    # --- Evaluation Run (with exploitation only, epsilon=0) ---
    capacity_rem = TOTAL_CAPACITY
    last_24h_bookings = 0
    total_revenue = 0
    prices = []
    daily_bookings = []

    for day in range(BOOKING_DAYS, 0, -1):
        velocity = last_24h_bookings
        state = agent.get_state(day, capacity_rem, velocity)
        
        # Exploit the learned policy - WITH NEW CONSTRAINT
        if day == BOOKING_DAYS:
            action = 0.0 # Force base price on first day
        else:
            action_idx = np.argmax(agent.q_table[state])
            action = agent.actions[action_idx]
            
        price = BASE_FARE * (1 + action)
        
        daily_demand = env.get_demand(price) / day
        tickets_sold = min(daily_demand, capacity_rem)
        
        total_revenue += tickets_sold * price
        capacity_rem -= tickets_sold
        last_24h_bookings = tickets_sold
        
        prices.append(price)
        daily_bookings.append(tickets_sold)
        
        if capacity_rem <= 0:
            break
            
    # --- Compile and Return Results ---
    prices.reverse()
    total_seats_sold = TOTAL_CAPACITY - capacity_rem
    occupancy = (total_seats_sold / TOTAL_CAPACITY) * 100
    avg_price = total_revenue / total_seats_sold if total_seats_sold > 0 else 0
    price_volatility = np.std(prices) if prices else 0
    
    results = {
        "Total Revenue": total_revenue,
        "Occupancy (%)": occupancy,
        "Seats Sold": total_seats_sold,
        "Average Price": avg_price,
        "Price Volatility": price_volatility,
        "Prices": prices
    }
    
    return results

In [364]:

def run(REWARD_MODEL,ELASTICITY_MODEL ):
    # --- Run the simulation ---
    final_results = run_simulation(REWARD_MODEL, ELASTICITY_MODEL)

    print(f"Reward Model: {REWARD_MODEL} | Elasticity Model: {ELASTICITY_MODEL}")
    print(f"Baseline Revenue (Fixed Price): ₹{BASELINE_REVENUE:,.2f}")
    print(f"Dynamic Pricing Revenue:        ₹{final_results['Total Revenue']:,.2f}")
    print(f"Seats Sold:                     {final_results['Seats Sold']:.0f} / {TOTAL_CAPACITY}")
    print(f"Occupancy:                      {final_results['Occupancy (%)']:.2f}%")
    print(f"Average Ticket Price:           ₹{final_results['Average Price']:,.2f}")
    print(f"Price Volatility (Std Dev):     ₹{final_results['Price Volatility']:,.2f}")
    print("\n")
run("SRM","linear" )
run("RVP", "linear")
run("SRM", "power")
run("RVP", "power")



Reward Function: SRM | Elasticity Model: linear
Reward Model: SRM | Elasticity Model: linear
Baseline Revenue (Fixed Price): ₹82,500.00
Dynamic Pricing Revenue:        ₹84,177.50
Seats Sold:                     90 / 90
Occupancy:                      100.00%
Average Ticket Price:           ₹935.31
Price Volatility (Std Dev):     ₹21.48


Reward Function: RVP | Elasticity Model: linear
Reward Model: RVP | Elasticity Model: linear
Baseline Revenue (Fixed Price): ₹82,500.00
Dynamic Pricing Revenue:        ₹85,766.48
Seats Sold:                     90 / 90
Occupancy:                      100.00%
Average Ticket Price:           ₹952.96
Price Volatility (Std Dev):     ₹40.61


Reward Function: SRM | Elasticity Model: power
Reward Model: SRM | Elasticity Model: power
Baseline Revenue (Fixed Price): ₹82,500.00
Dynamic Pricing Revenue:        ₹94,108.22
Seats Sold:                     73 / 90
Occupancy:                      80.74%
Average Ticket Price:           ₹1,295.10
Price Volatility (Std 

In [365]:
# def plot(REWARD_MODEL,ELASTICITY_MODEL):
#     final_results = run_simulation(REWARD_MODEL, ELASTICITY_MODEL)
#     # --- Plotting the Price Fluctuation ---
#     plt.figure(figsize=(14, 7))
#     plt.plot(range(len(final_results['Prices'])), final_results['Prices'], marker='o', linestyle='-', color='b')
#     plt.title(f'Dynamic Price Over Booking Window ({REWARD_MODEL} | {ELASTICITY_MODEL})', fontsize=16)
#     plt.xlabel('Days Before Departure', fontsize=12)
#     plt.ylabel('Ticket Price (₹)', fontsize=12)
#     plt.axhline(y=BASE_FARE, color='r', linestyle='--', label=f'Base Fare (₹{BASE_FARE})')
#     plt.grid(True)
#     plt.legend()
#     plt.gca().invert_xaxis() # Show day 60 on the left and day 0 on the right
#     plt.show()
# plot("SRM","linear" )
# plot("RVP", "linear")
# plot("SRM", "power")
# plot("RVP", "power")