In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.pipeline import Pipeline
from scipy.optimize import minimize

class RailwayDynamicPricingSystem:
    def __init__(self, base_capacity=72):
        """
        Initialize the dynamic pricing system
        
        Parameters:
        -----------
        base_capacity : int
            Standard capacity of the train class (e.g., 72 for 2A)
        """
        self.base_capacity = base_capacity
        self.demand_model = None
        self.price_elasticity_model = None
        self.fitted = False
        
    def fit_demand_model(self, historical_data):
        """
        Fit demand model using historical waitlist data
        
        Parameters:
        -----------
        historical_data : pandas DataFrame
            Contains columns: 'train_id', 'class', 'days_remaining', 
            'day_of_week', 'avg_waitlist', 'base_fare'
        """
        # Create advanced features
        X = self._create_features(historical_data)
        y = historical_data['avg_waitlist']
        
        # Build a demand prediction model using Gradient Boosting
        self.demand_model = Pipeline([
            ('scaler', StandardScaler()),
            ('gb', GradientBoostingRegressor(
                n_estimators=200, 
                learning_rate=0.05,
                max_depth=4,
                subsample=0.8,
                random_state=42
            ))
        ])
        
        # Fit the model
        self.demand_model.fit(X, y)
        
        # Train price elasticity model separately
        self._train_price_elasticity_model(historical_data)
        
        self.fitted = True
        return self
    
    def _train_price_elasticity_model(self, historical_data):
        """
        Train a model to estimate price elasticity based on various factors
        """
        if 'price_multiplier' in historical_data.columns and 'booking_rate' in historical_data.columns:
            # If we have historical price and booking rate data
            features = self._create_features(historical_data)
            
            # Target is the observed elasticity (change in demand / change in price)
            # Simplified calculation, would be more complex in production
            elasticity = -historical_data['booking_rate'] / historical_data['price_multiplier']
            
            # Constrain elasticity to reasonable values
            elasticity = np.clip(elasticity, -3.0, -0.1)
            
            self.price_elasticity_model = Pipeline([
                ('scaler', StandardScaler()),
                ('gb', GradientBoostingRegressor(n_estimators=100, max_depth=3))
            ])
            
            self.price_elasticity_model.fit(features, elasticity)
        else:
            # If we don't have elasticity data, use a rule-based approach
            self.price_elasticity_model = None
    
    def _create_features(self, data):
        """
        Create rich feature set for modeling
        """
        X = pd.DataFrame()
        
        # Lead time features
        X['days_remaining'] = data['days_remaining']
        X['days_remaining_log'] = np.log1p(data['days_remaining'])
        X['days_remaining_squared'] = data['days_remaining'] ** 2
        
        # Time-based features
        # Cyclical encoding of day of week (1-7)
        X['day_sin'] = np.sin(2 * np.pi * data['day_of_week'] / 7)
        X['day_cos'] = np.cos(2 * np.pi * data['day_of_week'] / 7)
        
        # Special periods
        if 'is_holiday_period' in data.columns:
            X['is_holiday_period'] = data['is_holiday_period']
        
        if 'is_weekend' in data.columns:
            X['is_weekend'] = data['is_weekend']
        else:
            X['is_weekend'] = (data['day_of_week'] >= 5).astype(int)
        
        # Train-specific features
        if 'train_id' in data.columns:
            train_dummies = pd.get_dummies(data['train_id'], prefix='train')
            X = pd.concat([X, train_dummies], axis=1)
        
        if 'class' in data.columns:
            class_dummies = pd.get_dummies(data['class'], prefix='class')
            X = pd.concat([X, class_dummies], axis=1)
        
        # Seasonality features if date is available
        if 'date' in data.columns:
            data['date'] = pd.to_datetime(data['date'])
            X['month'] = data['date'].dt.month
            X['month_sin'] = np.sin(2 * np.pi * X['month'] / 12)
            X['month_cos'] = np.cos(2 * np.pi * X['month'] / 12)
        
        return X
    
    def predict_waitlist(self, query_data):
        """
        Predict waitlist based on query parameters
        """
        if not self.fitted:
            raise ValueError("Model hasn't been fitted yet")
        
        X = self._create_features(query_data)
        return self.demand_model.predict(X)
    
    def estimate_price_elasticity(self, query_data):
        """
        Estimate price elasticity for the given scenario
        """
        if self.price_elasticity_model is not None:
            X = self._create_features(query_data)
            return self.price_elasticity_model.predict(X)
        else:
            # Rule-based fallback for elasticity estimates
            days_remaining = query_data['days_remaining'].values[0]
            
            # Elasticity typically increases (becomes less elastic) closer to departure
            if days_remaining <= 7:
                return -0.3  # Very inelastic (last minute)
            elif days_remaining <= 15:
                return -0.7  # Somewhat inelastic
            elif days_remaining <= 30:
                return -1.2  # Moderately elastic
            else:
                return -1.8  # More elastic (advance planning)
    
    def _demand_curve(self, price_multiplier, base_price, elasticity, reference_demand):
        """
        Calculate expected demand at a given price using constant elasticity model
        
        Q2 = Q1 * (P2/P1)^e where e is the price elasticity
        """
        return reference_demand * (price_multiplier ** elasticity)
    
    def _revenue_function(self, price_multiplier, base_price, elasticity, reference_demand, capacity):
        """
        Calculate expected revenue at a given price point
        Revenue = Price * Min(Demand, Capacity)
        """
        expected_demand = self._demand_curve(price_multiplier, base_price, elasticity, reference_demand)
        actual_demand = min(expected_demand, capacity)
        revenue = base_price * price_multiplier * actual_demand
        return -revenue  # Negative because we're minimizing
    
    def calculate_optimal_price(self, query_data, base_fare):
        """
        Calculate optimal price based on demand-supply dynamics
        
        Parameters:
        -----------
        query_data : pandas DataFrame
            Contains 'train_id', 'class', 'days_remaining', 'day_of_week'
        base_fare : float
            Base fare for the journey
            
        Returns:
        --------
        optimal_price : float
            The optimal price after applying the multiplier
        diagnostics : dict
            Diagnostic information about the calculation
        """
        if not self.fitted:
            raise ValueError("Model hasn't been fitted yet")
        
        # Predict waitlist (excess demand)
        predicted_waitlist = self.predict_waitlist(query_data)[0]
        
        # Convert waitlist to absolute demand
        # If waitlist is positive, demand = capacity + waitlist
        # If waitlist is negative, demand = capacity + waitlist (effectively capacity - |waitlist|)
        reference_demand = self.base_capacity + predicted_waitlist
        
        # Estimate price elasticity
        elasticity = self.estimate_price_elasticity(query_data)[0]
        
        # Define constraint: price must be between 0.7x and 3.0x base fare
        bounds = [(0.7, 3.0)]
        
        # Find price multiplier that maximizes revenue
        result = minimize(
            lambda x: self._revenue_function(
                x[0], base_fare, elasticity, reference_demand, self.base_capacity
            ),
            x0=[1.0],  # Start with base fare
            bounds=bounds,
            method='L-BFGS-B'
        )
        
        optimal_multiplier = result.x[0]
        optimal_price = base_fare * optimal_multiplier
        
        # Calculate expected demand and revenue at optimal price
        expected_demand = self._demand_curve(optimal_multiplier, base_fare, elasticity, reference_demand)
        expected_revenue = -self._revenue_function(
            optimal_multiplier, base_fare, elasticity, reference_demand, self.base_capacity
        )
        
        # Include competitor pricing if available
        if 'competitor_price' in query_data.columns:
            competitor_price = query_data['competitor_price'].values[0]
            # Adjust price based on competitor's price
            if competitor_price < optimal_price * 0.9:
                # If competitor is significantly cheaper, adjust down
                optimal_price = max(optimal_price * 0.9, competitor_price * 1.05)
            elif competitor_price > optimal_price * 1.1:
                # If competitor is significantly more expensive, potentially increase
                optimal_price = min(optimal_price * 1.05, competitor_price * 0.95)
        
        # Additional business rules
        # Implement tatkal pricing rules if close to departure
        days_remaining = query_data['days_remaining'].values[0]
        if days_remaining <= 1:
            # Tatkal rules (premium pricing for last-minute bookings)
            optimal_price = max(optimal_price, base_fare * 1.5)
        
        # Diagnostic information
        diagnostics = {
            'predicted_waitlist': predicted_waitlist,
            'reference_demand': reference_demand,
            'price_elasticity': elasticity,
            'optimal_multiplier': optimal_multiplier,
            'expected_demand': expected_demand,
            'expected_revenue': expected_revenue,
            'optimization_success': result.success,
            'days_remaining': days_remaining
        }
        
        return optimal_price, diagnostics

    def run_pricing_simulation(self, train_id, train_class, base_fare, days_range=60):
        """
        Run a simulation of pricing over time for a specific train
        
        Parameters:
        -----------
        train_id : str
            Train identifier
        train_class : str
            Class identifier (e.g., '2A', '3A')
        base_fare : float
            Base fare for the journey
        days_range : int
            Number of days before departure to simulate
            
        Returns:
        --------
        simulation_results : pandas DataFrame
            DataFrame with simulated prices for each day
        """
        results = []
        
        for days_remaining in range(days_range, 0, -1):
            for day_of_week in range(1, 8):
                query_data = pd.DataFrame({
                    'train_id': [train_id],
                    'class': [train_class],
                    'days_remaining': [days_remaining],
                    'day_of_week': [day_of_week],
                    'is_weekend': [1 if day_of_week >= 5 else 0]
                })
                
                optimal_price, diagnostics = self.calculate_optimal_price(query_data, base_fare)
                
                results.append({
                    'train_id': train_id,
                    'class': train_class,
                    'days_remaining': days_remaining,
                    'day_of_week': day_of_week,
                    'predicted_waitlist': diagnostics['predicted_waitlist'],
                    'price_elasticity': diagnostics['price_elasticity'],
                    'optimal_price': optimal_price,
                    'price_multiplier': diagnostics['optimal_multiplier'],
                    'expected_demand': diagnostics['expected_demand'],
                    'expected_revenue': diagnostics['expected_revenue']
                })
        
        return pd.DataFrame(results)


# Example usage
def demonstrate_model():
    # Create synthetic historical data similar to the chart provided
    days_remaining = list(range(60, 0, -1))
    
    # Create synthetic waitlist data based on the chart pattern
    waitlist_values = []
    for day in days_remaining:
        if day < 10:
            # High demand period (peak around day 8)
            waitlist = 20 - abs(day - 8) * 2.5
        elif day < 30:
            # Medium demand period
            waitlist = 5 - (day - 10) * 0.2
        else:
            # Low demand period
            waitlist = -4 + np.random.normal(0, 1)
            
        waitlist_values.append(waitlist)
    
    # Create DataFrame
    historical_data = pd.DataFrame({
        'train_id': ['1027'] * len(days_remaining),
        'class': ['2A'] * len(days_remaining),
        'days_remaining': days_remaining,
        'day_of_week': [day % 7 + 1 for day in range(len(days_remaining))],  # Rotating through days
        'avg_waitlist': waitlist_values,
        'base_fare': [2000] * len(days_remaining)  # Example base fare
    })
    
    # Initialize and fit model
    pricing_system = RailwayDynamicPricingSystem(base_capacity=72)
    pricing_system.fit_demand_model(historical_data)
    
    # Generate pricing simulation
    base_fare = 2000  # Base fare in INR
    simulation = pricing_system.run_pricing_simulation('1027', '2A', base_fare)
    
    # Show sample results for a specific day of week (e.g., Saturday)
    saturday_results = simulation[simulation['day_of_week'] == 6]
    
    return historical_data, saturday_results

# Execute demo
historical_data, price_simulation = demonstrate_model()
print(f"Sample price simulation for Train 1027, Class 2A (Saturdays only):")
print(price_simulation[['days_remaining', 'predicted_waitlist', 'price_multiplier', 'optimal_price']].head(10))