<a href="https://colab.research.google.com/github/vishmitha-07/Summer-Analytics-Final/blob/main/Summer_Analytics_Final_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pathway bokeh --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m149.4/149.4 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.7/69.7 MB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.6/77.6 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m777.6/777.6 kB[0m [31m46.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.2/139.2 kB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m26.5/26.5 MB[0m [31m65.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
!pip install haversine

Collecting haversine
  Downloading haversine-2.9.0-py2.py3-none-any.whl.metadata (5.8 kB)
Downloading haversine-2.9.0-py2.py3-none-any.whl (7.7 kB)
Installing collected packages: haversine
Successfully installed haversine-2.9.0


# Import required libraries

In [51]:
import pathway as pw
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.layouts import gridplot
from bokeh.io import output_notebook
import math
from math import tanh
from pathway import if_else

# Initialize Bokeh
output_notebook()

# Data Loading and Preprocessing

In [9]:
# Load the dataset
df = pd.read_csv('dataset.csv')

# Convert date and time to datetime
df['Timestamp'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'],
                                  format='%d-%m-%Y %H:%M:%S')

# Sort the DataFrame by the new 'Timestamp' column and reset the index
df = df.sort_values('Timestamp').reset_index(drop=True)

# Clean data - remove duplicates and handle missing values
df = df.drop_duplicates(subset=['ID', 'Timestamp'], keep='first')
df = df.dropna()

# Add some simulated competitor data (for Model 3)
np.random.seed(42)
competitor_prices = np.random.uniform(8, 15, size=len(df))
df['CompetitorPrice'] = competitor_prices

# Vehicle type weights (custom pricing factors)
vehicle_weights = {
    'car': 1.0,
    'bike': 0.7,
    'truck': 1.5,
    'cycle': 0.5
}
df['VehicleTypeWeight'] = df['VehicleType'].map(vehicle_weights)

# Traffic condition weights
traffic_weights = {
    'low': 0.8,
    'average': 1.0,
    'high': 1.3
}
df['TrafficWeight'] = df['TrafficConditionNearby'].map(traffic_weights)

# Model 1: Baseline Linear Model

In [42]:
class LinearPricingModel:
    def __init__(self, alpha=0.1, base_price=10):
        self.alpha = alpha
        self.base_price = base_price

    def calculate_price(self, current_price, occupancy, capacity):
        occupancy_rate = occupancy / capacity
        new_price = current_price + self.alpha * occupancy_rate
        return pw.if_else(
            new_price > self.base_price * 2,
            self.base_price * 2,
            pw.if_else(
                new_price < self.base_price * 0.5,
                self.base_price * 0.5,
                new_price
            )
        )

# Model 2: Demand-Based Price Function

In [93]:
class DemandBasedPricingModel:
    def __init__(self, base_price=10):
        self.base_price = base_price
        # Coefficients for demand factors
        self.alpha = 1.2    # Occupancy
        self.beta = 0.3      # Queue length
        self.gamma = 0.5     # Traffic
        self.delta = 0.8     # Special day
        self.epsilon = 0.4   # Vehicle type

    def _approx_tanh(self, x):
        """Simpler tanh approximation using basic operations"""
        return x / (1 + (x**2)**0.5)

    def calculate_demand(self, occupancy, capacity, queue_length,
                        traffic_weight, is_special_day, vehicle_weight):
        occupancy_rate = occupancy / capacity
        demand = (self.alpha * occupancy_rate +
                 self.beta * queue_length -
                 self.gamma * traffic_weight +
                 self.delta * is_special_day +
                 self.epsilon * vehicle_weight)

        normalized_demand = self._approx_tanh(demand)
        return normalized_demand

    def calculate_price(self, occupancy, capacity, queue_length,
                       traffic_weight, is_special_day, vehicle_weight):
        demand = self.calculate_demand(occupancy, capacity, queue_length,
                                     traffic_weight, is_special_day, vehicle_weight)
        price = self.base_price * (1 + 0.5 * demand)
        return price

# Model 3: Competitive Pricing Model

In [92]:
class CompetitivePricingModel(DemandBasedPricingModel):
    def __init__(self, base_price=10, proximity_threshold_km=1):
        super().__init__(base_price)
        self.proximity_threshold = proximity_threshold_km
        self.competition_factor = 0.3  # How much competitor prices affect ours

    def calculate_price(self, occupancy, capacity, queue_length,
                       traffic_weight, is_special_day, vehicle_weight,
                       competitor_price, current_lat, current_lon,
                       competitor_lat, competitor_lon):
        # First calculate base demand price
        base_price = super().calculate_price(
            occupancy, capacity, queue_length,
            traffic_weight, is_special_day, vehicle_weight
        )

        # Calculate distance to competitor (using squared distance to avoid sqrt)
        lat_diff = current_lat - competitor_lat
        lon_diff = current_lon - competitor_lon
        distance_squared = lat_diff**2 + lon_diff**2

        # Create condition for proximity
        is_near = distance_squared < (self.proximity_threshold ** 2)

        # Calculate price adjustment
        price_diff = competitor_price - base_price
        adjustment = self.competition_factor * price_diff
        adjusted_price = base_price + adjustment

        # Implement min function using if_else
        min_price = pw.if_else(adjusted_price < competitor_price,
                              adjusted_price,
                              competitor_price)

        # Final price logic
        final_price = pw.if_else(
            is_near,
            pw.if_else(
                (occupancy >= capacity) & (competitor_price < adjusted_price),
                min_price,
                adjusted_price
            ),
            base_price
        )

        return final_price

# Pathway Integration for Real-Time Processing

In [95]:
# Define schema for our data
if 'CompetitorPrice' not in df.columns:
    np.random.seed(42)
    df['CompetitorPrice'] = np.random.uniform(8, 15, size=len(df))

if 'VehicleTypeWeight' not in df.columns:
    vehicle_weights = {
        'car': 1.0,
        'bike': 0.7,
        'truck': 1.5,
        'cycle': 0.5
    }
    df['VehicleTypeWeight'] = df['VehicleType'].map(vehicle_weights)

if 'TrafficWeight' not in df.columns:
    traffic_weights = {
        'low': 0.8,
        'average': 1.0,
        'high': 1.3
    }
    df['TrafficWeight'] = df['TrafficConditionNearby'].map(traffic_weights)

# Convert date and time to datetime if not already done
if 'Timestamp' not in df.columns:
    df['Timestamp'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'])

# Now define the schema matching exactly the columns we'll use
class ParkingDataSchema(pw.Schema):
    ID: int
    SystemCodeNumber: str
    Capacity: int
    Latitude: float
    Longitude: float
    Occupancy: int
    VehicleType: str
    TrafficConditionNearby: str
    QueueLength: int
    IsSpecialDay: int
    Timestamp: pw.DateTimeUtc
    CompetitorPrice: float
    VehicleTypeWeight: float
    TrafficWeight: float
    # Select only the columns that match our schema
df_for_pathway = df[[
    'ID', 'SystemCodeNumber', 'Capacity', 'Latitude', 'Longitude',
    'Occupancy', 'VehicleType', 'TrafficConditionNearby', 'QueueLength',
    'IsSpecialDay', 'Timestamp', 'CompetitorPrice', 'VehicleTypeWeight',
    'TrafficWeight'
]].copy()

# Create Pathway table from dataframe
table = pw.debug.table_from_pandas(df_for_pathway)

# Initialize models
linear_model = LinearPricingModel()
demand_model = DemandBasedPricingModel()
competitive_model = CompetitivePricingModel()

# Function to process each row with all models
def process_row( row):
    # Model 1
    linear_price = linear_model.calculate_price(
        current_price=10,  # Starting price
        occupancy=row.Occupancy,
        capacity=row.Capacity
    )

    # Model 2
    demand_price = demand_model.calculate_price(
        occupancy = row.Occupancy,
        capacity=row.Capacity,
        queue_length=row.QueueLength,
        traffic_weight=row.TrafficWeight,
        is_special_day=row.IsSpecialDay,
        vehicle_weight=row.VehicleTypeWeight
    )

    # Model 3 - using same location for competitor for simplicity
    competitive_price = competitive_model.calculate_price(
        occupancy=row.Occupancy,
        capacity=row.Capacity,
        queue_length=row.QueueLength,
        traffic_weight=row.TrafficWeight,
        is_special_day=row.IsSpecialDay,
        vehicle_weight=row.VehicleTypeWeight,
        competitor_price=row.CompetitorPrice,
        current_lat=row.Latitude,
        current_lon=row.Longitude,
        competitor_lat=26.1445,  # Fixed for demo
        competitor_lon=91.7361   # Fixed for demo
    )

    return {
        "timestamp": row.Timestamp,
        "occupancy": row.Occupancy,
        "capacity": row.Capacity,
        "linear_price": linear_price,
        "demand_price": demand_price,
        "competitive_price": competitive_price,
        "competitor_price": row.CompetitorPrice
    }

# Apply processing to the table
result_table = table.select(**process_row(pw.this))

# Convert to pandas for visualization (in production, would stream this)
result_df = pw.debug.table_to_pandas(result_table)
result_df = result_df.sort_values('timestamp')

# Real-Time Visualization with Bokeh

In [106]:
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.layouts import column
from datetime import datetime, timedelta
import numpy as np

# Generate sample data (replace with your actual result_df)
def generate_sample_data():
    np.random.seed(42)
    date_range = pd.date_range(start=datetime.now() - timedelta(days=1),
                              end=datetime.now(),
                              freq='15min')

    data = {
        'timestamp': date_range,
        'occupancy': np.random.randint(20, 95, size=len(date_range)),
        'capacity': 100,
        'linear_price': 10 + np.cumsum(np.random.normal(0, 0.2, len(date_range))),
        'demand_price': 10 + np.cumsum(np.random.normal(0, 0.3, len(date_range))) +
                       np.sin(np.linspace(0, 4*np.pi, len(date_range))),
        'competitive_price': 10 + np.cumsum(np.random.normal(0, 0.25, len(date_range))) +
                          0.5 * np.sin(np.linspace(0, 2*np.pi, len(date_range))),
        'competitor_price': 9.5 + np.cumsum(np.random.normal(0, 0.15, len(date_range))) +
                          0.3 * np.sin(np.linspace(0, 3*np.pi, len(date_range)))
    }

    # Ensure prices stay within bounds
    for price_col in ['linear_price', 'demand_price', 'competitive_price', 'competitor_price']:
        data[price_col] = np.clip(data[price_col], 8, 15)

    return pd.DataFrame(data)

# Create the visualization
def create_pricing_visualization(result_df):
    source = ColumnDataSource(result_df)

    # Create tools
    hover = HoverTool(
        tooltips=[
            ("Time", "@timestamp{%F %T}"),
            ("Occupancy", "@occupancy/@capacity"),
            ("Linear Price", "$@linear_price{0.2f}"),
            ("Demand Price", "$@demand_price{0.2f}"),
            ("Competitive Price", "$@competitive_price{0.2f}"),
            ("Competitor Price", "$@competitor_price{0.2f}")
        ],
        formatters={
            '@timestamp': 'datetime'
        }
    )

    # Create figures
    p1 = figure(title="Dynamic Parking Pricing Models Comparison",
               x_axis_type="datetime", width=900, height=400,
               tools=[hover, 'pan', 'wheel_zoom', 'reset', 'save'],
               toolbar_location="above")

    p1.line('timestamp', 'linear_price', source=source,
           line_width=2, color="blue", legend_label="Linear Model (Basic)")
    p1.line('timestamp', 'demand_price', source=source,
           line_width=2, color="green", legend_label="Demand Model (Advanced)")
    p1.line('timestamp', 'competitive_price', source=source,
           line_width=2, color="red", legend_label="Competitive Model (Premium)")
    p1.line('timestamp', 'competitor_price', source=source,
           line_width=2, color="orange", legend_label="Competitor Price", line_dash="dashed")

    p1.yaxis.axis_label = "Price ($)"
    p1.xaxis.axis_label = "Time"
    p1.legend.location = "top_left"
    p1.legend.click_policy = "hide"  # Allow clicking to hide/show lines
    p1.title.text_font_size = '14pt'

    p2 = figure(title="Parking Occupancy Rate",
               x_axis_type="datetime", width=900, height=300,
               x_range=p1.x_range,
               tools=[hover, 'pan', 'wheel_zoom', 'reset'])

    p2.line('timestamp', 'occupancy', source=source,
           line_width=2, color="purple")
    p2.yaxis.axis_label = "Occupancy (%)"
    p2.xaxis.axis_label = "Time"

    # Formatting
    p2.y_range.start = 0
    p2.y_range.end = 100

    # Combine plots
    final_plot = column(p1, p2)

    return final_plot

# Main execution
if __name__ == "__main__":
    # Generate or load your actual results dataframe
    # result_df = pd.read_csv('pricing_results.csv')  # Uncomment to load real data
    result_df = generate_sample_data()  # Using sample data for demonstration

    # Create and show the visualization
    plot = create_pricing_visualization(result_df)
    show(plot)

# New Section