In [1]:
import pandas as pd
import numpy as np

### Step 1: Simulate the Dataset
We will create a synthetic dataset for ride-sharing based on features like distance, time of day, demand, and weather conditions.

In [2]:
# Simulate the dataset
np.random.seed(42)
n_samples = 1000

In [3]:
data = {
    "Distance": np.random.uniform(1, 20, n_samples),  # Distance in km
    "Time_of_Day": np.random.choice(["Morning", "Afternoon", "Evening", "Night"], n_samples),
    "Demand": np.random.uniform(1, 10, n_samples),  # Simulated demand index
    "Weather": np.random.choice(["Clear", "Rainy", "Snowy"], n_samples),
}

In [4]:
# Assign base prices
base_prices = {
    "Morning": 10,
    "Afternoon": 8,
    "Evening": 12,
    "Night": 15,
    "Clear": 1.0,
    "Rainy": 1.5,
    "Snowy": 2.0,
}

In [5]:
# Calculate dynamic price
data["Base_Price"] = [base_prices[time] for time in data["Time_of_Day"]]
data["Weather_Multiplier"] = [base_prices[weather] for weather in data["Weather"]]
data["Price"] = data["Distance"] * data["Base_Price"] * data["Weather_Multiplier"] * (1 + data["Demand"] / 10)

In [6]:
# Convert to DataFrame
df = pd.DataFrame(data)
df["Price"] = df["Price"].round(2)

In [7]:
# Save the dataset
df.to_csv("ride_sharing_data.csv", index=False)
print("Dataset created and saved as 'ride_sharing_data.csv'")

Dataset created and saved as 'ride_sharing_data.csv'


### Step 2: Build the Predictive Model
We use this dataset to train a predictive model.

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error

In [9]:
# Load the dataset
df = pd.read_csv("ride_sharing_data.csv")
df

Unnamed: 0,Distance,Time_of_Day,Demand,Weather,Base_Price,Weather_Multiplier,Price
0,8.116262,Evening,5.671736,Snowy,12,2.0,305.27
1,19.063572,Night,5.312637,Snowy,15,2.0,875.74
2,14.907885,Afternoon,1.230779,Snowy,8,2.0,267.88
3,12.374511,Night,4.071230,Snowy,15,2.0,522.37
4,3.964354,Night,4.421761,Snowy,15,2.0,171.52
...,...,...,...,...,...,...,...
995,2.740059,Morning,5.430929,Rainy,10,1.5,63.42
996,18.428958,Evening,6.195511,Clear,12,1.0,358.16
997,3.599554,Morning,8.790194,Clear,10,1.0,67.64
998,19.054510,Afternoon,9.826654,Rainy,8,1.5,453.34


In [10]:
# Encode categorical variables
encoder = OneHotEncoder(sparse_output=False)
encoded_features = encoder.fit_transform(df[["Time_of_Day", "Weather"]])

In [11]:
# Combine encoded features with numeric features
X = pd.concat([df[["Distance", "Demand"]], pd.DataFrame(encoded_features)], axis=1)
y = df["Price"]

In [12]:
# Ensure all column names are strings
X.columns = X.columns.astype(str)

In [13]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
# Initialize models
models = {
    "Random Forest": RandomForestRegressor(random_state=42),
    "Linear Regression": LinearRegression(),
    "Gradient Boosting": GradientBoostingRegressor(random_state=42),
    "XGBoost": XGBRegressor(random_state=42)
}

In [15]:
# Train and evaluate models
best_model = None
best_mae = float("inf")
best_r2 = -float("inf")

for model_name, model in models.items():
    # Train the model
    model.fit(X_train, y_train)

    # Predict and evaluate
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = model.score(X_test, y_test)

    print(f"{model_name} MAE: {mae}")
    print(f"{model_name} R²: {r2}")
    print("*********************")

    # Select the best model based on MAE and R²
    if mae < best_mae and r2 > best_r2:
        best_mae = mae
        best_r2 = r2
        best_model = model

print(f"Best Model: {best_model}")

Random Forest MAE: 18.129480499999982
Random Forest R²: 0.9635900143030814
*********************
Linear Regression MAE: 40.28387981680518
Linear Regression R²: 0.8869118970081303
*********************
Gradient Boosting MAE: 14.55613833928193
Gradient Boosting R²: 0.9802096459659877
*********************
XGBoost MAE: 14.678567012405399
XGBoost R²: 0.9819041016382857
*********************
Best Model: GradientBoostingRegressor(random_state=42)


### Step 3: Create the Dashboard
Build a dashboard using Dash to interact with the model.

In [16]:
pip install dash

Collecting dash
  Downloading dash-2.18.2-py3-none-any.whl.metadata (10 kB)
Collecting Flask<3.1,>=1.0.4 (from dash)
  Downloading flask-3.0.3-py3-none-any.whl.metadata (3.2 kB)
Collecting Werkzeug<3.1 (from dash)
  Downloading werkzeug-3.0.6-py3-none-any.whl.metadata (3.7 kB)
Collecting dash-html-components==2.0.0 (from dash)
  Downloading dash_html_components-2.0.0-py3-none-any.whl.metadata (3.8 kB)
Collecting dash-core-components==2.0.0 (from dash)
  Downloading dash_core_components-2.0.0-py3-none-any.whl.metadata (2.9 kB)
Collecting dash-table==5.0.0 (from dash)
  Downloading dash_table-5.0.0-py3-none-any.whl.metadata (2.4 kB)
Collecting retrying (from dash)
  Downloading retrying-1.3.4-py3-none-any.whl.metadata (6.9 kB)
Downloading dash-2.18.2-py3-none-any.whl (7.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)
Downloading dash_html_compo

In [17]:
import dash
from dash import html, dcc, Input, Output, State

# Initialize the Dash app
app = dash.Dash(__name__)

# Define the layout of the app
app.layout = html.Div([
    html.H1("Dynamic Pricing for Ride-Sharing Services", style={'text-align': 'center'}),

    html.Div([
        dcc.Input(id='distance', type='number', placeholder='Distance (km)', style={'margin': '10px'}),
        dcc.Input(id='demand', type='number', placeholder='Demand Index (1-10)', style={'margin': '10px'}),
        dcc.Dropdown(
            id='time_of_day',
            options=[{'label': t, 'value': t} for t in ["Morning", "Afternoon", "Evening", "Night"]],
            placeholder='Time of Day',
            style={'margin': '10px'}
        ),
        dcc.Dropdown(
            id='weather',
            options=[{'label': w, 'value': w} for w in ["Clear", "Rainy", "Snowy"]],
            placeholder='Weather',
            style={'margin': '10px'}
        ),
        html.Button('Predict Price', id='predict_button', n_clicks=0, style={'margin': '10px'}),
    ], style={'text-align': 'center'}),

    html.Div(id='prediction_output', style={'text-align': 'center', 'font-size': '20px', 'margin-top': '20px'})
])

# Define callback to predict price
@app.callback(
    Output('prediction_output', 'children'),
    [Input('predict_button', 'n_clicks')],
    [State('distance', 'value'),
     State('demand', 'value'),
     State('time_of_day', 'value'),
     State('weather', 'value')]
)
def predict_price(n_clicks, distance, demand, time_of_day, weather):
    if n_clicks > 0 and all(v is not None for v in [distance, demand, time_of_day, weather]):
        # Prepare input for prediction
        input_data = pd.DataFrame([[distance, demand] + list(encoder.transform([[time_of_day, weather]])[0])])
        prediction = best_model.predict(input_data)[0]
        return f"Predicted Ride Price: ₹{prediction:.2f}"
    elif n_clicks > 0:
        return "Please fill in all the inputs to get a prediction."
    return ""

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)


<IPython.core.display.Javascript object>