# Optimization

With a trained and refined model, I want to use it to optimize pricing strategies for different types of cars and locations, etc. For example, I can use the model to identify optimal rental prices for different times of the year or different locations.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly_express as px
import plotly.graph_objects as go
sns.set()
from pandas.io.json import json_normalize 

In [7]:
# import all what you need for machine learning
import sklearn
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import RobustScaler
import xgboost as xgb
from sklearn.metrics import mean_squared_error

In [8]:
import pickle

# Load the saved model from a file
filename = 'xgboost_model.pkl'
xgb_model = pickle.load(open(filename, 'rb'))

In [9]:
cars = pd.read_csv('./DataFeatures.csv')

In [10]:
from sklearn.preprocessing import StandardScaler

#Let's drop the irrelevant columns
cars2 = cars.drop(["city","state",'reviewCount','id',
                   'population','automaticTransmission','newListing','Unnamed: 0'], axis=1).copy() 
# Separating target variable and its features
y2 = cars2['averageDailyPrice']
X2 = cars2.drop('averageDailyPrice',axis=1)

# define standard scaler
scaler = StandardScaler()
# transform data
cars_main2 = scaler.fit_transform(X2)

from sklearn.model_selection import train_test_split
X_train2, X_test2, y_train2, y_test2 = train_test_split(cars_main2, y2, test_size=0.2, random_state=None)
print("x train: ",X_train2.shape)
print("x test: ",X_test2.shape)
print("y train: ",y_train2.shape)
print("y test: ",y_test2.shape)

x train:  (29023, 16)
x test:  (7256, 16)
y train:  (29023,)
y test:  (7256,)


In [11]:
# Define the input features for the model
selected_features = ['longitude', 'latitude','scalar','model', 'make', 'year', 'weekday','month','rating','type']
XX = X2[selected_features]

dset = xgb.DMatrix(data=XX,label=y2)
params = {"objective":"reg:linear",'colsample_bytree': 0.4,'learning_rate': 0.3,
                'max_depth': 10, 'alpha': 10, 'gamma':5}
xg_reg_opt = xgb.train(params=params, dtrain=dset, num_boost_round=10)

XX['predicted_price'] = xg_reg_opt.predict(dset, output_margin=True)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  XX['predicted_price'] = xg_reg_opt.predict(dset, output_margin=True)


In [12]:
# Group the data by MODEL and month, and compute the mean predicted price
grouped_data = XX.groupby(['month','model','type'])['predicted_price'].mean()
# Print the optimized prices for each location and month
grouped_data.head(35)

month  model  type
1      1      1        55.491905
       4      1        64.450043
       5      4        91.797653
       6      1       118.146225
       7      1        52.618988
       8      1       102.810669
       9      1        61.694462
       12     1       144.071259
       16     1        40.410622
       17     1        77.137711
       18     1        97.181183
       19     1        98.967751
       20     1        93.402977
       27     1        72.443359
       31     1        94.457817
       32     1        90.560684
       34     1       121.713257
       35     1       108.354111
       36     1       134.108643
       41     1       683.640991
       46     3        86.469803
       47     2        34.919464
       48     1        99.636932
       49     1       109.524994
       50     1        48.720589
       51     1       103.226540
       52     3        68.243935
       53     1        45.475372
       55     1        31.233871
       56     1       11

I can easily modify this code to optimize prices for different types of cars by adding a new feature to the data that represents the car type or model. For example, you could add a feature that indicates whether the car is a sedan, SUV, or sports car, and use this feature to group the data and compute the optimized prices.

I can use the trained XGBoost model to simulate the impact of different pricing strategies and adjust prices accordingly

In [13]:
# Define the pricing strategy to simulate
pricing_strategy = 0.8  # 80% of the predicted rental price

# Predict the rental price with the trained model
XX['predicted_price'] = xg_reg_opt.predict(xgb.DMatrix(XX.drop(['predicted_price'], axis=1)))

# Simulate the rental price based on the pricing strategy
XX['simulated_price'] = XX['predicted_price'] * pricing_strategy

# Calculate the revenue for the current pricing strategy
revenue_current = (y2).sum()

# Calculate the revenue for the simulated pricing strategy
revenue_simulated = (XX['simulated_price']).sum()

# Print the revenue for both strategies
print('Current revenue: ${:.2f}'.format(revenue_current))
print('Simulated revenue: ${:.2f}'.format(revenue_simulated))

# If the simulated revenue is higher, adjust the pricing strategy accordingly
if revenue_simulated > revenue_current:
    optimal_pricing_strategy = pricing_strategy
else:
    optimal_pricing_strategy = pricing_strategy + 0.05  # increase the pricing strategy by 5%

# Simulate the rental price based on the optimal pricing strategy
XX['optimal_price'] = XX['predicted_price'] * optimal_pricing_strategy

# Calculate the revenue for the optimal pricing strategy
revenue_optimal = (XX['optimal_price']).sum()

# Print the revenue for the optimal pricing strategy
print('Optimal revenue: ${:.2f}'.format(revenue_optimal))

Current revenue: $3575893.00
Simulated revenue: $2769274.00
Optimal revenue: $2942354.00


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  XX['predicted_price'] = xg_reg_opt.predict(xgb.DMatrix(XX.drop(['predicted_price'], axis=1)))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  XX['simulated_price'] = XX['predicted_price'] * pricing_strategy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  XX['optimal_price'] = XX['predicted_price'] * 

he reason for increasing the prices by 5% in the above code is to simulate a scenario where the company decides to adjust its pricing strategy.<br>
<br>
If the simulated revenue is less than the current revenue, it suggests that the current pricing strategy is generating more revenue than the simulated one. However, the company might still want to explore the possibility of increasing its prices to see if it can generate even more revenue.<br>
<br>
So, by increasing the prices by 5%, I am simulating a scenario where the company is testing a new pricing strategy that is slightly more aggressive than the current one. The company can then compare the simulated revenue with the current revenue to determine if the new pricing strategy is generating more revenue.<br>
<br>
Of course, in a real-world scenario, the company would need to carefully consider the impact of any pricing adjustments on customer demand, competition, and other factors before making any changes to its pricing strategy.

# Risk assessment

To minimize the risk and optimize the pricing strategy, I incorporate some logic into the code that automatically adjusts the prices based on the demand for cars. Here's one approach:<br>
<br>
*Identify slow periods*: We can first identify the slow periods by analyzing historical rental data and looking for patterns in demand. For example, if demand is consistently lower during certain times of the year or days of the week, we can label those periods as slow periods.<br>
<br>
*Apply discounts during slow periods*: During slow periods, we can offer discounts to incentivize customers to rent our cars. We can use the following code to apply a discount of 10% during the slow periods.<br>
<br>
*Increase prices for high-demand cars*: Similarly, if certain types of cars or locations are in high demand, we can increase the rental price to maximize revenue. We can use the following code to increase the rental price by 5% for high-demand cars:

By incorporating above logic into our pricing strategy, we can dynamically adjust our prices to maximize revenue and minimize risk.

In [14]:

# Identify slow periods
slow_periods = [2, 3, 4, 10, 11]  # February-April, October-November

# Identify high-demand cars
high_demand_cars = [3, 5]  # SUV and van cars #{'type': {'car': 1, 'minivan': 2, 'suv': 3, 'truck': 4, 'van': 5}}

In [15]:
# Train model
dtrain = xgb.DMatrix(X2, label=y2)
params = {'objective': 'reg:squarederror'}
xg_model = xgb.train(params, dtrain)

In [16]:
rental_price = xg_model.predict(xgb.DMatrix(X2.loc[(X2['type'] == 1) & (X2['month'] == 1)]))

Sample of demand curve

In [18]:
from scipy.optimize import curve_fit

# define the true objective function
def objective(x, a, b, c):
 return a * x** + b*x + c

# choose the input and output variables
# curve fit
popt, _ = curve_fit(objective, y2, X2["rating"])
# summarize the parameter values
a, b, c = popt
print('y = %.5f * x^2 + %.5f *x + %.5f' % (a, b, c))

y = 35.32540 * x^2 + -0.99954 *x + -30.52206


In [19]:
def demand_curve(price):
    demand = objective(price, a, b, c)
    demand[demand < 0] = 0
    return demand

Simulate pricing strategies

In [20]:
for car_type in X2['type'].unique():
    for month in X2['month'].unique():
        # Set rental price
        rental_price = xg_model.predict(xgb.DMatrix(X2.loc[(X2['type'] == car_type) & (X2['month'] == month)]))
        
        # Adjust prices for slow periods
        if month in slow_periods:
            # Apply discount
            rental_price *= 0.9

        # Adjust prices for high-demand cars
        if car_type in high_demand_cars:
            # Increase price
            rental_price *= 1.05

        # Simulate rental
        demand = demand_curve(rental_price)

        # Calculate revenue
        revenue_current = 1 * rental_price
        revenue_simulated = demand * rental_price
        
        for i in range(len(rental_price)):
            if revenue_simulated[i] < revenue_current[i]:
                # Apply 5% increase
                rental_price[i] *= 1.05

        # Print results
        print(f"{car_type} in month {month}:")
        #print(f"Current rental price: {rental_price:.2f}")
        #print(f"Current demand: {demand}")
        #print(f"Current revenue: {revenue_current:.2f}")
        #print(f"Simulated revenue: {revenue_simulated:.2f}\n")

1 in month 7:
1 in month 4:
1 in month 9:
1 in month 11:
1 in month 5:
1 in month 6:
1 in month 2:
1 in month 12:
1 in month 3:
1 in month 1:
1 in month 8:
1 in month 10:
3 in month 7:
3 in month 4:
3 in month 9:
3 in month 11:
3 in month 5:
3 in month 6:
3 in month 2:
3 in month 12:
3 in month 3:
3 in month 1:
3 in month 8:
3 in month 10:
4 in month 7:
4 in month 4:
4 in month 9:
4 in month 11:
4 in month 5:
4 in month 6:
4 in month 2:
4 in month 12:
4 in month 3:
4 in month 1:
4 in month 8:
4 in month 10:
2 in month 7:
2 in month 4:
2 in month 9:
2 in month 11:
2 in month 5:
2 in month 6:
2 in month 2:
2 in month 12:
2 in month 3:
2 in month 1:
2 in month 8:
2 in month 10:
5 in month 7:
5 in month 4:
5 in month 9:
5 in month 11:
5 in month 5:
5 in month 6:
5 in month 2:
5 in month 12:
5 in month 3:
5 in month 1:
5 in month 8:
5 in month 10:


Following has not been completed yet!

In [None]:
def optimize_prices(model, demand_curve, rental_data):
    """
    Optimize rental prices for different types of cars and locations based on the given model
    and demand curve.
    
    Parameters:
        model (xgboost.Booster): Trained XGBoost model for predicting rental prices.
        demand_curve (list): List of demand curve coefficients.
        rental_data (pandas.DataFrame): DataFrame containing rental data.
        
    Returns:
        optimized_prices (pandas.DataFrame): DataFrame containing optimized rental prices.
    """
    # Make a copy of the rental data to avoid modifying the original DataFrame
    rental_data_copy = rental_data.copy()
    
    # Calculate current revenue and demand for each rental listing
    rental_data_copy['revenue_current'] = rental_data_copy['averageDailyPrice'] * rental_data_copy['renterTripsTaken']
    rental_data_copy['demand_current'] = rental_data_copy['averageDailyPrice'].apply(lambda x: simulate_demand(x, demand_curve))
    
    # Loop over each rental listing and simulate the impact of a 5% price increase
    for i in range(len(rental_data_copy)):
        # Get the current rental price and demand
        price_current = rental_data_copy.loc[i, 'averageDailyPrice']
        demand_current = rental_data_copy.loc[i, 'demand_current']
        
        # Simulate demand for a 5% increase in rental price
        price_increased = price_current * 1.05
        demand_simulated = simulate_demand(price_increased, demand_curve)
        
        # Calculate the potential revenue for the increased price
        revenue_simulated = demand_simulated * price_increased
        
        # If the potential revenue is higher than the current revenue, increase the rental price by 5%
        if revenue_simulated > rental_data_copy.loc[i, 'revenue_current']:
            rental_data_copy.loc[i, 'averageDailyPrice'] = price_increased
    
    # Return the optimized rental prices
    optimized_prices = rental_data_copy[['id', 'make', 'model', 'year', 'type', 'averageDailyPrice']]
    
    return optimized_prices