# RL CIA-1 21011101136
# Contextual Bandit-Based Vehicle Recommendation System

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from collections import defaultdict
import random

## Initialize Vehicle Options and User Profiles(Assuming a small dataset of vehicle options and mock user data)

In [2]:
vehicles = ['Sedan', 'SUV', 'Truck', 'Convertible', 'Hatchback']
num_vehicles = len(vehicles)

In [3]:
label_encoder = LabelEncoder()
label_encoder.fit(vehicles)

LabelEncoder()

In [4]:
users = [
    {'user_id': 1, 'age': 25, 'location': 'urban', 'preference': 'SUV'},
    {'user_id': 2, 'age': 35, 'location': 'suburban', 'preference': 'Sedan'},
    {'user_id': 3, 'age': 45, 'location': 'rural', 'preference': 'Truck'}
]

In [5]:
for user in users:
    user['preference'] = label_encoder.transform([user['preference']])[0]

location_encoder = LabelEncoder()
for user in users:
    user['location'] = location_encoder.fit_transform([user['location']])[0]

## Contextual Bandit Setup

In [6]:
vehicle_rewards = defaultdict(int)
vehicle_counts = defaultdict(int)
epsilon = 0.1

## Recommendation Function with Epsilon-Greedy Strategy

In [7]:
def recommend_vehicle(user_context):
    if random.random() < epsilon:
        chosen_vehicle = random.choice(vehicles)
    else:
        avg_rewards = {vehicle: (vehicle_rewards[vehicle] / (vehicle_counts[vehicle] + 1)) for vehicle in vehicles}
        chosen_vehicle = max(avg_rewards, key=avg_rewards.get)
    return chosen_vehicle

## Collect Feedback and Update Rewards

In [8]:
def update_rewards(vehicle, reward):
    vehicle_rewards[vehicle] += reward
    vehicle_counts[vehicle] += 1

## Simulate User Interactions

In [9]:
num_iterations = 100

for i in range(num_iterations):
    user = random.choice(users)

    recommended_vehicle = recommend_vehicle(user)

    reward = 1 if user['preference'] == label_encoder.transform([recommended_vehicle])[0] else 0

    update_rewards(recommended_vehicle, reward)

## Display Results

In [10]:
print("Final cumulative rewards for each vehicle:")
for vehicle in vehicles:
    print(f"{vehicle}: {vehicle_rewards[vehicle]} rewards, {vehicle_counts[vehicle]} trials")

Final cumulative rewards for each vehicle:
Sedan: 17 rewards, 82 trials
SUV: 6 rewards, 11 trials
Truck: 0 rewards, 2 trials
Convertible: 0 rewards, 4 trials
Hatchback: 0 rewards, 1 trials


## Recommendation for the User

In [11]:
def suggest_best_vehicle(user):
    """
    Suggests the best vehicle for a given user based on their context and past feedback.
    
    Parameters:
    user (dict): A dictionary containing user context information such as 'preference'.
    
    Returns:
    str: The recommended vehicle model.
    """
    avg_rewards = {vehicle: (vehicle_rewards[vehicle] / (vehicle_counts[vehicle] + 1)) for vehicle in vehicles}

    preferred_vehicle = label_encoder.inverse_transform([user['preference']])[0]

    if avg_rewards[preferred_vehicle] >= max(avg_rewards.values()):
        return preferred_vehicle
    else:
        best_vehicle = max(avg_rewards, key=avg_rewards.get)
        return best_vehicle

user = {'user_id': 1, 'age': 25, 'location': 'urban', 'preference': label_encoder.transform(['SUV'])[0]}
recommended_vehicle = suggest_best_vehicle(user)
print(f"Recommended vehicle for User {user['user_id']}: {recommended_vehicle}")

Recommended vehicle for User 1: SUV
