In [4]:
import random
import math

class User:
    def __init__(self, user_id): 
        self.user_id = user_id
        self.preferences = {}  

    def update_preference(self, content_id, reward):
        if content_id not in self.preferences:
            self.preferences[content_id] = (0, 0)
        old_reward, num_impressions = self.preferences[content_id]
        self.preferences[content_id] = (old_reward + reward, num_impressions + 1)

    def get_preference(self, content_id):
        return self.preferences.get(content_id, (0, 0))


class Content:
    def __init__(self, content_id, category):  
        self.content_id = content_id
        self.category = category

# UCB1 Bandit algorithm for recommendation
class UCB1Bandit:
    def __init__(self, exploration_constant=1.0): 
        self.exploration_constant = exploration_constant

    def choose_content(self, user, content_options):
        ucb_values = {}
        total_impressions = sum(
            user.preferences.get(content.content_id, (0, 0))[1] for content in content_options
        )

        for content in content_options:
            reward, impressions = user.get_preference(content.content_id)
            if impressions == 0:
                ucb_values[content] = float('inf')  
            else:
                ucb_values[content] = (reward / impressions) + self.exploration_constant * \
                    math.sqrt(math.log(total_impressions + 1) / impressions)

       
        return max(ucb_values, key=ucb_values.get)

# Example usage
users = {
    1: User(1),
    2: User(2)
}

contents = {
    "A": Content("A", "News"),
    "B": Content("B", "Sports"),
    "C": Content("C", "Tech")
}
bandit = UCB1Bandit()
for user_id in users:
    user = users[user_id]
    print(f"Simulating recommendations for User {user_id}")
    for _ in range(10):
        available_contents = list(contents.values())
        chosen_content = bandit.choose_content(user, available_contents)

        # Simulate reward (e.g., a random value between 0 and 1)
        reward = random.uniform(0, 1)
        print(f"User {user_id} recommended: {chosen_content.content_id} (Reward: {reward:.2f})")
        user.update_preference(chosen_content.content_id, reward)
for user_id in users:
    print(f"\nUser {user_id} Preferences:")
    for content_id, (total_reward, impressions) in users[user_id].preferences.items():
        print(f"Content {content_id}: Total Reward = {total_reward:.2f}, Impressions = {impressions}")


Simulating recommendations for User 1
User 1 recommended: A (Reward: 0.32)
User 1 recommended: B (Reward: 0.29)
User 1 recommended: C (Reward: 0.48)
User 1 recommended: C (Reward: 0.43)
User 1 recommended: A (Reward: 0.61)
User 1 recommended: B (Reward: 0.22)
User 1 recommended: A (Reward: 0.81)
User 1 recommended: C (Reward: 0.17)
User 1 recommended: A (Reward: 0.25)
User 1 recommended: B (Reward: 0.18)
Simulating recommendations for User 2
User 2 recommended: A (Reward: 0.33)
User 2 recommended: B (Reward: 0.28)
User 2 recommended: C (Reward: 0.96)
User 2 recommended: C (Reward: 0.51)
User 2 recommended: C (Reward: 0.21)
User 2 recommended: A (Reward: 0.70)
User 2 recommended: B (Reward: 0.28)
User 2 recommended: A (Reward: 0.42)
User 2 recommended: C (Reward: 0.17)
User 2 recommended: A (Reward: 0.52)

User 1 Preferences:
Content A: Total Reward = 1.99, Impressions = 4
Content B: Total Reward = 0.69, Impressions = 3
Content C: Total Reward = 1.08, Impressions = 3

User 2 Preferences