In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random

In [None]:
np.random.seed(42)
torch.manual_seed(42)
random.seed(42)

In [None]:
class MarketingEnv:
    def __init__(self, betas, budget=100e7):
        self.betas = torch.tensor(betas, dtype=torch.float32)  # Revenue coefficients
        self.budget = budget  # Total marketing budget
        self.num_channels = len(betas)
        self.reset()

    def reset(self):
        """Initialize with a random budget allocation."""
        self.state = np.random.dirichlet(np.ones(self.num_channels)) * self.budget
        return torch.tensor(self.state, dtype=torch.float32)

    def step(self, action_changes):
        """Apply action changes and return new state, reward, and done flag."""
        self.state += action_changes
        self.state = np.maximum(self.state, 0)  # Ensure no negative budget

        # Normalize to keep the total budget constant
        self.state = (self.state / np.sum(self.state)) * self.budget

        # Compute revenue (reward)
        revenue = np.dot(self.betas.numpy(), self.state)

        return torch.tensor(self.state, dtype=torch.float32), revenue, False

In [None]:
class ActorNetwork(nn.Module):
    """Policy (Actor) Network - Outputs budget allocation probabilities."""
    def __init__(self, state_dim):
        super(ActorNetwork, self).__init__()
        self.fc1 = nn.Linear(state_dim, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, state_dim)

    def forward(self, state):
        x = torch.relu(self.fc1(state))
        x = torch.relu(self.fc2(x))
        return torch.softmax(self.fc3(x), dim=-1)  # Ensure sum = 1

In [None]:
class CriticNetwork(nn.Module):
    """Value (Critic) Network - Estimates expected revenue."""
    def __init__(self, state_dim):
        super(CriticNetwork, self).__init__()
        self.fc1 = nn.Linear(state_dim, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 1)

    def forward(self, state):
        x = torch.relu(self.fc1(state))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)  # Single value output

In [None]:
# === PPO AGENT ===
class PPOAgent:
    def __init__(self, state_dim, lr=0.002, gamma=0.99, epsilon=0.2):
        self.actor = ActorNetwork(state_dim)
        self.critic = CriticNetwork(state_dim)
        self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=lr)
        self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=lr)
        self.gamma = gamma
        self.epsilon = epsilon

    def compute_advantage(self, rewards, values):
        """Calculate advantage function (TD error)."""
        advantages = []
        returns = []
        G = 0
        for r, v in zip(reversed(rewards), reversed(values)):
            G = r + self.gamma * G
            returns.insert(0, G)
            advantages.insert(0, G - v)
        return torch.tensor(advantages, dtype=torch.float32), torch.tensor(returns, dtype=torch.float32)

    def update(self, states, actions, rewards, old_probs):
        """Perform PPO update."""
        values = self.critic(states).squeeze()
        advantages, returns = self.compute_advantage(rewards, values)

        # Compute new probabilities
        new_probs = self.actor(states).gather(1, actions.unsqueeze(1)).squeeze()
        ratio = new_probs / old_probs

        # PPO clipped objective
        surrogate1 = ratio * advantages
        surrogate2 = torch.clamp(ratio, 1 - self.epsilon, 1 + self.epsilon) * advantages
        actor_loss = -torch.min(surrogate1, surrogate2).mean()

        # Value loss (Critic update)
        critic_loss = (returns - values).pow(2).mean()

        # Backpropagation
        self.actor_optimizer.zero_grad()
        actor_loss.backward()
        self.actor_optimizer.step()

        self.critic_optimizer.zero_grad()
        critic_loss.backward()
        self.critic_optimizer.step()

In [None]:
# === TRAINING PPO AGENT ===
betas = [2, 3, 1.5, 2.5, 3.5, 2, 1, 1.8, 2.2]

env = MarketingEnv(betas)
ppo_agent = PPOAgent(state_dim=len(betas))

num_episodes = 2000
for episode in range(num_episodes):
    state = env.reset()
    total_reward = 0
    states, actions, rewards, old_probs = [], [], [], []

    for t in range(10):
        state_tensor = state.clone().detach() if isinstance(state, torch.Tensor) else torch.tensor(state, dtype=torch.float32)
        action_probs = ppo_agent.actor(state_tensor)
        action = torch.multinomial(action_probs, 1).item()

        action_changes = np.zeros(len(betas))
        action_changes[action] = np.random.uniform(-5, 5)

        next_state, reward, done = env.step(action_changes)

        states.append(state.numpy())  # Convert tensor to numpy before storing
        actions.append(action)
        rewards.append(reward)
        old_probs.append(action_probs[action].item())

        state = next_state
        total_reward += reward

    # Convert lists to tensors for training
    states_tensor = torch.tensor(np.array(states), dtype=torch.float32)
    actions_tensor = torch.tensor(actions, dtype=torch.long)
    rewards_tensor = torch.tensor(rewards, dtype=torch.float32)
    old_probs_tensor = torch.tensor(old_probs, dtype=torch.float32)

    ppo_agent.update(states_tensor, actions_tensor, rewards_tensor, old_probs_tensor)

    if episode % 100 == 0:
        print(f"Episode {episode}, Total Revenue: {total_reward:.2f}")

Episode 0, Total Revenue: 21389237089.75
Episode 100, Total Revenue: 20282050517.59
Episode 200, Total Revenue: 20533749784.23
Episode 300, Total Revenue: 21161017971.12
Episode 400, Total Revenue: 20238844801.21
Episode 500, Total Revenue: 22486703062.88
Episode 600, Total Revenue: 20672598555.15
Episode 700, Total Revenue: 20214069393.61
Episode 800, Total Revenue: 19242210640.08
Episode 900, Total Revenue: 21429024595.86
Episode 1000, Total Revenue: 20167532759.17
Episode 1100, Total Revenue: 17527061239.01
Episode 1200, Total Revenue: 22339779943.21
Episode 1300, Total Revenue: 23384486569.84
Episode 1400, Total Revenue: 18280377603.63
Episode 1500, Total Revenue: 21631241027.26
Episode 1600, Total Revenue: 16807202393.97
Episode 1700, Total Revenue: 19808364055.26
Episode 1800, Total Revenue: 23557193894.79
Episode 1900, Total Revenue: 19424840827.00


In [None]:
# === DISPLAY FINAL OPTIMIZED BUDGET ===
final_budget = env.state
print("\nOptimized Marketing Budget Allocation:")
for i, category in enumerate(["TV", "Digital", "Sponsorship", "Content", "Online", "Affiliates", "SEM", "Radio", "Others"]):
    print(f"{category}: ${final_budget[i]:.2f}")



Optimized Marketing Budget Allocation:
TV: $20643418.09
Digital: $32660597.96
Sponsorship: $33849653.40
Content: $108285599.16
Online: $313263997.77
Affiliates: $6193711.69
SEM: $122030557.95
Radio: $64716400.71
Others: $298356063.26


In [None]:
import numpy as np

# Given betas (fixed revenue multipliers)
betas = np.array([2, 3, 1.5, 2.5, 3.5, 2, 1, 1.8, 2.2])

# Initial budget allocation (before training, assume uniform allocation)
initial_budget = np.full(len(betas), 100e7 / len(betas))  # Evenly distribute total budget 100

# Optimized budget allocation (after PPO training)
optimized_budget = env.state # PPO gives final budget allocation

# Compute revenues
initial_revenue = np.dot(betas, initial_budget)
optimized_revenue = np.dot(betas, optimized_budget)

# Total budget (set it based on your requirement, e.g., 100 or another value)
total_budget = np.sum(initial_budget)  # Ensuring budget consistency

# Compute ROI
initial_ROI = ((initial_revenue - total_budget) / total_budget) * 100
optimized_ROI = ((optimized_revenue - total_budget) / total_budget) * 100

# ROI Increase
roi_increase = optimized_ROI - initial_ROI

# Print Results
print(f"Initial ROI: {initial_ROI:.2f}%")
print(f"Optimized ROI: {optimized_ROI:.2f}%")
print(f"Increase in ROI: {roi_increase:.2f}%")

Initial ROI: 116.67%
Optimized ROI: 146.45%
Increase in ROI: 29.78%
