# AI-Based Demand-Supply Matching Engine
This notebook simulates a supply chain engine using:
- Linear Programming (LP)
- Q-Learning (Tabular RL)
- PPO (Stable-Baselines3)


In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import linprog

def generate_weekly_supply_demand(n_regions=3, n_weeks=10, seed=42):
    np.random.seed(seed)
    data = []
    for week in range(n_weeks):
        for region in range(n_regions):
            demand = np.random.randint(50, 150)
            priority = np.random.uniform(0.5, 1.5)
            data.append({
                "Week": week,
                "Region": f"Region_{region+1}",
                "Demand": demand,
                "Priority": priority
            })
    return pd.DataFrame(data)

df = generate_weekly_supply_demand()
df.head()


In [None]:

def linear_programming_allocation(df_week, total_supply):
    demand = df_week["Demand"].values
    cost = 1 / df_week["Priority"].values
    bounds = [(0, d) for d in demand]
    A_eq = [np.ones(len(demand))]
    b_eq = [min(total_supply, sum(demand))]
    result = linprog(c=cost, A_eq=A_eq, b_eq=b_eq, bounds=bounds, method="highs")
    allocation = np.round(result.x).astype(int) if result.success else np.zeros(len(demand), dtype=int)
    df_week["Allocated_LP"] = allocation
    df_week["Unmet_LP"] = df_week["Demand"] - allocation
    df_week["FillRate_LP"] = allocation / df_week["Demand"]
    return df_week


In [None]:

class QLearningAgentLocal:
    def __init__(self, n_regions, n_supply_levels, alpha=0.1, gamma=0.95, epsilon=0.1):
        self.n_regions = n_regions
        self.n_supply_levels = n_supply_levels
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.q_table = {}

    def get_state_key(self, state):
        return tuple(state.tolist())

    def choose_action(self, state):
        key = self.get_state_key(state)
        if key not in self.q_table:
            self.q_table[key] = np.random.rand(*(self.n_supply_levels,) * self.n_regions)
        if np.random.rand() < self.epsilon:
            return np.random.randint(0, self.n_supply_levels, size=self.n_regions)
        else:
            return np.unravel_index(np.argmax(self.q_table[key]), self.q_table[key].shape)

    def update(self, state, action, reward, next_state):
        key = self.get_state_key(state)
        next_key = self.get_state_key(next_state)
        if key not in self.q_table:
            self.q_table[key] = np.zeros((self.n_supply_levels,) * self.n_regions)
        if next_key not in self.q_table:
            self.q_table[next_key] = np.zeros((self.n_supply_levels,) * self.n_regions)
        idx = tuple(action)
        best_next = np.max(self.q_table[next_key])
        self.q_table[key][idx] += self.alpha * (reward + self.gamma * best_next - self.q_table[key][idx])


In [None]:

df_results = []
agent = QLearningAgentLocal(n_regions=3, n_supply_levels=5)
total_supply = 300

for week in range(10):
    df_week = df[df["Week"] == week].copy().reset_index(drop=True)
    df_week = linear_programming_allocation(df_week, total_supply)
    demand = df_week["Demand"].values
    state = (demand // 10).astype(int)
    action = np.array(agent.choose_action(state))
    allocation = np.minimum(action * (total_supply // 5), demand)
    df_week["Allocated_Q"] = allocation
    df_week["Unmet_Q"] = df_week["Demand"] - allocation
    df_week["FillRate_Q"] = df_week["Allocated_Q"] / df_week["Demand"]
    reward = df_week["FillRate_Q"].mean()
    agent.update(state, action, reward, state)
    df_results.append(df_week)

df_final = pd.concat(df_results)
df_final.head()


In [None]:

df_kpis = df_final.groupby("Week").agg({
    "FillRate_LP": "mean",
    "FillRate_Q": "mean",
    "Unmet_LP": "sum",
    "Unmet_Q": "sum"
}).reset_index()

plt.figure(figsize=(10,4))
plt.plot(df_kpis["Week"], df_kpis["FillRate_LP"], label="LP Fill Rate", marker='o')
plt.plot(df_kpis["Week"], df_kpis["FillRate_Q"], label="Q-Learning Fill Rate", marker='x')
plt.title("Fill Rate: LP vs Q-Learning")
plt.xlabel("Week"); plt.ylabel("Avg Fill Rate")
plt.legend(); plt.grid(True); plt.show()

plt.figure(figsize=(10,4))
plt.plot(df_kpis["Week"], df_kpis["Unmet_LP"], label="LP Unmet", marker='o')
plt.plot(df_kpis["Week"], df_kpis["Unmet_Q"], label="Q Unmet", marker='x')
plt.title("Unmet Demand: LP vs Q-Learning")
plt.xlabel("Week"); plt.ylabel("Unmet Demand")
plt.legend(); plt.grid(True); plt.show()


### PPO Agent via Stable-Baselines3
To run PPO training:
```
pip install stable-baselines3[extra] gym
```
Then define `SupplyMatchingEnv` as shown previously and train using PPO.