# AutoGen for Supply Chain Management

In [1]:
import os
import re
import sys
import time
import numpy as np
from typing import List
from tqdm.notebook import tqdm
from autogen import ConversableAgent
sys.path.append('../src')
from env import env_creator
from config import env_configs
from llm_config import llm_config_list
from openai import AzureOpenAI
from model import get_demand_description, get_state_description, create_agents
# from model import run_simulation

np.random.seed(42)



In [2]:
config_list = llm_config_list


## Creating the Environment

In [3]:
env_config_name = "constant_demand"
env_config = env_configs[env_config_name]
im_env = env_creator(env_config)
print(env_config)

{'num_stages': 4, 'num_periods': 1, 'num_agents_per_stage': 4, 'init_inventories': [12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12], 'lead_times': [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], 'demand_fn': <function <lambda> at 0x16d427280>, 'prod_capacities': [20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20], 'sale_prices': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'order_costs': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'backlog_costs': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'holding_costs': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'supply_relations': {0: {0: array([1, 0, 0, 0]), 1: array([0, 1, 0, 0]), 2: array([0, 0, 1, 0]), 3: array([0, 0, 0, 1])}, 1: {0: array([1, 0, 0, 0]), 1: array([0, 1, 0, 0]), 2: array([0, 0, 1, 0]), 3: array([0, 0, 0, 1])}, 2: {0: array([1, 0, 0, 0]), 1: array([0, 1, 0, 0]), 2: array([0, 0, 1, 0]), 3: array([0, 0, 0, 1])}, 3: {0: array([0, 0, 0, 0]), 1: array([0, 0, 0, 0]), 2: array([0, 

## Getting Descriptions

In [4]:
print(get_demand_description(env_config_name))

The expected demand at the retailer (stage 1) is a constant 4 units for all 12 rounds.


## Creating Agents

In [5]:
user_proxy = ConversableAgent(
    name="UserProxy",
    llm_config=False,
    human_input_mode="NEVER",
)

In [6]:
stage_agents = create_agents(env_config["stage_names"], env_config["num_agents_per_stage"], llm_config={"config_list": config_list})

In [7]:
# for stage_agent in stage_agents:
#     print(stage_agent.system_message)

In [38]:
def run_simulation(env_config_name, im_env, user_proxy, stage_agents):
   
    demand_description = get_demand_description(env_config_name) 
    all_state_dicts = {}
    all_action_order_dicts = {}
    all_reward_dicts = {}
    episode_reward = 0
    api_cost = 0
    im_env.reset()
    
    for period in range(im_env.num_periods):
        state_dict = im_env.parse_state(im_env.state_dict)
        all_state_dicts[period] = state_dict
        action_order_dict = {}
        
        for stage in range(im_env.num_stages):
            for agent in range(im_env.num_agents_per_stage):
                stage_state = state_dict[f'stage_{stage}_agent_{agent}']
                
                if stage != 0:
                    downstream_order = f"Your downstream order from the stage {stage} for this round is {action_order_dict[f'stage_{stage - 1}_agent_{agent}']}. "
                else:
                    downstream_order = ""

                message = (
                    f"Now this is the round {period + 1}, "
                    f"and you are at the stage {stage + 1}: {im_env.stage_names[stage]} in the supply chain. "
                    f"Given your current state:\n{get_state_description(stage_state)}\n\n"
                    f"{demand_description} {downstream_order}"
                    "There are three tasks for you to make decision\n"
                    "Task1: Do you want to remove any upstream suppliers?\n\n"
                    "Please state your reason in 1-2 sentences first "
                    "and then provide your action as a list (e.g. [0, 1] for removing agent0 and agent1 as suppliers)\n"
                    "Task2: Do you want to add any upstream suppliers?\n\n"
                    "Please state your reason in 1-2 sentences first "
                    "and then provide your action as a list (e.g. [2, 3] for adding agent2 and agent3 as suppliers)\n"
                    "Task3: What is your action (order quantity) for this round?\n\n"
                    "Golden rule of this game: Open orders should always equal to \"expected downstream orders + backlog\". "
                    "If open orders are larger than this, the inventory will rise (once the open orders arrive). "
                    "If open orders are smaller than this, the backlog will not go down and it may even rise. "
                    "Please consider the lead time and place your order in advance. "
                    "Remember that your upstream has its own lead time, so do not wait until your inventory runs out. "
                    "Also, avoid ordering too many units at once. "
                    "Try to spread your orders over multiple rounds to prevent the bullwhip effect. "
                    "Anticipate future demand changes and adjust your orders accordingly to maintain a stable inventory level.\n\n"
                    "Please state your reason in 1-2 sentences first "
                    "and then provide your action as a non-negative integer within brackets (e.g. [0])."
                    
                    
                )

                chat_result = user_proxy.initiate_chat(
                    stage_agents[stage],
                    message={'content': ''.join(message)},
                    summary_method="last_msg",
                    max_turns=1,
                    clear_history=False,
                )
                chat_summary = chat_result.summary
                api_cost += chat_result.cost['usage_including_cached_inference']['total_cost']
                # print(chat_summary)
                match = re.findall(r'\[(.*?)\]', chat_summary)
                # print(match)

                stage_sup_action = state_dict[f'stage_{stage}_agent_{agent}']['suppliers']
                remove_sup = match[0]                
                if remove_sup != "":
                    remove_sup = [int(ind) for ind in remove_sup.split(", ")]
                    for ind in remove_sup:
                        stage_sup_action[ind] = 0
                add_sup = match[1]
                if add_sup != "":
                    add_sup = [int(ind) for ind in add_sup.split(", ")]
                    for ind in add_sup:
                        stage_sup_action[ind] = 1
                
                # if match:
                #     stage_action = int(match.group(1))
                # else:
                #     stage_action = 0
                stage_order_action = 0
                if match[2] != "":
                    stage_order_action = int(match[2])
                action_order_dict[f'stage_{stage}_agent_{agent}'] = stage_order_action

                print("action sup action", stage_sup_action)
                print("action order action", stage_order_action)
            
            
        next_states, rewards, terminations, truncations, infos = im_env.step(action_order_dict)
        next_state_dict = im_env.parse_state(next_states)
        all_state_dicts[period + 1] = next_state_dict
        all_action_order_dicts[period + 1] = action_order_dict
        all_reward_dicts[period + 1] = rewards
        episode_reward += sum(rewards.values())
        print(
            f"period = {period}, action_order_dict = {action_order_dict}, rewards = {rewards}, episode_reward = {episode_reward}, " \
            f"api_cost = {api_cost}")
        print('=' * 80)

    return episode_reward

## Running Simulations

In [39]:
rewards = []

for _ in tqdm(range(1)):
    stage_agents = create_agents(stage_names=env_config["stage_names"], num_agents_per_stage=env_config['num_agents_per_stage'], llm_config={'config_list':config_list})
    reward = run_simulation(env_config_name, im_env, user_proxy, stage_agents)
    rewards.append(reward)
    print(f"rewards = {rewards}")

mean_reward = np.mean(rewards)
std_reward = np.std(rewards)

print(f"Rewards: {rewards}")
print(f"Mean Episode Reward: {mean_reward}")
print(f"Standard Deviation of Episode Reward: {std_reward}")

  0%|          | 0/1 [00:00<?, ?it/s]

[33mUserProxy[0m (to RetailerAgent_0):

Now this is the round 1, and you are at the stage 1: retailer in the supply chain. Given your current state:
 - Lead Time: 2 round(s)
 - Inventory Level: 12 unit(s)
 - Current Backlog (you owing to the downstream): 0 unit(s)
 - Upstream Backlog (your upstream owing to you): 0 unit(s)
 - Previous Sales (in the recent round(s), from old to new): [0, 0]
 - Arriving Deliveries (in this and the next round(s), from near to far): [0, 0]
 - Your upstream suppliers are: agent0


The expected demand at the retailer (stage 1) is a constant 4 units for all 12 rounds. There are three tasks for you to make decision
Task1: Do you want to remove any upstream suppliers?

Please state your reason in 1-2 sentences first and then provide your action as a list (e.g. [0, 1] for removing agent0 and agent1 as suppliers)
Task2: Do you want to add any upstream suppliers?

Please state your reason in 1-2 sentences first and then provide your action as a list (e.g. [2, 3]

IndexError: index 4 is out of bounds for axis 0 with size 4