In [1]:
!pip install --quiet --upgrade pyautogen~=0.2.0b4 torch git+https://github.com/huggingface/transformers sentencepiece

In [2]:
!pip install --quiet --upgrade accelerate bitsandbytes

In [3]:
import autogen
from autogen import AssistantAgent, UserProxyAgent
from transformers import AutoTokenizer, GenerationConfig, AutoModelForCausalLM, BitsAndBytesConfig
from types import SimpleNamespace
import torch

2024-03-15 01:24:29,041	INFO util.py:124 -- Outdated packages:
  ipywidgets==7.7.1 found, needs ipywidgets>=8
Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2024-03-15 01:24:30,548	INFO util.py:124 -- Outdated packages:
  ipywidgets==7.7.1 found, needs ipywidgets>=8
Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [4]:
import os
os.environ['OAI_CONFIG_LIST'] ='[{"model": "google/gemma-7b-it","model_client_cls": "CustomModelClient","device": "cuda","n": 1,"params": {"max_length": 500}}]'

In [5]:

def replace_system_role_with_user(structure):
    # Iterate through each element in the structure
    updated_structure = []
    previous_role = None
    for item in structure:
        updated_structure.append(item)
        # Check if the role is 'system' and replace it with 'user'
        if item.get('role') == 'system':
            item['role'] = 'user'
            dummy_response = {'content': 'ok', 'role': 'assistant'}
            updated_structure.append(dummy_response)
            #item['role']. = 'assistant'
        elif item.get('role') == 'user':
            updated_structure.append({'content': 'ok', 'role': 'assistant'})
        elif item.get('role') == 'assistant':
            if updated_structure[-2]['role'] == 'assistant':
                updated_structure.pop()

        #previous_role = item['role']

    if updated_structure and updated_structure[-1]['role'] == 'assistant':
        updated_structure.pop()
    # Return the modified structure
    return updated_structure

# custom client with custom model loader
class CustomModelClient:
    def __init__(self, config, **kwargs):
        print(f"CustomModelClient config: {config}")
        self.device = config.get("device", "cpu")
        quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16)

        self.model = AutoModelForCausalLM.from_pretrained(config["model"],
                                                          quantization_config=quantization_config,#.to(self.device)
                                                          low_cpu_mem_usage=True,
                                                          device_map='auto',
                                                          torch_dtype=torch.float16)
        #self.model = AutoModelForCausalLM.from_pretrained(config["model"], device_map='auto', torch_dtype=torch.bfloat16).to(self.device)
        self.model_name = config["model"]
        self.tokenizer = AutoTokenizer.from_pretrained(config["model"], use_fast=False)
        self.tokenizer.pad_token_id = self.tokenizer.eos_token_id

        # params are set by the user and consumed by the user since they are providing a custom model
        # so anything can be done here
        gen_config_params = config.get("params", {})
        self.max_length = gen_config_params.get("max_length", 256)

        print(f"Loaded model {config['model']} to {self.device}")

    def create(self, params):
        if params.get("stream", False) and "messages" in params:
            raise NotImplementedError("Local models do not support streaming.")
        else:
            num_of_responses = params.get("n", 1)

            # can create my own data response class
            # here using SimpleNamespace for simplicity
            # as long as it adheres to the ClientResponseProtocol

            response = SimpleNamespace()

            chat_template = replace_system_role_with_user(params["messages"])
            #print("=======", chat_template)
            #print("-------", params["messages"])
            #chat_template = params['messages']
            inputs = self.tokenizer.apply_chat_template(
                chat_template, return_tensors="pt", add_generation_prompt=True
            ).to(self.device)
            inputs_length = inputs.shape[-1]

            # add inputs_length to max_length
            max_length = self.max_length + inputs_length
            generation_config = GenerationConfig(
                max_length=max_length,
                eos_token_id=self.tokenizer.eos_token_id,
                pad_token_id=self.tokenizer.eos_token_id,
            )

            response.choices = []
            response.model = self.model_name

            for _ in range(num_of_responses):
                outputs = self.model.generate(inputs, generation_config=generation_config)
                # Decode only the newly generated text, excluding the prompt
                text = self.tokenizer.decode(outputs[0, inputs_length:])
                choice = SimpleNamespace()
                choice.message = SimpleNamespace()
                choice.message.content = text
                choice.message.function_call = None
                response.choices.append(choice)

            return response

    def message_retrieval(self, response):
        """Retrieve the messages from the response."""
        choices = response.choices
        return [choice.message.content for choice in choices]

    def cost(self, response) -> float:
        """Calculate the cost of the response."""
        response.cost = 0
        return 0

    @staticmethod
    def get_usage(response):
        # returns a dict of prompt_tokens, completion_tokens, total_tokens, cost, model
        # if usage needs to be tracked, else None
        return {}

In [6]:
config_list_custom = autogen.config_list_from_json(
    "OAI_CONFIG_LIST",
)
llm_config={"config_list": config_list_custom, "cache_seed": 49, "temperature": 0.3}

In [7]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# Group Orchestration

In [8]:
user_proxy = autogen.UserProxyAgent(
    name="User_proxy",
    system_message="A human admin.",
    code_execution_config=False,
    human_input_mode="TERMINATE",
    is_termination_msg=lambda x: x.get("content", "") and x.get("content", "").rstrip().endswith("TERMINATE"),
)
writer = autogen.AssistantAgent(
    name="Writer",
    system_message="""You are a blog post writer who is capable of writing a travel blog.
                      You generate one iteration of an article once at a time.
                      You never provide review comments.
                      You are open to comments and willing to make changes in your article based on these comments.
                    """,
    llm_config=llm_config,
    description="""This is a blog post writer who is capable of writing travel blogs.
                   The writer is open to any comments and recommendations for improving the article.
                   Ask writer to iterate article every time when there is a new change recommendation from editor.
                """
)
editor = autogen.AssistantAgent(
    name="Editor",
    system_message="""You review blog posts and give change recommendations to make the article more viral on social media.
                      You never write or revise blogs by yourself.
                       """,
    llm_config=llm_config,
    description="""This is an editor who reviews the blogs of writers and provides change ideas.
                    The editor should be called every time the writer provides a version of a blog post.
                """
)
groupchat = autogen.GroupChat(agents=[user_proxy, writer, editor], messages=[], max_round=6)
manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=llm_config)



[autogen.oai.client: 03-15 01:46:08] {417} INFO - Detected custom model client in config: CustomModelClient, model client can not be used until register_model_client is called.
[autogen.oai.client: 03-15 01:46:08] {417} INFO - Detected custom model client in config: CustomModelClient, model client can not be used until register_model_client is called.
[autogen.oai.client: 03-15 01:46:08] {417} INFO - Detected custom model client in config: CustomModelClient, model client can not be used until register_model_client is called.


In [9]:
writer.register_model_client(model_client_cls=CustomModelClient)
editor.register_model_client(model_client_cls=CustomModelClient)
manager.register_model_client(model_client_cls=CustomModelClient)

CustomModelClient config: {'model': 'google/gemma-7b-it', 'model_client_cls': 'CustomModelClient', 'device': 'cuda', 'n': 1, 'params': {'max_length': 500}}


config.json:   0%|          | 0.00/694 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/2.11G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.16k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/888 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

Loaded model google/gemma-7b-it to cuda
CustomModelClient config: {'model': 'google/gemma-7b-it', 'model_client_cls': 'CustomModelClient', 'device': 'cuda', 'n': 1, 'params': {'max_length': 500}}


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Loaded model google/gemma-7b-it to cuda
CustomModelClient config: {'model': 'google/gemma-7b-it', 'model_client_cls': 'CustomModelClient', 'device': 'cuda', 'n': 1, 'params': {'max_length': 500}}


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Loaded model google/gemma-7b-it to cuda


In [10]:
user_proxy.initiate_chat(
    manager, message="""Generate a 120-word of blog post about traveling in Bohol Island.
                      """
)

[33mUser_proxy[0m (to chat_manager):

Generate a 120-word of blog post about traveling in Bohol Island.
                      

--------------------------------------------------------------------------------


2024-03-15 01:52:41.688061: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-15 01:52:41.688199: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-15 01:52:41.813542: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


[33mWriter[0m (to chat_manager):

## Island Dreams: Exploring the Jewel of Bohol

Bohol Island, a gem nestled in the heart of the Philippines, is a paradise teeming with natural beauty and cultural treasures. From the breathtaking Chocolate Hills to the serene Loboc River, Bohol offers a captivating experience for travelers of all kinds.

Upon arrival, the vibrant capital of Tagbilaran welcomes you with its friendly atmosphere and charming heritage. Immerse yourself in the history of the island at the Baclayon Church, a UNESCO World Heritage Site, or climb the iconic Chocolate Hills for a panoramic view that will take your breath away.

For a glimpse into the local culture, head to the Loboc River, where you can enjoy a leisurely boat ride while savoring the fresh and flavorful cuisine. Witness the vibrant dance of the Tarsier, the world's smallest primate, and marvel at the intricate patterns of the colorful butterfly sanctuary.

Whether you're seeking adventure, relaxation, or cult

ChatResult(chat_id=None, chat_history=[{'content': 'Generate a 120-word of blog post about traveling in Bohol Island.\n                      ', 'role': 'assistant'}, {'content': "## Island Dreams: Exploring the Jewel of Bohol\n\nBohol Island, a gem nestled in the heart of the Philippines, is a paradise teeming with natural beauty and cultural treasures. From the breathtaking Chocolate Hills to the serene Loboc River, Bohol offers a captivating experience for travelers of all kinds.\n\nUpon arrival, the vibrant capital of Tagbilaran welcomes you with its friendly atmosphere and charming heritage. Immerse yourself in the history of the island at the Baclayon Church, a UNESCO World Heritage Site, or climb the iconic Chocolate Hills for a panoramic view that will take your breath away.\n\nFor a glimpse into the local culture, head to the Loboc River, where you can enjoy a leisurely boat ride while savoring the fresh and flavorful cuisine. Witness the vibrant dance of the Tarsier, the world

In [11]:
!nvidia-smi

Fri Mar 15 02:04:39 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.129.03             Driver Version: 535.129.03   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   71C    P0              31W /  70W |  13453MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  Tesla T4                       Off | 00000000:00:05.0 Off |  