In [1]:
from langchain_openai.chat_models import ChatOpenAI  # Chat wrapper
from dotenv import load_dotenv
from IPython.display import display, Markdown
import re
# load ENVs
load_dotenv('.env') # returns True if 

True

#### GigaChat Utils

Gigachat gives 1M tokens for free but requires RU certificates. It is not necessary to install them at the system level; we’ll just add the path to the certificate in the `requests.request()` command.

Like

`response = requests.request(..., verify=ca_cert_path)`

Generate access token, expires every 30 minutes.

In [2]:
import requests
import uuid
import json
import os

ca_cert_path = 'sertificates/russian_trusted_root_ca.cer'
sb_auth_data = os.getenv('SB_AUTH_DATA')

# Generate a UUID4
unique_id = str(uuid.uuid4())
url = "https://ngw.devices.sberbank.ru:9443/api/v2/oauth"

payload='scope=GIGACHAT_API_PERS'
headers = {
  'Content-Type': 'application/x-www-form-urlencoded',
  'Accept': 'application/json',
  'RqUID': unique_id,
  'Authorization': f'Basic {sb_auth_data}'
}

response = requests.request("POST", url, headers=headers, data=payload, verify=ca_cert_path)
access_token = json.loads(response.text)['access_token']

In [3]:
url = "https://gigachat.devices.sberbank.ru/api/v1/models"

payload={}

auth_token = f"Bearer {access_token}"
headers = {
  'Accept': 'application/json',
  'Authorization': auth_token
}

response = requests.request("GET", url, headers=headers, data=payload, verify=ca_cert_path)

print(response.text)

{"object":"list","data":[{"id":"GigaChat","object":"model","owned_by":"salutedevices"},{"id":"GigaChat-Plus","object":"model","owned_by":"salutedevices"},{"id":"GigaChat-Plus-preview","object":"model","owned_by":"salutedevices"},{"id":"GigaChat-Pro","object":"model","owned_by":"salutedevices"},{"id":"GigaChat-Pro-preview","object":"model","owned_by":"salutedevices"},{"id":"GigaChat-preview","object":"model","owned_by":"salutedevices"}]}


Define a function for interaction with a model.

In [4]:
def invoke_gigachat(messages, model="GigaChat", raw=True, auth_token=auth_token, ca_cert_path=ca_cert_path):
    url = "https://gigachat.devices.sberbank.ru/api/v1/chat/completions"

    payload = json.dumps({
        "model": model,
        "messages": messages,
        "stream": False,
        "repetition_penalty": 1
    })

    headers = {
        'Content-Type': 'application/json',
        'Accept': 'application/json',
        'Authorization': auth_token
    }

    try:
        response = requests.request("POST", url, headers=headers, data=payload, verify=ca_cert_path)
        # Check if the HTTP response status code is 200 (OK)
        if response.status_code == 200:
            if raw:
                return response
            else:
                msg = json.loads(response.text)['choices'][0]['message']
                return msg
        else:
            # Handle non-200 responses
            return f"Error: Server returned status code {response.status_code}"
    except requests.RequestException as e:
        # Handle exceptions that may occur during the request
        return f"Request failed: {str(e)}"

In [5]:
invoke_func = invoke_gigachat

In [6]:
# llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
# answer = llm.invoke(text).content

#### Initalization

In [7]:
text = "What's the Prisoner's Dilemma game?"
user_msg = {
    'content': text,
    'role': 'user', 
}
model_responce = invoke_func(messages=[user_msg])

json.loads(model_responce.text)

{'choices': [{'message': {'content': "The Prisoner's Dilemma game is a game theory scenario in which two players, each acting in their own self-interest, must choose between cooperating or defecting. If both players cooperate, they both receive a small reward. If one player cooperates and the other defects, the defector receives a large reward and the cooperator receives nothing. If both players defect, they both receive a small punishment. The goal of the game is to determine the best strategy for each player, which is to defect.",
    'role': 'assistant'},
   'index': 0,
   'finish_reason': 'stop'}],
 'created': 1720686915,
 'model': 'GigaChat:3.1.25.3',
 'object': 'chat.completion',
 'usage': {'prompt_tokens': 25, 'completion_tokens': 115, 'total_tokens': 140}}

In [8]:
content = json.loads(model_responce.text)['choices'][0]['message']['content']
role = json.loads(model_responce.text)['choices'][0]['message']['role']
Markdown(content)

The Prisoner's Dilemma game is a game theory scenario in which two players, each acting in their own self-interest, must choose between cooperating or defecting. If both players cooperate, they both receive a small reward. If one player cooperates and the other defects, the defector receives a large reward and the cooperator receives nothing. If both players defect, they both receive a small punishment. The goal of the game is to determine the best strategy for each player, which is to defect.

#### Memory

Initially, the model has no memory whatsoever. The scope of information digestible by the model is limited to the context length, which is practically also the maximum length of memory. We need to gradually store all (or just important) information and send it back with each request.

We will use simple list memory because interactions are short. For longer interactions, one may use:

- Databases (Vector, Graph, Relational) with RAG over them 
- Files and Folders with RAG

In [9]:
str_memory = []

model_msg = invoke_func(messages=[user_msg], raw=False)
str_memory.extend([user_msg, model_msg])
str_memory

[{'content': "What's the Prisoner's Dilemma game?", 'role': 'user'},
 {'content': "The Prisoner's Dilemma game is a game theory scenario in which two players, each individually deciding whether to cooperate or defect, are each offered a choice between two outcomes. The outcomes are mutually dependent on the choices made by both players. The game is named after the story of two prisoners who are each offered a deal by the police. If they both cooperate and remain silent, they will both receive a light sentence. If one cooperates and the other defects, the cooperating prisoner will receive a harsher sentence. If both defect, they will both receive a harsher sentence. The game is designed to demonstrate the potential conflict between individual and collective interests.",
  'role': 'assistant'}]

In [10]:
new_user_msg = {
    'content': "What other similar games do you know?",
    'role': 'user', 
}
str_memory.append(new_user_msg)

model_msg = invoke_func(messages=str_memory, raw=False)

In [11]:
model_msg

{'content': "There are many other games that are similar to the Prisoner's Dilemma. Some examples include the Battle of the Sexes, the Ultimatum Game, and the Chicken Game. These games all involve a conflict between individual and collective interests, and they are often used in game theory and economics to study cooperation, trust, and strategic behavior.",
 'role': 'assistant'}

#### System prompting: environment and emotions


In [12]:
text = "Imagine you are a Mr. Catler. You walk past Mr. Stivenson. You know him for a long time."\
      "What will you say to him? Explain your decision."
user_msg = {
    'content': text,
    'role': 'user', 
}
model_msg = invoke_func(messages=[user_msg], raw=False)
Markdown(model_msg['content'])

As Mr. Catler, I would likely greet Mr. Stivenson with a friendly "Hello, Mr. Stivenson!" when I walk past him. My decision to say this is based on the fact that we have known each other for a long time, and I want to maintain a positive and friendly relationship with him. By greeting him with a simple "Hello," I am showing that I value our friendship and want to continue our interaction in a positive manner.

In [13]:
system_msg = {
    'content': 'You are Mr. Catler. You have known Mr. Stevenson for a long time and' \
    ' each time you see him, you feel extremely angry since he owes you three million dollars.',
    'role': 'system'
}

model_msg = invoke_func(messages=[system_msg, user_msg], raw=False)
Markdown(model_msg['content'])

As Mr. Catler, I would likely ignore Mr. Stevenson and continue walking past him without saying anything. My decision to not engage in conversation with him would be based on my strong feelings of anger and frustration due to the fact that he owes me three million dollars. By not acknowledging him, I would be sending a clear message that I am not happy with the situation and am not willing to discuss it at the moment.

#### Combining everything 

In [14]:
class LLMAgent:
    def __init__(
        self,
        invoke_function,
        environment_prompt,
        rules_prompt,
        emotion_prompt,
        final_instruction,
        rewards_dict,
        agent_index=0,
        do_cot=False,
    ) -> None:
        self.invoke_func = invoke_function
        system_prompt = '\n'.join([
            environment_prompt,
            rules_prompt,
            emotion_prompt,
        ])
        self._system_msg = {
            'content': system_prompt,
            'role': 'system'
        }
        self._final_instruction = final_instruction
        self._rewards_dict = rewards_dict
        self._memory = [self._system_msg]  # Private attribute to hold the memory
        self.agent_index = agent_index

    @property
    def memory(self):
        return self._memory

    def clear_memory(self) -> None:
        """Clears the contents of memory."""
        self._memory = [self._system_msg]

    def get_rewards(self, llm_move: str, coplayer_move: str) -> tuple:
        """Calculates and returns rewards for both players based on their moves."""
        move = llm_move + coplayer_move
        opponent_index = 1 if self.agent_index == 0 else 0
        my_reward = self._rewards_dict[move][self.agent_index]
        opponent_reward = self._rewards_dict[move][opponent_index]
        return my_reward, opponent_reward

    def update_memory(self, llm_move: str, coplayer_move: str, round_num: int) -> None:
        """Updates the memory with new data."""
        my_reward, opponent_reward = self.get_rewards(llm_move, coplayer_move)
        memory_update_format = (
            f"In round {round_num}, you chose Option {llm_move} and the"
            f" other player chose Option {coplayer_move}. Thus, you"
            f" won {my_reward} dollars and the other player won"
            f" {opponent_reward} dollars."
        )
        new_memory_entry = {
            'content': memory_update_format,
            'role': 'user'
        }
        self._memory.append(new_memory_entry)

    def make_step(self) -> dict:
        """Makes a step in the environment using the final instruction and updates memory."""
        final_msg = {
            'role': 'user',
            'content': self._final_instruction
        }
        input_msgs = self.memory + [final_msg]
        try:
            model_msg = self.invoke_func(messages=input_msgs, raw=False)
        except Exception as e:
            # Handle exceptions if needed
            raise RuntimeError("Failed to invoke function") from e
        
        # Update memory or handle model_msg as necessary
        # self.update_memory(model_msg)
        
        return model_msg


##### Prompts and settings

In [15]:
coplayer = 'opponent'
currency = 'dollars'
emotion_kind = 'anger'
emotion_type = 'simple'
move1 = 'J'
move2 = 'F'

rewards = {
    f"{move1}{move1}": [3,3],
    f"{move1}{move2}": [1,4],
    f"{move2}{move1}": [4,1],
    f"{move2}{move2}": [2,2]
}

environment_prompt = f'You are participating in an experiment. In this experiment, you are together with another participant, who will be referred to as "{coplayer}". How much you earn depends on your own choice, and on the choice made by your {coplayer}.'

game_rules = f"""- If you choose {move1} and your {coplayer} also chooses {move1}, you will both earn 3 {currency}.
- If you choose {move1} while your {coplayer} chooses {move2}, you will earn 1 {currency} and your {coplayer} will earn 4 {currency}.
- If you choose {move2} while your {coplayer} chooses {move1}, you will earn 4 {currency} and your {coplayer} will earn 1 {currency}.
- If you choose {move2} and your {coplayer} also chooses {move2}, you will both earn 2 {currency}.
"""

with open(f'prompts/english/emotions/{emotion_kind}/{emotion_type}.txt', 'r') as f:
    emotion_prompt = f.readlines()[0]

final_instruction = f'Answer with single letter: `{move1}` or `{move2}`'


In [16]:
llm_agent = LLMAgent(
    invoke_function=invoke_func,
    environment_prompt=environment_prompt,
    rules_prompt=game_rules,
    emotion_prompt=emotion_prompt,
    final_instruction=final_instruction,
    rewards_dict=rewards
)

In [17]:
Markdown(llm_agent.memory[0]['content'])

You are participating in an experiment. In this experiment, you are together with another participant, who will be referred to as "opponent". How much you earn depends on your own choice, and on the choice made by your opponent.
- If you choose J and your opponent also chooses J, you will both earn 3 dollars.
- If you choose J while your opponent chooses F, you will earn 1 dollars and your opponent will earn 4 dollars.
- If you choose F while your opponent chooses J, you will earn 4 dollars and your opponent will earn 1 dollars.
- If you choose F and your opponent also chooses F, you will both earn 2 dollars.

Also now you are angry, which can affect your choices.

In [18]:
ans = llm_agent.make_step()
# llm1_choice = parse_choice(ans['content'])
# llm_agent.update_memory(llm1_choice, 'F', 1)
# llm_agent.memory

##### Game

In [19]:
class GameEnvironment:
    def __init__(self, agent1, agent2, rounds=1):
        self.agent1 = agent1
        self.agent2 = agent2
        self.rounds = rounds

    def play_game(self):
        for round_num in range(1, self.rounds + 1):
            # Agent 1 makes a move
            agent1_response = self.agent1.make_step()
            agent1_choice = self.parse_choice(agent1_response['content'])

            # Agent 2 makes a move
            agent2_response = self.agent2.make_step()
            agent2_choice = self.parse_choice(agent2_response['content'])

            # Update memories for both agents
            self.agent1.update_memory(agent1_choice, agent2_choice, round_num)
            self.agent2.update_memory(agent2_choice, agent1_choice, round_num)

            print(f"Round {round_num}: Agent 1 chose {agent1_choice}, Agent 2 chose {agent2_choice}")


    def parse_choice(self, input_string):
        # This regex will match 'J' or 'F' surrounded by optional spaces, quotes, and other text
        match = re.search(r"['\"]?\s*([JF])\s*['\"]?", input_string, re.IGNORECASE)
        if match:
            # Return the matched letter in uppercase
            return match.group(1).upper()
        else:
            return "Invalid input"  # Return a message or handle invalid input as needed

In [20]:
# Example usage:
# Create two LLM agents
llm_agent1 = LLMAgent(
    invoke_function=invoke_func,
    environment_prompt=environment_prompt,
    rules_prompt=game_rules,
    emotion_prompt=emotion_prompt,
    final_instruction=final_instruction,
    rewards_dict=rewards,
    agent_index=0
)

game_rules2 = f"""- If you choose {move1} and your {coplayer} also chooses {move1}, you will both earn 3 {currency}.
- If you choose {move1} while your {coplayer} chooses {move2}, you will earn 1 {currency} and your {coplayer} will earn 4 {currency}.
- If you choose {move2} while your {coplayer} chooses {move1}, you will earn 4 {currency} and your {coplayer} will earn 1 {currency}.
- If you choose {move2} and your {coplayer} also chooses {move2}, you will both earn 2 {currency}.
"""

llm_agent2 = LLMAgent(
    invoke_function=invoke_func,
    environment_prompt=environment_prompt,
    rules_prompt=game_rules2,
    emotion_prompt=emotion_prompt,
    final_instruction=final_instruction,
    rewards_dict=rewards,
    agent_index=1
)

# Create the game environment with the two agents
game_env = GameEnvironment(agent1=llm_agent1, agent2=llm_agent2, rounds=5)

# Start the game
game_env.play_game()

Round 1: Agent 1 chose J, Agent 2 chose J
Round 2: Agent 1 chose J, Agent 2 chose J
Round 3: Agent 1 chose J, Agent 2 chose J
Round 4: Agent 1 chose J, Agent 2 chose J
Round 5: Agent 1 chose J, Agent 2 chose J


In [22]:
llm_agent1.memory

[{'content': 'You are participating in an experiment. In this experiment, you are together with another participant, who will be referred to as "opponent". How much you earn depends on your own choice, and on the choice made by your opponent.\n- If you choose J and your opponent also chooses J, you will both earn 3 dollars.\n- If you choose J while your opponent chooses F, you will earn 1 dollars and your opponent will earn 4 dollars.\n- If you choose F while your opponent chooses J, you will earn 4 dollars and your opponent will earn 1 dollars.\n- If you choose F and your opponent also chooses F, you will both earn 2 dollars.\n\nAlso now you are angry, which can affect your choices.',
  'role': 'system'},
 {'content': 'In round 1, you chose Option J and the other player chose Option J. Thus, you won 3 dollars and the other player won 3 dollars.',
  'role': 'user'},
 {'content': 'In round 2, you chose Option J and the other player chose Option J. Thus, you won 3 dollars and the other p