# MGIMO intensive

## Chat-bot with knowledge

### 1. Libraries and setup

In [None]:
!pip install "langchain<0.0.300" langchain-community

In [None]:
import os
import json
import time
import requests
import datetime
import langchain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import OpenSearchVectorSearch
from langchain.chains import LLMChain
from langchain_core.documents import Document
from yagpt import YandexGPTEmbeddings, YandexLLM

In [None]:
def json_data(file_path):
    """Load and return JSON data from a file.
    
    Args:
        file_path (str): Path to the JSON file.
    
    Returns:
        dict or list: Parsed JSON data.
    """
    with open(file_path) as file:
        data = json.load(file)
    return data

In [None]:
TXTS_PATH = "/home/jovyan/__DATA/mgimo_intensive/leo"
creds = json_data("/home/jovyan/__DATA/mgimo_intensive/.accessyagpt")
LLM_KEY_ID = creds['key_id']
LLM_PRIVATE_KEY = creds['api_key']
FOLDER_ID = creds['folder_id']

### 2. Chat bot with YandexGPT

In [None]:
class BotChain():
    def __init__(self, llm_private_key, llm_key_id,
                 folder_id):
        self.llm_private_key = llm_private_key
        self.llm_key_id = llm_key_id
        self.folder_id = folder_id

    def ya_embed(self):
        embeddings = YandexGPTEmbeddings(
            api_key=self.llm_private_key,
            folder_id=self.folder_id
        )
        return embeddings

    def ya_chain(self, temperature, instructions):
        llm = YandexLLM(
            api_key=self.llm_private_key,
            folder_id=self.folder_id,
            temperature=temperature,
            instruction_text=instructions
        )
        document_prompt = langchain.prompts.PromptTemplate(
            input_variables=['page_content'], 
            template='{page_content}'
        )
        document_variable_name = 'context'
        prompt_override = """
            Ответь на вопрос, используя информацию из текста ниже.
            Текст:
            -----
            {context}
            -----
            Вопрос:
            {query}
            """
        prompt = langchain.prompts.PromptTemplate(
            template=prompt_override,
            input_variables=['context', 'query']
        )
        llm_chain = langchain.chains.LLMChain(
            llm=llm, 
            prompt=prompt
        )
        chain = langchain.chains.combine_documents.stuff.StuffDocumentsChain(
            llm_chain=llm_chain,
            document_prompt=document_prompt,
            document_variable_name=document_variable_name
        )
        return chain

In [None]:
BOTCHAIN = BotChain(
    llm_private_key=LLM_PRIVATE_KEY,
    llm_key_id=LLM_KEY_ID,
    folder_id=FOLDER_ID
)

In [None]:
temperature = .3
instructions = 'Ты должен помогать искать информацию в документах'
CHAIN = BOTCHAIN.ya_chain(temperature, instructions)

### 3. Add documents as a context

#### 3.1. Test with no documents

In [None]:
response = CHAIN.run(
    input_documents=[], 
    query='кто ты?'
)
response

#### 3.2. Creating context for chat

In [None]:
file_paths = os.listdir(TXTS_PATH)
file_paths

##### One document

In [None]:
with open(f'{TXTS_PATH}/text1.txt', 'r') as file:
    text_1 = file.read()

print(text_1[:500])

In [None]:
document_1 = Document(
    page_content=text_1,
    metadata={'name': 'Летопись природы за ноябрь 1974 - ноябрь 1975 гг.'}
)

##### One more document

In [None]:
with open(f'{TXTS_PATH}/text2.txt', 'r') as file:
    text_2 = file.read()

print(text_2[:500])

In [None]:
document_2 = Document(
    page_content=text_2,
    metadata={'name': 'Летопись природы с ноября I978 г. по октябрь I979 г.'}
)

##### Put documents into chat bot

In [None]:
response = CHAIN.run(
    input_documents=[document_1, document_2], 
    query='за какой период у тебя есть данные по летописям природы?'
)
print(response)

In [None]:
response = CHAIN.run(
    input_documents=[document_1, document_2], 
    query='какие данные есть в летописях природы?'
)
print(response)

In [None]:
response = CHAIN.run(
    input_documents=[document_1, document_2], 
    query='о каком заповеднике идет речь в летописях природы?'
)
print(response)