In [1]:
import json
import os
from typing import Callable, Dict, List
import nltk
import pandas as pd
nltk.download("punkt")
nltk.download("averaged_perceptron_tagger")

from pathlib import Path
from langchain.chains import RetrievalQA
from langchain.chat_models.gigachat import GigaChat
from langchain.docstore.document import Document
from langchain.prompts import ChatPromptTemplate
from langchain.text_splitter import TokenTextSplitter
from langchain.vectorstores import Chroma
from langchain_community.document_loaders import Docx2txtLoader, PyPDFLoader
from langchain_community.embeddings.gigachat import GigaChatEmbeddings
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\valer\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\valer\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping taggers\averaged_perceptron_tagger.zip.


In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
import os
ROOT = os.getcwd()

import sys

# VectorDB

In [4]:
def load_chroma(persist_directory, embeddings):
    """
    Загружай хрому, если обучил
    """
    assert os.path.isdir(persist_directory), "Firstly use create_vectordb func"

    return Chroma(
        persist_directory=persist_directory, embedding_function=embeddings
    )
   

In [5]:
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True}

embeddings_e5 = HuggingFaceEmbeddings(
    model_name="intfloat/multilingual-e5-large",
    model_kwargs = model_kwargs,
    encode_kwargs=encode_kwargs,
)


In [7]:
vectordb=load_chroma(os.path.join(ROOT, "chroma"), embeddings_e5)

# GET ANSWER

In [8]:
import yaml 

def load_yaml(path_to_config: str) -> dict:
    with open(path_to_config, "r") as f:
        config = yaml.safe_load(f)
    return config

In [9]:
def create_qa_pipeline(llm, vectordb):
    
    system_prompt = load_yaml("config/system_prompt.yaml")
    messages = [("system", system_prompt["system_template"]), ("human", system_prompt["user_template"])]
    
    QA_CHAIN_PROMPT = ChatPromptTemplate.from_messages(messages,)
    retriever = vectordb.as_retriever(search_type="mmr", search_kwargs={"k":4, "fetch_k":20, "lambda":.6})
    compressor = LLMChainExtractor.from_llm(llm)
    compression_retriever = ContextualCompressionRetriever(
        base_compressor=compressor, base_retriever=retriever
    )
    
    qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=compression_retriever,
        return_source_documents=True,
        chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
    )
    return qa_chain

In [10]:
def generate_answer(question, llm, vectordb):
    """
    Метод генерации ответов на вопросы.
    Прогоняем на тестовом сете.
    """
    qa_chain=create_qa_pipeline(llm, vectordb)
        
    result = qa_chain({"query": question})
        
    return result['result'], result['source_documents']

## Чтобы запустить пайплайн 

In [None]:
from tqdm import tqdm

#вместо llm надо подсунуть вашу модель, обернутую в соответствующий формат. 
#q - вопрос
#vectordb определена выше
#функция возвращает ответ и документы на основе которых модель давала ответ. 
# вы их сохраняете куда-то, потом используйте функции в папке prepocess_calculate чтобы распарсить как нужно
# https://python.langchain.com/docs/integrations/llms/
# https://python.langchain.com/docs/integrations/providers/huggingface/

answ, source = generate_answer(q, llm, vectordb)
     