In [3]:
# Установите зависимости:
# pip install langchain faiss-cpu sentence-transformers

from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceHub  # Или OpenAI

In [5]:
!pip install sentence-transformers

Collecting sentence-transformers
  Using cached sentence_transformers-3.4.1-py3-none-any.whl (275 kB)
Collecting tqdm
  Using cached tqdm-4.67.1-py3-none-any.whl (78 kB)
Collecting scikit-learn
  Using cached scikit_learn-1.6.1-cp310-cp310-macosx_12_0_arm64.whl (11.1 MB)
Collecting torch>=1.11.0
  Using cached torch-2.6.0-cp310-none-macosx_11_0_arm64.whl (66.5 MB)
Collecting scipy
  Using cached scipy-1.15.2-cp310-cp310-macosx_14_0_arm64.whl (22.4 MB)
Collecting Pillow
  Using cached pillow-11.1.0-cp310-cp310-macosx_11_0_arm64.whl (3.1 MB)
Collecting huggingface-hub>=0.20.0
  Using cached huggingface_hub-0.29.3-py3-none-any.whl (468 kB)
Collecting transformers<5.0.0,>=4.41.0
  Using cached transformers-4.49.0-py3-none-any.whl (10.0 MB)
Collecting filelock
  Using cached filelock-3.18.0-py3-none-any.whl (16 kB)
Collecting fsspec>=2023.5.0
  Using cached fsspec-2025.3.0-py3-none-any.whl (193 kB)
Collecting sympy==1.13.1
  Using cached sympy-1.13.1-py3-none-any.whl (6.2 MB)
Collecting net

In [6]:

# 1. Загрузка и подготовка данных
documents = [
    "Погода в Москве сегодня: +25°C, солнечно.",
    "Вчера в Санкт-Петербурге был дождь и +18°C.",
    "Завтра в Сочи ожидается гроза и +28°C."
]

# Разделение текста на чанки (passages)
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
docs = text_splitter.create_documents(documents)

# 2. Создание эмбеддингов
embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2")

# 3. Сохранение в FAISS
vector_store = FAISS.from_documents(docs, embedder)

# 4. Настройка RAG-цепочки
llm = HuggingFaceHub(repo_id="google/flan-t5-small")  # Пример модели
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vector_store.as_retriever(search_kwargs={"k": 2}),
    chain_type="stuff"
)

# 5. Задаем вопрос
query = "Какая погода сегодня в Москве?"
response = qa_chain.run(query)
print(f"Ответ: {response}")

  from .autonotebook import tqdm as notebook_tqdm


KeyboardInterrupt: 