Код для генерации описаний для каждой категории

In [1]:
from langchain.prompts import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    ChatPromptTemplate,
    HumanMessagePromptTemplate
)

from pydantic import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser
from langchain.output_parsers.json import SimpleJsonOutputParser

from typing import List, Tuple

import os
import json
import random

In [2]:
CATEGORIES_PATH = ".\\categories.txt"

with open(CATEGORIES_PATH, "r", encoding="UTF-8") as file:
    categories_array = file.read().splitlines()

In [4]:
def create_model_prompts(system_prompt: str,
                         user_prompt: str) -> ChatPromptTemplate:
    system_prompt = SystemMessagePromptTemplate.from_template(system_prompt)
    user_prompt = HumanMessagePromptTemplate.from_template(user_prompt)
    chat_prompt = ChatPromptTemplate.from_messages(
        [system_prompt,
         user_prompt]
    )
    return chat_prompt

# Model

In [3]:
from langchain_openai import ChatOpenAI

T_PRO_CREDS = "..\\..\\secrets\\t-pro.json"

with open(T_PRO_CREDS) as file:
    model_params = json.load(file)

llm = ChatOpenAI(**model_params, temperature=0)

In [5]:
SYSTEM_PROMPT = """
Для данной категории напиши описание из одного предложения.
""".strip()

USER_PROMPT = """
Категория: {problem_title}
""".strip()

prompt = create_model_prompts(SYSTEM_PROMPT, USER_PROMPT)

In [6]:
batch = [
    {
        'problem_title': category
    } for category in categories_array
]

In [7]:
from langchain_core.output_parsers.string import StrOutputParser

chain = prompt | llm | StrOutputParser()

results = chain.batch(batch)

In [9]:
# for category, description in zip(categories_array, results):
#     print(f"{category}. {description}")

with open("categories_descriptions.txt", "w", encoding="utf-8") as file:
    file.write("\n".join([f"{category}. {description}" for category, description in zip(categories_array, results)]))