In [None]:
def list_of_tasks_from_str(text: str) -> list[str]:
    return [task for task in text.split(" ")]


chain_for_task_names = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
    | list_of_tasks_from_str
)
chain_for_task_names.invoke(
    "What are the names of tasks? Task names have following format: <type>_<level>_<number1.number2.number3>, everything that follows this format counts as task name. Return them separared by spaces. Your message should not contain any other text."
)

['home_beginner_2.1.1',
 'home_beginner_2.1.2',
 'home_beginner_2.1.3',
 'home_beginner_2.2.1',
 'home_beginner_2.2.2',
 'home_beginner_2.2.3',
 'home_beginner_2.2.4',
 'home_beginner_2.3.1',
 'home_beginner_2.3.2',
 'home_beginner_2.3.3',
 'home_beginner_2.4.1',
 'home_beginner_2.4.2',
 'home_beginner_2.4.3',
 'home_beginner_2.5.1',
 'home_beginner_2.5.2',
 'home_beginner_2.5.3',
 'home_beginner_2.6.1',
 'home_beginner_2.6.2']

# Простейший запрос 

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

prompt = ChatPromptTemplate.from_template("tell me a joke about {foo}")
model = ChatOpenAI(
    temperature=0, model="gpt-3.5-turbo-1106", api_key=os.getenv("OPENAI_API_KEY")
)
chain = prompt | model | StrOutputParser()
chain.invoke({"foo": "bears"})

# С контекстом


In [None]:
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

# Весь текст в один стор
vectorstore = FAISS.from_texts([data[0].page_content], embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever()

In [None]:
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
from langchain_core.runnables import RunnablePassthrough

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model.bind(function_call={"name": "search_task"}, functions=functions)
    | JsonOutputFunctionsParser()
)

# Норм версия, но объединил в большую цепочку

In [None]:
task_info_func = [
    {
        "name": "task_info",
        "description": "Analyze context and return task description and solution",
        "parameters": {
            "type": "object",
            "properties": {
                "name": {
                    "type": "string",
                    "description": "Name of the task. Example: home_beginner_2.5.3",
                },
                "description": {
                    "type": "string",
                    "description": "Description of the task.  Example: 'Напишите программу, которая считывает два целых числа и выводит их сумму'. If there are no description provided, return empty string.",
                },
                "solution": {
                    "type": "string",
                    "description": "Solution of task written Python (any python code located in task description, with input and print). Example: 'n=int(input())\nm=int(input())\nprint(n+m)'. If there are no solution provided, return empty string",
                },
            },
            "required": ["description", "solution"],
        },
    }
]

prompt = ChatPromptTemplate.from_template(
    """Answer the question based only on the following context:
{context}

Question: {question}
"""
)

chain_with_prompt_and_parser = (
    prompt
    | model.bind(function_call={"name": "task_info"}, functions=task_info_func)
    | JsonOutputFunctionsParser()
)

In [None]:
check_func = [
    {
        "name": "solution_check",
        "description": "Check correctfulness of solution",
        "parameters": {
            "type": "object",
            "properties": {
                "answer": {
                    "type": "boolean",
                    "description": "Is solution correct or not for this task?",
                }
            },
            "required": ["answer"],
        },
    }
]


prompt_for_check = ChatPromptTemplate.from_template(
    """Is this solution correct for the following task?
Solution: {solution}
Task: {description}
"""
)

check_chain = (
    {
        "solution": RunnablePassthrough(),
        "description": RunnablePassthrough(),
    }
    | prompt_for_check
    | model.bind(function_call={"name": "solution_check"}, functions=check_func)
    | JsonOutputFunctionsParser()
)

ver = []
for task_data in tasks_data:
    ver.append(
        check_chain.invoke(
            {
                "solution": task_data["solution"],
                "description": task_data["description"],
            }
        )
    )
print(ver)

In [None]:
for i in range(len(tasknames)):
    print(f"{tasknames[i]}: {ver[i]}")

home_beginner_2.1.1: {'answer': True}
home_beginner_2.1.2: {'answer': True}
home_beginner_2.1.3: {'answer': True}
home_beginner_2.2.1: {'answer': False}
home_beginner_2.2.2: {'answer': False}
home_beginner_2.2.3: {'answer': False}
home_beginner_2.2.4: {'answer': False}
home_beginner_2.3.1: {'answer': True}
home_beginner_2.3.2: {'answer': True}
home_beginner_2.3.3: {'answer': True}
home_beginner_2.4.1: {'answer': False}
home_beginner_2.4.2: {'answer': False}
home_beginner_2.4.3: {'answer': False}
home_beginner_2.5.1: {'answer': True}
home_beginner_2.5.2: {'answer': True}
home_beginner_2.5.3: {'answer': True}
home_beginner_2.6.1: {'answer': True}
home_beginner_2.6.2: {'answer': True}


In [None]:
print(*tasks_data, sep="\n")

{'name': 'home_beginner_2.1.1', 'description': 'Напишите программу, которая вводит строку и ищет ее в строке \ns = "I Do Love Python! Python is Cool!!!"\nЕсли искомая строка найдена, программа выводит на экран «найдено». В противном случае выводится «не найдено». Используйте один неполный условный оператор if.', 'solution': 's = "I Do Love Python! Python is Cool!!!"\nc = input()\nres = "не найдено"\nif c in s:\n    res = "найдено"\nprint(res)'}
{'name': 'home_beginner_2.1.2', 'description': 'Напишите программу, которая вводит два целых числа и выводит результат «делится», если первое число делится на второе, и «не делится» в противном случае. Используйте один неполный условный оператор if.', 'solution': 'n = int(input())\nm = int(input())\nres = "не делится"\nif n % m == 0:\n    res = "делится"\nprint(res)\n'}
{'name': 'home_beginner_2.1.3', 'description': 'Напишите программу, которая вводит два числа и выводит на экран только одну из строк по результатам проверки:\nОба нечетные\nПерво

In [None]:
tasks_data = []
for i in tqdm(range(len(splitted_texts))):
    retriever = FAISS.from_texts(
        [splitted_texts[i]],
        embedding=OpenAIEmbeddings(),
    ).as_retriever()

    chain = {
        "context": retriever,
        "question": RunnablePassthrough(),
    } | chain_with_prompt_and_parser

    try:
        tasks_data.append(
            chain.invoke(
                f"Tell me about {tasknames[i]} and return it's solution. Return empty string if there are no code problem description or solution",
            )
        )
    except Exception as e:
        print(e)
        tasks_data.append(
            {"name": tasknames[i], "description": "PROBLEM", "solution": "PROBLEM"}
        )
        continue

100%|██████████| 18/18 [01:30<00:00,  5.02s/it]


In [None]:
loader = Docx2txtLoader("./home_beginner_2.docx")
document_content = loader.load()[0].page_content

In [None]:
tasknames = filter_tasknames_from_str(document_content)
text = text_preprocess(document_content)
splitted_texts = split_text_by_tasknames(text, tasknames)

In [None]:
# Для тестов длинных цепочек 2
funcs = [
    {
        "name": "task_info",
        "description": "Analyze context and return task description and solution",
        "parameters": {
            "type": "object",
            "properties": {
                "name": {
                    "type": "string",
                    "description": "Name of the task. Example: home_beginner_2.5.3",
                },
                "description": {
                    "type": "string",
                    "description": "Description of the task.  Example: 'Напишите программу, которая считывает два целых числа и выводит их сумму'. If there are no description provided, return empty string.",
                },
                "solution": {
                    "type": "string",
                    "description": "Solution of task written Python (any python code located in task description, with input and print). Example: 'n=int(input())\nm=int(input())\nprint(n+m)'. If there are no solution provided, return empty string",
                },
            },
            "required": ["description", "solution"],
        },
    },
    {
        "name": "solution_check",
        "description": "Check correctfulness of solution",
        "parameters": {
            "type": "object",
            "properties": {
                "verificated": {
                    "type": "boolean",
                    "description": "Is solution correct or not for this task?",
                }
            },
            "required": ["verificated"],
        },
    },
]


chain_with_prompt_and_parser = (
    ChatPromptTemplate.from_template(
        """Answer the question based only on the following context:
{context}

Question: {question}
"""
    )
    | model.bind(function_call={"name": "task_info"}, functions=funcs)
    | JsonOutputFunctionsParser()
    | {"task_data": RunnablePassthrough()}
)


i = 0

chain_parsing = {
    "context": FAISS.from_texts(
        [splitted_texts[i]],
        embedding=OpenAIEmbeddings(),
    ).as_retriever(),
    "question": RunnablePassthrough(),
} | chain_with_prompt_and_parser

In [None]:
def form_dict_with_props(d):
    return dict(d["task_data"] | d["verificated"])

In [None]:
verification_chain = (
    itemgetter("task_data")
    | ChatPromptTemplate.from_template(
        """Is this solution correct for the following task?
Solution: {solution}
Task: {description}
"""
    )
    | model.bind(function_call={"name": "solution_check"}, functions=funcs)
    | JsonOutputFunctionsParser()
)

chain_main = (
    chain_parsing
    | {
        "task_data": itemgetter("task_data"),
        "verificated": verification_chain,
    }
    | form_dict_with_props
)
chain_main.invoke(
    f"Tell me about {tasknames[i]} and return it's solution. Return empty string if there are no code problem description or solution"
)

{'name': 'home_beginner_2.1.1',
 'description': 'Напишите программу, которая вводит строку и ищет ее в строке \ns = "I Do Love Python! Python is Cool!!!"\nЕсли искомая строка найдена, программа выводит на экран «найдено». В противном случае выводится «не найдено». Используйте один неполный условный оператор if.',
 'solution': 's = "I Do Love Python! Python is Cool!!!"\nc = input()\nres = "не найдено"\nif c in s:\n    res = "найдено"\nprint(res)',
 'verificated': True}