In [1]:
import requests
from langfuse import Langfuse
import os
import sys

sys.path.append(os.path.abspath("C:/vuzik/sii/bachelor-2025-team-losoci/ml"))

import prompt_templates
from prompt_templates import ProfileText
from preprocessing_pipeline import DataPreprocessor

from langfuse import Langfuse
from langchain_ollama import ChatOllama

import os
from dotenv import load_dotenv
load_dotenv()

True

### Langfuse

In [5]:
langfuse = Langfuse(
        secret_key=os.getenv('LANGFUSE_SK'),
        public_key=os.getenv('LANGFUSE_PK'),
        host=os.getenv('LANGFUSE_HOST')
    )

#### Курс доллара


In [3]:
import requests

def get_currency_rate():
    
    url = "https://api.exchangerate-api.com/v4/latest/USD"
    #try:
    response = requests.get(url)
    data = response.json()
    rub_rate = data['rates']['RUB']
    return rub_rate
    #except Exception as e:
        #print(f"Ошибка при получении курса: {e}")
        #return None

### Запросы

#### По профилю

In [6]:
from langchain.agents import create_agent
from langchain_ollama import ChatOllama
from langgraph.checkpoint.memory import InMemorySaver  

In [16]:
def generate_response(prompt: str, 
                     system: str,
                     model_name: str = None,
                     max_tokens: int = 500,
                     temperature: float = 0.7):
                     
    model = model_name or os.getenv('MODEL_NAME')
    system_content = system

    with langfuse.start_as_current_generation(
        name=f"Dataset test: pfp_1",
        model=model,
        input={
            "system": system_content,
            "prompt": prompt},
        model_parameters={
            "temperature": temperature,
            "max_tokens": max_tokens
        }
    ) as generation:
        data = {
            "model": model,
            "prompt": prompt,
            "system": system_content,
            "stream": False,
            "options": {
                "temperature": temperature,
                "max_tokens": max_tokens
            }
        }
        
        response = requests.post(os.getenv('BASE_URL'), json=data)

        if response.status_code == 200:
            full_response = response.json()
            generated_text = full_response["response"]

            # Подсчет токенов
            prompt_tokens = full_response.get("prompt_eval_count", 0)
            completion_tokens = full_response.get("eval_count", 0)

            # Подсчет стоимости токена из расчета
            # rate = get_currency_rate()
            currency_rate = 83
            input_cost_per_token = 0.00000244 * currency_rate
            output_cost_per_token = input_cost_per_token * 2.5 
            
            input_cost = prompt_tokens * input_cost_per_token
            output_cost = completion_tokens * output_cost_per_token
            total_cost = input_cost + output_cost

            

            generation.update(
                output=generated_text,
                usage_details={
                    "input": prompt_tokens,
                    "output": completion_tokens,
                },
                cost_details={
                    "input": input_cost,
                    "output": output_cost,
                    "total": total_cost 
                },
                metadata={
                    "response_time": full_response.get("total_duration", 0),
                    "model": model,
                    "system": system_content,
                    "prompt": prompt,
                    "stream": False,
                    #"judge_response": generated_judge_text,
                    "options": {
                        "temperature": temperature,
                        "max_tokens": max_tokens
                    }
                }
                
            )
            
            return generated_text
            
        else:
            error_msg = f"Ошибка API: {response.status_code} - {response.text}"
            
            generation.update(
                output={"error": error_msg},
                level="ERROR"
            )

            return {
                "success": False,
                "error": error_msg,
                "model": model
            }
            
    langfuse.flush()

#### Для локальной Ollama

In [None]:
def generate_response_local_ollama(prompt: str, 
                     system: str,
                     model_name: str = None,
                     max_tokens: int = 500,
                     temperature: float = 0.7):
    
    model = model_name or os.getenv('MODEL_NAME')
    ollama_host = os.getenv('OLLAMA_HOST', 'http://localhost:11434')
    
    with langfuse.start_as_current_generation(
        name=f"Dataset test: pfp_1",
        model=model,
        input={
            "system": system,
            "prompt": prompt},
        model_parameters={
            "temperature": temperature,
            "max_tokens": max_tokens
        }
    ) as generation:
        
        try:
            # Инициализация ChatOllama
            llm = ChatOllama(
                model=model,
                temperature=temperature,
                num_predict=max_tokens
            )
            
            # Создание сообщений
            messages = [
                ("system", system),
                ("human", prompt)
            ]
            
            # Вызов модели
            response = llm.invoke(messages) # response = requests.post(os.getenv('MODEL_NAME'), json=data)

            generated_text = response.сcontent
            
            # Для получения точного количества токенов можно использовать дополнительный запрос
            import requests
            token_data = {
                "model": model,
                "prompt": f"{system}\n\n{prompt}"
            }
            
            token_response = requests.post(f"{ollama_host}/api/encode", json=token_data)
            if token_response.status_code == 200:
                prompt_tokens = len(token_response.json().get("tokens", []))
            else:
                prompt_tokens = len(prompt.split()) // 0.75
                
            completion_tokens = len(generated_text.split()) // 0.75
            
            # Подсчет стоимости
            currency_rate = 83
            input_cost_per_token = 0.00000244 * currency_rate
            output_cost_per_token = input_cost_per_token * 2.5 
            
            input_cost = prompt_tokens * input_cost_per_token
            output_cost = completion_tokens * output_cost_per_token
            total_cost = input_cost + output_cost

            generation.update(
                output=generated_text,
                usage_details={
                    "input": prompt_tokens,
                    "output": completion_tokens,
                },
                cost_details={
                    "input": input_cost,
                    "output": output_cost,
                    "total": total_cost 
                },
                metadata={
                    "model": model,
                    "system": system,
                    "prompt": prompt,
                    "temperature": temperature,
                    "max_tokens": max_tokens
                }
            )
            
            return generated_text
            
        except Exception as e:
            error_msg = f"Ошибка ChatOllama: {str(e)}"
            
            generation.update(
                output={"error": error_msg},
                level="ERROR"
            )

            return {
                "success": False,
                "error": error_msg,
                "model": model
            }
            
    langfuse.flush()

### Chat

In [8]:
from langchain.agents import create_agent
from langchain_ollama import ChatOllama
from langgraph.checkpoint.memory import InMemorySaver  
from langfuse.langchain import CallbackHandler

In [19]:
model = ChatOllama(
    model="llama3.1",
    validate_model_on_init=True,
    temperature=0.4,
    max_tokens=2048,
    
)

checkpointer = InMemorySaver()
langfuse_handler = CallbackHandler()
pt = ProfileText()

chat_agent = create_agent(
    model,
    system_prompt=pt.system_prompt,
    checkpointer=checkpointer,
    )

In [None]:
def run_chat_assistant(prompt: str,
                       sender_id: str, 
                       recipient_id: str,
                       system_prompt: str = pt.system_prompt,
                       temperature: float = 0.4,
                       num_predict: int = 2048):
    
     answer = chat_agent.invoke({"messages": {"role": "human", "content": prompt}}, 
                                config={"configurable": {"thread_id": int(sender_id +recipient_id)},
                                     "callbacks": [langfuse_handler]})
     answer = answer['messages'][-1].content
     return answer
    

In [None]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Optional
import uvicorn

app = FastAPI()

class ChatRequest(BaseModel):
    prompt: str
    sender_id: str
    recipient_id: str
    system_prompt: Optional[str] = None
    first_message: Optional[str] = "Помоги подобрать подарок"
    temperature: Optional[float] = 0.4
    num_predict: Optional[int] = 2048

class ChatResponse(BaseModel):
    answer: str
    status: str = "success"

@app.post("/chat", response_model=ChatResponse)
async def chat_endpoint(request: ChatRequest):
    try:
        answer = run_chat_assistant(
            prompt=request.prompt,
            sender_id=request.sender_id,
            recipient_id=request.recipient_id,
        )
        
        return ChatResponse(answer=answer)
    
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
    
uvicorn.run(
        app, 
        host="26.205.227.135",  # доступ с любого IP
        port=8000,       # ← ваш порт
        reload=True      # автоматическая перезагрузка при изменениях
    )

In [37]:
run_chat_assistant("как дела",'12','12')


In [None]:
def run_chat_assistant(sender_id: str, 
                       recipient_id: str,
                       system_prompt: str = pt.system_prompt,
                       first_message: str = "Помоги подобрать подарок",
                       temperature: float = 0.4,
                       num_predict: int = 2048):
    

    messages = [("first_message", first_message)]

    answer = chat_agent.invoke({"messages": {"role": "human", "content": first_message}}, 
                                config={"configurable": {"thread_id": int(sender_id +recipient_id)},
                                        "callbacks": [langfuse_handler]})
            
    print('Бот:', answer['messages'][-1].content)
            
    while True:
        user_msg = input("Ты: ").strip()
        if not user_msg:
            continue
        if user_msg.lower() in ("/exit", "exit", "quit"):
            print("Выход...")
            break
            """if user_msg.lower() == "/reset":
                history.clear()
                print("Память очищена.")
                continue"""
            
        answer = chat_agent.invoke({"messages": {"role": "human", "content": user_msg}}, 
                               config={"configurable": {"thread_id": int(sender_id + recipient_id)},
                                       'callbacks': [langfuse_handler]})
        
        print("Бот:", answer['messages'][-1].content)
        langfuse.flush()


AttributeError: 'dict' object has no attribute 'text'

### Запрос модели

In [None]:
import pandas as pd

In [None]:
data = pd.read_csv('../../data/data.csv')

In [None]:
pt = ProfileText()
for row in range(2):
    status = DataPreprocessor.test_single_row_pipeline(data.iloc[row])
    if status is not False and not status.empty:
        pt.info['sex'] = status['sex']
        pt.info['age'] = status['age']
        pt.info['info'] = status['info_clean']
    else:
        print(0)
    generate_response(pt.prompt_from_profile_1, prompt_templates.system_content_from_profile)


In [None]:
generate_response("привет какая погода в москве","ассистент по подбору подарков")