In [29]:
import os
from dotenv import load_dotenv
from openai import OpenAI
from typing import List, Dict, Callable
import pprint

load_dotenv()

True

In [30]:
api_key = os.getenv('DEEPSEEK_API_KEY')
api_key

'sk-8abac7e92a274dbd8a60201414d73b14'

In [51]:
data_processing_steps = [
    """Extract only the numerical values and their associated metrics from the text.
    Format each as 'value: metric' on a new line.
    Notice if exist multiple metics in text, extract those metrics separately and give each value and appropriate metric name, don't miss any value or metric,e.g if text contains 'Sales increased from 8% to 92%' extract '8' and '92' and give them different metric names.
    Example format:
    92: customer satisfaction
    45%: revenue growth""",
    
    """Convert all numerical values to percentages where possible.
    If not a percentage or points, convert to decimal (e.g., 92 points -> 92%).
    Keep one number per line.
    Example format:
    92%: customer satisfaction
    45%: revenue growth""",
    
    """Sort all lines in descending order by numerical value.
    Keep the format 'value: metric' on each line.
    Example:
    92%: customer satisfaction
    87%: employee satisfaction""",
    
    """Format the sorted data as a markdown table with columns:
    | Metric | Value |
    |:--|--:|
    | Customer Satisfaction | 92% |"""
]

report = """
Q3 Performance Summary:
Our customer satisfaction score rose to 92 points this quarter.
Revenue grew by 45% compared to last year.
Market share is now at 23% in our primary market.
Customer churn decreased to 5% from 8%.
New user acquisition cost is $43 per user.
Product adoption rate increased to 78%.
Employee satisfaction is at 87 points.
Operating margin improved to 34%.
"""

In [32]:
def llm_call(prompt: str, system_prompt: str = "", model="deepseek-chat"):
    messages = [{'role':'system','content': system_prompt},{"role": "user", "content": prompt}]
    client = OpenAI(api_key=api_key,base_url="https://api.deepseek.com")
    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=messages,
        temperature=0.1,
    )
    return  response.choices[0].message

In [33]:
def chain(input: str, prompts: List[str]) -> str:
    """Chain multiple LLM calls sequentially, passing results between steps."""
    result = input
    for i, prompt in enumerate(prompts, 1):
        print(f"\nStep {i}:")
        result = llm_call(f"{prompt}\nInput: {result}")
        print(result)
    return result

In [49]:
exact_prompt = '''
Extract only the numerical values and their associated metrics from the text.
    Format each as 'value: metric' on a new line.
    Notice if exist multiple metics in text, extract those metrics separately and give each value and appropriate metric name, don't miss any value or metric,e.g if text contains 'Sales increased from 8% to 92%' extract '8' and '92' and give them different metric names.
    Example format:
    92: customer satisfaction
    45%: revenue growth
'''

response = llm_call(report,exact_prompt)
pprint.pprint(response.content)

('92: customer satisfaction score  \n'
 '45%: revenue growth  \n'
 '23%: market share  \n'
 '5%: customer churn  \n'
 '8%: previous customer churn  \n'
 '$43: new user acquisition cost  \n'
 '78%: product adoption rate  \n'
 '87: employee satisfaction  \n'
 '34%: operating margin')


In [52]:
formatted_result = chain(report, data_processing_steps)
pprint.pprint(formatted_result.content)


Step 1:
ChatCompletionMessage(content='92: customer satisfaction score  \n45%: revenue growth  \n23%: market share  \n5%: customer churn  \n8%: previous customer churn  \n$43: new user acquisition cost  \n78%: product adoption rate  \n87: employee satisfaction  \n34%: operating margin', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None)

Step 2:
ChatCompletionMessage(content='92%: customer satisfaction score  \n45%: revenue growth  \n23%: market share  \n5%: customer churn  \n8%: previous customer churn  \n$43: new user acquisition cost  \n78%: product adoption rate  \n87%: employee satisfaction  \n34%: operating margin', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None)

Step 3:
ChatCompletionMessage(content="Here is the input sorted in descending order by numerical value, maintaining the format 'value: metric':\n\n```\n92%: customer satisfaction score  \n87%: employee satisfaction  \n78%: product adoption rate  \n45%: reve