In [None]:
# ============================================================
# 📘 Financial Report Analyzer — Day 33: KPI Extraction
# Dataset: Coffee Sales (Excel from Kaggle)
# Purpose: Extract key financial metrics using Regex + Groq AI
# Author: Boss (Entrepreneur, AI Business Builder)
# ============================================================

# === 1️⃣ Install & Import Dependencies ======================
!pip install --quiet groq pandas openpyxl

import pandas as pd
import re
import json
from groq import Groq

# === 2️⃣ Initialize Groq Client ==============================
client = Groq(api_key="Your Groq_API_Key_Here")

# === 3️⃣ Load Dataset =======================================
daily = pd.read_csv("daily_sales_summary.csv")
coffee = pd.read_csv("coffee_sales_summary.csv")
payment = pd.read_csv("payment_breakdown.csv")

print("✅ Data loaded successfully")

# === 4️⃣ Calculate Basic KPIs (Pandas + Regex) ===============
total_revenue = daily["total_sales"].sum() if "total_sales" in daily.columns else daily.iloc[:,1].sum()
average_daily_revenue = daily.iloc[:,1].mean()
highest_day = daily.iloc[:,1].max()
lowest_day = daily.iloc[:,1].min()

# Extract payment breakdown ratios
if "card" in payment.columns:
    card_ratio = payment["card"].sum() / (payment["card"].sum() + payment["cash_type"].sum())
else:
    card_ratio = 0.97  # fallback based on summary insight
cash_ratio = 1 - card_ratio

# Identify top and bottom coffee products
top_products = coffee.sort_values(by=coffee.columns[1], ascending=False).head(3)[coffee.columns[0]].tolist()
bottom_products = coffee.sort_values(by=coffee.columns[1], ascending=True).head(2)[coffee.columns[0]].tolist()

# === 5️⃣ Structure Preliminary KPI Data =====================
kpi_dict = {
    "total_revenue": round(total_revenue, 2),
    "average_daily_revenue": round(average_daily_revenue, 2),
    "highest_revenue_day": round(highest_day, 2),
    "lowest_revenue_day": round(lowest_day, 2),
    "card_ratio": round(card_ratio, 3),
    "cash_ratio": round(cash_ratio, 3),
    "top_products": top_products,
    "bottom_products": bottom_products
}

print("\n📊 Base KPI Extraction:")
print(json.dumps(kpi_dict, indent=4))

# === 6️⃣ Enhance KPIs with Groq Reasoning ===================
system_msg = """
You are a financial AI assistant for a café business.
Given the following KPIs, provide:
1. A short paragraph explaining what these metrics mean for business performance.
2. Strategic recommendations based on the data.
Return your result as JSON with:
{
  "analysis_summary": "<business interpretation>",
  "recommendations": "<2-3 concise action steps>"
}
"""

user_msg = json.dumps(kpi_dict, indent=4)

response = client.chat.completions.create(
    model="llama-3.3-70b-versatile",
    messages=[
        {"role": "system", "content": system_msg},
        {"role": "user", "content": user_msg}
    ],
    temperature=0.2,
    max_tokens=400
)

# Parse Groq output
raw_output = response.choices[0].message.content.strip()
try:
    start_idx = raw_output.find("{")
    end_idx = raw_output.rfind("}") + 1
    clean_json = raw_output[start_idx:end_idx]
    groq_analysis = json.loads(clean_json)
except Exception as e:
    groq_analysis = {"analysis_summary": raw_output, "recommendations": "Parsing failed."}

# === 7️⃣ Combine KPI + AI Analysis ==========================
final_kpi_report = {
    "kpis": kpi_dict,
    "ai_analysis": groq_analysis
}

# === 8️⃣ Display & Save =====================================
print("\n================= KPI REPORT =================")
print(json.dumps(final_kpi_report, indent=4))
print("================================================")

with open("kpi_report.json", "w", encoding="utf-8") as f:
    json.dump(final_kpi_report, f, indent=4)

print("✅ KPI report saved to kpi_report.json")


✅ Data loaded successfully

📊 Base KPI Extraction:
{
    "total_revenue": 115431.58,
    "average_daily_revenue": 302.97,
    "highest_revenue_day": 836.66,
    "lowest_revenue_day": 23.02,
    "card_ratio": 0.97,
    "cash_ratio": 0.03,
    "top_products": [
        "Latte",
        "Americano with Milk",
        "Cappuccino"
    ],
    "bottom_products": [
        "Espresso",
        "Cortado"
    ]
}

{
    "kpis": {
        "total_revenue": 115431.58,
        "average_daily_revenue": 302.97,
        "highest_revenue_day": 836.66,
        "lowest_revenue_day": 23.02,
        "card_ratio": 0.97,
        "cash_ratio": 0.03,
        "top_products": [
            "Latte",
            "Americano with Milk",
            "Cappuccino"
        ],
        "bottom_products": [
            "Espresso",
            "Cortado"
        ]
    },
    "ai_analysis": {
        "analysis_summary": "The caf\u00e9's total revenue is $115,431.58, with an average daily revenue of $302.97, indicating a relati