<a href="https://colab.research.google.com/github/tusharDeb888/FlowFi.ai-PrototypeV1.0/blob/main/FlowFi_ai_Prototype_V1_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Install dependencies

In [None]:
!pip install -q gradio pandas plotly
!pip install -q git+https://github.com/openai/whisper.git
!pip install -q langchain langchain_community faiss-cpu sentence-transformers transformers torch soundfile librosa accelerate

#Import libraries



In [None]:
import warnings
warnings.filterwarnings("ignore")
import os
import json
import re
import tempfile
import uuid
from datetime import datetime
from typing import Optional, List, Tuple, Dict
import numpy as np
import pandas as pd
import plotly.express as px
import gradio as gr
import soundfile as sf
import librosa
import whisper
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from transformers import pipeline as hf_pipeline

#Model & RAG Configuration

In [None]:
# Model file config.
DATA_FILE = "tracker_data.json"
VECTOR_DIR = "faiss_index"
WHISPER_MODEL = "base"
FLAN_MODEL = "google/flan-t5-base"

# RAG config.
EMB_MODEL = "BAAI/bge-base-en-v1.5"
TARGET_SR = 16000

# Silence HuggingFace parallelism warning
os.environ["TOKENIZERS_PARALLELISM"] = "false"

#Application's memory system

* DATA_FILE (tracker_data.json)
    * This is the physical file folder, where all the expense and budget data is permanently stored on disk when the app isn't running.
* app_data:
    * It's the live, in-memory version of the data that the application actively works with (reads from, adds to, deletes from).
* load_data():
    * This is the action of getting the folder from the dir at the start of the app.

* save_data():
    * This is the action of putting the folder back into the dir after making changes.

In [None]:
def load_data() -> Dict:
    """Loads data from the JSON file, ensuring expenses have unique IDs."""
    if os.path.exists(DATA_FILE):
        with open(DATA_FILE, "r", encoding="utf-8") as f:
            data = json.load(f)
        for expense in data.get("expenses", []):
            if "id" not in expense:
                expense["id"] = str(uuid.uuid4())
        return data
    return {"expenses": [], "budgets": {}}

def save_data(data: Dict):
    """Saves data to the JSON file."""
    with open(DATA_FILE, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)

app_data = load_data()

#Load AI Models

In [None]:
print("Loading AI models... This might take a few minutes.")
try:
    asr_model = whisper.load_model(WHISPER_MODEL)
    print("Whisper model loaded Successfully.....")

    device = 0 if "CUDA_VISIBLE_DEVICES" in os.environ else -1
    flan_pipe = hf_pipeline("text2text-generation", model=FLAN_MODEL, device=device, max_new_tokens=256)
    llm = HuggingFacePipeline(pipeline=flan_pipe)
    print("Flan-T5 LLM loaded Successfully.....")

    embeddings = HuggingFaceEmbeddings(model_name=EMB_MODEL)
    print("Embeddings model loaded Successfully.....")
except Exception as e:
    print(f"Error loading models: {e}")
    raise

# Speech to Text Setup

In [None]:
def save_numpy_audio_to_wav(numpy_audio, target_sr=TARGET_SR) -> str:
    if numpy_audio is None: raise ValueError("No audio provided")
    sr, audio = numpy_audio
    if not np.issubdtype(audio.dtype, np.floating):
        audio = audio.astype(np.float32) / np.iinfo(audio.dtype).max
    if audio.ndim > 1: audio = np.mean(audio, axis=1)
    if sr != target_sr:
        audio = librosa.resample(audio, orig_sr=sr, target_sr=target_sr)
    tmp_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
    sf.write(tmp_wav.name, audio, target_sr)
    return tmp_wav.name

def parse_amount(text: str) -> Optional[float]:
    """abstract amount from the audio"""
    match = re.search(r"(\d+(?:[.,]\d+)?)", text)
    return float(match.group(1).replace(",", ".")) if match else None

def parse_category(text: str) -> str:
    """abstract category from the audio"""
    text_l = text.lower()
    keywords = {
        "Food": ["food", "restaurant", "lunch", "dinner", "snack", "coffee"],
        "Groceries": ["groceries", "supermarket", "market"],
        "Transport": ["taxi", "uber", "bus", "train", "metro", "fuel"],
        "Utilities": ["bill", "electricity", "internet", "phone", "water"],
        "Rent": ["rent", "mortgage"],
        "Shopping": ["shopping", "clothes", "electronics", "store"],
        "Entertainment": ["movie", "concert", "game", "netflix"],
    }
    for cat, kws in keywords.items():
        if any(kw in text_l for kw in kws):
            return cat
    return "Misc"

RAG Implementation



In [None]:
PROMPT_TEMPLATE = """You are an expert financial assistant. Use the following expense records to answer the user's question concisely. Provide calculations if helpful. If you don't find the answer in the records, say "I could not find any relevant expenses for your question.ask me like total spent on rent.."

Context (Expense Records):
{context}

Question:
{question}

Answer:"""
QA_PROMPT = PromptTemplate(template=PROMPT_TEMPLATE, input_variables=["context", "question"])

_RAG_COMPONENTS = {"retriever": None, "chain": None}

def build_vector_store():
    """Builds and saves the FAISS vector store from the latest expense data."""

    global _RAG_COMPONENTS
    expenses = app_data.get("expenses", [])
    if not expenses:
        _RAG_COMPONENTS = {"retriever": None, "chain": None}
        return

    texts = []
    for e in expenses:

        full_text = e.get('text', 'no description')
        doc = f"On {e.get('date')}, an expense of Rs. {e.get('amount', 0.0):.2f} was recorded in the '{e.get('category', 'Misc')}' category. The description is: '{full_text}'."
        texts.append(doc)

    store = FAISS.from_texts(texts, embeddings)
    store.save_local(VECTOR_DIR)


    _RAG_COMPONENTS = {"retriever": None, "chain": None}
    print("✅ Vector store rebuilt successfully.")

def get_rag_components():
    """Loads the vector store and initializes the RAG chain if not already loaded."""
    if _RAG_COMPONENTS.get("chain") is None:
        if not os.path.exists(VECTOR_DIR):
            return None, None

        store = FAISS.load_local(VECTOR_DIR, embeddings, allow_dangerous_deserialization=True)
        retriever = store.as_retriever(search_type="similarity", search_kwargs={"k": 5})

        # Use the more controllable load_qa_chain with our custom prompt
        chain = load_qa_chain(llm=llm, chain_type="stuff", prompt=QA_PROMPT)

        _RAG_COMPONENTS["retriever"] = retriever
        _RAG_COMPONENTS["chain"] = chain
        print("✅ RAG chain initialized.")

    return _RAG_COMPONENTS["retriever"], _RAG_COMPONENTS["chain"]

# Initialize vector store on startup
build_vector_store()

def rag_chat_enhanced(user_input: str, chat_history: List):
    chat_history = chat_history or []

    # Keep the fast, rule-based approach for simple math queries
    total_match = re.search(r"(?:total|sum|how much.*on)\s+([\w\s]+)", user_input, re.IGNORECASE)
    list_match = re.search(r"(?:show|list|display)\s+(?:all\s+)?([\w\s]+)\s+expenses", user_input, re.IGNORECASE)

    df = get_expenses_df()
    if total_match:
        category = total_match.group(1).strip().capitalize()
        cat_df = df[df['category'].str.contains(category, case=False)]
        if not cat_df.empty:
            total = cat_df['amount'].sum()
            answer = f"The total spending on **{category}** is **₹{total:,.2f}**."
            chat_history.append((user_input, answer))
            return chat_history, None, ""
    elif list_match:
        category = list_match.group(1).strip().capitalize()
        cat_df = df[df['category'].str.contains(category, case=False)]
        if not cat_df.empty:
            answer = f"Here are all expenses for **{category}**:"
            chat_history.append((user_input, answer))
            return chat_history, cat_df[['date', 'category', 'amount', 'text']], ""

    # --- RAG IMPROVEMENT 3: USE THE NEW, MORE POWERFUL CHAIN ---
    # Fallback to the LLM RAG chain for complex, semantic questions
    retriever, chain = get_rag_components()
    if not retriever or not chain:
        answer = "The AI is not ready. Please add at least one expense to activate it."
        chat_history.append((user_input, answer))
        return chat_history, None, ""

    # 1. Retrieve relevant documents from the vector store
    relevant_docs = retriever.invoke(user_input)
    if not relevant_docs:
        answer = "I couldn't find any expenses related to your question."
        chat_history.append((user_input, answer))
        return chat_history, None, ""

    # 2. Call the LLM chain with the retrieved docs and the question
    result = chain.invoke({"input_documents": relevant_docs, "question": user_input}, return_only_outputs=True)
    answer = result.get("output_text", "Sorry, I had trouble finding an answer.")

    chat_history.append((user_input, answer))
    return chat_history, None, ""

Data Processing & Visualization

In [None]:
def get_expenses_df() -> pd.DataFrame:
    expenses = app_data.get("expenses", [])
    if not expenses:
        return pd.DataFrame(columns=["id", "date", "category", "amount", "text"])
    df = pd.DataFrame(expenses)
    df["amount"] = pd.to_numeric(df["amount"], errors="coerce").fillna(0.0)
    df['date'] = pd.to_datetime(df['date'])
    return df.sort_values(by="date", ascending=False)

def update_dashboard_cards(df: pd.DataFrame):
    if df.empty: return "₹0.00", "₹0.00"
    total_spend = df["amount"].sum()
    avg_transaction = df["amount"].mean()
    return f"₹{total_spend:,.2f}", f"₹{avg_transaction:,.2f}"

def update_visualizations(df: pd.DataFrame):
    if df.empty: return None, None, None
    pie_fig = px.pie(df, values="amount", names="category", title="Expense Breakdown", hole=0.3)
    bar_df = df.groupby("category")["amount"].sum().reset_index().sort_values("amount", ascending=False)
    bar_fig = px.bar(bar_df, x="category", y="amount", title="Spending by Category", text_auto='.2s')
    line_df = df.groupby(df['date'].dt.date)["amount"].sum().reset_index()
    line_fig = px.line(line_df, x="date", y="amount", title="Spending Over Time", markers=True)
    return pie_fig, bar_fig, line_fig

CRUD Operations

In [None]:
def refresh_all_components():
    df = get_expenses_df()
    total_spend, avg_txn = update_dashboard_cards(df)
    pie, bar, line = update_visualizations(df)
    current_month_str = datetime.now().strftime("%Y-%m")
    alerts = check_budget_alerts_for_month(current_month_str)
    alert_text = "\n".join(alerts) if alerts else "No budget alerts."
    return df, total_spend, avg_txn, pie, bar, line, alert_text

def add_expense(text: str, amount: float, category: str, date_str: str):
    if not amount or not category: raise gr.Error("Amount and Category are required.")
    entry = {"id": str(uuid.uuid4()), "date": date_str or datetime.now().strftime("%Y-%m-%d"), "category": category.strip().capitalize(), "amount": float(amount), "text": text.strip()}
    app_data["expenses"].append(entry)
    save_data(app_data)
    build_vector_store()
    gr.Info("✅ Manual expense added.")
    return refresh_all_components()

def add_expense_voice(audio_input):
    if audio_input is None: raise gr.Error("No audio recorded. Please record your expense.")
    wav_path = save_numpy_audio_to_wav(audio_input)
    transcription = asr_model.transcribe(wav_path, fp16=False)["text"]
    amount = parse_amount(transcription)
    category = parse_category(transcription)
    if amount is None:
        gr.Warning("Could not detect an amount in audio. Please add it manually.")
        return # Or pre-fill form
    add_expense(transcription, amount, category, datetime.now().strftime("%Y-%m-%d"))
    gr.Info(f"✅ Voice expense added: {transcription}")
    return refresh_all_components()

def update_expense(expense_id: str, date: str, category: str, amount: float, text: str):
    if not expense_id: raise gr.Error("No expense selected to update.")
    for expense in app_data["expenses"]:
        if expense["id"] == expense_id:
            expense.update({"date": date, "category": category.strip().capitalize(), "amount": float(amount), "text": text.strip()})
            break
    save_data(app_data)
    build_vector_store()
    gr.Info("✅ Expense updated successfully!")
    return refresh_all_components()

def delete_expense(expense_id: str):
    if not expense_id: raise gr.Error("No expense selected to delete.")
    app_data["expenses"] = [exp for exp in app_data["expenses"] if exp["id"] != expense_id]
    save_data(app_data)
    build_vector_store()
    gr.Info("🗑️ Expense deleted successfully!")
    return *refresh_all_components(), "", "", "", "", ""


#Budget Logic

In [None]:
def set_budget(category: str, amount: float):
    if not category or amount is None or float(amount) <= 0: raise gr.Error("Valid category and positive budget required.")
    app_data.setdefault("budgets", {})[category.strip().capitalize()] = float(amount)
    save_data(app_data)
    budgets_df = pd.DataFrame(list(app_data.get("budgets", {}).items()), columns=["Category", "Budget"])
    gr.Info(f"Budget set for {category.capitalize()}: ₹{amount:,.2f}")
    return budgets_df

def check_budget_alerts_for_month(year_month: str) -> List[str]:
    alerts = []
    df = get_expenses_df()
    if df.empty: return []
    df["ym"] = df["date"].dt.strftime("%Y-%m")
    month_df = df[df["ym"] == year_month]
    if month_df.empty: return []
    sums_by_cat = month_df.groupby("category")["amount"].sum().to_dict()
    budgets = app_data.get("budgets", {})
    for cat, budget_amount in budgets.items():
        spent = sums_by_cat.get(cat, 0.0)
        if spent >= budget_amount:
            alerts.append(f"🚨 Budget EXCEEDED for {cat}: Spent ₹{spent:,.2f} of ₹{budget_amount:,.2f}")
        elif spent >= 0.8 * budget_amount:
            alerts.append(f"⚠️ Nearing budget for {cat}: Spent ₹{spent:,.2f} of ₹{budget_amount:,.2f} ({(spent/budget_amount):.0%})")
    return alerts

#Gradio UI

In [None]:
with gr.Blocks(theme=gr.themes.Soft(), title="# AI Smart Expense Manager") as demo:
    gr.Markdown("FlowFi.ai Prototype V1.0")

    with gr.Tabs():
        with gr.TabItem("Dashboard"):
            with gr.Row():
                total_spend_card = gr.Textbox(label="💰 Total Expenses", interactive=False)
                avg_txn_card = gr.Textbox(label="📊 Average Transaction", interactive=False)
            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown("### Add New Expense")
                    with gr.Tabs():
                        with gr.TabItem("🎤 Voice"):
                            mic_audio = gr.Audio(sources=["microphone"], type="numpy", label="Record your expense")
                            add_voice_btn = gr.Button("Add from Voice", variant="primary")
                        with gr.TabItem("✍️ Manual"):
                            m_text = gr.Textbox(label="Description")
                            m_amount = gr.Number(label="Amount")
                            m_cat = gr.Textbox(label="Category")
                            m_date = gr.Textbox(label="Date", value=datetime.now().strftime("%Y-%m-%d"))
                            add_manual_btn = gr.Button("Add Manually", variant="primary")
                    with gr.Accordion("Set Monthly Budget", open=False):
                        b_category = gr.Textbox(label="Category")
                        b_amount = gr.Number(label="Monthly Budget Amount")
                        set_budget_btn = gr.Button("Set Budget")
                        current_budgets_df = gr.DataFrame(value=pd.DataFrame(list(app_data.get("budgets", {}).items()), columns=["Category", "Budget"]), interactive=False)
                    budget_alerts_box = gr.Textbox(label="🚨 Budget Alerts", interactive=False, lines=3)
                with gr.Column(scale=2):
                    gr.Markdown("### All Expenses")
                    expense_df_display = gr.DataFrame(value=get_expenses_df, headers=["ID", "Date", "Category", "Amount", "Description"], interactive=False)
                    gr.Markdown("### Edit Selected Expense")
                    edit_id = gr.Textbox(label="Expense ID", interactive=False)
                    edit_date = gr.Textbox(label="Date")
                    edit_cat = gr.Textbox(label="Category")
                    edit_amount = gr.Number(label="Amount")
                    edit_text = gr.Textbox(label="Description")
                    with gr.Row():
                        update_btn = gr.Button("Update Expense", variant="primary")
                        delete_btn = gr.Button("Delete Expense", variant="stop")

        with gr.TabItem("Analysis & AI Chat"):
            gr.Markdown("## 📊 Visual Analysis")
            with gr.Row(): pie_chart, bar_chart = gr.Plot(), gr.Plot()
            with gr.Row(): line_chart = gr.Plot()
            gr.Markdown("## 🤖 Chat with Your AI Expense Analyst")
            chatbot = gr.Chatbot(height=300)
            chat_input = gr.Textbox(label="Your Question")
            chat_table_output = gr.DataFrame(interactive=False, label="Query Result")
            chat_input.submit(rag_chat_enhanced, inputs=[chat_input, chatbot], outputs=[chatbot, chat_table_output, chat_input])

    all_outputs = [expense_df_display, total_spend_card, avg_txn_card, pie_chart, bar_chart, line_chart, budget_alerts_box]
    add_manual_btn.click(lambda t, a, c, d: add_expense(t, a, c, d), [m_text, m_amount, m_cat, m_date], all_outputs)
    add_voice_btn.click(add_expense_voice, [mic_audio], all_outputs)

    def on_select_row(df: pd.DataFrame, evt: gr.SelectData):
        if not evt.value: return "", "", "", "", ""
        selected_row = df.iloc[evt.index[0]]
        return selected_row['id'], pd.to_datetime(selected_row['date']).strftime('%Y-%m-%d'), selected_row['category'], selected_row['amount'], selected_row['text']

    expense_df_display.select(on_select_row, [expense_df_display], [edit_id, edit_date, edit_cat, edit_amount, edit_text])
    update_btn.click(update_expense, [edit_id, edit_date, edit_cat, edit_amount, edit_text], all_outputs)
    delete_btn.click(delete_expense, [edit_id], [*all_outputs, edit_id, edit_date, edit_cat, edit_amount, edit_text])
    set_budget_btn.click(set_budget, [b_category, b_amount], [current_budgets_df]).then(refresh_all_components, None, all_outputs)
    demo.load(refresh_all_components, None, all_outputs)


# Run the app

In [None]:
if __name__ == "__main__":
    demo.launch(debug=False, share=True)