In [None]:
# ---------------------------------------------
# Daily Expense Categorizer (Full Project)
# Machine Learning model that classifies expenses
# ---------------------------------------------

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

# ------------------------------------------------
# 1. SAMPLE DATASET (You can replace with your own)
# ------------------------------------------------
data = {
    "description": [
        "uber ride to office",
        "grocery store purchase",
        "monthly netflix subscription",
        "electricity bill payment",
        "mcdonalds burger meal",
        "petrol refill",
        "amazon shopping order",
        "train ticket",
        "doctor consultation fee",
        "gym membership renewal"
    ],
    "category": [
        "transport",
        "food",
        "entertainment",
        "utilities",
        "food",
        "transport",
        "shopping",
        "transport",
        "health",
        "fitness"
    ]
}

df = pd.DataFrame(data)

# ---------------------------------------------
# 2. FEATURE EXTRACTION (Text → Numbers)
# ---------------------------------------------
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df["description"])
y = df["category"]

# ---------------------------------------------
# 3. TRAIN / TEST SPLIT
# ---------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ---------------------------------------------
# 4. TRAIN THE MODEL
# ---------------------------------------------
model = MultinomialNB()
model.fit(X_train, y_train)

# ---------------------------------------------
# 5. TEST ACCURACY
# ---------------------------------------------
preds = model.predict(X_test)
print("\nMODEL ACCURACY:", accuracy_score(y_test, preds))

# ---------------------------------------------
# 6. PREDICTION FUNCTION
# ---------------------------------------------
def predict_expense(text):
    text_vec = vectorizer.transform([text])
    return model.predict(text_vec)[0]

# ---------------------------------------------
# 7. TRY NEW EXPENSES
# ---------------------------------------------
print("\nTEST PREDICTIONS:")
print("subway sandwich  →", predict_expense("subway sandwich lunch"))
print("diesel for bike  →", predict_expense("diesel for bike"))
print("broadband bill   →", predict_expense("payment for internet broadband"))
print("buy paracetamol  →", predict_expense("bought paracetamol tablets"))

# ---------------------------------------------
# END OF FILE
# ---------------------------------------------