In [1]:
print("Hi")

Hi


In [4]:
!conda env

zsh:1: command not found: conda


In [1]:
# Import required libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Define category mappings
MERCHANT_CATEGORIES = {
    # Coffee & Quick Food
    'Tim Hortons': 'Coffee',
    'Starbucks': 'Coffee',
    'Second Cup': 'Coffee',
    
    # Restaurants
    'Tst-Globol': 'Restaurant',
    'Pizza Pizza': 'Restaurant',
    'Pizza Hut': 'Restaurant',
    'McDonalds': 'Restaurant',
    
    # Shopping
    'AMAZON': 'Shopping',
    'Walmart': 'Shopping',
    'Costco': 'Shopping',
    
    # Bills & Utilities
    'RENT': 'Housing',
    'HYDRO': 'Utilities',
    'ROGERS': 'Utilities',
    'BELL': 'Utilities'
}

# Function to load and preprocess data
def load_transactions(file_path):
    df = pd.read_csv(file_path)
    df['date'] = pd.to_datetime(df['date'])
    df['amount'] = pd.to_numeric(df['amount'].str.replace('$', ''), errors='coerce')
    
    # Add category column based on description matching
    df['category'] = df['description'].apply(
        lambda x: next((MERCHANT_CATEGORIES[merchant] 
                       for merchant in MERCHANT_CATEGORIES 
                       if merchant.lower() in x.lower()), 'Other')
    )
    return df

# Function to analyze spending by category
def analyze_spending_by_category(df):
    # Filter for expenses (negative amounts)
    expenses_df = df[df['amount'] < 0].copy()
    expenses_df['amount'] = expenses_df['amount'].abs()  # Make amounts positive for easier reading
    
    # Group by category
    category_summary = expenses_df.groupby('category').agg({
        'amount': ['sum', 'mean', 'count']
    }).round(2)
    
    return category_summary

# Function to analyze specific merchant spending (e.g., Tim Hortons)
def analyze_merchant_spending(df, merchant_name):
    merchant_df = df[df['description'].str.contains(merchant_name, case=False)].copy()
    merchant_df['amount'] = merchant_df['amount'].abs()
    
    monthly_spending = merchant_df.groupby(merchant_df['date'].dt.strftime('%Y-%m'))['amount'].agg([
        'sum', 'count', 'mean'
    ]).round(2)
    
    return monthly_spending

# Function to visualize spending patterns
def plot_spending_analysis(df):
    expenses_df = df[df['amount'] < 0].copy()
    expenses_df['amount'] = expenses_df['amount'].abs()
    
    plt.figure(figsize=(15, 12))
    
    # Monthly spending by category
    plt.subplot(2, 1, 1)
    monthly_by_category = expenses_df.pivot_table(
        index=expenses_df['date'].dt.strftime('%Y-%m'),
        columns='category',
        values='amount',
        aggfunc='sum'
    ).fillna(0)
    monthly_by_category.plot(kind='bar', stacked=True)
    plt.title('Monthly Spending by Category')
    plt.xlabel('Month')
    plt.ylabel('Amount ($)')
    plt.xticks(rotation=45)
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    
    # Pie chart of total spending by category
    plt.subplot(2, 1, 2)
    category_totals = expenses_df.groupby('category')['amount'].sum()
    plt.pie(category_totals, labels=category_totals.index, autopct='%1.1f%%')
    plt.title('Total Spending Distribution by Category')
    
    plt.tight_layout()
    plt.show()

# Main execution
def main():
    # Load the data
    file_path = 'your_transactions.csv'  # Replace with your file path
    df = load_transactions(file_path)
    
    # Overall category analysis
    category_summary = analyze_spending_by_category(df)
    print("\nSpending by Category:")
    print(category_summary)
    
    # Tim Hortons specific analysis
    tim_hortons_summary = analyze_merchant_spending(df, 'Tim Hortons')
    print("\nTim Hortons Spending:")
    print(tim_hortons_summary)
    
    # Visualize the data
    plot_spending_analysis(df)

if __name__ == "__main__":
    main()

ModuleNotFoundError: No module named 'pandas'

In [2]:
df = load_transactions("./billing/2024-11.csv")


NameError: name 'load_transactions' is not defined