# AI Financial Insights - Data Exploration

This notebook explores financial transaction data and demonstrates the capabilities of the AI Financial Insights system.

## Import Required Libraries

In [None]:
import sys
sys.path.insert(0, '../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from data_prep import load_data, clean_data, prepare_features
from categorisation import TransactionCategorizer
from anomaly_model import AnomalyDetector
from insights_engine import InsightsEngine

# Set style for visualizations
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

## Load and Explore Data

In [None]:
# Load sample data
df = load_data('../data/sample_statements.csv')
print(f"Loaded {len(df)} transactions")
print(f"\nFirst few rows:")
print(df.head())

In [None]:
# Display basic information
print("Data Types:")
print(df.dtypes)
print(f"\nMissing Values:")
print(df.isnull().sum())

## Clean Data

In [None]:
# Clean the data
df_clean = clean_data(df)
print(f"Cleaned data: {len(df_clean)} transactions")
print(f"\nData after cleaning:")
print(df_clean.head())

## Categorize Transactions

In [None]:
# Categorize transactions
categorizer = TransactionCategorizer()
df_clean = categorizer.categorize_dataframe(df_clean, column='description')

print("Transaction categories:")
print(df_clean[['description', 'predicted_category', 'amount']])

## Generate Insights

In [None]:
# Create insights engine
insights = InsightsEngine(df_clean)

# Get summary statistics
stats = insights.get_summary_statistics()

print("Financial Summary:")
for key, value in stats.items():
    if isinstance(value, float):
        print(f"  {key.replace('_', ' ').title()}: ${value:.2f}")
    else:
        print(f"  {key.replace('_', ' ').title()}: {value}")

## Category Breakdown

In [None]:
# Get category breakdown
category_breakdown = insights.get_category_breakdown()

if category_breakdown:
    print("Spending by Category:")
    for category, amount in sorted(category_breakdown.items(), key=lambda x: x[1], reverse=True):
        print(f"  {category}: ${amount:.2f}")
    
    # Visualize category spending
    plt.figure(figsize=(10, 6))
    categories = list(category_breakdown.keys())
    amounts = list(category_breakdown.values())
    plt.bar(categories, amounts, color='steelblue')
    plt.xlabel('Category')
    plt.ylabel('Amount ($)')
    plt.title('Spending by Category')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()

## Anomaly Detection

In [None]:
# Prepare features for anomaly detection
df_features, feature_names = prepare_features(df_clean)

# Select amount feature for anomaly detection
if 'amount_abs' in df_features.columns:
    X = df_features[['amount_abs']].values
    
    # Fit anomaly detector
    detector = AnomalyDetector(contamination=0.15)
    anomalies = detector.fit_predict(X)
    
    # Add anomaly predictions to dataframe
    df_features['anomaly'] = anomalies
    df_features['anomaly_label'] = df_features['anomaly'].apply(lambda x: 'Anomaly' if x == -1 else 'Normal')
    
    # Display anomalies
    print(f"Total transactions: {len(df_features)}")
    print(f"Anomalies detected: {sum(anomalies == -1)}")
    
    if sum(anomalies == -1) > 0:
        print("\nAnomalous transactions:")
        print(df_features[df_features['anomaly'] == -1][['description', 'amount', 'anomaly_label']])

## Visualization: Transaction Amounts

In [None]:
# Visualize transaction amounts with anomalies highlighted
if 'anomaly' in df_features.columns:
    plt.figure(figsize=(12, 6))
    
    normal = df_features[df_features['anomaly'] == 1]
    anomaly = df_features[df_features['anomaly'] == -1]
    
    plt.scatter(range(len(normal)), normal['amount'], label='Normal', color='green', alpha=0.6)
    plt.scatter(range(len(df_features))[len(normal):], anomaly['amount'], label='Anomaly', color='red', alpha=0.8, s=100)
    
    plt.xlabel('Transaction Index')
    plt.ylabel('Amount ($)')
    plt.title('Transactions with Detected Anomalies')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

## Summary Insights

In [None]:
# Get generated insights
generated_insights = insights.get_insights()

print("Generated Insights:")
for i, insight in enumerate(generated_insights, 1):
    print(f"{i}. {insight}")

## Next Steps

1. **Expand Data**: Add more historical transaction data for better pattern recognition
2. **Improve Categorization**: Enhance the categorizer with NLP or machine learning models
3. **Advanced Anomaly Detection**: Experiment with different anomaly detection algorithms
4. **Predictive Analytics**: Build models to forecast future spending trends
5. **Real-time Integration**: Connect to banking APIs for real-time transaction monitoring