In [1]:
# Import necessary modules - adjust the path to include our src directory
import sys
sys.path.append('..')

# Import our custom modules
from src.utils.data_generator import DataGenerator
from src.utils.feature_engineering import FeatureEngineer

# Standard data science imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Generate sample data
generator = DataGenerator(seed=42)
trades_df, transactions_df, labels_df = generator.generate_dataset(n_users=100)

# Look at the data
print("Trades shape:", trades_df.shape)
print("Transactions shape:", transactions_df.shape)
print("Labels shape:", labels_df.shape)

# Display sample of each dataframe
print("\nSample trades:")
display(trades_df.head())

print("\nSample transactions:")
display(transactions_df.head())

print("\nUser labels:")
display(labels_df.head())

# Engineer features
engineer = FeatureEngineer()
features = engineer.calculate_user_features(trades_df, transactions_df)

# Look at engineered features
print("\nEngineered features shape:", features.shape)
print("\nFeature columns:")
display(features.columns.tolist())

# Quick visualization of some key features
plt.figure(figsize=(12, 6))
plt.scatter(
    features['deposit_amount_sum'],
    features['withdrawal_amount_sum'],
    c=labels_df['is_fraudulent'],
    cmap='coolwarm'
)
plt.xlabel('Total Deposits')
plt.ylabel('Total Withdrawals')
plt.title('Deposits vs Withdrawals by User Type')
plt.colorbar(label='Is Fraudulent')
plt.show()

ModuleNotFoundError: No module named 'src'