In [None]:
import pandas as pd

# Checking for missing values in each dataset
missing_values_receipts = df_receipts_clean.isnull().sum().sum()
missing_values_users = df_users_clean.isnull().sum().sum()
missing_values_brands = df_brands_clean.isnull().sum().sum()

# Checking for duplicate rows
duplicates_receipts = df_receipts_clean.duplicated().sum()
duplicates_users = df_users_clean.duplicated().sum()
duplicates_brands = df_brands_clean.drop(columns=['cpg'], errors='ignore').duplicated().sum()  # Dropping unhashable field

# Checking for future dates
future_dates_receipts = (df_receipts_clean['purchaseDate'] > pd.Timestamp.now()).sum()
future_dates_users = (df_users_clean['createdDate'] > pd.Timestamp.now()).sum()

# Converting `totalSpent` to numeric and checking for negative spend values
df_receipts_clean['totalSpent'] = pd.to_numeric(df_receipts_clean['totalSpent'], errors='coerce')
negative_spend = (df_receipts_clean['totalSpent'] < 0).sum()

# Checking for orphaned foreign keys
orphaned_users_in_receipts = (~df_receipts_clean['userId'].isin(df_users_clean['_id'])).sum()
orphaned_brands_in_items = (~df_receipts_clean.explode('rewardsReceiptItemList')['rewardsReceiptItemList']
                            .apply(lambda x: x.get('barcode') if isinstance(x, dict) else None)
                            .isin(df_brands_clean['barcode'])).sum()

# Creating a summary table of data quality issues
data_quality_issues = pd.DataFrame({
    "Category": [
        "Missing Values (Receipts)",
        "Missing Values (Users)",
        "Missing Values (Brands)",
        "Duplicate Rows (Receipts)",
        "Duplicate Rows (Users)",
        "Duplicate Rows (Brands)",
        "Future Dates (Receipts)",
        "Future Dates (Users)",
        "Negative Spend Values",
        "Orphaned Users in Receipts",
        "Orphaned Brands in Items"
    ],
    "Count": [
        missing_values_receipts,
        missing_values_users,
        missing_values_brands,
        duplicates_receipts,
        duplicates_users,
        duplicates_brands,
        future_dates_receipts,
        future_dates_users,
        negative_spend,
        orphaned_users_in_receipts,
        orphaned_brands_in_items
    ]
})

# Displaying the results
import ace_tools as tools
tools.display_dataframe_to_user(name="Final Data Quality Issues", dataframe=data_quality_issues)
