# Debug Data Type Mismatch Issue

This notebook helps diagnose the "orgnbr dtype mismatch" error.

In [None]:
import os, sys
from pathlib import Path

# Change to project root and add to Python path
project_root = Path.cwd().parent
os.chdir(project_root)
sys.path.insert(0, str(project_root))

print(f"Working directory: {os.getcwd()}")
print(f"Python path includes: {project_root}")

In [None]:
import pandas as pd
import src.data_quality.core
from src.main import load_database_tables, create_acct_df, create_org_final, create_pers_final

print("Modules imported successfully")

In [None]:
# Try to load the data to see what's happening
try:
    print("Attempting to load database tables...")
    data = load_database_tables()
    print(f"Data loaded: {type(data)}")
    if isinstance(data, dict):
        for key, value in data.items():
            if value is not None:
                print(f"{key}: {type(value)}, shape: {value.shape if hasattr(value, 'shape') else 'N/A'}")
                if hasattr(value, 'dtypes'):
                    print(f"  dtypes: {value.dtypes.to_dict()}")
            else:
                print(f"{key}: None")
except Exception as e:
    print(f"Error loading data: {e}")
    print(f"Error type: {type(e)}")

In [None]:
# Try to create acct_df
try:
    print("Attempting to create acct_df...")
    acct_df = create_acct_df()
    print(f"acct_df created: {type(acct_df)}")
    if acct_df is not None:
        print(f"Shape: {acct_df.shape}")
        print(f"Columns: {list(acct_df.columns)}")
        print(f"Dtypes: {acct_df.dtypes.to_dict()}")
except Exception as e:
    print(f"Error creating acct_df: {e}")
    print(f"Error type: {type(e)}")

In [None]:
# Create sample data to test the dtype conversion fix
print("Creating sample data to test dtype conversion...")

# Sample wh_org with int64 orgnbr
wh_org = pd.DataFrame({
    'orgnbr': [1, 2, 3, 4, 5],
    'orgname': ['Company A', 'Company B', 'Company C', 'Company D', 'Company E']
})

# Sample orgaddruse with object/string orgnbr (common mismatch)
orgaddruse = pd.DataFrame({
    'orgnbr': ['1', '2', '3'],  # String values
    'addrnbr': [101, 102, 103],
    'addrusecd': ['PRI', 'PRI', 'PRI']
})

# Sample wh_addr
wh_addr = pd.DataFrame({
    'addrnbr': [101, 102, 103, 104],
    'text1': ['123 Main St', '456 Oak Ave', '789 Pine Rd', '321 Elm St'],
    'cityname': ['Anytown', 'Somewhere', 'Elsewhere', 'Nowhere'],
    'statecd': ['CA', 'NY', 'TX', 'FL'],
    'zipcd': ['12345', '67890', '54321', '09876']
})

print(f"wh_org orgnbr dtype: {wh_org['orgnbr'].dtype}")
print(f"orgaddruse orgnbr dtype: {orgaddruse['orgnbr'].dtype}")
print(f"wh_addr addrnbr dtype: {wh_addr['addrnbr'].dtype}")
print(f"orgaddruse addrnbr dtype: {orgaddruse['addrnbr'].dtype}")

In [None]:
# Test the fixed function
try:
    print("Testing create_org_table_with_address with sample data...")
    result = src.data_quality.core.create_org_table_with_address(wh_org, orgaddruse, wh_addr)
    print(f"✅ Success! Result shape: {result.shape}")
    print(f"Result columns: {list(result.columns)}")
    print("\nFirst few rows:")
    print(result.head())
except Exception as e:
    print(f"❌ Error: {e}")
    import traceback
    traceback.print_exc()

In [None]:
# Test the new function signatures with sample data
if 'wh_org' in locals() and 'orgaddruse' in locals() and 'wh_addr' in locals():
    print("Testing new function signatures...")
    
    # Create sample data that would normally come from load_database_tables()
    sample_data = {
        'wh_org': wh_org,
        'orgaddruse': orgaddruse, 
        'wh_addr': wh_addr,
        'wh_pers': pd.DataFrame({'persnbr': [1, 2], 'firstname': ['John', 'Jane']}),
        'persaddruse': pd.DataFrame({'persnbr': [1], 'addrnbr': [101], 'addrusecd': ['PRI']}),
        'wh_allroles': pd.DataFrame({'acctnbr': [1001, 1002], 'orgnbr': [1, 2], 'persnbr': [None, None]})
    }
    
    # Create sample acct_df
    sample_acct_df = pd.DataFrame({'acctnbr': [1001, 1002]})
    
    try:
        # Test create_org_final with new signature
        print("Testing create_org_final(data, acct_df)...")
        org_result = create_org_final(sample_data, sample_acct_df)
        print(f"✅ create_org_final worked! Shape: {org_result.shape}")
    except Exception as e:
        print(f"❌ create_org_final failed: {e}")
        
    # Note: create_pers_final would need matching persnbr dtypes to work properly
    print("Note: create_pers_final would need proper persnbr dtype alignment to test")
else:
    print("Sample data not available - run previous cells first")