# Smart Database Connector V3 - Comprehensive Unit Tests

This notebook provides comprehensive testing for the Smart Database Connector V3 with separate test sections for:

1. **NeonDB Tests** - Default connection with full functionality testing
2. **AWS LayeredDB Tests** - Tunnel connection with full functionality testing

Each section tests all class methods and functions independently.

## Setup and Imports

⚠️ **IMPORTANT**: If you see errors about unexpected keyword arguments (`mode`, `database`), please **restart the Jupyter kernel** to reload the updated V3 module.

Import all necessary libraries and the V3 connector

In [28]:
import pandas as pd
import numpy as np
import sys
import os
from datetime import datetime
import importlib

# --- Step 1: Set up the system path to find your package ---
# This ensures Python can locate the 'db_connector' package.
# The script navigates up two directories from the current notebook's location
# to reach the root where the 'db_connector' package resides.
#
# Correct Project Structure:
# /db_population_utils/
# └── /db_connector/
#     ├── __init__.py
#     ├── smart_db_connector_enhanced_V3.py
#     └── /tests/
#         └── test_smart_db_connector_V3_comprehensive.ipynb

# Get the directory of the current notebook, then go up two levels
notebook_dir = os.path.dirname(os.path.abspath("__file__")) # A robust way to get notebook dir
package_path = os.path.abspath(os.path.join(notebook_dir, '..', '..'))

if package_path not in sys.path:
    sys.path.append(package_path)
    print(f"✅ Added '{package_path}' to system path")

# --- Step 2: Force a reload of the module (useful for development) ---
if 'db_connector' in sys.modules:
    importlib.reload(sys.modules['db_connector'])
if 'db_connector.smart_db_connector_enhanced_V3' in sys.modules:
    importlib.reload(sys.modules['db_connector.smart_db_connector_enhanced_V3'])

# --- Step 3: Import the connector class from your package ---
try:
    from db_connector import db_connector
    print("✅ Successfully imported 'db_connector' from the package.")
except ImportError as e:
    print(f"❌ ImportError: {e}. Please check your file structure and __init__.py file.")
    sys.exit()

print(f"📂 Working directory: {os.getcwd()}")
print(f"🕐 Test started at: {datetime.now()}")

# --- Step 4: Comprehensive Test Suite ---
db = None  # Initialize db to None for the finally block
try:
    # --- 4.1: Initialize the default NeonDB connector ---
    print("\n🚀 Initializing the Smart DB Connector (Default: NeonDB)...")
    db = db_connector()

    # --- 4.2: Perform and display health check ---
    print("\n🩺 Performing initial health check...")
    health = db.health_check()
    print("\n--- Health Check Result ---")
    if health.get('status') == 'healthy':
        print("💚 Status: HEALTHY")
        for key, value in health.items():
            if key != 'status':
                print(f"   - {key}: {value}")
    else:
        print("💔 Status: UNHEALTHY")
        for key, value in health.items():
            print(f"   - {key}: {value}")
    print("---------------------------\n")

    # --- 4.3: Explore the database ---
    print("\n🗺️  Exploring database structure...")
    print(f"   Available Schemas: {db.schemas}")
    if db.current_schema:
        print(f"   Current Schema: {db.current_schema}")
        print(f"   Tables in '{db.current_schema}': {db.tables[:5]}...") # Show first 5 tables
    
    # --- 4.4: Run a sample read query ---
    if db.current_schema and 'districts' in db.tables:
        print(f"\n쿼리 실행 중 '{db.current_schema}.districts'...")
        districts_df = db.query("SELECT * FROM districts LIMIT 3")
        print("   Query successful. First 3 rows from 'districts':")
        print(districts_df)
    else:
        print("\nSkipping query test: 'districts' table not found in current schema.")

    # --- 4.5: Test the 'populate' method (write operation) ---
    print("\n📝 Testing the 'populate' method...")
    
    # Create a sample DataFrame to insert
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    new_table_name = f"test_log_{timestamp}"
    
    log_data = {
        'timestamp': [datetime.now() - pd.Timedelta(minutes=x) for x in range(3)],
        'log_level': ['INFO', 'WARNING', 'INFO'],
        'message': ['User logged in', 'High memory usage detected', 'Data processed successfully'],
        'user_id': [101, np.nan, 102]
    }
    log_df = pd.DataFrame(log_data)
    
    print(f"   - Created a sample DataFrame to insert into '{new_table_name}'.")
    
    # Use populate to create and insert into the new table
    populate_result = db.populate(
        df=log_df, 
        table_name=new_table_name, 
        schema=db.current_schema, 
        mode='replace' # Use 'replace' to ensure test is repeatable
    )
    
    if populate_result.get('status') == 'success':
        print(f"   ✅ 'populate' operation successful.")
        
        # Verify the data was inserted correctly
        print(f"   - Verifying data in new table '{new_table_name}'...")
        verify_df = db.query(f"SELECT * FROM {new_table_name}")
        print(f"   - Verification query returned {len(verify_df)} rows.")
        print(verify_df.head())
    else:
        print(f"   ❌ 'populate' operation failed: {populate_result.get('error')}")


except Exception as e:
    print(f"\n❌ An error occurred during the test suite: {e}")

finally:
    # --- Step 5: Always ensure the connection is closed ---
    if db and db.engine:
        print("\n🔒 Closing database connection...")
        db.close()
    else:
        print("\nℹ️ No active connection to close.")


✅ Successfully imported 'db_connector' from the package.
📂 Working directory: /Users/svitlanakovalivska/layered-populate-data-pool-da/db_population_utils/db_connector/tests
🕐 Test started at: 2025-08-21 18:39:16.731993

🚀 Initializing the Smart DB Connector (Default: NeonDB)...
🌟 SMART DATABASE CONNECTOR V3 - INITIALIZING...
🔗 Using default NeonDB connection
✅ NeonDB configuration loaded
   Default schema: test_berlin_data
🔌 Connecting to NeonDB...
✅ Connection successful!
   Database: neondb
   User: neondb_owner

🔍 Auto-discovering database schemas...
✅ Discovered 4 schemas
🎯 Auto-selected default schema: test_berlin_data

📊 SMART DB CONNECTOR V3 - CONNECTION SUMMARY
🔗 Connection Type: NeonDB

🗂️  Discovered 4 schemas:
  📁 dependency_example: 4 tables
       └─ departments (2 columns)
       └─ districts (3 columns)
       └─ employees (4 columns)
       └─ ... and 1 more tables
  📁 nyc_schools: 27 tables
       └─ Audrey_sat_results (10 columns)
       └─ Colleges_Berlin (12 columns

## Test Data Creation

Create standardized test datasets for both database tests

In [29]:
def create_test_dataframes():
    """Create standardized test DataFrames for testing"""
    
    # Test DataFrame 1: Simple product data
    products_df = pd.DataFrame({
        'product_id': range(1, 11),
        'product_name': [f'Product_{i}' for i in range(1, 11)],
        'category': ['Electronics', 'Books', 'Clothing', 'Home', 'Sports'] * 2,
        'price': np.random.uniform(10.0, 500.0, 10).round(2),
        'in_stock': [True, False] * 5,
        'created_date': pd.date_range('2024-01-01', periods=10, freq='D')
    })
    
    # Test DataFrame 2: Sales data
    sales_df = pd.DataFrame({
        'sale_id': range(1, 21),
        'product_id': np.random.choice(range(1, 11), 20),
        'quantity': np.random.randint(1, 10, 20),
        'sale_amount': np.random.uniform(50.0, 1000.0, 20).round(2),
        'customer_id': np.random.choice(range(100, 200), 20),
        'sale_date': pd.date_range('2024-01-01', periods=20, freq='H')
    })
    
    # Test DataFrame 3: Customer data
    customers_df = pd.DataFrame({
        'customer_id': range(100, 110),
        'customer_name': [f'Customer_{i}' for i in range(100, 110)],
        'email': [f'customer{i}@test.com' for i in range(100, 110)],
        'registration_date': pd.date_range('2023-01-01', periods=10, freq='W'),
        'is_active': [True] * 8 + [False] * 2
    })
    
    return {
        'products': products_df,
        'sales': sales_df, 
        'customers': customers_df
    }

# Create test data
test_data = create_test_dataframes()

print("📊 Test DataFrames Created:")
for name, df in test_data.items():
    print(f"   {name}: {len(df)} rows × {len(df.columns)} columns")

# Display sample data
print("\n🔍 Sample Products Data:")
display(test_data['products'].head(3))

📊 Test DataFrames Created:
   products: 10 rows × 6 columns
   sales: 20 rows × 6 columns
   customers: 10 rows × 5 columns

🔍 Sample Products Data:


  'sale_date': pd.date_range('2024-01-01', periods=20, freq='H')


Unnamed: 0,product_id,product_name,category,price,in_stock,created_date
0,1,Product_1,Electronics,354.72,True,2024-01-01
1,2,Product_2,Books,274.12,False,2024-01-02
2,3,Product_3,Clothing,179.86,True,2024-01-03


# 🔗 SECTION 1: NeonDB Tests

Comprehensive testing of all functionality using NeonDB (default connection)

## 1.1 NeonDB Connection Test

In [30]:
print("🧪 NEONDB CONNECTION TEST")
print("=" * 40)

# Test default connection (should connect to NeonDB)
try:
    neon_db = db_connector()  # Using new function name
    
    if neon_db.engine:
        print("✅ NeonDB connection successful!")
        print(f"   Connection type: {neon_db.connection_type}")
        print(f"   Current schema: {neon_db.current_schema}")
        print(f"   Available schemas: {len(neon_db.schemas)}")
    else:
        print("❌ NeonDB connection failed - no engine created")
        
except Exception as e:
    print(f"❌ NeonDB connection error: {e}")
    neon_db = None

🧪 NEONDB CONNECTION TEST
🌟 SMART DATABASE CONNECTOR V3 - INITIALIZING...
🔗 Using default NeonDB connection
✅ NeonDB configuration loaded
   Default schema: test_berlin_data
🔌 Connecting to NeonDB...
✅ Connection successful!
   Database: neondb
   User: neondb_owner

🔍 Auto-discovering database schemas...
✅ Discovered 4 schemas
🎯 Auto-selected default schema: test_berlin_data

📊 SMART DB CONNECTOR V3 - CONNECTION SUMMARY
🔗 Connection Type: NeonDB

🗂️  Discovered 4 schemas:
  📁 dependency_example: 4 tables
       └─ departments (2 columns)
       └─ districts (3 columns)
       └─ employees (4 columns)
       └─ ... and 1 more tables
  📁 nyc_schools: 27 tables
       └─ Audrey_sat_results (10 columns)
       └─ Colleges_Berlin (12 columns)
       └─ Levon_cleaned_sat_scores (8 columns)
       └─ ... and 24 more tables
  📁 public: 15 tables
       └─ audrey_sat_results (10 columns)
       └─ cleaned_sat_results_peter_s (9 columns)
       └─ demo_users (6 columns)
       └─ ... and 12 more

## 1.2 NeonDB Schema Operations Test

In [31]:
if neon_db and neon_db.engine:
    print("🧪 NEONDB SCHEMA OPERATIONS TEST")
    print("=" * 40)
    
    # Test 1: List all schemas
    print("\n1. Testing schemas property:")
    schemas = neon_db.schemas
    print(f"   Available schemas: {schemas}")
    assert isinstance(schemas, list), "schemas should return a list"
    assert len(schemas) > 0, "Should have at least one schema"
    print("   ✅ Schemas property test passed")
    
    # Test 2: Current schema
    print("\n2. Testing current_schema property:")
    current = neon_db.current_schema
    print(f"   Current schema: {current}")
    assert current is not None, "Should have a current schema"
    assert current in schemas, "Current schema should be in available schemas"
    print("   ✅ Current schema test passed")
    
    # Test 3: Schema switching
    print("\n3. Testing schema switching:")
    original_schema = neon_db.current_schema
    
    # Try switching to different schema
    other_schemas = [s for s in schemas if s != original_schema]
    if other_schemas:
        target_schema = other_schemas[0]
        print(f"   Switching from '{original_schema}' to '{target_schema}'...")
        switch_result = neon_db.use(target_schema)
        assert switch_result == True, "Schema switch should return True"
        assert neon_db.current_schema == target_schema, "Current schema should be updated"
        
        # Switch back
        neon_db.use(original_schema)
        assert neon_db.current_schema == original_schema, "Should switch back to original"
        print("   ✅ Schema switching test passed")
    else:
        print("   ⚠️  Only one schema available, skipping switch test")
    
    # Test 4: Invalid schema
    print("\n4. Testing invalid schema handling:")
    invalid_result = neon_db.use('nonexistent_schema_12345')
    assert invalid_result == False, "Invalid schema switch should return False"
    print("   ✅ Invalid schema handling test passed")
    
else:
    print("❌ Skipping schema tests - no NeonDB connection")

🧪 NEONDB SCHEMA OPERATIONS TEST

1. Testing schemas property:
   Available schemas: ['dependency_example', 'nyc_schools', 'public', 'test_berlin_data']
   ✅ Schemas property test passed

2. Testing current_schema property:
   Current schema: test_berlin_data
   ✅ Current schema test passed

3. Testing schema switching:
   Switching from 'test_berlin_data' to 'dependency_example'...
✅ Switched from 'test_berlin_data' to 'dependency_example' (4 tables)
   Tables: departments, districts, employees, neighborhoods
✅ Switched from 'dependency_example' to 'test_berlin_data' (33 tables)
   ✅ Schema switching test passed

4. Testing invalid schema handling:
❌ Schema 'nonexistent_schema_12345' not found. Available: dependency_example, nyc_schools, public, test_berlin_data
   ✅ Invalid schema handling test passed


## 1.3 NeonDB Table Operations Test

In [32]:
if neon_db and neon_db.engine:
    print("🧪 NEONDB TABLE OPERATIONS TEST")
    print("=" * 40)
    
    # Test 1: List tables in current schema
    print("\n1. Testing tables property:")
    tables = neon_db.tables
    print(f"   Tables in '{neon_db.current_schema}': {len(tables)} tables")
    if len(tables) <= 10:
        print(f"   Table names: {tables}")
    else:
        print(f"   First 10 tables: {tables[:10]}")
    assert isinstance(tables, list), "tables should return a list"
    print("   ✅ Tables property test passed")
    
    # Test 2: Get table info for existing table (if available)
    if tables:
        print("\n2. Testing get_table_info method:")
        test_table = tables[0]
        table_info = neon_db.get_table_info(test_table)
        print(f"   Table info for '{test_table}':")
        print(f"   - Schema: {table_info.get('schema', 'N/A')}")
        print(f"   - Columns: {table_info.get('column_count', 0)}")
        if table_info.get('columns'):
            print(f"   - Sample columns: {[col['column_name'] for col in table_info['columns'][:3]]}")
        assert 'schema' in table_info, "Table info should contain schema"
        assert 'column_count' in table_info, "Table info should contain column count"
        print("   ✅ Get table info test passed")
    else:
        print("   ⚠️  No tables available for get_table_info test")
        
else:
    print("❌ Skipping table tests - no NeonDB connection")

🧪 NEONDB TABLE OPERATIONS TEST

1. Testing tables property:
   Tables in 'test_berlin_data': 33 tables
   First 10 tables: ['berlin_pools', 'berlin_venues', 'colleges_berlin', 'crime_statistics', 'districts', 'enhanced_banks_test', 'enhanced_test_table', 'green_spaces', 'hospitals', 'kindergardens']
   ✅ Tables property test passed

2. Testing get_table_info method:
   Table info for 'berlin_pools':
   - Schema: test_berlin_data
   - Columns: 10
   - Sample columns: ['pool_id', 'name', 'pool_type']
   ✅ Get table info test passed


## 1.4 NeonDB Query Operations Test

In [33]:
if neon_db and neon_db.engine:
    print("🧪 NEONDB QUERY OPERATIONS TEST")
    print("=" * 40)
    
    # Test 1: Basic query
    print("\n1. Testing basic query:")
    try:
        basic_query = "SELECT current_database(), current_user, current_schema()"
        result = neon_db.query(basic_query, show_info=False)
        print(f"   Query result shape: {result.shape}")
        print("   Query results:")
        display(result)
        assert isinstance(result, pd.DataFrame), "Query should return DataFrame"
        assert len(result) > 0, "Query should return at least one row"
        print("   ✅ Basic query test passed")
    except Exception as e:
        print(f"   ❌ Basic query failed: {e}")
    
    # Test 2: Query with specific schema
    print("\n2. Testing query with schema specification:")
    try:
        schema_query = "SELECT 1 as test_value, 'schema_test' as test_name"
        result = neon_db.query(schema_query, schema='public', show_info=False)
        print(f"   Schema-specific query result: {result.shape}")
        display(result)
        assert isinstance(result, pd.DataFrame), "Schema query should return DataFrame"
        print("   ✅ Schema-specific query test passed")
    except Exception as e:
        print(f"   ❌ Schema query failed: {e}")
    
    # Test 3: Query existing table (if available)
    if neon_db.tables:
        print("\n3. Testing query on existing table:")
        try:
            test_table = neon_db.tables[0]
            table_query = f"SELECT * FROM {test_table} LIMIT 3"
            result = neon_db.query(table_query, show_info=False)
            print(f"   Table query result: {result.shape}")
            if not result.empty:
                display(result.head())
            print("   ✅ Table query test passed")
        except Exception as e:
            print(f"   ⚠️  Table query note: {e} (table might be empty or restricted)")
    
else:
    print("❌ Skipping query tests - no NeonDB connection")

🧪 NEONDB QUERY OPERATIONS TEST

1. Testing basic query:
   Query result shape: (1, 3)
   Query results:


Unnamed: 0,current_database,current_user,current_schema
0,neondb,neondb_owner,test_berlin_data


   ✅ Basic query test passed

2. Testing query with schema specification:
   Schema-specific query result: (1, 2)


Unnamed: 0,test_value,test_name
0,1,schema_test


   ✅ Schema-specific query test passed

3. Testing query on existing table:
   Table query result: (3, 10)


Unnamed: 0,pool_id,name,pool_type,street,postal_code,latitude,longitude,open_all_year,district,district_id
0,472,Strandbad Lübars,Naturbad,Am Freibad 9,13469,52.61824,13.33519,False,Reinickendorf,12
1,473,Kleine Schwimmhalle Wuhlheide,Hallenbad,An der Wuhlheide 161,12459,52.45993,13.53965,True,Treptow-Köpenick,9
2,474,Kombibad Mariendorf,Kombibad,Ankogelweg 95,12107,52.41972,13.40154,True,Tempelhof-Schöneberg,7


   ✅ Table query test passed


## 1.5 NeonDB Data Population Test

In [34]:
if neon_db and neon_db.engine:
    print("🧪 NEONDB DATA POPULATION TEST")
    print("=" * 40)
    
    # Test 1: Insert test data using insert method
    print("\n1. Testing insert method:")
    try:
        test_table_name = 'neon_test_products_v3'
        insert_result = neon_db.insert(
            df=test_data['products'], 
            table_name=test_table_name, 
            if_exists='replace'
        )
        print(f"   Insert result: {insert_result}")
        assert insert_result['status'] == 'success', "Insert should be successful"
        assert insert_result['rows_inserted'] == len(test_data['products']), "All rows should be inserted"
        print("   ✅ Insert method test passed")
        
        # Verify the inserted data
        print("\n   Verifying inserted data:")
        verify_query = f"SELECT * FROM {test_table_name} ORDER BY product_id"
        verification_result = neon_db.query(verify_query, show_info=False)
        print(f"   Verification query result: {verification_result.shape}")
        display(verification_result.head())
        assert len(verification_result) == len(test_data['products']), "Inserted data should match original"
        print("   ✅ Data verification passed")
        
        # Check if table exists in schema tables list
        print("\n   Checking table exists in schema:")
        current_tables = neon_db.tables
        assert test_table_name in current_tables, f"Table {test_table_name} should be in tables list"
        print(f"   ✅ Table {test_table_name} found in schema tables list")
        
    except Exception as e:
        print(f"   ❌ Insert method failed: {e}")
    
    # Test 2: Populate method (simplified interface)
    print("\n2. Testing populate method:")
    try:
        sales_table_name = 'neon_test_sales_v3'
        populate_result = neon_db.populate(
            df=test_data['sales'],
            table_name=sales_table_name
        )
        print(f"   Populate result: {populate_result}")
        assert populate_result['status'] == 'success', "Populate should be successful"
        print("   ✅ Populate method test passed")
        
        # Query the populated data
        print("\n   Querying populated sales data:")
        sales_query = f"SELECT COUNT(*) as total_sales, AVG(sale_amount) as avg_amount FROM {sales_table_name}"
        sales_summary = neon_db.query(sales_query, show_info=False)
        print("   Sales summary:")
        display(sales_summary)
        print("   ✅ Sales data query passed")
        
        # Verify table was added to tables list
        print("\n   Verifying table in tables list:")
        updated_tables = neon_db.tables
        assert sales_table_name in updated_tables, f"Table {sales_table_name} should be in tables list"
        print(f"   ✅ Table {sales_table_name} confirmed in schema")
        
    except Exception as e:
        print(f"   ❌ Populate method failed: {e}")
    
    # Test 3: Insert with append mode
    print("\n3. Testing insert with append mode:")
    try:
        # Create additional customers data
        additional_customers = pd.DataFrame({
            'customer_id': range(200, 205),
            'customer_name': [f'Additional_Customer_{i}' for i in range(200, 205)],
            'email': [f'additional{i}@test.com' for i in range(200, 205)],
            'registration_date': pd.date_range('2024-01-01', periods=5, freq='D'),
            'is_active': [True] * 5
        })
        
        customers_table = 'neon_test_customers_v3'
        
        # First insert
        first_insert = neon_db.insert(
            df=test_data['customers'],
            table_name=customers_table,
            if_exists='replace'
        )
        
        # Append additional data
        append_insert = neon_db.insert(
            df=additional_customers,
            table_name=customers_table,
            if_exists='append'
        )
        
        print(f"   First insert: {first_insert['rows_inserted']} rows")
        print(f"   Append insert: {append_insert['rows_inserted']} rows")
        
        # Verify total count
        count_query = f"SELECT COUNT(*) as total_customers FROM {customers_table}"
        count_result = neon_db.query(count_query, show_info=False)
        total_customers = count_result.iloc[0]['total_customers']
        expected_total = len(test_data['customers']) + len(additional_customers)
        
        print(f"   Total customers: {total_customers}, Expected: {expected_total}")
        assert total_customers == expected_total, "Append should add to existing data"
        
        # Final table existence check
        print("\n   Final table verification:")
        all_tables = neon_db.tables
        created_tables = [customers_table, sales_table_name, test_table_name]
        for table in created_tables:
            if table in all_tables:
                print(f"   ✅ {table} exists in schema")
            else:
                print(f"   ❌ {table} missing from schema tables list")
        
        print("   ✅ Append mode test passed")
        
    except Exception as e:
        print(f"   ❌ Append mode failed: {e}")
        
else:
    print("❌ Skipping data population tests - no NeonDB connection")

🧪 NEONDB DATA POPULATION TEST

1. Testing insert method:
📝 Inserting 10 rows × 6 columns
   Target: test_berlin_data.neon_test_products_v3
   Action: replace
✅ Insert completed successfully
   Insert result: {'status': 'success', 'rows_inserted': 10, 'table': 'test_berlin_data.neon_test_products_v3', 'schema': 'test_berlin_data'}
   ✅ Insert method test passed

   Verifying inserted data:
   Verification query result: (10, 6)


Unnamed: 0,product_id,product_name,category,price,in_stock,created_date
0,1,Product_1,Electronics,354.72,True,2024-01-01
1,2,Product_2,Books,274.12,False,2024-01-02
2,3,Product_3,Clothing,179.86,True,2024-01-03
3,4,Product_4,Home,292.4,False,2024-01-04
4,5,Product_5,Sports,175.02,True,2024-01-05


   ✅ Data verification passed

   Checking table exists in schema:
   ✅ Table neon_test_products_v3 found in schema tables list

2. Testing populate method:

🗂️  SCHEMA SELECTION
   Available schemas: ['dependency_example', 'nyc_schools', 'public', 'test_berlin_data']
   Current schema: test_berlin_data

📊 SMART POPULATE - PRE-POPULATION ANALYSIS
🎯 Target: test_berlin_data.neon_test_sales_v3
📝 Mode: REPLACE
🔗 Connection: ConnectionType.NEON_DB

📋 DATASET ANALYSIS:
   Rows: 20
   Columns: 6
   Memory usage: 0.00 MB

🔍 COLUMN ANALYSIS:
   sale_id: int64 | Nulls: 0 (0.0%) | Unique: 20
   product_id: int64 | Nulls: 0 (0.0%) | Unique: 10
   quantity: int64 | Nulls: 0 (0.0%) | Unique: 8
   sale_amount: float64 | Nulls: 0 (0.0%) | Unique: 20
   customer_id: int64 | Nulls: 0 (0.0%) | Unique: 19
   sale_date: datetime64[ns] | Nulls: 0 (0.0%) | Unique: 20

✅ DATA QUALITY CHECKS:
   Total null values: 0
   Duplicate rows: 0

🏗️  TABLE STATUS:
   Table exists: Yes
   Current rows: 20
   Final rows

Unnamed: 0,total_sales,avg_amount
0,20,580.581


   ✅ Sales data query passed

   Verifying table in tables list:
   ✅ Table neon_test_sales_v3 confirmed in schema

3. Testing insert with append mode:
📝 Inserting 10 rows × 5 columns
   Target: test_berlin_data.neon_test_customers_v3
   Action: replace
✅ Insert completed successfully
📝 Inserting 5 rows × 5 columns
   Target: test_berlin_data.neon_test_customers_v3
   Action: append
✅ Insert completed successfully
   First insert: 10 rows
   Append insert: 5 rows
   Total customers: 15, Expected: 15

   Final table verification:
   ✅ neon_test_customers_v3 exists in schema
   ✅ neon_test_sales_v3 exists in schema
   ✅ neon_test_products_v3 exists in schema
   ✅ Append mode test passed


## 1.6 NeonDB Complex Query Test

In [35]:
if neon_db and neon_db.engine:
    print("🧪 NEONDB COMPLEX QUERY TEST")
    print("=" * 40)
    
    try:
        # Complex query joining our test tables
        print("\n1. Testing complex JOIN query:")
        complex_query = """
        SELECT 
            p.product_name,
            p.category,
            p.price,
            COUNT(s.sale_id) as total_sales,
            SUM(s.sale_amount) as total_revenue,
            AVG(s.quantity) as avg_quantity
        FROM neon_test_products_v3 p
        LEFT JOIN neon_test_sales_v3 s ON p.product_id = s.product_id
        GROUP BY p.product_id, p.product_name, p.category, p.price
        ORDER BY total_revenue DESC NULLS LAST
        LIMIT 5
        """
        
        join_result = neon_db.query(complex_query, show_info=False)
        print("   Complex query results:")
        display(join_result)
        assert isinstance(join_result, pd.DataFrame), "Complex query should return DataFrame"
        print("   ✅ Complex JOIN query test passed")
        
        # Test aggregation query
        print("\n2. Testing aggregation query:")
        agg_query = """
        SELECT 
            category,
            COUNT(*) as product_count,
            AVG(price) as avg_price,
            MIN(price) as min_price,
            MAX(price) as max_price
        FROM neon_test_products_v3
        GROUP BY category
        ORDER BY avg_price DESC
        """
        
        agg_result = neon_db.query(agg_query, show_info=False)
        print("   Aggregation query results:")
        display(agg_result)
        print("   ✅ Aggregation query test passed")
        
        # Test date-based query
        print("\n3. Testing date-based query:")
        date_query = """
        SELECT 
            DATE(sale_date) as sale_day,
            COUNT(*) as daily_sales,
            SUM(sale_amount) as daily_revenue
        FROM neon_test_sales_v3
        GROUP BY DATE(sale_date)
        ORDER BY sale_day
        LIMIT 10
        """
        
        date_result = neon_db.query(date_query, show_info=False)
        print("   Date-based query results:")
        display(date_result)
        print("   ✅ Date-based query test passed")
        
    except Exception as e:
        print(f"   ⚠️  Complex query note: {e} (may need test tables from previous steps)")
        
else:
    print("❌ Skipping complex query tests - no NeonDB connection")

🧪 NEONDB COMPLEX QUERY TEST

1. Testing complex JOIN query:
   Complex query results:


Unnamed: 0,product_name,category,price,total_sales,total_revenue,avg_quantity
0,Product_2,Books,274.12,4,2808.56,2.75
1,Product_6,Electronics,191.98,3,1751.17,4.333333333333333
2,Product_8,Clothing,321.94,3,1476.3,5.333333333333333
3,Product_5,Sports,175.02,3,1178.53,5.333333333333333
4,Product_3,Clothing,179.86,1,947.32,2.0


   ✅ Complex JOIN query test passed

2. Testing aggregation query:
   Aggregation query results:


Unnamed: 0,category,product_count,avg_price,min_price,max_price
0,Sports,2,326.185,175.02,477.35
1,Electronics,2,273.35,191.98,354.72
2,Books,2,251.1,228.08,274.12
3,Clothing,2,250.9,179.86,321.94
4,Home,2,201.585,110.77,292.4


   ✅ Aggregation query test passed

3. Testing date-based query:
   Date-based query results:


Unnamed: 0,sale_day,daily_sales,daily_revenue
0,2024-01-01,20,11611.62


   ✅ Date-based query test passed


## 1.7 NeonDB Health Check Test

In [36]:
if neon_db and neon_db.engine:
    print("🧪 NEONDB HEALTH CHECK TEST")
    print("=" * 40)
    
    try:
        print("\n1. Testing health_check method:")
        health_status = neon_db.health_check()
        
        print("   Health check results:")
        for key, value in health_status.items():
            print(f"   - {key}: {value}")
        
        # Assertions for health check
        assert 'status' in health_status, "Health check should include status"
        assert health_status['status'] in ['healthy', 'unhealthy'], "Status should be healthy or unhealthy"
        assert 'connection_type' in health_status, "Health check should include connection type"
        assert 'schemas_available' in health_status, "Health check should include schema count"
        
        if health_status['status'] == 'healthy':
            print("   ✅ NeonDB connection is healthy")
        else:
            print(f"   ⚠️  NeonDB connection issue: {health_status.get('error', 'Unknown')}")
        
        print("   ✅ Health check test passed")
        
    except Exception as e:
        print(f"   ❌ Health check failed: {e}")
        
else:
    print("❌ Skipping health check test - no NeonDB connection")

🧪 NEONDB HEALTH CHECK TEST

1. Testing health_check method:
   Health check results:
   - status: healthy
   - connection_type: NeonDB
   - database: neondb
   - version: PostgreSQL 17.5 on aarch64-unknown-linux-gnu, comp...
   - schemas_available: 4
   - current_schema: test_berlin_data
   - tables_in_current_schema: 33
   - connection: active
   ✅ NeonDB connection is healthy
   ✅ Health check test passed


## 1.8 Enhanced Smart Populate Test

Test the new intelligent populate function with reporting and upsert capabilities

In [37]:
if neon_db and neon_db.engine:
    print("🧪 ENHANCED SMART POPULATE TEST")
    print("=" * 45)
    
    # Create sample banks data similar to your example
    banks_data = pd.DataFrame({
        'bank_id': ['B001', 'B002', 'B003', 'B004', 'B005'],
        'district_id': ['01', '02', '01', '03', '02'],
        'name': ['Berlin Central Bank', 'Mitte Bank', 'Kreuzberg Finance', 'Tech Bank', 'Student Bank'],
        'address': ['Unter den Linden 1', 'Friedrichstraße 50', 'Kreuzberg Str. 100', 'Prenzlauer Berg 25', 'Friedrichshain 75'],
        'postal_code': ['10117', '10117', '10961', '10405', '10243'],
        'phone_number': ['+49-30-123-456', '+49-30-789-012', '+49-30-345-678', '+49-30-901-234', '+49-30-567-890'],
        'coordinates': ['52.5170,13.3888', '52.5200,13.3850', '52.4990,13.4180', '52.5320,13.4210', '52.5150,13.4530'],
        'latitude': [52.5170, 52.5200, 52.4990, 52.5320, 52.5150],
        'longitude': [13.3888, 13.3850, 13.4180, 13.4210, 13.4530],
        'neighborhood': ['Mitte', 'Mitte', 'Kreuzberg', 'Prenzlauer Berg', 'Friedrichshain'],
        'district': ['Mitte', 'Mitte', 'Friedrichshain-Kreuzberg', 'Pankow', 'Friedrichshain-Kreuzberg']
    })
    
    print("\\n1. Testing basic enhanced populate (with full reporting):")
    try:
        # Test basic populate with enhanced reporting
        result1 = neon_db.populate(
            df=banks_data,
            table_name='enhanced_banks_test',
            mode='replace',
            show_report=True
        )
        print(f"   ✅ Enhanced populate result: {result1['status']}")
        
    except Exception as e:
        print(f"   ❌ Enhanced populate failed: {e}")
    
    # Test upsert functionality
    print("\\n2. Testing upsert mode with primary key:")
    try:
        # Create updated data with same IDs but changed values
        updated_banks = banks_data.copy()
        updated_banks.loc[0, 'name'] = 'Berlin Central Bank - Updated'
        updated_banks.loc[1, 'phone_number'] = '+49-30-NEW-PHONE'
        
        # Add a new bank
        new_bank = pd.DataFrame({
            'bank_id': ['B006'],
            'district_id': ['03'],
            'name': ['New Berlin Bank'],
            'address': ['New Address 123'],
            'postal_code': ['10115'],
            'phone_number': ['+49-30-NEW-BANK'],
            'coordinates': ['52.5250,13.3900'],
            'latitude': [52.5250],
            'longitude': [13.3900],
            'neighborhood': ['Mitte'],
            'district': ['Mitte']
        })
        
        upsert_data = pd.concat([updated_banks, new_bank], ignore_index=True)
        
        result2 = neon_db.populate(
            df=upsert_data,
            table_name='enhanced_banks_test',
            mode='upsert',
            primary_key=['bank_id'],
            show_report=True
        )
        print(f"   ✅ Upsert result: {result2['status']}")
        
        # Verify upsert worked
        verify_query = "SELECT bank_id, name, phone_number FROM enhanced_banks_test ORDER BY bank_id"
        verification = neon_db.query(verify_query, show_info=False)
        print("\\n   Upsert verification:")
        display(verification)
        
    except Exception as e:
        print(f"   ❌ Upsert test failed: {e}")
    
    # Test append mode
    print("\\n3. Testing append mode:")
    try:
        additional_banks = pd.DataFrame({
            'bank_id': ['B007', 'B008'],
            'district_id': ['04', '05'],
            'name': ['Append Bank 1', 'Append Bank 2'],
            'address': ['Append St. 1', 'Append St. 2'],
            'postal_code': ['10001', '10002'],
            'phone_number': ['+49-30-APP-001', '+49-30-APP-002'],
            'coordinates': ['52.5000,13.4000', '52.5100,13.4100'],
            'latitude': [52.5000, 52.5100],
            'longitude': [13.4000, 13.4100],
            'neighborhood': ['Test Area 1', 'Test Area 2'],
            'district': ['Test District 1', 'Test District 2']
        })
        
        result3 = neon_db.populate(
            df=additional_banks,
            table_name='enhanced_banks_test',
            mode='append',
            show_report=True
        )
        print(f"   ✅ Append result: {result3['status']}")
        
        # Final count verification
        count_query = "SELECT COUNT(*) as total_banks FROM enhanced_banks_test"
        final_count = neon_db.query(count_query, show_info=False)
        print(f"\\n   Final bank count: {final_count.iloc[0]['total_banks']}")
        
    except Exception as e:
        print(f"   ❌ Append test failed: {e}")
    
    # Cleanup
    print("\\n4. Cleanup enhanced test table:")
    try:
        cleanup_query = "DROP TABLE IF EXISTS enhanced_banks_test"
        neon_db.query(cleanup_query, show_info=False)
        print("   ✅ Enhanced test table cleaned up")
    except Exception as e:
        print(f"   ⚠️  Cleanup warning: {e}")
        
else:
    print("❌ Skipping enhanced populate tests - no NeonDB connection")

🧪 ENHANCED SMART POPULATE TEST
\n1. Testing basic enhanced populate (with full reporting):

🗂️  SCHEMA SELECTION
   Available schemas: ['dependency_example', 'nyc_schools', 'public', 'test_berlin_data']
   Current schema: test_berlin_data

📊 SMART POPULATE - PRE-POPULATION ANALYSIS
🎯 Target: test_berlin_data.enhanced_banks_test
📝 Mode: REPLACE
🔗 Connection: ConnectionType.NEON_DB

📋 DATASET ANALYSIS:
   Rows: 5
   Columns: 11
   Memory usage: 0.00 MB

🔍 COLUMN ANALYSIS:
   bank_id: object | Nulls: 0 (0.0%) | Unique: 5
   district_id: object | Nulls: 0 (0.0%) | Unique: 3
   name: object | Nulls: 0 (0.0%) | Unique: 5
   address: object | Nulls: 0 (0.0%) | Unique: 5
   postal_code: object | Nulls: 0 (0.0%) | Unique: 4
   phone_number: object | Nulls: 0 (0.0%) | Unique: 5
   coordinates: object | Nulls: 0 (0.0%) | Unique: 5
   latitude: float64 | Nulls: 0 (0.0%) | Unique: 5
   longitude: float64 | Nulls: 0 (0.0%) | Unique: 5
   neighborhood: object | Nulls: 0 (0.0%) | Unique: 4
   district

Unnamed: 0,bank_id,name,phone_number
0,B001,Berlin Central Bank,+49-30-123-456
1,B002,Mitte Bank,+49-30-789-012
2,B003,Kreuzberg Finance,+49-30-345-678
3,B004,Tech Bank,+49-30-901-234
4,B005,Student Bank,+49-30-567-890


\n3. Testing append mode:

🗂️  SCHEMA SELECTION
   Available schemas: ['dependency_example', 'nyc_schools', 'public', 'test_berlin_data']
   Current schema: test_berlin_data

📊 SMART POPULATE - PRE-POPULATION ANALYSIS
🎯 Target: test_berlin_data.enhanced_banks_test
📝 Mode: APPEND
🔗 Connection: ConnectionType.NEON_DB

📋 DATASET ANALYSIS:
   Rows: 2
   Columns: 11
   Memory usage: 0.00 MB

🔍 COLUMN ANALYSIS:
   bank_id: object | Nulls: 0 (0.0%) | Unique: 2
   district_id: object | Nulls: 0 (0.0%) | Unique: 2
   name: object | Nulls: 0 (0.0%) | Unique: 2
   address: object | Nulls: 0 (0.0%) | Unique: 2
   postal_code: object | Nulls: 0 (0.0%) | Unique: 2
   phone_number: object | Nulls: 0 (0.0%) | Unique: 2
   coordinates: object | Nulls: 0 (0.0%) | Unique: 2
   latitude: float64 | Nulls: 0 (0.0%) | Unique: 2
   longitude: float64 | Nulls: 0 (0.0%) | Unique: 2
   neighborhood: object | Nulls: 0 (0.0%) | Unique: 2
   district: object | Nulls: 0 (0.0%) | Unique: 2

✅ DATA QUALITY CHECKS:
   

## 1.9 NeonDB Cleanup

In [93]:
if neon_db and neon_db.engine:
    print("🧹 NEONDB CLEANUP")
    print("=" * 20)
    
    # Clean up test tables
    test_tables = [
        'neon_test_products_v3',
        'neon_test_sales_v3', 
        'neon_test_customers_v3'
    ]
    
    for table in test_tables:
        try:
            cleanup_query = f"DROP TABLE IF EXISTS {table}"
            neon_db.query(cleanup_query, show_info=False)
            print(f"   ✅ Dropped table: {table}")
        except Exception as e:
            print(f"   ⚠️  Could not drop {table}: {e}")
    
    # Close connection
    neon_db.close()
    print("   ✅ NeonDB connection closed")
    
print("\n🎉 NEONDB TESTS COMPLETED")
print("=" * 30)

🧹 NEONDB CLEANUP
   ✅ Dropped table: neon_test_products_v3
   ✅ Dropped table: neon_test_sales_v3
   ✅ Dropped table: neon_test_customers_v3
🔒 Database connection closed
   ✅ NeonDB connection closed

🎉 NEONDB TESTS COMPLETED


# 🚇 SECTION 2: AWS LayeredDB Tests

Comprehensive testing of all functionality using AWS LayeredDB (tunnel connection)

## 2.1 AWS LayeredDB Connection Test

In [38]:
print("🧪 AWS LAYEREDDB CONNECTION TEST")
print("=" * 40)

# Note: This will require manual input or pre-configured credentials
print("⚠️  AWS LayeredDB connection requires:")
print("   1. SSH tunnel running on localhost:5433")
print("   2. Valid database credentials")
print("   3. Run: ./connect-db.sh before this test\n")

# Attempt AWS connection
try:
    # Method 1: Auto-prompt for credentials
    print("🔐 Attempting AWS LayeredDB connection...")
    print("   (Will prompt for credentials if not provided)")
    
    # You can uncomment and modify this line to test with specific credentials:
    # aws_db = db_connector(database='layereddb', username='your_username', password='your_password')
    
    # For interactive testing:
    aws_db = db_connector(database='layereddb')  # Will prompt for credentials
    
    if aws_db.engine:
        print("✅ AWS LayeredDB connection successful!")
        print(f"   Connection type: {aws_db.connection_type}")
        print(f"   Current schema: {aws_db.current_schema}")
        print(f"   Available schemas: {len(aws_db.schemas)}")
        
        if aws_db.tunnel_manager:
            tunnel_status = aws_db.tunnel_manager.get_tunnel_status()
            print(f"   Tunnel status: {tunnel_status.status}")  # Access as attribute, not dict
    else:
        print("❌ AWS LayeredDB connection failed - no engine created")
        aws_db = None
        
except Exception as e:
    print(f"❌ AWS LayeredDB connection error: {e}")
    print("💡 This is expected if:")
    print("   - SSH tunnel is not running")
    print("   - Credentials are not provided")
    print("   - Database is not accessible")
    aws_db = None

# Alternative: Skip AWS tests if connection fails
if not aws_db:
    print("\n⚠️  AWS LayeredDB connection not available.")
    print("   Remaining AWS tests will be skipped.")
    print("   To test AWS functionality:")
    print("   1. Start SSH tunnel: ./connect-db.sh")
    print("   2. Restart this notebook section")

🧪 AWS LAYEREDDB CONNECTION TEST
⚠️  AWS LayeredDB connection requires:
   1. SSH tunnel running on localhost:5433
   2. Valid database credentials
   3. Run: ./connect-db.sh before this test

🔐 Attempting AWS LayeredDB connection...
   (Will prompt for credentials if not provided)
🌟 SMART DATABASE CONNECTOR V3 - INITIALIZING...
🚇 AWS LayeredDB connection requested

🔐 AWS LayeredDB CONNECTION SETUP
----------------------------------------
Please provide your AWS database credentials:
🚇 Tunnel Status: Connected
✅ AWS LayeredDB configuration loaded
   Tunnel: Tunnel is active on localhost:5433
🔌 Connecting to AWS LayeredDB...
✅ Connection successful!
   Database: layereddb
   User: svitlana_kovalivska

🔍 Auto-discovering database schemas...
✅ Discovered 2 schemas
🎯 Auto-selected default schema: berlin_source_data

📊 SMART DB CONNECTOR V3 - CONNECTION SUMMARY
🔗 Connection Type: AWS LayeredDB
🚇 Tunnel Status: Connected (localhost:5433)

🗂️  Discovered 2 schemas:
  🎯 [CURRENT] berlin_source_

## 2.2 AWS LayeredDB Schema Operations Test

In [39]:
if aws_db and aws_db.engine:
    print("🧪 AWS LAYEREDDB SCHEMA OPERATIONS TEST")
    print("=" * 40)
    
    # Test 1: List all schemas
    print("\n1. Testing schemas property:")
    aws_schemas = aws_db.schemas
    print(f"   Available schemas: {aws_schemas}")
    assert isinstance(aws_schemas, list), "schemas should return a list"
    assert len(aws_schemas) > 0, "Should have at least one schema"
    print("   ✅ AWS schemas property test passed")
    
    # Test 2: Current schema
    print("\n2. Testing current_schema property:")
    aws_current = aws_db.current_schema
    print(f"   Current schema: {aws_current}")
    assert aws_current is not None, "Should have a current schema"
    assert aws_current in aws_schemas, "Current schema should be in available schemas"
    print("   ✅ AWS current schema test passed")
    
    # Test 3: Schema switching
    print("\n3. Testing schema switching:")
    original_aws_schema = aws_db.current_schema
    
    # Try switching to different schema
    other_aws_schemas = [s for s in aws_schemas if s != original_aws_schema]
    if other_aws_schemas:
        target_aws_schema = other_aws_schemas[0]
        print(f"   Switching from '{original_aws_schema}' to '{target_aws_schema}'...")
        aws_switch_result = aws_db.use(target_aws_schema)
        assert aws_switch_result == True, "AWS schema switch should return True"
        assert aws_db.current_schema == target_aws_schema, "AWS current schema should be updated"
        
        # Switch back
        aws_db.use(original_aws_schema)
        assert aws_db.current_schema == original_aws_schema, "Should switch back to original"
        print("   ✅ AWS schema switching test passed")
    else:
        print("   ⚠️  Only one schema available, skipping switch test")
    
    # Test 4: Invalid schema
    print("\n4. Testing invalid schema handling:")
    aws_invalid_result = aws_db.use('nonexistent_aws_schema_12345')
    assert aws_invalid_result == False, "Invalid schema switch should return False"
    print("   ✅ AWS invalid schema handling test passed")
    
else:
    print("❌ Skipping AWS schema tests - no AWS LayeredDB connection")

🧪 AWS LAYEREDDB SCHEMA OPERATIONS TEST

1. Testing schemas property:
   Available schemas: ['berlin_source_data', 'public']
   ✅ AWS schemas property test passed

2. Testing current_schema property:
   Current schema: berlin_source_data
   ✅ AWS current schema test passed

3. Testing schema switching:
   Switching from 'berlin_source_data' to 'public'...
✅ Switched from 'berlin_source_data' to 'public' (22 tables)
✅ Switched from 'public' to 'berlin_source_data' (11 tables)
   ✅ AWS schema switching test passed

4. Testing invalid schema handling:
❌ Schema 'nonexistent_aws_schema_12345' not found. Available: berlin_source_data, public
   ✅ AWS invalid schema handling test passed


## 2.3 AWS LayeredDB Table Operations Test

In [40]:
if aws_db and aws_db.engine:
    print("🧪 AWS LAYEREDDB TABLE OPERATIONS TEST")
    print("=" * 40)
    
    # Test 1: List tables in current schema
    print("\n1. Testing tables property:")
    aws_tables = aws_db.tables
    print(f"   Tables in '{aws_db.current_schema}': {len(aws_tables)} tables")
    if len(aws_tables) <= 10:
        print(f"   Table names: {aws_tables}")
    else:
        print(f"   First 10 tables: {aws_tables[:10]}")
    assert isinstance(aws_tables, list), "tables should return a list"
    print("   ✅ AWS tables property test passed")
    
    # Test 2: Get table info for existing table (if available)
    if aws_tables:
        print("\n2. Testing get_table_info method:")
        aws_test_table = aws_tables[0]
        aws_table_info = aws_db.get_table_info(aws_test_table)
        print(f"   Table info for '{aws_test_table}':")
        print(f"   - Schema: {aws_table_info.get('schema', 'N/A')}")
        print(f"   - Columns: {aws_table_info.get('column_count', 0)}")
        if aws_table_info.get('columns'):
            print(f"   - Sample columns: {[col['column_name'] for col in aws_table_info['columns'][:3]]}")
        assert 'schema' in aws_table_info, "Table info should contain schema"
        assert 'column_count' in aws_table_info, "Table info should contain column count"
        print("   ✅ AWS get table info test passed")
    else:
        print("   ⚠️  No tables available for get_table_info test")
        
else:
    print("❌ Skipping AWS table tests - no AWS LayeredDB connection")

🧪 AWS LAYEREDDB TABLE OPERATIONS TEST

1. Testing tables property:
   Tables in 'berlin_source_data': 11 tables
   First 10 tables: ['aws_test_customers_v3', 'aws_test_products_v3', 'aws_test_sales_v3', 'berlin_venues', 'crime_statistics', 'districts', 'districts_pop_stat', 'neighborhoods', 'test_table_clara_neagu', 'test_table_peter_scheinsohn']
   ✅ AWS tables property test passed

2. Testing get_table_info method:
   Table info for 'aws_test_customers_v3':
   - Schema: berlin_source_data
   - Columns: 5
   - Sample columns: ['customer_id', 'customer_name', 'email']
   ✅ AWS get table info test passed


## 2.4 AWS LayeredDB Query Operations Test

In [41]:
if aws_db and aws_db.engine:
    print("🧪 AWS LAYEREDDB QUERY OPERATIONS TEST")
    print("=" * 40)
    
    # Test 1: Basic query
    print("\n1. Testing basic query:")
    try:
        aws_basic_query = "SELECT current_database(), current_user, current_schema(), version()"
        aws_result = aws_db.query(aws_basic_query, show_info=False)
        print(f"   Query result shape: {aws_result.shape}")
        print("   Query results:")
        display(aws_result)
        assert isinstance(aws_result, pd.DataFrame), "Query should return DataFrame"
        assert len(aws_result) > 0, "Query should return at least one row"
        print("   ✅ AWS basic query test passed")
    except Exception as e:
        print(f"   ❌ AWS basic query failed: {e}")
    
    # Test 2: Query with specific schema
    print("\n2. Testing query with schema specification:")
    try:
        aws_schema_query = "SELECT 1 as test_value, 'aws_schema_test' as test_name, NOW() as current_time"
        aws_schema_result = aws_db.query(aws_schema_query, schema='public', show_info=False)
        print(f"   Schema-specific query result: {aws_schema_result.shape}")
        display(aws_schema_result)
        assert isinstance(aws_schema_result, pd.DataFrame), "Schema query should return DataFrame"
        print("   ✅ AWS schema-specific query test passed")
    except Exception as e:
        print(f"   ❌ AWS schema query failed: {e}")
    
    # Test 3: Query existing table (if available)
    if aws_db.tables:
        print("\n3. Testing query on existing table:")
        try:
            aws_table_test = aws_db.tables[0]
            aws_table_query = f"SELECT * FROM {aws_table_test} LIMIT 5"
            aws_table_result = aws_db.query(aws_table_query, show_info=False)
            print(f"   Table query result: {aws_table_result.shape}")
            if not aws_table_result.empty:
                display(aws_table_result.head())
            print("   ✅ AWS table query test passed")
        except Exception as e:
            print(f"   ⚠️  AWS table query note: {e} (table might be empty or restricted)")
    
else:
    print("❌ Skipping AWS query tests - no AWS LayeredDB connection")

🧪 AWS LAYEREDDB QUERY OPERATIONS TEST

1. Testing basic query:
   Query result shape: (1, 4)
   Query results:


Unnamed: 0,current_database,current_user,current_schema,version
0,layereddb,svitlana_kovalivska,berlin_source_data,"PostgreSQL 16.9 on x86_64-pc-linux-gnu, compil..."


   ✅ AWS basic query test passed

2. Testing query with schema specification:
   Schema-specific query result: (1, 3)


Unnamed: 0,test_value,test_name,current_time
0,1,aws_schema_test,2025-08-21 16:43:32.670643+00:00


   ✅ AWS schema-specific query test passed

3. Testing query on existing table:
   Table query result: (5, 5)


Unnamed: 0,customer_id,customer_name,email,registration_date,is_active
0,100,Customer_100,customer100@test.com,2023-01-01,True
1,101,Customer_101,customer101@test.com,2023-01-08,True
2,102,Customer_102,customer102@test.com,2023-01-15,True
3,103,Customer_103,customer103@test.com,2023-01-22,True
4,104,Customer_104,customer104@test.com,2023-01-29,True


   ✅ AWS table query test passed


## 2.5 AWS LayeredDB Data Population Test

In [42]:
if aws_db and aws_db.engine:
    print("🧪 AWS LAYEREDDB DATA POPULATION TEST")
    print("=" * 40)
    
    # Test 1: Insert test data using insert method
    print("\n1. Testing insert method:")
    try:
        aws_test_table_name = 'aws_test_products_v3'
        aws_insert_result = aws_db.insert(
            df=test_data['products'], 
            table_name=aws_test_table_name, 
            if_exists='replace'
        )
        print(f"   Insert result: {aws_insert_result}")
        assert aws_insert_result['status'] == 'success', "Insert should be successful"
        assert aws_insert_result['rows_inserted'] == len(test_data['products']), "All rows should be inserted"
        print("   ✅ AWS insert method test passed")
        
        # Verify the inserted data
        print("\n   Verifying inserted data:")
        aws_verify_query = f"SELECT * FROM {aws_test_table_name} ORDER BY product_id"
        aws_verification_result = aws_db.query(aws_verify_query, show_info=False)
        print(f"   Verification query result: {aws_verification_result.shape}")
        display(aws_verification_result.head())
        assert len(aws_verification_result) == len(test_data['products']), "Inserted data should match original"
        print("   ✅ AWS data verification passed")
        
        # Check if table exists in AWS schema tables list
        print("\n   Checking table exists in AWS schema:")
        aws_current_tables = aws_db.tables
        assert aws_test_table_name in aws_current_tables, f"Table {aws_test_table_name} should be in AWS tables list"
        print(f"   ✅ Table {aws_test_table_name} found in AWS schema tables list ({aws_db.current_schema})")
        
    except Exception as e:
        print(f"   ❌ AWS insert method failed: {e}")
    
    # Test 2: Populate method (simplified interface)
    print("\n2. Testing populate method:")
    try:
        aws_sales_table_name = 'aws_test_sales_v3'
        aws_populate_result = aws_db.populate(
            df=test_data['sales'],
            table_name=aws_sales_table_name
        )
        print(f"   Populate result: {aws_populate_result}")
        assert aws_populate_result['status'] == 'success', "Populate should be successful"
        print("   ✅ AWS populate method test passed")
        
        # Query the populated data
        print("\n   Querying populated sales data:")
        aws_sales_query = f"SELECT COUNT(*) as total_sales, AVG(sale_amount) as avg_amount FROM {aws_sales_table_name}"
        aws_sales_summary = aws_db.query(aws_sales_query, show_info=False)
        print("   Sales summary:")
        display(aws_sales_summary)
        print("   ✅ AWS sales data query passed")
        
        # Verify table was added to AWS tables list
        print("\n   Verifying table in AWS tables list:")
        aws_updated_tables = aws_db.tables
        assert aws_sales_table_name in aws_updated_tables, f"Table {aws_sales_table_name} should be in AWS tables list"
        print(f"   ✅ Table {aws_sales_table_name} confirmed in AWS schema ({aws_db.current_schema})")
        
    except Exception as e:
        print(f"   ❌ AWS populate method failed: {e}")
    
    # Test 3: Insert with append mode
    print("\n3. Testing insert with append mode:")
    try:
        # Create additional test data specifically for AWS
        aws_additional_data = pd.DataFrame({
            'customer_id': range(300, 305),
            'customer_name': [f'AWS_Customer_{i}' for i in range(300, 305)],
            'email': [f'aws{i}@layereddb.com' for i in range(300, 305)],
            'registration_date': pd.date_range('2024-02-01', periods=5, freq='D'),
            'is_active': [True] * 5
        })
        
        aws_customers_table = 'aws_test_customers_v3'
        
        # First insert
        aws_first_insert = aws_db.insert(
            df=test_data['customers'],
            table_name=aws_customers_table,
            if_exists='replace'
        )
        
        # Append additional data
        aws_append_insert = aws_db.insert(
            df=aws_additional_data,
            table_name=aws_customers_table,
            if_exists='append'
        )
        
        print(f"   First insert: {aws_first_insert['rows_inserted']} rows")
        print(f"   Append insert: {aws_append_insert['rows_inserted']} rows")
        
        # Verify total count
        aws_count_query = f"SELECT COUNT(*) as total_customers FROM {aws_customers_table}"
        aws_count_result = aws_db.query(aws_count_query, show_info=False)
        aws_total_customers = aws_count_result.iloc[0]['total_customers']
        aws_expected_total = len(test_data['customers']) + len(aws_additional_data)
        
        print(f"   Total customers: {aws_total_customers}, Expected: {aws_expected_total}")
        assert aws_total_customers == aws_expected_total, "Append should add to existing data"
        
        # Final AWS table existence check
        print(f"\n   Final table verification in schema '{aws_db.current_schema}':")
        aws_all_tables = aws_db.tables
        aws_created_tables = [aws_customers_table, aws_sales_table_name, aws_test_table_name]
        for table in aws_created_tables:
            if table in aws_all_tables:
                print(f"   ✅ {table} exists in AWS schema ({aws_db.current_schema})")
            else:
                print(f"   ❌ {table} missing from AWS schema tables list")
        
        print("   ✅ AWS append mode test passed")
        
    except Exception as e:
        print(f"   ❌ AWS append mode failed: {e}")
        
else:
    print("❌ Skipping AWS data population tests - no AWS LayeredDB connection")

🧪 AWS LAYEREDDB DATA POPULATION TEST

1. Testing insert method:
📝 Inserting 10 rows × 6 columns
   Target: berlin_source_data.aws_test_products_v3
   Action: replace
✅ Insert completed successfully
   Insert result: {'status': 'success', 'rows_inserted': 10, 'table': 'berlin_source_data.aws_test_products_v3', 'schema': 'berlin_source_data'}
   ✅ AWS insert method test passed

   Verifying inserted data:
   Verification query result: (10, 6)


Unnamed: 0,product_id,product_name,category,price,in_stock,created_date
0,1,Product_1,Electronics,354.72,True,2024-01-01
1,2,Product_2,Books,274.12,False,2024-01-02
2,3,Product_3,Clothing,179.86,True,2024-01-03
3,4,Product_4,Home,292.4,False,2024-01-04
4,5,Product_5,Sports,175.02,True,2024-01-05


   ✅ AWS data verification passed

   Checking table exists in AWS schema:
   ✅ Table aws_test_products_v3 found in AWS schema tables list (berlin_source_data)

2. Testing populate method:

🗂️  SCHEMA SELECTION
   Available schemas: ['berlin_source_data', 'public']
   Current schema: berlin_source_data

📊 SMART POPULATE - PRE-POPULATION ANALYSIS
🎯 Target: berlin_source_data.aws_test_sales_v3
📝 Mode: REPLACE
🔗 Connection: ConnectionType.AWS_LAYERED_DB

📋 DATASET ANALYSIS:
   Rows: 20
   Columns: 6
   Memory usage: 0.00 MB

🔍 COLUMN ANALYSIS:
   sale_id: int64 | Nulls: 0 (0.0%) | Unique: 20
   product_id: int64 | Nulls: 0 (0.0%) | Unique: 10
   quantity: int64 | Nulls: 0 (0.0%) | Unique: 8
   sale_amount: float64 | Nulls: 0 (0.0%) | Unique: 20
   customer_id: int64 | Nulls: 0 (0.0%) | Unique: 19
   sale_date: datetime64[ns] | Nulls: 0 (0.0%) | Unique: 20

✅ DATA QUALITY CHECKS:
   Total null values: 0
   Duplicate rows: 0

🏗️  TABLE STATUS:
   Table exists: Yes
   Current rows: 20
   Fin

Unnamed: 0,total_sales,avg_amount
0,20,580.581


   ✅ AWS sales data query passed

   Verifying table in AWS tables list:
   ✅ Table aws_test_sales_v3 confirmed in AWS schema (berlin_source_data)

3. Testing insert with append mode:
📝 Inserting 10 rows × 5 columns
   Target: berlin_source_data.aws_test_customers_v3
   Action: replace
✅ Insert completed successfully
📝 Inserting 5 rows × 5 columns
   Target: berlin_source_data.aws_test_customers_v3
   Action: append
✅ Insert completed successfully
   First insert: 10 rows
   Append insert: 5 rows
   Total customers: 15, Expected: 15

   Final table verification in schema 'berlin_source_data':
   ✅ aws_test_customers_v3 exists in AWS schema (berlin_source_data)
   ✅ aws_test_sales_v3 exists in AWS schema (berlin_source_data)
   ✅ aws_test_products_v3 exists in AWS schema (berlin_source_data)
   ✅ AWS append mode test passed


## 2.6 AWS LayeredDB Complex Query Test

In [43]:
if aws_db and aws_db.engine:
    print("🧪 AWS LAYEREDDB COMPLEX QUERY TEST")
    print("=" * 40)
    
    try:
        # Complex query joining our AWS test tables
        print("\n1. Testing complex JOIN query:")
        aws_complex_query = """
        SELECT 
            p.product_name,
            p.category,
            p.price,
            COUNT(s.sale_id) as total_sales,
            COALESCE(SUM(s.sale_amount), 0) as total_revenue,
            COALESCE(AVG(s.quantity), 0) as avg_quantity
        FROM aws_test_products_v3 p
        LEFT JOIN aws_test_sales_v3 s ON p.product_id = s.product_id
        GROUP BY p.product_id, p.product_name, p.category, p.price
        ORDER BY total_revenue DESC
        LIMIT 5
        """
        
        aws_join_result = aws_db.query(aws_complex_query, show_info=False)
        print("   AWS complex query results:")
        display(aws_join_result)
        assert isinstance(aws_join_result, pd.DataFrame), "Complex query should return DataFrame"
        print("   ✅ AWS complex JOIN query test passed")
        
        # Test aggregation query with PostgreSQL-compatible ROUND
        print("\n2. Testing aggregation query:")
        aws_agg_query = """
        SELECT 
            category,
            COUNT(*) as product_count,
            CAST(ROUND(CAST(AVG(price) AS numeric), 2) AS float) as avg_price,
            MIN(price) as min_price,
            MAX(price) as max_price
        FROM aws_test_products_v3
        GROUP BY category
        ORDER BY avg_price DESC
        """
        
        aws_agg_result = aws_db.query(aws_agg_query, show_info=False)
        print("   AWS aggregation query results:")
        display(aws_agg_result)
        print("   ✅ AWS aggregation query test passed")
        
        # Test window function query
        print("\n3. Testing window function query:")
        aws_window_query = """
        SELECT 
            customer_id,
            sale_amount,
            sale_date,
            ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY sale_amount DESC) as sale_rank,
            SUM(sale_amount) OVER (PARTITION BY customer_id) as customer_total
        FROM aws_test_sales_v3
        ORDER BY customer_id, sale_rank
        LIMIT 10
        """
        
        aws_window_result = aws_db.query(aws_window_query, show_info=False)
        print("   AWS window function query results:")
        display(aws_window_result)
        print("   ✅ AWS window function query test passed")
        
        # Test database-specific functions
        print("\n4. Testing PostgreSQL-specific functions:")
        postgres_query = """
        SELECT 
            category,
            STRING_AGG(product_name, ', ') as products,
            EXTRACT(EPOCH FROM NOW()) as current_timestamp_epoch
        FROM aws_test_products_v3
        GROUP BY category
        LIMIT 3
        """
        
        postgres_result = aws_db.query(postgres_query, show_info=False)
        print("   PostgreSQL-specific query results:")
        display(postgres_result)
        print("   ✅ PostgreSQL-specific functions test passed")
        
    except Exception as e:
        print(f"   ⚠️  AWS complex query note: {e} (may need test tables from previous steps)")
        
else:
    print("❌ Skipping AWS complex query tests - no AWS LayeredDB connection")

🧪 AWS LAYEREDDB COMPLEX QUERY TEST

1. Testing complex JOIN query:
   AWS complex query results:


Unnamed: 0,product_name,category,price,total_sales,total_revenue,avg_quantity
0,Product_2,Books,274.12,4,2808.56,2.75
1,Product_6,Electronics,191.98,3,1751.17,4.333333333333333
2,Product_8,Clothing,321.94,3,1476.3,5.333333333333333
3,Product_5,Sports,175.02,3,1178.53,5.333333333333333
4,Product_3,Clothing,179.86,1,947.32,2.0


   ✅ AWS complex JOIN query test passed

2. Testing aggregation query:
   AWS aggregation query results:


Unnamed: 0,category,product_count,avg_price,min_price,max_price
0,Sports,2,326.19,175.02,477.35
1,Electronics,2,273.35,191.98,354.72
2,Books,2,251.1,228.08,274.12
3,Clothing,2,250.9,179.86,321.94
4,Home,2,201.59,110.77,292.4


   ✅ AWS aggregation query test passed

3. Testing window function query:
   AWS window function query results:


Unnamed: 0,customer_id,sale_amount,sale_date,sale_rank,customer_total
0,100,371.37,2024-01-01 14:00:00,1,371.37
1,103,947.32,2024-01-01 09:00:00,1,947.32
2,112,290.79,2024-01-01 13:00:00,1,290.79
3,118,103.12,2024-01-01 07:00:00,1,103.12
4,126,466.36,2024-01-01 10:00:00,1,466.36
5,137,212.85,2024-01-01 08:00:00,1,212.85
6,150,921.97,2024-01-01 18:00:00,1,921.97
7,153,552.62,2024-01-01 04:00:00,1,552.62
8,155,583.31,2024-01-01 06:00:00,1,583.31
9,157,771.69,2024-01-01 12:00:00,1,771.69


   ✅ AWS window function query test passed

4. Testing PostgreSQL-specific functions:
   PostgreSQL-specific query results:


Unnamed: 0,category,products,current_timestamp_epoch
0,Sports,"Product_5, Product_10",1755794765.083355
1,Electronics,"Product_1, Product_6",1755794765.083355
2,Home,"Product_4, Product_9",1755794765.083355


   ✅ PostgreSQL-specific functions test passed


## 2.7 AWS LayeredDB Health Check Test

In [44]:
if aws_db and aws_db.engine:
    print("🧪 AWS LAYEREDDB HEALTH CHECK TEST")
    print("=" * 40)
    
    try:
        print("\n1. Testing health_check method:")
        aws_health_status = aws_db.health_check()
        
        print("   AWS health check results:")
        for key, value in aws_health_status.items():
            print(f"   - {key}: {value}")
        
        # Assertions for health check
        assert 'status' in aws_health_status, "Health check should include status"
        assert aws_health_status['status'] in ['healthy', 'unhealthy'], "Status should be healthy or unhealthy"
        assert 'connection_type' in aws_health_status, "Health check should include connection type"
        assert 'schemas_available' in aws_health_status, "Health check should include schema count"
        
        # AWS-specific checks
        if 'tunnel_status' in aws_health_status:
            print(f"   AWS Tunnel Status: {aws_health_status['tunnel_status']}")
            assert aws_health_status['tunnel_status'] in ['Connected', 'Not Connected'], "Tunnel status should be valid"
        
        if aws_health_status['status'] == 'healthy':
            print("   ✅ AWS LayeredDB connection is healthy")
        else:
            print(f"   ⚠️  AWS LayeredDB connection issue: {aws_health_status.get('error', 'Unknown')}")
        
        print("   ✅ AWS health check test passed")
        
        # Test tunnel manager separately if available
        if aws_db.tunnel_manager:
            print("\n2. Testing tunnel manager:")
            tunnel_status = aws_db.tunnel_manager.get_tunnel_status()
            print("   Tunnel manager status:")
            print(f"   - active: {tunnel_status.active}")
            print(f"   - host: {tunnel_status.host}")
            print(f"   - port: {tunnel_status.port}")
            print(f"   - status: {tunnel_status.status}")
            print(f"   - message: {tunnel_status.message}")
            
            assert hasattr(tunnel_status, 'active'), "Tunnel status should include active flag"
            assert hasattr(tunnel_status, 'host'), "Tunnel status should include host"
            assert hasattr(tunnel_status, 'port'), "Tunnel status should include port"
            print("   ✅ Tunnel manager test passed")
        
    except Exception as e:
        print(f"   ❌ AWS health check failed: {e}")
        
else:
    print("❌ Skipping AWS health check test - no AWS LayeredDB connection")

🧪 AWS LAYEREDDB HEALTH CHECK TEST

1. Testing health_check method:
   AWS health check results:
   - status: healthy
   - connection_type: AWS LayeredDB
   - database: layereddb
   - version: PostgreSQL 16.9 on x86_64-pc-linux-gnu, compiled b...
   - schemas_available: 2
   - current_schema: berlin_source_data
   - tables_in_current_schema: 11
   - connection: active
   - tunnel_status: Connected
   - tunnel_active: True
   AWS Tunnel Status: Connected
   ✅ AWS LayeredDB connection is healthy
   ✅ AWS health check test passed

2. Testing tunnel manager:
   Tunnel manager status:
   - active: True
   - host: localhost
   - port: 5433
   - status: Connected
   - message: Tunnel is active on localhost:5433
   ✅ Tunnel manager test passed


## 2.8 AWS LayeredDB Cleanup

In [25]:
print("📋 COMPREHENSIVE TEST SUMMARY")
print("=" * 40)

print(f"\n🕐 Test completed at: {datetime.now()}")
print(f"📂 Test location: {current_dir}")

print("\n🔗 NeonDB Tests:")
if 'neon_db' in locals() and neon_db and hasattr(neon_db, 'engine'):
    print("   ✅ Connection: Successful")
    print("   ✅ Schema Operations: Tested")
    print("   ✅ Table Operations: Tested")
    print("   ✅ Query Operations: Tested")
    print("   ✅ Data Population: Tested")
    print("   ✅ Enhanced Populate: Tested")
    print("   ✅ Complex Queries: Tested")
    print("   ✅ Health Check: Tested")
    print("   ✅ Cleanup: Completed")
else:
    print("   ❌ Connection: Failed or Skipped")

print("\n🚇 AWS LayeredDB Tests:")
if 'aws_db' in locals() and aws_db and hasattr(aws_db, 'engine'):
    print("   ✅ Connection: Successful")
    print("   ✅ Schema Operations: Tested")
    print("   ✅ Table Operations: Tested")
    print("   ✅ Query Operations: Tested")
    print("   ✅ Data Population: Tested")
    print("   ✅ Complex Queries: Tested")
    print("   ✅ Health Check: Tested")
    print("   ✅ Cleanup: Completed")
else:
    print("   ❌ Connection: Failed or Skipped")
    print("       (Expected if SSH tunnel not running or credentials not provided)")

print("\n💡 Enhanced V3 Features Tested:")
print("   ✅ New db_connector() function name")
print("   ✅ Enhanced populate() with mode parameter")
print("   ✅ Comprehensive data analysis reporting")
print("   ✅ Automatic table creation with constraints")
print("   ✅ Upsert functionality with primary keys")
print("   ✅ PostgreSQL compatibility fixes")
print("   ✅ AWS default schema (berlin_source_data)")

print("\n💡 Test Coverage:")
test_methods = [
    'db_connector() constructor',
    'schemas property',
    'tables property', 
    'current_schema property',
    'use() method',
    'query() method',
    'insert() method',
    'populate() method (enhanced)',
    'get_table_info() method',
    'health_check() method',
    'close() method',
    'TunnelManager functionality',
    'DatabaseConfig functionality',
    'Error handling and validation'
]

for method in test_methods:
    print(f"   ✅ {method}")

print("\n🎯 Key V3 Improvements:")
print("   ✅ Intelligent database switching (NeonDB default, AWS when params provided)")
print("   ✅ Enhanced populate with replace/append/upsert modes")
print("   ✅ Interactive schema/table selection prompts")
print("   ✅ Comprehensive pre/post population reporting")
print("   ✅ Automatic primary key detection and constraints")
print("   ✅ Optimal data type mapping for table creation")
print("   ✅ SSH tunnel detection and management")
print("   ✅ Production-ready error handling and validation")

print("\n🚀 Smart Database Connector V3 - All Tests Completed!")
print("=" * 55)

print("\n📝 Usage Examples:")
print("# Simple connection")
print("db = db_connector()  # NeonDB with test_berlin_data schema")
print("")
print("# AWS connection") 
print("db = db_connector(database='layereddb', username='user', password='pass')")
print("")
print("# Enhanced populate")
print("db.populate(df, 'banks', mode='upsert', primary_key=['bank_id'])")
print("")
print("# Interactive populate")
print("db.populate(df)  # Will prompt for schema and table")

📋 COMPREHENSIVE TEST SUMMARY

🕐 Test completed at: 2025-08-21 18:38:40.194553
📂 Test location: /Users/svitlanakovalivska/layered-populate-data-pool-da/db_population_utils/db_connector/tests

🔗 NeonDB Tests:
   ❌ Connection: Failed or Skipped

🚇 AWS LayeredDB Tests:
   ✅ Connection: Successful
   ✅ Schema Operations: Tested
   ✅ Table Operations: Tested
   ✅ Query Operations: Tested
   ✅ Data Population: Tested
   ✅ Complex Queries: Tested
   ✅ Health Check: Tested
   ✅ Cleanup: Completed

💡 Enhanced V3 Features Tested:
   ✅ New db_connector() function name
   ✅ Enhanced populate() with mode parameter
   ✅ Comprehensive data analysis reporting
   ✅ Automatic table creation with constraints
   ✅ Upsert functionality with primary keys
   ✅ PostgreSQL compatibility fixes
   ✅ AWS default schema (berlin_source_data)

💡 Test Coverage:
   ✅ db_connector() constructor
   ✅ schemas property
   ✅ tables property
   ✅ current_schema property
   ✅ use() method
   ✅ query() method
   ✅ insert() met

# 📋 Test Summary and Results

Final summary of all test results

In [45]:
print("📋 COMPREHENSIVE TEST SUMMARY")
print("=" * 40)

print(f"\n🕐 Test completed at: {datetime.now()}")
print(f"📂 Test location: {current_dir}")

print("\n🔗 NeonDB Tests:")
if 'neon_db' in locals() and neon_db and hasattr(neon_db, 'engine'):
    print("   ✅ Connection: Successful")
    print("   ✅ Schema Operations: Tested")
    print("   ✅ Table Operations: Tested")
    print("   ✅ Query Operations: Tested")
    print("   ✅ Data Population: Tested")
    print("   ✅ Complex Queries: Tested")
    print("   ✅ Health Check: Tested")
    print("   ✅ Cleanup: Completed")
else:
    print("   ❌ Connection: Failed or Skipped")

print("\n🚇 AWS LayeredDB Tests:")
if 'aws_db' in locals() and aws_db and hasattr(aws_db, 'engine'):
    print("   ✅ Connection: Successful")
    print("   ✅ Schema Operations: Tested")
    print("   ✅ Table Operations: Tested")
    print("   ✅ Query Operations: Tested")
    print("   ✅ Data Population: Tested")
    print("   ✅ Complex Queries: Tested")
    print("   ✅ Health Check: Tested")
    print("   ✅ Cleanup: Completed")
else:
    print("   ❌ Connection: Failed or Skipped")
    print("       (Expected if SSH tunnel not running or credentials not provided)")

print("\n💡 Test Coverage:")
test_methods = [
    'SmartDbConnectorV3.__init__()',
    'schemas property',
    'tables property', 
    'current_schema property',
    'use() method',
    'query() method',
    'insert() method',
    'populate() method',
    'get_table_info() method',
    'health_check() method',
    'close() method',
    'TunnelManager functionality',
    'DatabaseConfig functionality',
    'Error handling and validation'
]

for method in test_methods:
    print(f"   ✅ {method}")

print("\n🎯 Key Features Verified:")
print("   ✅ Intelligent database switching (NeonDB default, AWS when params provided)")
print("   ✅ Auto-discovery of schemas and tables")
print("   ✅ Schema switching with validation")
print("   ✅ Multiple insert modes (replace, append)")
print("   ✅ Complex query support with JOIN and window functions")
print("   ✅ SSH tunnel detection and management")
print("   ✅ Comprehensive error handling and health checks")
print("   ✅ Production-ready connection management")

print("\n🚀 Smart Database Connector V3 - All Tests Completed!")
print("=" * 55)

📋 COMPREHENSIVE TEST SUMMARY

🕐 Test completed at: 2025-08-21 18:46:28.736509
📂 Test location: /Users/svitlanakovalivska/layered-populate-data-pool-da/db_population_utils/db_connector/tests

🔗 NeonDB Tests:
   ✅ Connection: Successful
   ✅ Schema Operations: Tested
   ✅ Table Operations: Tested
   ✅ Query Operations: Tested
   ✅ Data Population: Tested
   ✅ Complex Queries: Tested
   ✅ Health Check: Tested
   ✅ Cleanup: Completed

🚇 AWS LayeredDB Tests:
   ✅ Connection: Successful
   ✅ Schema Operations: Tested
   ✅ Table Operations: Tested
   ✅ Query Operations: Tested
   ✅ Data Population: Tested
   ✅ Complex Queries: Tested
   ✅ Health Check: Tested
   ✅ Cleanup: Completed

💡 Test Coverage:
   ✅ SmartDbConnectorV3.__init__()
   ✅ schemas property
   ✅ tables property
   ✅ current_schema property
   ✅ use() method
   ✅ query() method
   ✅ insert() method
   ✅ populate() method
   ✅ get_table_info() method
   ✅ health_check() method
   ✅ close() method
   ✅ TunnelManager functionality

In [46]:
import pandas as pd
import numpy as np
import sys
import os
from datetime import datetime
import importlib

# - Set up the system path to find your package ---
# This ensures Python can locate the 'db_connector' package.
# The script navigates up two directories from the current notebook's location
# to reach the root where the 'db_connector' package resides.
#
# Correct Project Structure:
# /db_population_utils/
# └── /db_connector/
#     ├── __init__.py
#     ├── smart_db_connector_enhanced_V3.py
#     └── /tests/
#         └── test_smart_db_connector_V3_comprehensive.ipynb

# Get the directory of the current notebook, then go up two levels
notebook_dir = os.path.dirname(os.path.abspath("__file__")) # A robust way to get notebook dir
package_path = os.path.abspath(os.path.join(notebook_dir, '..', '..'))

if package_path not in sys.path:
    sys.path.append(package_path)
    print(f"✅ Added '{package_path}' to system path")
"""
Example usage of Interactive Smart DB Connector
"""

from db_connector import *

def main():
    """Example of how to use the interactive connector"""
    
    print("🚀 INTERACTIVE DB CONNECTOR EXAMPLE")
    print("=" * 40)
    
    # Method 1: Simple connection (prompts for credentials if needed)
    # If not AWS connection, will fallback to Neon database
    db = db_connector()
    
    if db.engine:
        print("\n✅ Successfully connected!")
        
        # Show available schemas
        print(f"Available schemas: {db.schemas}")
        
        # Switch to test_berlin_data schema by default if available
        if db.schemas:
            # Default to test_berlin_data schema
            if 'test_berlin_data' in db.schemas:
                target_schema = 'test_berlin_data'
                print(f"🎯 Using default schema: {target_schema}")
            else:
                target_schema = db.schemas[0]
                print(f"⚠️  test_berlin_data not found, using: {target_schema}")
            
            db.use(target_schema)
            
            # Show tables in current schema
            print(f"Tables in {target_schema}: {len(db.tables)} tables")
            
            # Example query
            try:
                result = db.query("SELECT current_schema() as schema, current_user as user")
                print("\nConnection info:")
                print(result)
            except Exception as e:
                print(f"Query error: {e}")
        
        # Clean up
        db.close()
        
    else:
        print("❌ Failed to connect to database")

if __name__ == "__main__":
    main()


🚀 INTERACTIVE DB CONNECTOR EXAMPLE
🌟 SMART DATABASE CONNECTOR V3 - INITIALIZING...
🔗 Using default NeonDB connection
✅ NeonDB configuration loaded
   Default schema: test_berlin_data
🔌 Connecting to NeonDB...
✅ Connection successful!
   Database: neondb
   User: neondb_owner

🔍 Auto-discovering database schemas...
✅ Discovered 4 schemas
🎯 Auto-selected default schema: test_berlin_data

📊 SMART DB CONNECTOR V3 - CONNECTION SUMMARY
🔗 Connection Type: NeonDB

🗂️  Discovered 4 schemas:
  📁 dependency_example: 4 tables
       └─ departments (2 columns)
       └─ districts (3 columns)
       └─ employees (4 columns)
       └─ ... and 1 more tables
  📁 nyc_schools: 27 tables
       └─ Audrey_sat_results (10 columns)
       └─ Colleges_Berlin (12 columns)
       └─ Levon_cleaned_sat_scores (8 columns)
       └─ ... and 24 more tables
  📁 public: 15 tables
       └─ audrey_sat_results (10 columns)
       └─ cleaned_sat_results_peter_s (9 columns)
       └─ demo_users (6 columns)
       └─ ... a

In [47]:
import pandas as pd
import numpy as np
import sys
import os
from datetime import datetime
import importlib

def setup_path_and_imports():
    """Handles path setup and module reloading for the test."""
    print("--- Setting up environment ---")
    # This ensures Python can locate the 'db_connector' package.
    # The script navigates up two directories from the current notebook's location
    # to reach the root where the 'db_connector' package resides.
    #
    # Correct Project Structure:
    # /db_population_utils/
    # └── /db_connector/
    #     ├── __init__.py
    #     ├── smart_db_connector_enhanced_V3.py
    #     └── /tests/
    #         └── test_smart_db_connector_V3_comprehensive.ipynb
    
    notebook_dir = os.path.dirname(os.path.abspath("__file__"))
    package_path = os.path.abspath(os.path.join(notebook_dir, '..', '..'))

    if package_path not in sys.path:
        sys.path.append(package_path)
        print(f"✅ Added '{package_path}' to system path")

    # Force a reload of the module to get the latest changes
    if 'db_connector' in sys.modules:
        importlib.reload(sys.modules['db_connector'])
    if 'db_connector.smart_db_connector_enhanced_V3' in sys.modules:
        importlib.reload(sys.modules['db_connector.smart_db_connector_enhanced_V3'])
    
    try:
        from db_connector import db_connector
        print("✅ Successfully imported 'db_connector' from the package.")
        return db_connector
    except ImportError as e:
        print(f"❌ ImportError: {e}. Please check your file structure and __init__.py file.")
        return None

def main():
    """
    Main function to run an interactive and comprehensive test of the Smart DB Connector.
    """
    db_connector_class = setup_path_and_imports()
    if not db_connector_class:
        sys.exit(1)

    print(f"\n📂 Working directory: {os.getcwd()}")
    print(f"🕐 Test started at: {datetime.now()}")
    
    db = None
    try:
        # --- 1. Initialize the connector ---
        print("\n🚀 Initializing the Smart DB Connector (Default: NeonDB)...")
        db = db_connector_class()
        if not db.engine:
            print("❌ Failed to connect to the database.")
            return

        # --- 2. Perform and display health check ---
        print("\n🩺 Performing initial health check...")
        health = db.health_check()
        print("\n--- Health Check Result ---")
        status = health.get('status', 'unhealthy')
        print(f"Status: {'💚 HEALTHY' if status == 'healthy' else '💔 UNHEALTHY'}")
        for key, value in health.items():
            if key != 'status':
                print(f"   - {key}: {value}")
        print("---------------------------\n")

        # --- 3. Explore and run a simple query ---
        print("\n🗺️  Exploring database structure...")
        print(f"   Available Schemas: {db.schemas}")
        if db.current_schema:
            print(f"   🎯 Current Schema: {db.current_schema}")
            print(f"   Tables in '{db.current_schema}': {db.tables[:5]}...")
            
            print(f"\n🔍 Running a simple query in '{db.current_schema}'...")
            info_df = db.query("SELECT current_schema() as schema, current_user as user")
            print("   Query successful. Connection info:")
            print(info_df)
        else:
            print("   No schemas found to explore.")

        # --- 4. Demonstrate the 'populate' method ---
        print("\n\n📝 Demonstrating the 'populate' method...")
        
        # Create a sample DataFrame to insert
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        new_table_name = f"test_population_log_{timestamp}"
        
        log_data = {
            'event_id': range(3),
            'timestamp': [datetime.now() - pd.Timedelta(minutes=x) for x in range(3)],
            'log_level': ['SUCCESS', 'ERROR', 'SUCCESS'],
            'details': ['User authentication successful', 'Failed to process payment', 'Data sync complete'],
            'user_id': [25, 42, 25]
        }
        log_df = pd.DataFrame(log_data)
        
        print(f"\n   - A sample DataFrame has been created to populate the table '{new_table_name}'.")
        print(log_df.head())
        
        # Use populate, which will show a pre-population report
        populate_result = db.populate(
            df=log_df, 
            table_name=new_table_name, 
            schema=db.current_schema, 
            mode='replace' # 'replace' ensures the test is clean every time
        )
        
        if populate_result.get('status') == 'success':
            print(f"\n   ✅ 'populate' operation completed successfully.")
            
            # Verify the data was inserted correctly
            print(f"   - Verifying data in new table '{new_table_name}'...")
            verify_df = db.query(f"SELECT * FROM {new_table_name}")
            print(f"   - Verification query returned {len(verify_df)} rows.")
            print(verify_df.head())
        else:
            print(f"\n   ❌ 'populate' operation failed: {populate_result.get('error')}")

    except Exception as e:
        print(f"\n❌ An unexpected error occurred during the test suite: {e}")

    finally:
        # --- 5. Always ensure the connection is closed ---
        if db and db.engine:
            print("\n\n🔒 Closing database connection...")
            db.close()
        else:
            print("\n\nℹ️ No active connection was made or it was already closed.")

if __name__ == "__main__":
    main()


--- Setting up environment ---
✅ Successfully imported 'db_connector' from the package.

📂 Working directory: /Users/svitlanakovalivska/layered-populate-data-pool-da/db_population_utils/db_connector/tests
🕐 Test started at: 2025-08-21 18:50:49.115276

🚀 Initializing the Smart DB Connector (Default: NeonDB)...
🌟 SMART DATABASE CONNECTOR V3 - INITIALIZING...
🔗 Using default NeonDB connection
✅ NeonDB configuration loaded
   Default schema: test_berlin_data
🔌 Connecting to NeonDB...
✅ Connection successful!
   Database: neondb
   User: neondb_owner

🔍 Auto-discovering database schemas...
✅ Discovered 4 schemas
🎯 Auto-selected default schema: test_berlin_data

📊 SMART DB CONNECTOR V3 - CONNECTION SUMMARY
🔗 Connection Type: NeonDB

🗂️  Discovered 4 schemas:
  📁 dependency_example: 4 tables
       └─ departments (2 columns)
       └─ districts (3 columns)
       └─ employees (4 columns)
       └─ ... and 1 more tables
  📁 nyc_schools: 27 tables
       └─ Audrey_sat_results (10 columns)
     

# 3. Simple usage

In [None]:
#Simple usage example of the db_connector for NeonDB
from db_connector import db_connector
db = db_connector()

🌟 SMART DATABASE CONNECTOR V3 - INITIALIZING...
🔗 Using default NeonDB connection
✅ NeonDB configuration loaded
   Default schema: test_berlin_data
🔌 Connecting to NeonDB...
✅ Connection successful!
   Database: neondb
   User: neondb_owner

🔍 Auto-discovering database schemas...
✅ Discovered 4 schemas
🎯 Auto-selected default schema: test_berlin_data

📊 SMART DB CONNECTOR V3 - CONNECTION SUMMARY
🔗 Connection Type: NeonDB

🗂️  Discovered 4 schemas:
  📁 dependency_example: 4 tables
       └─ departments (2 columns)
       └─ districts (3 columns)
       └─ employees (4 columns)
       └─ ... and 1 more tables
  📁 nyc_schools: 27 tables
       └─ Audrey_sat_results (10 columns)
       └─ Colleges_Berlin (12 columns)
       └─ Levon_cleaned_sat_scores (8 columns)
       └─ ... and 24 more tables
  📁 public: 15 tables
       └─ audrey_sat_results (10 columns)
       └─ cleaned_sat_results_peter_s (9 columns)
       └─ demo_users (6 columns)
       └─ ... and 12 more tables
  🎯 [CURRENT] tes

In [50]:
# Simple usage example of the db_connector for AWS LayeredDB
from db_connector import db_connector
aws_db = db_connector(database='layereddb')

🌟 SMART DATABASE CONNECTOR V3 - INITIALIZING...
🚇 AWS LayeredDB connection requested

🔐 AWS LayeredDB CONNECTION SETUP
----------------------------------------
Please provide your AWS database credentials:
🚇 Tunnel Status: Connected
✅ AWS LayeredDB configuration loaded
   Tunnel: Tunnel is active on localhost:5433
🔌 Connecting to AWS LayeredDB...
✅ Connection successful!
   Database: layereddb
   User: svitlana_kovalivska

🔍 Auto-discovering database schemas...
✅ Discovered 2 schemas
🎯 Auto-selected default schema: berlin_source_data

📊 SMART DB CONNECTOR V3 - CONNECTION SUMMARY
🔗 Connection Type: AWS LayeredDB
🚇 Tunnel Status: Connected (localhost:5433)

🗂️  Discovered 2 schemas:
  🎯 [CURRENT] berlin_source_data: 11 tables
       └─ aws_test_customers_v3 (5 columns)
       └─ aws_test_products_v3 (6 columns)
       └─ aws_test_sales_v3 (6 columns)
       └─ ... and 8 more tables
  📁 public: 22 tables
       └─ aws_test_customers_v3 (5 columns)
       └─ aws_test_products_v3 (6 columns

# 4. Complex usage

In [None]:
#NeonDB Test Suite
import pandas as pd
import numpy as np
import sys
import os
from datetime import datetime
import importlib

def setup_path_and_imports():
    """Handles path setup and module reloading for the test."""
    print("--- Setting up environment ---")
    # This ensures Python can locate the 'db_connector' package.
    # The script navigates up two directories from the current notebook's location
    # to reach the root where the 'db_connector' package resides.
    #
    # Correct Project Structure:
    # /db_population_utils/
    # └── /db_connector/
    #     ├── __init__.py
    #     ├── smart_db_connector_enhanced_V3.py
    #     └── /tests/
    #         └── test_smart_db_connector_V3_comprehensive.ipynb
    
    notebook_dir = os.path.dirname(os.path.abspath("__file__"))
    package_path = os.path.abspath(os.path.join(notebook_dir, '..', '..'))

    if package_path not in sys.path:
        sys.path.append(package_path)
        print(f"✅ Added '{package_path}' to system path")

    # Force a reload of the module to get the latest changes
    if 'db_connector' in sys.modules:
        importlib.reload(sys.modules['db_connector'])
    if 'db_connector.smart_db_connector_enhanced_V3' in sys.modules:
        importlib.reload(sys.modules['db_connector.smart_db_connector_enhanced_V3'])
    
    try:
        from db_connector import db_connector
        print("✅ Successfully imported 'db_connector' from the package.")
        return db_connector
    except ImportError as e:
        print(f"❌ ImportError: {e}. Please check your file structure and __init__.py file.")
        return None

def main():
    """
    Main function to run an interactive and comprehensive test of the Smart DB Connector.
    """
    db_connector_class = setup_path_and_imports()
    if not db_connector_class:
        sys.exit(1)

    print(f"\n📂 Working directory: {os.getcwd()}")
    print(f"🕐 Test started at: {datetime.now()}")
    
    db = None
    try:
        # --- 1. Initialize the connector ---
        print("\n🚀 Initializing the Smart DB Connector (Default: NeonDB)...")
        db = db_connector_class()
        if not db.engine:
            print("❌ Failed to connect to the database.")
            return

        # --- 2. Perform and display health check ---
        print("\n🩺 Performing initial health check...")
        health = db.health_check()
        print("\n--- Health Check Result ---")
        status = health.get('status', 'unhealthy')
        print(f"Status: {'💚 HEALTHY' if status == 'healthy' else '💔 UNHEALTHY'}")
        for key, value in health.items():
            if key != 'status':
                print(f"   - {key}: {value}")
        print("---------------------------\n")

        # --- 3. Explore and run a simple query ---
        print("\n🗺️  Exploring database structure...")
        print(f"   Available Schemas: {db.schemas}")
        if db.current_schema:
            print(f"   🎯 Current Schema: {db.current_schema}")
            print(f"   Tables in '{db.current_schema}': {db.tables[:5]}...")
            
            print(f"\n🔍 Running a simple query in '{db.current_schema}'...")
            info_df = db.query("SELECT current_schema() as schema, current_user as user")
            print("   Query successful. Connection info:")
            print(info_df)
        else:
            print("   No schemas found to explore.")

        # --- 4. Demonstrate the 'populate' method ---
        print("\n\n📝 Demonstrating the 'populate' method...")
        
        # Create a sample DataFrame to insert
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        new_table_name = f"test_population_log_{timestamp}"
        
        log_data = {
            'event_id': range(3),
            'timestamp': [datetime.now() - pd.Timedelta(minutes=x) for x in range(3)],
            'log_level': ['SUCCESS', 'ERROR', 'SUCCESS'],
            'details': ['User authentication successful', 'Failed to process payment', 'Data sync complete'],
            'user_id': [25, 42, 25]
        }
        log_df = pd.DataFrame(log_data)
        
        print(f"\n   - A sample DataFrame has been created to populate the table '{new_table_name}'.")
        print(log_df.head())
        
        # Use populate, which will show a pre-population report
        populate_result = db.populate(
            df=log_df, 
            table_name=new_table_name, 
            schema=db.current_schema, 
            mode='replace' # 'replace' ensures the test is clean every time
        )
        
        if populate_result.get('status') == 'success':
            print(f"\n   ✅ 'populate' operation completed successfully.")
            
            # Verify the data was inserted correctly
            print(f"   - Verifying data in new table '{new_table_name}'...")
            verify_df = db.query(f"SELECT * FROM {new_table_name}")
            print(f"   - Verification query returned {len(verify_df)} rows.")
            print(verify_df.head())
        else:
            print(f"\n   ❌ 'populate' operation failed: {populate_result.get('error')}")

    except Exception as e:
        print(f"\n❌ An unexpected error occurred during the test suite: {e}")

    finally:
        # --- 5. Always ensure the connection is closed ---
        if db and db.engine:
            print("\n\n🔒 Closing database connection...")
            db.close()
        else:
            print("\n\nℹ️ No active connection was made or it was already closed.")

if __name__ == "__main__":
    main()


In [None]:
# Comprehensive test suite for the Smart DB Connector V3
# This suite tests all features, including NeonDB and AWS LayeredDB connections,
# data population, complex queries, and health checks.
import pandas as pd
import numpy as np
import sys
import os
from datetime import datetime
import importlib

def setup_path_and_imports():
    """Handles path setup and module reloading for the test."""
    print("--- Setting up environment ---")
    # This ensures Python can locate the 'db_connector' package.
    # The script navigates up two directories from the current notebook's location
    # to reach the root where the 'db_connector' package resides.
    #
    # Correct Project Structure:
    # /db_population_utils/
    # └── /db_connector/
    #     ├── __init__.py
    #     ├── smart_db_connector_enhanced_V3.py
    #     └── /tests/
    #         └── test_smart_db_connector_V3_comprehensive.ipynb
    
    notebook_dir = os.path.dirname(os.path.abspath("__file__"))
    package_path = os.path.abspath(os.path.join(notebook_dir, '..', '..'))

    if package_path not in sys.path:
        sys.path.append(package_path)
        print(f"✅ Added '{package_path}' to system path")

    # Force a reload of the module to get the latest changes
    if 'db_connector' in sys.modules:
        importlib.reload(sys.modules['db_connector'])
    if 'db_connector.smart_db_connector_enhanced_V3' in sys.modules:
        importlib.reload(sys.modules['db_connector.smart_db_connector_enhanced_V3'])
    
    try:
        from db_connector import db_connector
        print("✅ Successfully imported 'db_connector' from the package.")
        return db_connector
    except ImportError as e:
        print(f"❌ ImportError: {e}. Please check your file structure and __init__.py file.")
        return None

def run_aws_test(db_connector_class):
    """Runs a separate test for the AWS LayeredDB connection."""
    print("\n" + "="*50)
    print("--- ЗАПУСК ТЕСТА AWS ---")
    print("This will prompt for your AWS username and password.")
    print("Ensure your SSH tunnel is active before proceeding.")
    print("="*50)
    
    aws_db = None
    try:
        # Initialize with 'layereddb' to trigger AWS connection
        aws_db = db_connector_class(database='layereddb')
        
        if not aws_db.engine:
            print("❌ Failed to connect to the AWS database. The connector might have fallen back to NeonDB.")
            return

        print("\n🩺 Performing AWS health check...")
        health = aws_db.health_check()
        print("\n--- AWS Health Check Result ---")
        status = health.get('status', 'unhealthy')
        print(f"Status: {'💚 HEALTHY' if status == 'healthy' else '💔 UNHEALTHY'}")
        for key, value in health.items():
            if key != 'status':
                print(f"   - {key}: {value}")
        print("---------------------------\n")

        if aws_db.current_schema:
            print(f"   🎯 AWS Current Schema: {aws_db.current_schema}")
            print(f"   Tables in '{aws_db.current_schema}': {aws_db.tables[:5]}...")
        
    except Exception as e:
        print(f"❌ An error occurred during the AWS test: {e}")
    finally:
        if aws_db and aws_db.engine:
            print("\n🔒 Closing AWS database connection...")
            aws_db.close()

def main():
    """
    Main function to run an interactive and comprehensive test of the Smart DB Connector.
    """
    db_connector_class = setup_path_and_imports()
    if not db_connector_class:
        sys.exit(1)

    print(f"\n📂 Working directory: {os.getcwd()}")
    print(f"🕐 Test started at: {datetime.now()}")
    
    db = None
    try:
        # --- 1. Initialize the connector (DEFAULTS TO NEONDB) ---
        print("\n🚀 Initializing the Smart DB Connector (Default: NeonDB)...")
        db = db_connector_class()
        if not db.engine:
            print("❌ Failed to connect to the default database.")
            return

        # --- 2. Perform and display health check ---
        print("\n🩺 Performing initial health check...")
        health = db.health_check()
        print("\n--- Health Check Result ---")
        status = health.get('status', 'unhealthy')
        print(f"Status: {'💚 HEALTHY' if status == 'healthy' else '💔 UNHEALTHY'}")
        for key, value in health.items():
            if key != 'status':
                print(f"   - {key}: {value}")
        print("---------------------------\n")

        # --- 3. Explore and run a simple query ---
        print("\n🗺️  Exploring database structure...")
        print(f"   Available Schemas: {db.schemas}")
        if db.current_schema:
            print(f"   🎯 Current Schema: {db.current_schema}")
            print(f"   Tables in '{db.current_schema}': {db.tables[:5]}...")
            
            print(f"\n🔍 Running a simple query in '{db.current_schema}'...")
            info_df = db.query("SELECT current_schema() as schema, current_user as user")
            print("   Query successful. Connection info:")
            print(info_df)
        else:
            print("   No schemas found to explore.")

        # --- 4. Demonstrate the 'populate' method ---
        print("\n\n📝 Demonstrating the 'populate' method...")
        
        # Create a sample DataFrame to insert
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        new_table_name = f"test_population_log_{timestamp}"
        
        log_data = {
            'event_id': range(3),
            'timestamp': [datetime.now() - pd.Timedelta(minutes=x) for x in range(3)],
            'log_level': ['SUCCESS', 'ERROR', 'SUCCESS'],
            'details': ['User authentication successful', 'Failed to process payment', 'Data sync complete'],
            'user_id': [25, 42, 25]
        }
        log_df = pd.DataFrame(log_data)
        
        print(f"\n   - A sample DataFrame has been created to populate the table '{new_table_name}'.")
        print(log_df.head())
        
        # Use populate, which will show a pre-population report
        populate_result = db.populate(
            df=log_df, 
            table_name=new_table_name, 
            schema=db.current_schema, 
            mode='replace' # 'replace' ensures the test is clean every time
        )
        
        if populate_result.get('status') == 'success':
            print(f"\n   ✅ 'populate' operation completed successfully.")
            
            # Verify the data was inserted correctly
            print(f"   - Verifying data in new table '{new_table_name}'...")
            verify_df = db.query(f"SELECT * FROM {new_table_name}")
            print(f"   - Verification query returned {len(verify_df)} rows.")
            print(verify_df.head())
        else:
            print(f"\n   ❌ 'populate' operation failed: {populate_result.get('error')}")

    except Exception as e:
        print(f"\n❌ An unexpected error occurred during the default test suite: {e}")

    finally:
        # --- 5. Always ensure the connection is closed ---
        if db and db.engine:
            print("\n\n🔒 Closing default database connection...")
            db.close()
        else:
            print("\n\nℹ️ No active connection was made for the default test.")

    # --- 6. Run the separate test for AWS ---
    run_aws_test(db_connector_class)


if __name__ == "__main__":
    main()


--- Setting up environment ---
✅ Successfully imported 'db_connector' from the package.

📂 Working directory: /Users/svitlanakovalivska/layered-populate-data-pool-da/db_population_utils/db_connector/tests
🕐 Test started at: 2025-08-21 18:58:38.022425

🚀 Initializing the Smart DB Connector (Default: NeonDB)...
🌟 SMART DATABASE CONNECTOR V3 - INITIALIZING...
🔗 Using default NeonDB connection
✅ NeonDB configuration loaded
   Default schema: test_berlin_data
🔌 Connecting to NeonDB...
✅ Connection successful!
   Database: neondb
   User: neondb_owner

🔍 Auto-discovering database schemas...
✅ Discovered 4 schemas
🎯 Auto-selected default schema: test_berlin_data

📊 SMART DB CONNECTOR V3 - CONNECTION SUMMARY
🔗 Connection Type: NeonDB

🗂️  Discovered 4 schemas:
  📁 dependency_example: 4 tables
       └─ departments (2 columns)
       └─ districts (3 columns)
       └─ employees (4 columns)
       └─ ... and 1 more tables
  📁 nyc_schools: 27 tables
       └─ Audrey_sat_results (10 columns)
     