In [0]:
# Check files in Volume
print("Checking files in Volume: /Volumes/workspace/default/olist_data/")
print("="*80)

try:
    files = dbutils.fs.ls("/Volumes/workspace/default/olist_data/")
    
    # Filter for CSV files
    csv_files = [f for f in files if f.name.endswith('.csv')]
    
    print(f"\n✅ Found {len(csv_files)} CSV files:\n")
    for f in csv_files:
        # Get file size in MB
        size_mb = f.size / (1024 * 1024)
        print(f"  📄 {f.name:<50} ({size_mb:.2f} MB)")
    
    # Test reading one file
    if len(csv_files) > 0:
        print(f"\n{'='*80}")
        print(f"🧪 Testing read on: {csv_files[0].name}")
        print("="*80)
        
        test_df = spark.read.csv(
            csv_files[0].path, 
            header=True, 
            inferSchema=False  # Read as strings to avoid type issues
        )
        
        print(f"✅ Successfully read {csv_files[0].name}")
        print(f"   Rows: {test_df.count():,}")
        print(f"   Columns: {len(test_df.columns)}")
        
        print("\n📊 Schema:")
        test_df.printSchema()
        
        print("\n👀 First 3 rows:")
        test_df.show(3, truncate=False)
        
        print("\n✅ All good! Your files are ready to be processed!")
        
    else:
        print("\n❌ No CSV files found in the Volume!")
        print("Please upload your files to: /Volumes/workspace/default/olist_data/")
        
except Exception as e:
    print(f"\n❌ Error accessing Volume: {e}")
    print("\nMake sure:")
    print("  1. You created the volume 'olist_data'")
    print("  2. You uploaded all 9 CSV files")
    print("  3. Your SQL Warehouse is running")