In [0]:
import boto3
import os
from pprint import pprint

In [0]:

# Configure S3 access
s3_bucket = os.getenv('BUCKET_NAME')
base_prefix = "iot-sitewise/"
directories = ["agg", "asset_metadata", "index", "raw"]

# Create S3 client using environment variables
s3_client = boto3.client(
    's3',
    aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'),
    aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'),
    region_name=os.getenv('AWS_REGION', 'us-east-1')
)

# Function to list sample paths from each directory
def list_sample_paths(directory, max_samples=3):
    print(f"\n===== SAMPLE PATHS FROM {directory.upper()} =====")
    
    prefix = f"{base_prefix}{directory}/"
    response = s3_client.list_objects_v2(
        Bucket=s3_bucket,
        Prefix=prefix,
        MaxKeys=100  # Get enough to sample from
    )
    
    # Get the first few files
    if 'Contents' in response:
        samples = []
        dirs_seen = set()
        
        # Try to get samples from different subdirectories
        for obj in response.get('Contents', []):
            key = obj['Key']
            # Get the first level subdirectory if any
            parts = key[len(prefix):].split('/')
            subdir = parts[0] if len(parts) > 1 else ""
            
            if subdir not in dirs_seen and len(samples) < max_samples:
                samples.append(key)
                dirs_seen.add(subdir)
                
        if not samples and response.get('Contents'):
            # If we couldn't get subdirectory samples, just take the first few
            samples = [obj['Key'] for obj in response.get('Contents', [])[:max_samples]]
            
        # Print the samples
        for i, sample in enumerate(samples, 1):
            print(f"Sample {i}: {sample}")
    else:
        print(f"No files found in {directory}")

In [0]:
 # List directory structure
for directory in directories:
    list_sample_paths(directory)

# Additional: print hierarchy levels for better understanding
print("\n===== DIRECTORY STRUCTURE ANALYSIS =====")
for directory in directories:
    print(f"\nAnalyzing {directory}...")
    prefix = f"{base_prefix}{directory}/"
    response = s3_client.list_objects_v2(
        Bucket=s3_bucket,
        Prefix=prefix,
        Delimiter='/'
    )
    
    # Print common prefixes (subdirectories)
    if 'CommonPrefixes' in response:
        print(f"First level subdirectories in {directory}:")
        for common_prefix in response.get('CommonPrefixes', []):
            subdir = common_prefix['Prefix']
            print(f"  - {subdir}")
            
            # Check next level (if needed)
            sub_response = s3_client.list_objects_v2(
                Bucket=s3_bucket,
                Prefix=subdir,
                Delimiter='/',
                MaxKeys=5
            )
            if 'CommonPrefixes' in sub_response:
                print(f"    Second level (showing up to 5):")
                for sub_prefix in sub_response.get('CommonPrefixes', [])[:5]:
                    print(f"    - {sub_prefix['Prefix']}")
    else:
        # If no subdirectories, list a few files directly
        if 'Contents' in response:
            print(f"Files directly in {directory} (showing up to 5):")
            for obj in response.get('Contents', [])[:5]:
                print(f"  - {obj['Key']}")
        else:
            print(f"No files found in {directory}")