In [None]:
import boto3
import os

## Test Connection Script

In [None]:
# Configuration Variables
BUCKET_NAME = 'schulich-project-roadrunner'

# Create a session and client
s3 = boto3.client('s3')

# Check if you can access the bucket
try:
    s3.head_bucket(Bucket=BUCKET_NAME)
    print(f"Successfully connected to the bucket: {BUCKET_NAME}")
except Exception as e:
    print(f"Failed to connect to the bucket: {BUCKET_NAME}")
    print(e)

## Download the Data

In [None]:
# Example Script to download the data
import boto3
import os

# Configuration Variables
BUCKET_NAME = 'schulich-project-roadrunner'
LOCAL_DOWNLOAD_PATH = './vehicle_data/'  # Replace with whatever the path you would like to store those vehicle data

# Create a session and client
s3 = boto3.client('s3')

def bulk_download_from_s3(bucket_name, user_type, country, year, months, encrypted_vins, local_download_path):
    # Ensure the local directory exists
    if not os.path.exists(local_download_path):
        os.makedirs(local_download_path)

    # Iterate over each month and encrypted_vin
    for month in months:
        for encrypted_vin in encrypted_vins:
            # Construct the prefix for the S3 path
            prefix = f"{user_type}/{country}/{year}/{month}/"
            if encrypted_vin:
                prefix += f"{encrypted_vin}/"

            continuation_token = None
            while True:
                # List objects in the specified bucket and prefix
                if continuation_token:
                    response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix, ContinuationToken=continuation_token)
                else:
                    response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix)

                if 'Contents' in response:
                    for obj in response['Contents']:
                        key = obj['Key']
                        # Only download CSV files
                        if key.endswith('.csv'):
                            # Construct the full local file path, preserving the structure
                            local_file_path = os.path.join(local_download_path, key)

                            # Check if the file already exists
                            if os.path.exists(local_file_path):
                                print(f"File {local_file_path} already exists. Skipping download.")
                                continue

                            # Create subdirectories if they don't exist
                            local_dir = os.path.dirname(local_file_path)
                            if not os.path.exists(local_dir):
                                os.makedirs(local_dir)

                            # Download the file
                            print(f"Downloading {key} to {local_file_path}")
                            s3.download_file(bucket_name, key, local_file_path)

                # Handle pagination
                if response.get('IsTruncated'):  # More results are available
                    continuation_token = response.get('NextContinuationToken')
                else:
                    break

# Run the download function
# Example: Download all CSVs for year 2024, months ['07', '08'], and encrypted_vins ['abc123', None].
# If encrypted_vin is None, all VINs for the specified year and month will be downloaded.
user_type = 'IN'  # or 'FM'. 'IN' individual or 'FM' fleet manager.
country = 'CA'  # Two-letter country code, e.g., 'US', 'CA'
year = '2024'
months = ['07', '08']
encrypted_vins = ['-0aLNtbYFtsTgXFS7', '-efOV-tY9RQqfGdWO', '-zId26lhoDxoP475C']  # Set it to [None] to download all VINs
bulk_download_from_s3(BUCKET_NAME, user_type, country, year, months, encrypted_vins, LOCAL_DOWNLOAD_PATH)
