In [5]:
from google.cloud import bigquery, storage
from google.oauth2 import service_account
from google.api_core import exceptions

# Update the path to your service account key file
KEY_PATH = "C:/Users/sambo/Downloads/boreal-fort-437820-k4-0b11a4b50131.json"

# Initialize credentials and clients
try:
    credentials = service_account.Credentials.from_service_account_file(
        KEY_PATH,
        scopes=["https://www.googleapis.com/auth/cloud-platform"]
    )
    project_id = "boreal-fort-437820-k4"
    bucket_name = "nyc-yellow-cabs"
    dataset_name = "yellow_cab_data"

    storage_client = storage.Client(project=project_id, credentials=credentials)
    bigquery_client = bigquery.Client(project=project_id, credentials=credentials)
except Exception as e:
    print(f"Error initializing clients: {e}")
    exit(1)

def create_dataset(dataset_name):
    dataset_id = f"{project_id}.{dataset_name}"
    dataset = bigquery.Dataset(dataset_id)
    dataset.location = "US"
    try:
        dataset = bigquery_client.create_dataset(dataset, exists_ok=True)
        print(f"Dataset {dataset_id} created or already exists.")
        return dataset_id
    except exceptions.GoogleAPIError as e:
        print(f"Error creating dataset: {e}")
        return None

def load_csv_to_bigquery(dataset_id, gcs_bucket_name, file_name):
    uri = f"gs://{gcs_bucket_name}/{file_name}"
    table_name = file_name.split(".")[0].replace("-", "_")
    table_id = f"{dataset_id}.{table_name}"
    
    job_config = bigquery.LoadJobConfig(
        source_format=bigquery.SourceFormat.CSV,
        skip_leading_rows=1,
        autodetect=True,
        write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
    )
    
    try:
        print(f"Loading {file_name} into table {table_id}...")
        load_job = bigquery_client.load_table_from_uri(uri, table_id, job_config=job_config)
        load_job.result()  # Waits for the job to complete
        print(f"Loaded {file_name} into table {table_id}.")
    except exceptions.BadRequest as e:
        print(f"Error loading {file_name}: {e}")
    except exceptions.Forbidden as e:
        print(f"Permission denied when loading {file_name}: {e}")
    except Exception as e:
        print(f"Unexpected error when loading {file_name}: {e}")

def main():
    dataset_id = create_dataset(dataset_name)
    if not dataset_id:
        print("Failed to create or access dataset. Exiting.")
        return

    try:
        bucket = storage_client.bucket(bucket_name)
        blobs = bucket.list_blobs()
        
        for blob in blobs:
            if blob.name.lower().endswith('.csv'):
                load_csv_to_bigquery(dataset_id, bucket_name, blob.name)
    except exceptions.NotFound:
        print(f"Bucket {bucket_name} not found.")
    except exceptions.Forbidden:
        print(f"Permission denied to access bucket {bucket_name}.")
    except Exception as e:
        print(f"Unexpected error: {e}")

if __name__ == "__main__":
    main()

Dataset boreal-fort-437820-k4.yellow_cab_data created or already exists.
Loading avg_fare_by_day.csv into table boreal-fort-437820-k4.yellow_cab_data.avg_fare_by_day...
Loaded avg_fare_by_day.csv into table boreal-fort-437820-k4.yellow_cab_data.avg_fare_by_day.
Loading avg_fare_by_hour.csv into table boreal-fort-437820-k4.yellow_cab_data.avg_fare_by_hour...
Loaded avg_fare_by_hour.csv into table boreal-fort-437820-k4.yellow_cab_data.avg_fare_by_hour.
Loading avg_fare_by_passenger.csv into table boreal-fort-437820-k4.yellow_cab_data.avg_fare_by_passenger...
Loaded avg_fare_by_passenger.csv into table boreal-fort-437820-k4.yellow_cab_data.avg_fare_by_passenger.
Loading fare_amount_by_distance.csv into table boreal-fort-437820-k4.yellow_cab_data.fare_amount_by_distance...
Loaded fare_amount_by_distance.csv into table boreal-fort-437820-k4.yellow_cab_data.fare_amount_by_distance.
Loading final-nyc.csv into table boreal-fort-437820-k4.yellow_cab_data.final_nyc...
Loaded final-nyc.csv into t