Parameters

In [2]:
import os
GOOGLE_APPLICATION_CREDENTIALS = "/Users/Vishal/Downloads/e2e-fraud-detection-debf1c9863af.json"
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = GOOGLE_APPLICATION_CREDENTIALS

In [3]:
PROJECT_ID = "e2e-fraud-detection"
BUCKET_NAME = PROJECT_ID
BQ_DATASET = 'fraud_dataset'

In [4]:
# Print the variables
print(f"GOOGLE_APPLICATION_CREDENTIALS: {GOOGLE_APPLICATION_CREDENTIALS}")
print(f"PROJECT_ID: {PROJECT_ID}")
print(f"BUCKET_NAME: {BUCKET_NAME}")
print(f"BQ_DATASET: {BQ_DATASET}")

GOOGLE_APPLICATION_CREDENTIALS: /Users/Vishal/Downloads/e2e-fraud-detection-debf1c9863af.json
PROJECT_ID: e2e-fraud-detection
BUCKET_NAME: e2e-fraud-detection
BQ_DATASET: fraud_dataset


### Title:
"GCS Bucket Creation with Error Handling"

### Description:
"A utility function to create GCS buckets with custom settings and comprehensive error handling. Validates naming, supports custom authentication, and manages conflicts and authentication errors efficiently."

In [6]:
import re
import os 
from google.cloud import storage
from google.api_core.exceptions import Conflict, GoogleAPIError


class BucketNameConflictError(Exception):
    pass


class AuthenticationError(Exception):
    pass


def create_gcs_bucket(bucket_name: str, project_id: str, 
                      credentials_path: str = None, 
                      location: str = 'US', storage_class: str = 'STANDARD') -> None:
    """Creates a new bucket in Google Cloud Storage with robust error handling.

    Args:
        bucket_name: Name for the new bucket. Must meet GCS bucket naming requirements.
        project_id: The ID of the Google Cloud project where the bucket will be created.
        credentials_path: Path to the JSON credentials file for authentication. 
                          If None, credentials will be inferred from the environment.
        location: Location where the bucket will be created. Defaults to 'US'.
        storage_class: Storage class for the bucket. Defaults to 'STANDARD'.
    """

    # Validate bucket name against GCS naming conventions
    if not re.match(r"^[a-z0-9][a-z0-9\-]{1,61}[a-z0-9]$", bucket_name):
        raise ValueError("Invalid bucket name. Please follow GCS naming conventions.")

    # Create a storage client
    if credentials_path:
        storage_client = storage.Client.from_service_account_json(credentials_path, project=project_id)
    else:
        # Credentials will be automatically inferred from the environment 
        storage_client = storage.Client(project=project_id)

    try:
        bucket = storage.Bucket(client=storage_client, name=bucket_name)
        bucket.storage_class = storage_class
        new_bucket = storage_client.create_bucket(bucket, location=location)

        print(f'Bucket {new_bucket.name} created in location {new_bucket.location} with storage class {new_bucket.storage_class}')

    except Conflict:
        raise BucketNameConflictError(f'The bucket name "{bucket_name}" is already in use.')   
    except GoogleAPIError as e:
        if e.code == 401 or e.code == 403:  
            raise AuthenticationError(f'Authentication error: {e}')
        else:
            print(f'Error creating bucket: {e}')
            raise  


In [7]:
create_gcs_bucket(BUCKET_NAME,PROJECT_ID,GOOGLE_APPLICATION_CREDENTIALS)


Bucket e2e-fraud-detection created in location US with storage class STANDARD


### Title:
"Efficient BigQuery Dataset Creation"

### Description:
"A function to easily create BigQuery datasets with options for customization and error handling. It allows setting the dataset's geographic location and description, handling dataset conflicts gracefully."

In [10]:
from google.cloud import bigquery
from google.api_core.exceptions import Conflict

def create_bigquery_dataset(dataset_id: str, project_id: str ='e2e-fraud-detection',  
                            location: str = 'US', description: str = None) -> None:
    """Creates a new dataset in Google BigQuery.

    Args:
        dataset_id: The ID of the dataset to create.
        project_id: The ID of the Google Cloud project where the dataset will reside.
        location: The geographic location where the dataset should be created (e.g., 'US', 'EU').
        description: An optional description of the dataset.  
    """

    client = bigquery.Client(project=project_id)

    dataset_ref = client.dataset(dataset_id, project=project_id)
    dataset = bigquery.Dataset(dataset_ref)

    if description:
        dataset.description = description

    if location:
        dataset.location = location

    try:
        dataset = client.create_dataset(dataset)
        print(f'Dataset {dataset.dataset_id} created in project {project_id} at location {dataset.location}.')
    except Conflict:
        print(f'Dataset {dataset_id} already exists.')
    except Exception as e:
        print(f'Error creating dataset: {e}')



In [11]:
# Example Usage
create_bigquery_dataset(BQ_DATASET, description='Dataset for storing fraudulent data.') 

Dataset fraud_dataset created in project e2e-fraud-detection at location US.
