# Create S3 Bucket

In [13]:
import boto3
import sagemaker

session = boto3.session.Session()
region = session.region_name
sagemaker_session = sagemaker.Session()
bucket = sagemaker_session.default_bucket()

s3 = boto3.Session().client(service_name="s3", region_name=region)

In [14]:
setup_s3_bucket_passed = False

In [15]:
print("Default bucket: {}".format(bucket))

Default bucket: sagemaker-us-east-1-705927414280


# Verify S3_BUCKET Bucket Creation

In [16]:
%%bash

aws s3 ls s3://${bucket}/

2023-03-17 13:01:57 aws-athena-query-results-705927414280-us-east-1
2023-03-18 02:29:33 gunviolence-kbaum215
2023-03-19 21:59:56 official-gunviolence-kbaum215
2023-03-05 21:10:59 sagemaker-studio-705927414280-q3srkpw595r
2023-03-05 20:36:33 sagemaker-studio-705927414280-vo6gix3myc
2023-03-05 20:42:01 sagemaker-us-east-1-705927414280


In [17]:
from botocore.client import ClientError

response = None

try:
    response = s3.head_bucket(Bucket=bucket)
    print(response)
    setup_s3_bucket_passed = True
except ClientError as e:
    print("[ERROR] Cannot find bucket {} in {} due to {}.".format(bucket, response, e))

{'ResponseMetadata': {'RequestId': 'AH80QD0W8JD760VA', 'HostId': 'jjyIh0tdWa6eAJ4CkJQSpEY+2NQH6VR9ZGn6+spcmRu2TLV7dzky2KGntdqE2SL1sy8K/htrTNE=', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amz-id-2': 'jjyIh0tdWa6eAJ4CkJQSpEY+2NQH6VR9ZGn6+spcmRu2TLV7dzky2KGntdqE2SL1sy8K/htrTNE=', 'x-amz-request-id': 'AH80QD0W8JD760VA', 'date': 'Fri, 07 Apr 2023 13:19:30 GMT', 'x-amz-bucket-region': 'us-east-1', 'x-amz-access-point-alias': 'false', 'content-type': 'application/xml', 'server': 'AmazonS3'}, 'RetryAttempts': 0}}


In [18]:
%store setup_s3_bucket_passed

Stored 'setup_s3_bucket_passed' (bool)


In [19]:
%store

Stored variables and their in-db values:
ingest_create_athena_db_passed             -> True
s3_private_path                            -> 's3://sagemaker-us-east-1-705927414280/gun_violenc
s3_private_path_1                          -> 's3://sagemaker-us-east-1-705927414280/census2010_
s3_private_path_2                          -> 's3://sagemaker-us-east-1-705927414280/state_abbre
s3_private_path_3                          -> 's3://sagemaker-us-east-1-705927414280/congress_da
s3_private_path_4                          -> 's3://sagemaker-us-east-1-705927414280/state_incom
s3_private_path_5                          -> 's3://sagemaker-us-east-1-705927414280/cities_data
s3_private_path_6                          -> 's3://sagemaker-us-east-1-705927414280/zipcodes'
s3_private_path_7                          -> 's3://sagemaker-us-east-1-705927414280/adjusted_gr
s3_private_path_8                          -> 's3://sagemaker-us-east-1-705927414280/temp'
s3_private_path_tsv                        

# Create Folders If They Do Not Already Exist

In [20]:
def create_folder(bucket_name, folder_name):
    s3 = boto3.client('s3')
    folder_key = f"{folder_name}/"
    s3.put_object(Bucket=bucket_name, Key=folder_key)

def get_existing_folders(bucket_name):
    s3 = boto3.client('s3')
    existing_folders = set()
    paginator = s3.get_paginator('list_objects_v2')
    for page in paginator.paginate(Bucket=bucket_name):
        for obj in page.get('Contents', []):
            folder = obj['Key'].split('/')[0]
            existing_folders.add(folder)
    return existing_folders

def create_folders_if_not_exists(bucket_name, folder_names):
    existing_folders = get_existing_folders(bucket_name)
    for folder_name in folder_names:
        if folder_name not in existing_folders:
            create_folder(bucket_name, folder_name)
            print(f"Created folder: {folder_name}")
        else:
            print(f"Folder {folder_name} already exists")

bucket_name = bucket 
folder_names = [
    'gun_violence_data',
    'census2010_data',
    'state_abbrev_data',
    'congress_data',
    'state_income_data',
    'cities_data',
]

create_folders_if_not_exists(bucket_name, folder_names)

Folder gun_violence_data already exists
Folder census2010_data already exists
Folder state_abbrev_data already exists
Folder congress_data already exists
Folder state_income_data already exists
Folder cities_data already exists


# Put The Files Into Their Appropriate Folders

In [21]:
def check_file_exists_in_folder(bucket_name, folder_name, file_name):
    s3 = boto3.client('s3')
    key_prefix = f"{folder_name}/{file_name}"
    response = s3.list_objects_v2(Bucket=bucket_name, Prefix=key_prefix)

    for obj in response.get('Contents', []):
        if obj['Key'] == key_prefix:
            return True
    return False

def upload_file_to_folder(bucket_name, folder_name, file_path, file_name):
    if not check_file_exists_in_folder(bucket_name, folder_name, file_name):
        s3 = boto3.client('s3')
        key = f"{folder_name}/{file_name}"
        s3.upload_file(file_path, bucket_name, key)
        print(f"Uploaded {file_name} to {folder_name}")
    else:
        print(f"File {file_name} already exists in {folder_name}")
    

# Upload the file to the specified folder
upload_file_to_folder(bucket_name, 'gun_violence_data', '../data/gun_violence.csv', 'gun_violence.csv')
upload_file_to_folder(bucket_name, 'census2010_data', '../data/sub_est2018_all.csv', 'sub_est2018_all.csv')
upload_file_to_folder(bucket_name, 'state_abbrev_data', '../data/state_abbrev_map.csv', 'state_abbrev_map.csv')
upload_file_to_folder(bucket_name, 'congress_data', '../data/Congress_2013-2018.csv', 'Congress_2013-2018.csv')
upload_file_to_folder(bucket_name, 'state_income_data', '../data/all_states_income.csv', 'all_states_income.csv')
upload_file_to_folder(bucket_name, 'cities_data', '../data/uscities.csv', 'uscities.csv')

File gun_violence.csv already exists in gun_violence_data
File sub_est2018_all.csv already exists in census2010_data
File state_abbrev_map.csv already exists in state_abbrev_data
File Congress_2013-2018.csv already exists in congress_data
File all_states_income.csv already exists in state_income_data


# Release Resources

In [22]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>

In [23]:
%%javascript

try {
    Jupyter.notebook.save_checkpoint();
    Jupyter.notebook.session.delete();
}
catch(err) {
    // NoOp
}

<IPython.core.display.Javascript object>