In [1]:
import sys
import os
import io
import csv
import boto3
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
from awsglue.dynamicframe import DynamicFrame
from botocore.client import Config

In [2]:
endpoint = "http://glue.dev.s3.local:9000"
os.environ["TEST_S3_ENDPOINT_URL"] = endpoint
sc = SparkContext.getOrCreate()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)
# job.init("JobName")

In [3]:
sc._jsc.hadoopConfiguration().set("fs.s3a.endpoint", endpoint)
sc._jsc.hadoopConfiguration().set("fs.s3a.path.style.access", "true")
sc._jsc.hadoopConfiguration().set("fs.s3a.signing-algorithm", "S3SignerType")

In [4]:
!aws s3 ls --endpoint-url $TEST_S3_ENDPOINT_URL sample/

2022-01-14 15:23:30       2470 iris.csv


In [5]:
s3 = boto3.resource(
    "s3",
    endpoint_url=os.getenv("TEST_S3_ENDPOINT_URL"),
    region_name="ap-northeast-1",
    use_ssl=False,
    config=Config(s3={"addressing_style": "path"}),
)
bucket_name = "sample"
bucket = s3.Bucket(bucket_name)
# bucket.create(ACL="public-read-write")

In [6]:
p = "s3://sample/"
df = glueContext.create_dynamic_frame.from_options(
    connection_type="s3",
    connection_options={"paths": [p]},
    format="csv",
    format_options={
        "separator": ",",
        "withHeader": True
    }
).toDF()
df.show()

+-----------------+----------------+-----------------+----------------+
|sepal length (cm)|sepal width (cm)|petal length (cm)|petal width (cm)|
+-----------------+----------------+-----------------+----------------+
|              5.1|             3.5|              1.4|             0.2|
|              4.9|             3.0|              1.4|             0.2|
|              4.7|             3.2|              1.3|             0.2|
|              4.6|             3.1|              1.5|             0.2|
|              5.0|             3.6|              1.4|             0.2|
|              5.4|             3.9|              1.7|             0.4|
|              4.6|             3.4|              1.4|             0.3|
|              5.0|             3.4|              1.5|             0.2|
|              4.4|             2.9|              1.4|             0.2|
|              4.9|             3.1|              1.5|             0.1|
|              5.4|             3.7|              1.5|          