In [None]:
import os, posixpath, socket
import pyspark
from pyspark.sql import SparkSession

# Spark Session
APP_NAME = "test-hgkim2"
APP_ID = "test-python.py"
EXECUTOR_MEMORY = "8g"
EXECUTOR_CORES = 4
EXECUTOR_INSTANCES = 3
DRIVER_MEMORY = "5g"
DRIVER_MAX_RESULT_SIZE = "10g"
SHUFFLE_PARTITIONS = 2000

spark_session = SparkSession \
    .builder.appName(APP_NAME) \
    .master("k8s://https://172.17.***.56:6443") \
    .config("spark.app.id", APP_ID) \
    .config('spark.ui.proxyBase', '/user/manager/proxy/4040') \
    .config("spark.sql.sources.partitionOverwriteMode", "dynamic") \
    .config("spark.kryoserializer.buffer.max", "1024m") \
    .config("spark.kubernetes.container.image", "encore.encore/library/spark-3.2.0-base:1.2.0-20211209") \
    .config("spark.executor.instances", EXECUTOR_INSTANCES) \
    .config("spark.executor.memory", EXECUTOR_MEMORY) \
    .config("spark.executor.cores", EXECUTOR_CORES) \
    .config("spark.driver.memory", DRIVER_MEMORY) \
    .config("spark.driver.maxResultSize", DRIVER_MAX_RESULT_SIZE) \
    .config("spark.sql.shuffle.partitions", SHUFFLE_PARTITIONS) \
    .config("spark.kubernetes.namespace", "spark") \
    .config("spark.kubernetes.authenticate.driver.serviceAccountName", "jupyter") \
    .config("spark.driver.port", "2222") \
    .config("spark.driver.blockManaer.port", "7777") \
    .config("spark.driver.host", socket.gethostbyname(socket.gethostname())) \
    .config("spark.driver.bindAddress", "0.0.0.0") \
    .config("spark.dynamicAllocation.enabled", "false") \
    .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-localdirpvc.options.claimName", "OnDemand") \
    .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-localdirpvc.options.storageClass", "rook-ceph-block") \
    .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-localdirpvc.options.sizeLimit", "10Gi") \
    .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-localdirpvc.mount.path", "/data") \
    .config("spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-localdirpvc.mount.readOnly", "false") \
    .config("spark.hadoop.fs.s3a.access.key", "access-key") \
    .config("spark.hadoop.fs.s3a.secret.key", "secret-key") \
    .config("spark.hadoop.fs.s3a.endpoint", "http://10.***.81.57:80") \
    .config("spark.hadoop.com.amazonaws.services.s3.enableV4", "true") \
    .config("spark.hadoop.fs.s3a.connection.ssl.enabled", "false") \
    .config("spark.hadoop.fs.s3a.path.style.access", "true") \
    .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") \
    .config("spark.hadoop.fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider") \
    .getOrCreate()