# AWS Glue Studio Notebook
##### You are now running a AWS Glue Studio notebook; To start using your notebook you need to start an AWS Glue Interactive Session.


#### Optional: Run this cell to see available notebook commands ("magics").


In [None]:
%help

####  Run this cell to set up and start your interactive session.


In [1]:
%idle_timeout 2880
%glue_version 3.0
%worker_type G.1X
%number_of_workers 3

import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
  
sc = SparkContext.getOrCreate()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)

Welcome to the Glue Interactive Sessions Kernel
For more information on available magic commands, please type %help in any new cell.

Please view our Getting Started page to access the most up-to-date information on the Interactive Sessions kernel: https://docs.aws.amazon.com/glue/latest/dg/interactive-sessions.html
Installed kernel version: 0.37.0 
Current idle_timeout is 2800 minutes.
idle_timeout has been set to 2880 minutes.
Setting Glue version to: 3.0
Previous worker type: G.1X
Setting new worker type to: G.1X
Previous number of workers: 5
Setting new number of workers to: 3
Authenticating with environment variables and user-defined glue_role_arn: arn:aws:iam::254872174412:role/AWS-Glue-HartB
Trying to create a Glue session for the kernel.
Worker Type: G.1X
Number of Workers: 3
Session ID: da642451-4a96-4657-935c-548b1a52ae48
Job Type: glueetl
Applying the following default arguments:
--glue_kernel_version 0.37.0
--enable-glue-datacatalog true
Waiting for session da642451-4a96-46

#### Example: Create a DynamicFrame from a table in the AWS Glue Data Catalog and display its schema


In [None]:
#dyf = glueContext.create_dynamic_frame.from_catalog(database='database_name', table_name='table_name')
#dyf.printSchema()


In [2]:
dyf = glueContext.create_dynamic_frame_from_options(connection_type = "s3",
                                                               connection_options={"paths":["s3://bossa-nova-trusted/etl_gestao_emails/adsdasdada/"]},
                                                               format="parquet",format_options={"withHeader":True, "optimizePerformance":True})




#### Example: Convert the DynamicFrame to a Spark DataFrame and display a sample of the data


In [6]:
df = dyf.toDF()
#df.show()
df

DataFrame[id: string, email_id: string, lead_id: string, list_id: string, ip_id: string, copy_id: string, email_address: string, date_sent: string, is_read: string, is_failed: string, viewed_in_browser: string, date_read: string, tracking_hash: string, retry_count: string, source: string, source_id: string, tokens: string, open_count: string, last_opened: string, open_details: string, generated_sent_date: string]


In [10]:
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.dynamicframe import DynamicFrame

#sc = SparkContext()
#glueContext = GlueContext(sc)

dyf = DynamicFrame.fromDF(df, glueContext, "nested")




#### Example: Write the data in the DynamicFrame to a location in Amazon S3 and a table for it in the AWS Glue Data Catalog


In [11]:
s3output = glueContext.getSink(
  path="s3://bossa-nova-trusted/etl_gestao_emails/zadasdas/",
  connection_type="s3",
  updateBehavior="UPDATE_IN_DATABASE",
  partitionKeys=[],
  compression="snappy",
  enableUpdateCatalog=True,
  transformation_ctx="s3output",
)

s3output.setCatalogInfo(
  catalogDatabase="demo", catalogTableName="populations"
)

s3output.setFormat("glueparquet")
s3output.writeFrame(dyf)


<awsglue.dynamicframe.DynamicFrame object at 0x7f85c6052b90>
