### Importing Libraries

In [None]:
from pyspark.sql import SparkSession
from simple_salesforce import Salesforce
from pyspark.sql.functions import to_timestamp, col, lit, array
from pyspark import SparkConf, SparkContext
from pyspark.sql.types import *
import json
import snowflake.connector
import os
from datetime import datetime
import hmac 
import hashlib
import urllib.parse
from pyspark.sql import functions as F
from cryptography.fernet import Fernet
import re

"""
from pyspark.sql import SparkSession
from simple_salesforce import Salesforce
from pyspark import SparkConf, SparkContext
from pyspark.sql.types import *
import json
import snowflake.connector
import os
from datetime import datetime
import hmac 
import hashlib
import urllib.parse
from pyspark.sql import functions as F
from cryptography.fernet import Fernet
import re
"""


'\nfrom pyspark.sql import SparkSession\nfrom simple_salesforce import Salesforce\nfrom pyspark import SparkConf, SparkContext\nfrom pyspark.sql.types import *\nimport json\nimport snowflake.connector\nimport os\nfrom datetime import datetime\nimport hmac \nimport hashlib\nimport urllib.parse\nfrom pyspark.sql import functions as F\nfrom cryptography.fernet import Fernet\nimport re\n'

### Loading Credentials, Creating Spark Context, and Creating Connections

#### Load configuration properties from the Congig file

In [None]:
config = {'user': 'user',
 'password': 'password',
 'account': 'account',
 'warehouse': 'warehouse',
 'database': 'database',
 'schema': 'schema',
 'role': 'role',
 'username_sf': 'username_sf',
 'password_sf': 'password_sf',
 'securityToken_sf': 'securityToken_sf',
 'authEndPoint_sf': 'authEndPoint_sf',
 'username_mail': 'username_mail',
 'password_mail': 'password_mail',
 'enc_key': 'enc_key'}

In [None]:
# Loading connection credentials from Databricks secrets into config dictionary

for k,v in config.items():
    #print(k,v)
    config[k] = dbutils.secrets.get(scope="snow_salesforce_creds", key= k)

#### Set up SparkSession

In [None]:
conf = SparkConf() \
    .setAppName("Salesforce_ETL") \
    .setMaster("local[*]") \
    .set("spark.jars.packages" ,
         "net.snowflake:snowflake-jdbc:3.13.3,net.snowflake:spark-snowflake_2.12:2.9.0-spark_3.0") \
    .set("spark.driver.memory" , "10g") \
    .set("spark.executor.memory" , "6g") \
    .set("spark.driver.maxResultSize" , "6g")\
    .set("spark.driver.extraClassPath" , "postgresql-42.2.24.jar")
    
sc = SparkContext.getOrCreate(conf=conf)
spark = SparkSession(sc)

#### Define connection properties

In [None]:
# Connect to Salesforce
sf = Salesforce(
    username = config['username_sf'],
    password = config['password_sf'],
    security_token = config['securityToken_sf'],
    instance_url = config['authEndPoint_sf']
)

# Snowflake Properties

snow_options = {
    "sfURL": f"{config['account']}.snowflakecomputing.com" ,
    "sfUser": config['user'] ,
    "sfPassword": config['password'] ,
    "sfDatabase": config['database'] ,
    "sfSchema": config['schema'] ,
    "sfWarehouse": config['warehouse'] ,
    "sfRole": config['role']
}


# Establish a connection to Snowflake
conn = snowflake.connector.connect(
    user = config['user'],
    password = config['password'],
    account= config['account'],
    warehouse= config['warehouse'],
    database= config['database'],
    schema= config['schema']
)


# A list of messeages to send to me

msg_to_send = []
Schema_change_vals = []

In [None]:
snow_options["sfSchema"]

'[REDACTED]'

### Dummy Variables

In [None]:
dummy_json_str = {
    "_id": "6138763984fe5172962f8147",
    "balance": "$3,677.47",
    "about": "Lorem ipsum Aliqua ad elit elit veniam in mollit officia",
    "registered": "2018-05-04T02:28:05 -02:00",
    "latitude": 60.763774
}

dummy_json = json.dumps(dummy_json_str)
dummy_array = ['Lorem Ipsum']#dummy_json
dummy_array = array(lit(dummy_array[0]))

dummy_mail = 'dummy@mail.com'
dummy_string = 'Lorem ipsum'
dummy_phone = '+000000000000'
dummy_ip = '127.0.0.1'
dummy_date = '1660-01-01'


### Main Methods

#### Encryption

In [None]:
def split_email(email):
    parts = email.split('@', 1)
    return parts[0], "@" + parts[1]

def is_valid_email(email):
    email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
    match = re.match(email_pattern, email)
    return bool(match)

def hash_value(text: str) -> str:
    if not text:
        text = 'null'

    if is_valid_email(text):
        username, domain = split_email(text)
        return hashlib.sha256((username.encode() + config['enc_key'].encode())).hexdigest() + domain
    else:
        return hashlib.sha256((text.encode() + config['enc_key'].encode())).hexdigest()

encrypt_data_udf = udf(hash_value, StringType())

"""
# Example usage:

text_to_hash = 'world@gmail.com'
result = hash_value(text_to_hash)
print(result)
"""

"\n# Example usage:\n\ntext_to_hash = 'world@gmail.com'\nresult = hash_value(text_to_hash)\nprint(result)\n"

#### Anonymize

In [None]:
def anonymise(pg_df):
    if tab == 'contact':

        # lastname, firstname, name, phone, fax, mobilephone, email, birthdate, fromname__c, fromemail__c, fromphone__c, frommobile__c

        # Apply the encryption UDF to the "name" column      
        pg_df = pg_df.withColumn("lastname", encrypt_data_udf(col("lastname")))
        pg_df = pg_df.withColumn("firstname", encrypt_data_udf(col("firstname")))
        pg_df = pg_df.withColumn("name", encrypt_data_udf(col("name")))
        pg_df = pg_df.withColumn("email", encrypt_data_udf(col("email")))
        pg_df = pg_df.withColumn("birthdate", lit(dummy_date))
        pg_df = pg_df.withColumn("phone", lit(dummy_phone))
        pg_df = pg_df.withColumn("fax", lit(dummy_phone))
        pg_df = pg_df.withColumn("mobilephone", lit(dummy_phone))
        pg_df = pg_df.withColumn("fromname__c", encrypt_data_udf(col("fromname__c")))
        pg_df = pg_df.withColumn("fromemail__c", encrypt_data_udf(col("fromemail__c")))
        pg_df = pg_df.withColumn("fromphone__c", lit(dummy_phone))
        pg_df = pg_df.withColumn("frommobile__c", lit(dummy_phone))        
        return pg_df  
      
    else:
        return pg_df

#### Reading data from tables

In [None]:
# Uncomment below for DEV

"""
snow_options["sfSchema"] = "SALESFORCE_PARALLEL"

conn = snowflake.connector.connect(
    user = config['user'],
    password = config['password'],
    account= config['account'],
    warehouse= config['warehouse'],
    database= config['database'],
    schema= 'SALESFORCE_PARALLEL' #config['schema']
)

"""

'\nsnow_options["sfSchema"] = "[REDACTED]_PARALLEL"\n\nconn = snowflake.connector.connect(\n    user = config[\'user\'],\n    password = config[\'password\'],\n    account= config[\'account\'],\n    warehouse= config[\'warehouse\'],\n    database= config[\'database\'],\n    schema= \'[REDACTED]_PARALLEL\' #config[\'schema\']\n)\n\n'

##### Determine Salesfoce table field type

In [None]:
# Define the __get_field_type function to determine the field type
def __get_field_type(field_type):
    if field_type == "string":
        return StringType()
    elif field_type == "int":
        return IntegerType()
    elif field_type == "double":
        return DoubleType()
    
    # Comment the code below if you want to get Salesfore's datetime format instead of standardize datetime format
    
    elif field_type == "date":
        return DateType()
    elif field_type == "datetime":
        return TimestampType()
    
    # Comment the code above if you want to get Salesfore's datetime format instead of standardize datetime format


    # Add more conditions for other field types as needed

    # Return a default field type if the input field_type is not recognized
    return StringType()  # Change this to the appropriate default type

##### Reading data from source table

In [None]:
# Read Data from Salesforce Object(Table)

def read_src_sf_table(table: str, query_extension: str = None ) -> 'DataFrame':
    
    ordering = 'CreatedDate'
    if table == 'userrole':
        ordering = 'LASTMODIFIEDDATE'
    try:
        # Get object description
        obj_description = sf.__getattr__(table).describe()

        # Read schema
        fields = obj_description["fields"]
        schema = StructType([StructField(field["name"], __get_field_type(field["type"]), True) for field in fields])

        # Query data using SOQL
        #print(f"SELECT {','.join([field['name'] for field in fields])} FROM {table} {query_extension}")
        #query = "SELECT Id, IsDeleted, AccountId, CreatedById, CreatedDate, Field, DataType, OldValue, NewValue FROM AccountHistory WHERE CreatedDate >= 2023-06-09T15:26:16Z ORDER BY CreatedDate ASC"

        #data = sf.query_all(f"SELECT {' , '.join([field['name'] for field in fields])} FROM {table} {query_extension} ORDER BY {ordering} ASC LIMIT 100000 ")
        data = sf.query_all(f"SELECT {' , '.join([field['name'] for field in fields])} FROM {table} {query_extension} ")

        #data = sf.query_all(query)
        #print(f"Query sent --> {data}")
        records = data["records"]

        # Extract rows from the records
        rows = []

        for record in records:
            row = []

            for field in fields:
                field_name = field["name"]
                field_value = record.get(field_name)

                # Comment the code below and in the __get_field_type() method above if you want to get/use Salesfore's datetime format instead of standardize datetime format

                # Handle date and datetime fields. 
                if field["type"] == "date" and field_value is not None:
                    field_value = datetime.strptime(field_value, "%Y-%m-%d").date()
                elif field["type"] == "datetime" and field_value is not None:
                    field_value = field_value.replace('T', ' ')[:19]
                    field_value = datetime.strptime(field_value, "%Y-%m-%d %H:%M:%S") #to_timestamp(field_value, "yyyy-MM-dd HH:mm:ss ") '%Y-%m-%d %H:%M:%S %z'
                    
                # Comment the code below and in the __get_field_type() method above if you want to get/use Salesfore's datetime format instead of standardize datetime format

                row.append(field_value)

            rows.append(row)

        # Convert data to PySpark DataFrame and return
        return spark.createDataFrame(rows, schema)
         
    
    except Exception as e:
        print("Error occured -> ", e)

In [None]:
"""
dat = read_src_sf_table('accounthistory')
display(dat)
"""

"\ndat = read_src_sf_table('accounthistory')\ndisplay(dat)\n"

##### Reading data from target table

In [None]:
# Read Data from Snowflake Database table

def read_tgt_snw_table(query: str, snow_options: dict) -> 'DataFrame':
    #print(query)
    try:
        # Read table into a dataframe
        snow_table_df = spark.read.format("snowflake").options(**snow_options).option("query", query).load()
        return snow_table_df
    except Exception as e:
        print("Error occured -> ", e)

##### Get source database tables to migrate

In [None]:
# Get all database tables to migrate

def get_all_tabs_to_migrate() -> list:
    migrated_tabs = ['account', 'userrole', 'user', 'opportunity', 'deploiement__c', 'deploiement__history', 'accounthistory']
    return migrated_tabs
     

In [None]:
# Get all target database tables
def get_all_tgt_tabs_to_migrate(snow_options: dict) -> list:
    get_all_tar_tables_snw = f"(SELECT DISTINCT LOWER(TABLE_NAME) as TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{snow_options['sfSchema']}' AND TABLE_CATALOG = '{snow_options['sfDatabase']}' AND TABLE_TYPE = 'BASE TABLE' ORDER BY LOWER(TABLE_NAME) ASC) all_tgt_tabs"
    all_snw_tabs = read_tgt_snw_table(get_all_tar_tables_snw, snow_options)
    all_snw_tabs = all_snw_tabs.select("TABLE_NAME").collect()
    return [row["TABLE_NAME"] for row in all_snw_tabs]

In [None]:
snow_options['sfSchema']

'[REDACTED]'

#### Schema change detection and handling

##### Schema change detection

In [None]:
def check_schema_difference(table_to_check: str) -> bool: 
    Schema_change_vals = []
    chng_str = ""
    #print('Table ', table_to_check)
    
    # Processing Source(Salesforce) table
    src_sf_df = read_src_sf_table(table_to_check, ' LIMIT 1')
    #print(f"src_sf_df {src_sf_df}")

    # Get column metadata from Salesforce table
    src_sf_metadata = set([(row.name.lower(), str(row.dataType), row.nullable) for row in src_sf_df.schema])
    #print(f"src_sf_metadata -> {src_sf_metadata}")


    # Processing Target(Snowflake) table
    tar_snow_query = f"(SELECT * FROM {table_to_check} LIMIT 1) {table_to_check}"
    tar_snow_df = read_tgt_snw_table(tar_snow_query, snow_options)

    # Get column metadata from Snowflake table
    tar_snow_metadata = set( [(row.name.lower(), str(row.dataType), row.nullable) for row in tar_snow_df.schema])
    #print(f"\ntar_snow_metadata -> {tar_snow_metadata}\n")    

    # Definition of data type mapping between Postgres and Snowflake
    data_type_mapping = {
        'LongType()': 'DecimalType(38,0)',
        'bigint': 'DecimalType(38,0)',
        'IntegerType()': 'DecimalType(38,0)',
        'ArrayType(StringType(), True)': 'StringType()',
        'ShortType()': 'DecimalType(38,0)'
    }

    # Apply data type mapping to Source (Postgres) metadata
    mapped_src_sf_metadata = set(list([(column_name.lower(), data_type_mapping.get(data_type, data_type), is_nullable) for column_name, data_type, is_nullable in src_sf_metadata]))    
    #print(f"\nmapped_src_sf_metadata --> {mapped_src_sf_metadata}\n")

    # Compare schemas
    schema_diff = mapped_src_sf_metadata - tar_snow_metadata
    print(f"schema_diff -> {schema_diff}")    
    if len(schema_diff) > 0 and len(src_sf_df.columns) != len(tar_snow_df.columns):
        #print("There's a schema change")
        for column in schema_diff:
            column_name, data_type, is_nullable = column
            #print(f"Column: {column_name}, Data Type: {data_type}, Nullable: {is_nullable}")
            chng_str += f"Column: {column_name}, Data Type: {data_type}, Nullable: {is_nullable}\n"
        chng_str = f"Database Table {table_to_check}\n"  + chng_str
        Schema_change_vals.append(chng_str)
        return True
    else:
        #print("There's no schema change")
        return False
    

In [None]:
#check_schema_difference('AccountHistory')

##### Schema change handling

In [None]:
def handle_schema_change(data_df: 'DataFrame', table_name: str, versioning_schema: str) -> None:
    try:
        # Get current timestamp
        current_timestamp = str(datetime.now().strftime("%Y%m%d%H%M%S"))
        temp_new_table = table_name+'__'+current_timestamp
        print(temp_new_table)

        # Save table with new name
        data_df.write.format("snowflake").options(**snow_options).option("dbtable", temp_new_table).mode("append").save() # "overwrite" "append"

        # Updates: 
                # -> Move old table to alternate schema and rename it by appending the current_timestamp of the new table to its name
                # -> Rename the new table to the name of the old table by stripping the current_timestamp from it

        # Start a transaction
        conn.cursor().execute("BEGIN TRANSACTION")

        try:
            # Send old table to temp schema and rename it
            conn.cursor().execute(f"ALTER TABLE {snow_options['sfDatabase']}.{snow_options['sfSchema']}.{table_name} RENAME TO {snow_options['sfDatabase']}.{versioning_schema}.{temp_new_table}")

            # Rename new table to old table
            conn.cursor().execute(f"ALTER TABLE {snow_options['sfDatabase']}.{snow_options['sfSchema']}.{temp_new_table} RENAME TO {snow_options['sfDatabase']}.{snow_options['sfSchema']}.{table_name}")

            # Commit the transaction
            conn.cursor().execute("COMMIT")
            msg = f"Schema change detected and new table created at {temp_new_table}"

            msg_to_send.append(msg)
        except Exception as e:
            # Rollback the transaction in case of any error
            conn.cursor().execute("ROLLBACK")
            print("Rolling Back...\nError occurred -> ", e)    


        #return saved_df
    except Exception as e:
        print("Error occured -> ", e)


#### Saving Data to Database

##### UPSERT target table

In [None]:
"""
This method essentially updates (MERGE) the target table
"""

def upsert_target_table(tab: str, merge_key: str = None) -> bool:

    if merge_key is None:
        merge_key = 'id'


    cursor = conn.cursor()
    cursor.execute("BEGIN TRANSACTION")

    try:
        ## Get the column names of the target table
        target_columns_query = f"SHOW COLUMNS IN {snow_options['sfDatabase']}.{snow_options['sfSchema']}.{tab}" 
        cursor.execute(target_columns_query)
        column_names = [column[2] for column in cursor.fetchall()]

        # Generate the column mappings for the MERGE query, handling reserved keywords
        column_mappings = ", ".join([f"\"target\".\"{col}\" = \"source\".\"{col}\"" for col in column_names])
        #print(f"column_mappings {column_mappings}")

        # Perform the MERGE operation to insert new rows or update existing rows
        #print(f"Upserting.... snow_options['sfSchema'] {snow_options['sfSchema']}")
        merge_query = (
            "MERGE INTO \"" + snow_options['sfDatabase']+"\".\"" + snow_options['sfSchema']+"\"."+"\""+tab.upper() + "\" AS \"target\" "
            "USING " + snow_options['sfDatabase']+"." + snow_options['sfSchema']+".TEMP_TABLE AS \"source\" "
            "ON \"target\".\"" + merge_key + "\" = \"source\".\"" + merge_key + "\" "
            "WHEN MATCHED THEN "
            "UPDATE SET " + column_mappings + " "
            "WHEN NOT MATCHED THEN "
            "INSERT (" + ", ".join(["\"" + col + "\"" for col in column_names]) + ") "
            "VALUES (" + ", ".join(["\"source\".\"" + col + "\"" for col in column_names]) + ")"
        )
        #print(f"\nQuery to be executed {merge_query}")
        #print(snow_options['sfRole'])
        cursor.execute(merge_query)
        #print(f"\nQuery executed")
        
        # Delete the previously created TEMP_TABLE to free up the database
        cursor.execute(f"DROP TABLE {snow_options['sfDatabase']}.{snow_options['sfSchema']}.TEMP_TABLE")

        print(f"\nDrop table Query executed")

        # Commit the changes and close the connection
        cursor.execute("COMMIT")  

        return True
    except Exception as e:
        #print('Some error occured')
        # Rollback the transaction in case of any error    
        msg_to_send.append(f"Could not Merge UPDATE table {tab} because of the following error\n{e}\n, and consequently the table was not migrated.\n Go take a look")
        cursor.execute("ROLLBACK")
        print("Rolling Back...\nError occurred -> ", e)  
        return False


##### Saving data to target table

In [None]:
# Save Data to Snowflake Database table

def load_tgt_snw_table(data_df: 'DataFrame', database_table: str, snow_options: dict, write_mode: str = None, updating: str = None) -> 'DataFrame':
    try:
        old_tab = database_table # Keep the table name just in case

        # Default write method
        if write_mode is None:
            write_mode = "append"
        
        # If updating, save data instead in a TEMP_TABLE for later MERGE operation and then change write mode to overite
        if updating is not None:
            database_table = "TEMP_TABLE"
            write_mode = "overwrite"

        # Read table into a dataframe
        print('About to save')
        data_df.write.format("snowflake").options(**snow_options).option("dbtable", database_table).mode(f"{write_mode}").save()
        print('saved')
        if updating is not None:
            print('UPDATE ')
            upsert_target_table(old_tab)

        #return saved_df
    except Exception as e:
        print("Error occured -> ", e)

### Sending Email

In [None]:
def send_mail(subject: str, message: str, any_link: str=None) -> None:
    import smtplib
    from email.mime.text import MIMEText
    from email.mime.multipart import MIMEMultipart

    if any_link is None:
        any_link = 'No link'

    
    username = config['username_mail'] 
    password = config['password_mail']
    msg = MIMEMultipart('mixed')

    sender = 'my_email@mail.com'
    recipient = 'my_email@mail.com'

    msg['Subject'] = subject 
    msg['From'] = sender
    msg['To'] = recipient
    
    text_message = MIMEText(str(message))
    html_message = MIMEText(f"<br><b>File Located at:</b> <a href='#'>{any_link}</a>", 'html')
    msg.attach(text_message)
    msg.attach(html_message)

    mailServer = smtplib.SMTP('mail.smtp2go.com', 2525) # 8025, 587 and 25 can also be used.
    mailServer.ehlo()
    mailServer.starttls()
    mailServer.ehlo()
    mailServer.login(username, password)
    mailServer.sendmail(sender, recipient, msg.as_string())
    mailServer.close()

### Testing DB Connections and geting source and target tables

In [None]:
all_sf_tabs = get_all_tabs_to_migrate()
print("Source tables -> ", all_sf_tabs, '\n\n of length -> ', len(all_sf_tabs), '\n-----')

all_snw_tabs = get_all_tgt_tabs_to_migrate(snow_options)
print("\n\nSource tables -> ", all_snw_tabs, '\n\n of length -> ', len(all_snw_tabs))


Source tables ->  ['account', 'userrole', 'user', 'opportunity', 'deploiement__c', 'deploiement__history', 'accounthistory'] 

 of length ->  7 
-----


Source tables ->  ['account', 'accountcontactrelation', 'accounthistory', 'accountteammember', 'asset', 'case', 'casefeed', 'contact', 'deploiement', 'deploiement__c', 'deploiement__history', 'emailmessage', 'emailmessagerelation', 'event', 'knowledgearticle', 'kyc', 'lead', 'leadhistory', 'opportunity', 'opportunitylineitem', 'order_c', 'orderitem', 'profile', 'role', 'sf_users', 'sf_zoom_call_log', 'task', 'user', 'userrole', 'zoom_app__zoom_call_log__c'] 

 of length ->  30


### Core Migration method

In [None]:
salesforce_tables = get_all_tabs_to_migrate()
all_snw_tabs = get_all_tgt_tabs_to_migrate(snow_options)

deleted_salesforce_objects = ['deploiement', 'kyc', 'order_c', 'role', 'sf_users', 'sf_zoom_call_log']
print(len(salesforce_tables))
all_snw_tabs = sorted(list(set(all_snw_tabs) - set(deleted_salesforce_objects)))
all_snw_tabs.remove('leadhistory')
all_snw_tabs.append('leadhistory')
no_created_at = []
all_snw_tabs

for tab in all_snw_tabs: # salesforce_tables
    snow_options['sfWarehouse'] = 'COMPUTE_WH'
    print(f"Now processing table {tab}...")

    max_col = 'CreatedDate'
    if tab == 'userrole':
        max_col = 'LASTMODIFIEDDATE'
    
    """
    if tab not in salesforce_tables: #all_snw_tabs:        
        sf_df = read_src_sf_table(tab)
        #display(sf_df)
        snow_options['sfWarehouse'] = 'COMPUTE_WH'
        load_tgt_snw_table(sf_df, tab, snow_options, "overwrite")
    #msg_to_send.append(f"New table {tab} migrated")
    continue
    """

    if check_schema_difference(tab):
        print('handle')
        sf_df = read_src_sf_table(tab)
        sf_df = anonymise(sf_df)
        handle_schema_change(sf_df, tab, 'TEMP_HOLD')
    else:
        print('get max and proceed')

        if tab in no_created_at:

            # select all and overwrite          
            sf_df = read_src_sf_table(tab)
            sf_df = anonymise(sf_df)
            load_tgt_snw_table(sf_df, tab, snow_options, "overwrite")
        else:
            cursor = conn.cursor()
            cursor.execute(f"SELECT MAX({max_col}) FROM {snow_options['sfDatabase']}.{snow_options['sfSchema']}.{tab} ")
            max_date = cursor.fetchone()[0]
            datetime_obj = datetime.strptime(str(max_date), "%Y-%m-%d %H:%M:%S")
            max_date = datetime_obj.strftime("%Y-%m-%dT%H:%M:%SZ")
            #max_date = datetime.datetime.strptime(str(max_date), '%Y-%m-%d %H:%M:%S').isoformat()
            print('max_date ', max_date, ' Type ', type(max_date))
            sf_query = f" WHERE {max_col} >= {max_date} ORDER BY {max_col} ASC " # ORDER BY CreatedDate ASC   OR LastModifiedDate >= {max_date} 
            sf_df = read_src_sf_table(tab, sf_query)
            sf_df = anonymise(sf_df)

            load_tgt_snw_table(sf_df, tab, snow_options, "overwrite", "update") #, "update"


#print(msg_to_send)
if len(msg_to_send) > 0:
    any_link = 'https://databricks_notebook_URL'
    sbj = 'Salesforce ETL - job from Databrick! '
    final_msg = ''.join([f"{i}\n" for i in msg_to_send])

    send_mail(sbj, final_msg, any_link)            


    

7
Now processing table account...
schema_diff -> set()
get max and proceed
max_date  2023-07-26T13:00:15Z  Type  <class 'str'>
About to save
saved
UPDATE 

Drop table Query executed
Now processing table accountcontactrelation...
schema_diff -> set()
get max and proceed
max_date  2023-07-26T13:04:10Z  Type  <class 'str'>
About to save
saved
UPDATE 

Drop table Query executed
Now processing table accounthistory...
schema_diff -> set()
get max and proceed
max_date  2023-07-26T13:05:11Z  Type  <class 'str'>
About to save
saved
UPDATE 

Drop table Query executed
Now processing table accountteammember...
schema_diff -> set()
get max and proceed
max_date  2023-07-26T13:05:09Z  Type  <class 'str'>
About to save
saved
UPDATE 


In [None]:
"""

tab = 'task'
sf_df = read_src_sf_table(tab)
#display(sf_df)
snow_options['sfWarehouse'] = 'COMPUTE_WH'
load_tgt_snw_table(sf_df, tab, snow_options, "overwrite")

"""

### Delete all below

In [None]:
final_msg = []
msg_to_send = []
final_msg, msg_to_send

### End Spark

In [None]:
# Uncomment the code below after DEV
#spark.stop()

# Close the connection
#cursor.close()
#conn.close()