In [0]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter
import seaborn as sns
import plotly.graph_objects as go

from datetime import date, datetime, timedelta
from pyspark.sql.functions import current_date, year, col, when

import pyspark.sql.functions as F
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.window import Window
from pyspark.sql.types import StructType, StructField, StringType, DoubleType
from sklearn.feature_selection import SelectKBest
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import f1_score, classification_report, auc, roc_auc_score, roc_curve, accuracy_score, recall_score, precision_score
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold, cross_val_score, GridSearchCV
import pickle
from sklearn.cluster import KMeans, DBSCAN
from sklearn.mixture import GaussianMixture

spark.conf.set('spark.sql.pivotMaxValues', u'50000')
spark.conf.set('spark.databricks.delta.formatCheck.enabled',False)
spark.conf.set('spark.databricks.delta.checkLatestSchemaOnRead',False)

In [0]:
# #--------------------------------------------CDP Connections -------------------------------------------------------
prodUsername = dbutils.secrets.get(scope="US_DSAA_PROD_GROUP_Snowflake_Admin", key="Prod-Username")
prodPassword = dbutils.secrets.get(scope="US_DSAA_PROD_GROUP_Snowflake_Admin", key="Prod-Password")

def get_data_snowflake(query, Schema = "PHCDW_DSAA", Role="PROD_PHCDW_PROD_CYRUS_GREY_PHCDW_CDM_RWC"):
  sfOptions = {
    "sfURL" : "bayer_cphcdp_prod.us-east-1.snowflakecomputing.com:443",
      "sfUser" : prodUsername,
      "sfPassword" : prodPassword,
      "sfRole" : Role,
      "sfSchema" : Schema,
      "sfDatabase" : "PHCDW",
      "sfWarehouse" : "PROD_CYRUS_BI_WH",
      "purge" : "off",
      "autopushdown" : "on"
    }
  return spark.read.format("net.snowflake.spark.snowflake").options(**sfOptions).option('query',query).load()

# print("CDP Connection")

In [0]:
# Save Output Table on CDP
prodUsername = dbutils.secrets.get(scope="US_DSAA_PROD_GROUP_Snowflake_Admin", key="Prod-Username")
prodPassword = dbutils.secrets.get(scope="US_DSAA_PROD_GROUP_Snowflake_Admin", key="Prod-Password")

sfOptions_write = {
      "sfURL" : "bayer_cphcdp_prod.us-east-1.snowflakecomputing.com:443",
		     "sfUser" : prodUsername,
		     "sfPassword" : prodPassword,
		     "sfRole" : "PROD_PHCDW_PROD_CYRUS_GREY_PHCDW_DSAA_RWC",
             "sfSchema" : "PHCDW_DSAA",
		     "sfDatabase" : "PHCDW",
		     "sfWarehouse" : "PROD_CYRUS_BI_WH",
             "purge" : "off",
		     "autopushdown" : "on"
		      }


In [0]:
# Save Output Table on CDP PHCDW_CDM
prodUsername = dbutils.secrets.get(scope="US_DSAA_PROD_GROUP_Snowflake_Admin", key="Prod-Username")
prodPassword = dbutils.secrets.get(scope="US_DSAA_PROD_GROUP_Snowflake_Admin", key="Prod-Password")

sfOptions_writ_cdm = {
      "sfURL" : "bayer_cphcdp_prod.us-east-1.snowflakecomputing.com:443",
		     "sfUser" : prodUsername,
		     "sfPassword" : prodPassword,
		     "sfRole" : "PROD_PHCDW_PROD_CYRUS_GREY_PHCDW_CDM_RWC",
             "sfSchema" : "PHCDW_CDM",
		     "sfDatabase" : "PHCDW",
		     "sfWarehouse" : "PROD_CYRUS_BI_WH",
             "purge" : "off",
		     "autopushdown" : "on"
		      }


In [0]:
# %sql
# CREATE DATABASE IF NOT EXISTS jivi_new_writer_model;
# CREATE DATABASE IF NOT EXISTS kolvaltry_new_writer_model;

In [0]:
DB_NAME = 'heme_data'

print(f'DB_NAME: {DB_NAME}')

In [0]:
# Save Data to Delta Lake
def save_sdf(sdf, DB_NAME, dlt_table):
  try:
      spark.sql(f'drop table if exists {DB_NAME}.{dlt_table}')
      sdf.write.saveAsTable(f'{DB_NAME}.{dlt_table}', mode = 'overwrite')  
      print(f'Table {DB_NAME}.{dlt_table} saved')
  except Exception as e:
      print(f'Table {DB_NAME}.{dlt_table} save failed')
      print(e)