In [0]:
import json
import requests
from io import BytesIO
import datetime
from pyspark.sql.types import StructField, IntegerType, StringType, StructType, DoubleType, DateType, DecimalType, BooleanType, TimestampType

In [0]:
# Set KeyVault Variables:
secret_scope         = "carbonScope"
storage_account_name = dbutils.secrets.get(scope=secret_scope, key="storageAccountName")
sas_token            = dbutils.secrets.get(scope=secret_scope, key="sasToken")
db_password          = dbutils.secrets.get(scope=secret_scope, key="connectionStringTotesys")

In [0]:
# Set Spark configuration:
spark.conf.set(f"fs.azure.account.auth.type.{storage_account_name}.dfs.core.windows.net", "SAS") 
spark.conf.set(f"fs.azure.sas.token.provider.type.{storage_account_name}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider") 
spark.conf.set(f"fs.azure.sas.fixed.token.{storage_account_name}.dfs.core.windows.net", sas_token) 

In [0]:
# Set locations to dataLake:
RAW             = "abfss://carbonone@sjpcarbon.dfs.core.windows.net/framework-j1/RAW"
TRANSFORM       = "abfss://carbonone@sjpcarbon.dfs.core.windows.net/framework-j1/TRANSFORM"
CURATE          = "abfss://carbonone@sjpcarbon.dfs.core.windows.net/framework-j1/CURATE"

BRONZE          = "abfss://carbonone@sjpcarbon.dfs.core.windows.net/framework-j1/BRONZE"
SILVER          = "abfss://carbonone@sjpcarbon.dfs.core.windows.net/framework-j1/SILVER"
GOLD            = "abfss://carbonone@sjpcarbon.dfs.core.windows.net/framework-j1/GOLD"

date_path       = datetime.datetime.now().strftime("%Y/%m/%d")

In [0]:
# Iterate through an object to create a schema:
def create_struct(obj):
    # Map strings to dataTypes:
    type_dict = {'int': IntegerType(), 'string': StringType(), 'decimal': DecimalType(), 'boolean': BooleanType(), 'date': DateType(), 'timestamp': TimestampType()}
    
    # Create Schema
    schema = StructType([
        StructField(value['columnName'], type_dict[value['dataType']], True) 
        for value in obj
    ])
    return schema

# Query framework-j1-db (metadata) database and return dataframe:
def query_entityNames(query):
  # Create and return dataFrame:
  return (spark.read
    .format("sqlserver")
    .option("host", "framework-j1-sv.database.windows.net")
    .option("port", "1433")
    .option("user", "adminsjp")
    .option("password", db_password)
    .option("database", "framework-j1-db")
    .option("query", query)
    .load()
  ).collect()[0]['entityNames']

# Query the totesys database and return a dataframe:
def query_totesys(query):
  return (spark.read
  .format("sqlserver")
  .option("host", "framework-j1-sv.database.windows.net")
  .option("port", "1433")
  .option("user", "adminsjp")
  .option("password", db_password)
  .option("database", "totesys")
  .option("query", query)
  .load()
)

# Function to return the bronze location for the given entity:
def entity_bronze(e):
  q = f"""
  SELECT bronzeLocation FROM sourceEntity
  WHERE entityName = '{e}'
  """
  # Create and return dataFrame:
  return (spark.read
    .format("sqlserver")
    .option("host", "framework-j1-sv.database.windows.net")
    .option("port", "1433")
    .option("user", "adminsjp")
    .option("password", db_password)
    .option("database", "framework-j1-db")
    .option("query", q)
    .load()
  ).collect()[0]['bronzeLocation']

# Function to return the silver location for the given entity:
def entity_silver(e):
  q = f"""
  SELECT silverLocation FROM sourceEntity
  WHERE entityName = '{e}'
  """
  # Create and return dataFrame:
  return (spark.read
    .format("sqlserver")
    .option("host", "framework-j1-sv.database.windows.net")
    .option("port", "1433")
    .option("user", "adminsjp")
    .option("password", db_password)
    .option("database", "framework-j1-db")
    .option("query", q)
    .load()
  ).collect()[0]['silverLocation']