In [1]:
//import org.apache.commons.lang3.time.{DateFormatUtils, FastDateFormat}
//import org.apache.spark.sql.functions._
import org.apache.spark.sql._
//import org.apache.spark.sql.types._
import org.apache.log4j._
import com.tccc.dna.synapse.dataset.NewYorkTaxiYellow
import com.tccc.dna.synapse.{StorageFormat, AzStorage}
import com.tccc.dna.synapse.StorageFormat._
import com.tccc.dna.synapse.Logs._

import com.tccc.dna.synapse.spark.{SynapseSpark, DataFrames, Partitions, Writers, Catalogs}
val notebookName = SynapseSpark.getCurrentNotebookName
sc.setLogLevel("DEBUG")
val log = org.apache.log4j.LogManager.getLogger(s"com.aravind.notebook.$notebookName")
log.setLevel(Level.DEBUG)

In [2]:
val tcccStorageAcct = "xxx"
val tcccContainer = "tlfs"

val yellowTaxiCsvPath = "/poc/csv/nyc_yellow_taxi_trips"
val yellowTaxiParquetPath = "/poc/parquet/nyc_yellow_taxi_trips"
val yellowTaxiDeltaPath = "/poc/delta/nyc_yellow_taxi_trips"

if(AzStorage.ifFileExists(yellowTaxiCsvPath)) logDebug(log, s"$yellowTaxiCsvPath exists.")
if(AzStorage.ifFileExists(yellowTaxiParquetPath)) logDebug(log, s"$yellowTaxiParquetPath exists.")
if(AzStorage.ifFileExists(yellowTaxiDeltaPath)) logDebug(log, s"$yellowTaxiDeltaPath exists.")

In [3]:
val schemaName = "silver"

//mt_ prefix = managed table
val yellowTaxiCsvBackedTable = "mt_nyc_yellow_taxi_trips_csv"
val yellowTaxiParquetBackedTable = "mt_nyc_yellow_taxi_trips_parquet"
val yellowTaxiDeltaBackedTable = "mt_nyc_yellow_taxi_trips_delta"

val partitionCols = Array("puYear", "puMonth")

//Create Parquet backed table
if(!Catalogs.isTableExists(schemaName, yellowTaxiParquetBackedTable)) {
    val df = DataFrames.getDataFrame(tcccStorageAcct, tcccContainer, yellowTaxiCsvPath, StorageFormat.Csv, Map("header"->"true")).cache

    //Issues CETAS. Physical plan command: CreateDataSourceTableAsSelectCommand
    Writers.initialLoad(df, schemaName, yellowTaxiParquetBackedTable, StorageFormat.Parquet, SaveMode.Overwrite, 
        partitionColNames = partitionCols, dbComment = "Silver Zone", 
        otherWriterOpts = Map("header"->"true"))
    
    val info = Catalogs.getTableInfo(schemaName, yellowTaxiParquetBackedTable)
    logDebug(log, s"${info.toString}")
}

//Create Delta backed table
if(!Catalogs.isTableExists(schemaName, yellowTaxiDeltaBackedTable)) {
    val df = DataFrames.getDataFrame(tcccStorageAcct, tcccContainer, yellowTaxiCsvPath, StorageFormat.Csv, Map("header"->"true"))
    
    //Issues CETAS. Physical plan command: CreateDataSourceTableAsSelectCommand
    Writers.initialLoad(df, schemaName, yellowTaxiDeltaBackedTable, StorageFormat.Delta, SaveMode.Overwrite, 
        partitionColNames = partitionCols, dbComment = "Silver Zone")
    
    val info = Catalogs.getTableInfo(schemaName, yellowTaxiDeltaBackedTable)
    logDebug(log, s"${info.toString}")
}