In [None]:
from pyspark.sql import functions as f
from pyspark.sql import SparkSession
from pyspark.sql.types import (StructType, StructField, StringType,
                               IntegerType, BooleanType, TimestampType,
                              ArrayType, MapType)

from google.cloud import bigquery
from pyspark.sql.window import Window


spark = SparkSession.builder.appName("DimTickerDetails") \
.config("spark.jars.packages", "com.google.cloud.spark:spark-bigquery-with-dependencies_2.12:0.24.0") \
.getOrCreate()

ticker_Df = spark.read.format("parquet") \
            .option("header", "true") \
            .load("gs://stocks-pipeline/raw-data/ticker_details/*.parquet")

ticker_Df = ticker_Df.withColumn("tickerKey", f.row_number().over(Window.orderBy('ticker')))
ticker_Df = ticker_Df.selectExpr("*","CASE WHEN active = true THEN 1 ELSE 0 END AS isActive")
ticker_Df = ticker_Df.fillna(value = 'Not Assigned', subset = ["cik", "composite_figi", "share_class_figi"])
ticker_Df = ticker_Df.fillna(value = 'Unknown', subset = ["type"])
ticker_Df = ticker_Df.drop("active")


# create a BigQuery client and dataset reference
client = bigquery.Client(project='noted-span-377814')
dataset_ref = client.dataset('Stocks_DW')

# create a BigQuery table and upload the data
table_ref = dataset_ref.table('DimTickerDetails')

job_config = bigquery.LoadJobConfig(write_disposition='WRITE_TRUNCATE')
job = client.load_table_from_dataframe(ticker_Df.toPandas(), table_ref, job_config=job_config)
print(job.result())

spark.stop()