In [None]:
from pyspark.sql import functions as f
from pyspark.sql import SparkSession
from pyspark.sql.types import (StructType, StructField, StringType,
                               IntegerType, BooleanType, TimestampType,
                              ArrayType, MapType, DateType)

from google.cloud import bigquery
from pyspark.sql.window import Window

spark = SparkSession \
        .builder.appName("DimTickerTypes") \
        .config("spark.jars.packages", "com.google.cloud.spark:spark-bigquery-with-dependencies_2.12:0.24.0") \
        .getOrCreate()

#load types DF
types_df = spark \
        .read.option("recursiveFileLookup", "true") \
        .option("header", "true") \
        .parquet("gs://stocks-pipeline/raw-data/ticker_types")


print(types_df.show())

print(types_df.printSchema())
    

types_df = types_df.withColumn("TypeKey", f.row_number().over(Window.orderBy('code')))

types_df = types_df.withColumnRenamed("code","TypeCode") \
            .withColumnRenamed("description", "Description") \
            .withColumnRenamed("locale", "Locale")
            

types_df = types_df.fillna(value = 'Not Available', subset = ["Description"])
types_df= types_df.drop("asset_class")
# persist_df = types_df.persist()

# print(persist_df.show())
    
 # create a BigQuery client and dataset reference
client = bigquery.Client(project='noted-span-377814')
dataset_ref = client.dataset('Stocks_DW')

# create a BigQuery table and upload the data
table_ref = dataset_ref.table('DimTickerTypes')
job_config = bigquery.LoadJobConfig(write_disposition='WRITE_APPEND')
job = client.load_table_from_dataframe(types_df.toPandas(), table_ref, job_config=job_config)
spark.stop()
