In [None]:
import org.apache.spark.sql.functions._
import org.apache.spark.sql.expressions._
import scala.util.{Failure, Success, Try}
import org.apache.spark.sql.types._
import org.apache.spark.sql.SparkSession

In [None]:
val spark = (
    SparkSession
    .builder()
    .appName("FinancialSheets_Before_Ensemble")
    .config("spark.cores.max", 6)
    .config("spark.executor.cores", 6)
    .config("spark.executor.memory", "36g")
    .getOrCreate()
    )

val mongoUrl = "mongodb+srv://xxxxxxxxxxxxxxxxxxxxxx/"

In [None]:
val reportRiskPremiumCalcurate = (
    spark.read.format("mongodb")
    .option("spark.mongodb.read.connection.uri", mongoUrl)
    .option("spark.mongodb.write.connection.uri", mongoUrl)
    .option("database", "coreEngine")
    .option("aggregation.pipeline", "{ $match: { updateDate: { $gte: '20220414' } } }")
    .option("collection", "ReportRiskPremiumCalcurate_New").load()
    .drop("_id")
    )

val stressTest = (
    spark.read.format("mongodb")
    .option("spark.mongodb.read.connection.uri", mongoUrl)
    .option("spark.mongodb.write.connection.uri", mongoUrl)
    .option("database", "coreEngine")
    .option("aggregation.pipeline", "{ $match: { updateDate: { $gte: '20220414' } } }")
    .option("collection", "StressTest").load()
    .drop("_id", "stockFullName")
    .withColumnRenamed("riskPremium", "hfrp")
    )

val pbrPerYield = (
    spark.read.format("mongodb")
    .option("spark.mongodb.read.connection.uri", mongoUrl)
    .option("spark.mongodb.write.connection.uri", mongoUrl)
    .option("database", "coreEngine")
    .option("aggregation.pipeline", "{ $match: { update_date: { $gte: '20220414' } } }")
    .option("collection", "PbrPerYield").load()
    .select("stock_code", "per", "pbr", "update_date")
    .withColumnRenamed("stock_code", "stockCode")
    .withColumnRenamed("update_date", "updateDate")
    .withColumn("per", col("per").cast(DoubleType))
    .withColumn("pbr", col("pbr").cast(DoubleType))
    .drop("_id")
    )

In [None]:
val reportRiskPremiumWithPbrPerFixed = (
    reportRiskPremiumCalcurate.join(pbrPerYield, Seq("stockCode", "updateDate"), "left")
    .withColumnRenamed("per", "fixed_per")
    .withColumnRenamed("pbr", "fixed_pbr")
    .withColumnRenamed("updateDate", "RegUpdateDate")
    .withColumnRenamed("tempUpdateDate", "updateDate")
)

In [None]:
val partition = Window.partitionBy("stockCode").orderBy("updateDate")
val ff = partition.rowsBetween(Window.unboundedPreceding, 0)

In [None]:
val reportRiskPremiumWithPbrPer = (
    reportRiskPremiumWithPbrPerFixed.join(pbrPerYield, Seq("stockCode", "updateDate"), "outer")
    .join(stressTest, Seq("stockCode", "updateDate"), "left")
    .withColumn("RegUpdateDate", last(col("RegUpdateDate"), true).over(ff))
    .withColumn("FSPctRank", last(col("FSPctRank"), true).over(ff))
    .withColumn("TF1", last(col("TF1"), true).over(ff))
    .withColumn("TF2", last(col("TF2"), true).over(ff))
    .withColumn("TF3", last(col("TF3"), true).over(ff))
    .withColumn("TT", last(col("TT"), true).over(ff))
    .withColumn("VaRTF1", last(col("VaRTF1"), true).over(ff))
    .withColumn("VaRTF2", last(col("VaRTF2"), true).over(ff))
    .withColumn("VaRTF3", last(col("VaRTF3"), true).over(ff))
    .withColumn("basicReturn", last(col("basicReturn"), true).over(ff))
    .withColumn("bsnsYear", last(col("bsnsYear"), true).over(ff))
    .withColumn("corpCls", last(col("corpCls"), true).over(ff))
    .withColumn("corpCode", last(col("corpCode"), true).over(ff))
    .withColumn("event", last(col("event"), true).over(ff))
    .withColumn("expectedLossFN1", last(col("expectedLossFN1"), true).over(ff))
    .withColumn("expectedLossFN2", last(col("expectedLossFN2"), true).over(ff))
    .withColumn("expectedLossFN3", last(col("expectedLossFN3"), true).over(ff))
    .withColumn("expectedProfit", last(col("expectedProfit"), true).over(ff))
    .withColumn("expectedRisk", last(col("expectedRisk"), true).over(ff))
    .withColumn("grade", last(col("grade"), true).over(ff))
    .withColumn("loanAvailable", last(col("loanAvailable"), true).over(ff))
    .withColumn("marketCap", last(col("marketCap"), true).over(ff))
    .withColumn("plbtEvent", last(col("plbtEvent"), true).over(ff))
    .withColumn("predict", last(col("predict"), true).over(ff))
    .withColumn("priceEvent", last(col("priceEvent"), true).over(ff))
    .withColumn("profitLoss", last(col("profitLoss"), true).over(ff))
    .withColumn("quarter", last(col("quarter"), true).over(ff))
    .withColumn("r", last(col("r"), true).over(ff))
    .withColumn("r_s", last(col("r_s"), true).over(ff))
    .withColumn("rank", last(col("rank"), true).over(ff))
    .withColumn("rceptNo", last(col("rceptNo"), true).over(ff))
    .withColumn("reGrade", last(col("reGrade"), true).over(ff))
    .withColumn("reLoanAvailable", last(col("reLoanAvailable"), true).over(ff))
    .withColumn("rePredict", last(col("rePredict"), true).over(ff))
    .withColumn("reRank", last(col("reRank"), true).over(ff))
    .withColumn("recoveryFN1", last(col("recoveryFN1"), true).over(ff))
    .withColumn("recoveryFN2", last(col("recoveryFN2"), true).over(ff))
    .withColumn("recoveryFN3", last(col("recoveryFN3"), true).over(ff))
    .withColumn("reprtCode", last(col("reprtCode"), true).over(ff))
    .withColumn("riskPremium", last(col("riskPremium"), true).over(ff))
    .withColumn("stockName", last(col("stockName"), true).over(ff))
    .withColumn("threshold", last(col("threshold"), true).over(ff))
    .withColumn("fixed_per", last(col("fixed_per"), true).over(ff))
    .withColumn("fixed_pbr", last(col("fixed_pbr"), true).over(ff))
    .withColumn("per", last(col("per"), true).over(ff))
    .withColumn("pbr", last(col("pbr"), true).over(ff))
    .withColumn("stressWarn", last(col("stressWarn"), true).over(ff))
    .withColumn("stressAlert", last(col("stressAlert"), true).over(ff))
    .withColumn("lockWarn30", last(col("lockWarn30"), true).over(ff))
    .withColumn("lockWarn60", last(col("lockWarn60"), true).over(ff))
    .withColumn("lockAlert30", last(col("lockAlert30"), true).over(ff))
    .withColumn("lockAlert60", last(col("lockAlert60"), true).over(ff))
    .withColumn("balanceRateLoan", last(col("balanceRateLoan"), true).over(ff))
    .withColumn("hfrp", last(col("hfrp"), true).over(ff))
    .withColumn("stress", last(col("stress"), true).over(ff))
    .where(col("RegUpdateDate").isNotNull)
)

In [None]:
"""
(
    reportRiskPremiumWithPbrPer
    .where(col("updateDate") > "20230630")
    .write.format("mongodb")
    .mode("append")
    .option("upsertDocument", "true")
    .option("idFieldList", "updateDate,stockCode")
    .option("spark.mongodb.read.connection.uri", mongoUrl)
    .option("spark.mongodb.write.connection.uri", mongoUrl)
    .option("database", "coreEngine")
    .option("collection", "ReportRiskPremiumWithPbrPerStress")
    .save()
    )
    """