In [None]:
import org.apache.spark.sql.functions._
import org.apache.spark.sql.expressions._
import scala.util.{Failure, Success, Try}
import org.apache.spark.sql.types._
import org.apache.spark.sql.SparkSession

In [None]:
val spark = (
    SparkSession
    .builder()
    .appName("StressTest")
    .config("spark.cores.max", 4)
    .config("spark.executor.cores", 4)
    .config("spark.executor.memory", "36g")
    .getOrCreate()
    )
    
val csv_options = Map("header" -> "true")

In [None]:
val mongoUrl = "mongodb+srv://xxxxxxxxxxxxxxxxxxxxxx/"

val priceRiskPremium = (
    spark.read.format("mongodb")
    .option("spark.mongodb.read.connection.uri", mongoUrl)
    .option("spark.mongodb.write.connection.uri", mongoUrl)
    .option("database", "coreEngine")
    .option("collection", "PriceRiskPremium").load()
    .select("stockCode", "stockFullName", "riskPremium", "updateDate")
    .where(col("updateDate") > "20220101")
    .withColumn("riskPremium", col("riskPremium").cast(DoubleType))
    )

val creditLoanRate = (
    spark.read.format("mongodb")
    .option("spark.mongodb.read.connection.uri", mongoUrl)
    .option("spark.mongodb.write.connection.uri", mongoUrl)
    .option("database", "coreEngine")
    .option("collection", "CreditLoanRate").load()
    .select("stockCode", "balanceRateLoan", "updateDate")
    .where(col("updateDate") > "20220101")
    .withColumn("balanceRateLoan", col("balanceRateLoan").cast(DoubleType))
    .withColumn("stockCode", lpad(col("stockCode"), 6, "0"))
    )

In [None]:
val partition = Window.partitionBy("stockCode").orderBy("updateDate")
val ff60 = partition.rowsBetween(-60, 0)
val ff30 = partition.rowsBetween(-30, 0)

val stress = (
    priceRiskPremium.join(creditLoanRate, Seq("stockCode", "updateDate"), "left")
    .withColumn("stress", col("riskPremium") * 100000 * col("balanceRateLoan"))
    .withColumn("stressWarn", when(col("stress") >= 1, 1).otherwise(null))
    .withColumn("stressAlert", when(col("balanceRateLoan") >= 5, 1).otherwise(null))
    .withColumn("lockWarn30", first(col("stressWarn"), true).over(ff30))
    .withColumn("lockWarn60", first(col("stressWarn"), true).over(ff60))
    .withColumn("lockAlert30", first(col("stressAlert"), true).over(ff30))
    .withColumn("lockAlert60", first(col("stressAlert"), true).over(ff60))
    .na.fill(0)
)

In [None]:
// (
//     stress
// //     .where(col("alert") === 1)
// //     .where(col("stockCode").isin(Seq("003610", "109860", "004890", "001080", "001070", "017390", "016710", "004690", "004360", "003380", "032190", "003100"):_*))
//     .where(col("updateDate") > "20220101")
//     .orderBy("stockCode", "updateDate")
//     .withColumn("riskPremium", col("riskPremium").cast(DoubleType))
//     .coalesce(1).write.option("header", "true").csv("s3n://available-raw/Output_20230707_3.csv")
// )

In [None]:
"""
(
    stress
    .where(col("updateDate") > "20230630")
    .write.format("mongodb")
    .mode("append")
    .option("upsertDocument", "true")
    .option("idFieldList", "updateDate,stockCode")
    .option("spark.mongodb.read.connection.uri", mongoUrl)
    .option("spark.mongodb.write.connection.uri", mongoUrl)
    .option("database", "coreEngine")
    .option("collection", "StressTest")
    .save()
    )
    """

In [None]:
spark.stop()