In [None]:
import org.apache.spark.sql.functions._
import org.apache.spark.sql.expressions._
import scala.util.{Failure, Success, Try}
import org.apache.spark.sql.types._
import org.apache.spark.sql.SparkSession

In [None]:
val spark = (
    SparkSession
    .builder()
    .appName("MDD")
    .config("spark.cores.max", 4)
    .config("spark.executor.cores", 4)
    .config("spark.executor.memory", "36g")
    .getOrCreate()
    )
    
val csv_options = Map("header" -> "true")

In [None]:
val mongoUrl = "mongodb+srv://xxxxxxxxxxxxxxxxxxxxxx/"

val stockPrice = (
    spark.read.format("mongodb")
    .option("spark.mongodb.read.connection.uri", mongoUrl)
    .option("spark.mongodb.write.connection.uri", mongoUrl)
    .option("database", "coreEngine")
    .option("aggregation.pipeline", "{ $match: { updateDate: { $gte: '20230101'} } }")
    .option("collection", "Price").load()
    .withColumn("closingPrice",col("closingPrice").cast(IntegerType))
    .withColumn("market", when(col("classify") === "KOSDAQ", 0).otherwise(1))
    .select("stockCode", "stockFullName", "market", "closingPrice", "updateDate")
    )

In [None]:
val partition = Window.partitionBy("stockCode").orderBy("updateDate")
val w_5 = partition.rowsBetween(-5, 0)
val w_22 = partition.rowsBetween(-22, 0)
val w_65 = partition.rowsBetween(-65, 0)
val w_126 = partition.rowsBetween(-126, 0)

val mdd = (
    stockPrice
    .withColumn("max5Price", max(col("closingPrice")).over(w_5))
    .withColumn("min5Price", min(col("closingPrice")).over(w_5))
    .withColumn("mdd5Days", (col("min5Price") - col("max5Price")) / col("max5Price"))
    .withColumn("max22Price", max(col("closingPrice")).over(w_22))
    .withColumn("min22Price", min(col("closingPrice")).over(w_22))
    .withColumn("mdd22Days", (col("min22Price") - col("max22Price")) / col("max22Price"))
    .withColumn("max65Price", max(col("closingPrice")).over(w_65))
    .withColumn("min65Price", min(col("closingPrice")).over(w_65))
    .withColumn("mdd65Days", (col("min65Price") - col("max65Price")) / col("max65Price"))
    .withColumn("max126Price", max(col("closingPrice")).over(w_126))
    .withColumn("min126Price", min(col("closingPrice")).over(w_126))
    .withColumn("mdd126Days", (col("min126Price") - col("max126Price")) / col("max126Price"))
    .where(col("updateDate") > "20230630")
    .where((col("mdd5Days").isNotNull) and (col("mdd22Days").isNotNull) and (col("mdd65Days").isNotNull))
    .select("stockCode", "stockFullName", "market", "closingPrice", "updateDate", "mdd5Days", "mdd22Days", "mdd65Days", "mdd126Days")
    )

In [None]:
"""
(
    mdd.where(col("updateDate") > "20230630")
    .write.format("mongodb")
    .mode("append")
    .option("upsertDocument", "true")
    .option("idFieldList", "updateDate,stockCode")
    .option("spark.mongodb.read.connection.uri", mongoUrl)
    .option("spark.mongodb.write.connection.uri", mongoUrl)
    .option("database", "coreEngine")
    .option("collection", "PriceMDDCalcurate")
    .save()
    )
    """

In [None]:
spark.stop()