In [1]:
# Welcome to your new notebook
# Type here in the cell editor to add code!
# Cell 1 - Read Bronze Data
from pyspark.sql import functions as F
from pyspark.sql.window import Window

# Read all stock data from bronze layer
df_bronze = spark.table("bronze_stock_data")

print(f"Total records in bronze: {df_bronze.count()}")
df_bronze.groupBy("symbol").count().show()

StatementMeta(, 2528c9ee-e6f2-4ff9-8c77-ad6acc999d56, 3, Finished, Available, Finished)

Total records in bronze: 25
+------------+-----+
|      symbol|count|
+------------+-----+
|RELIANCE.BSE|   25|
+------------+-----+



In [3]:
# Cell 2 - Calculate Technical Indicators
# Window specifications for different timeframes
window_spec = Window.partitionBy("symbol").orderBy("timestamp")
window_20 = Window.partitionBy("symbol").orderBy("timestamp").rowsBetween(-19, 0)
window_14 = Window.partitionBy("symbol").orderBy("timestamp").rowsBetween(-13, 0)

# Calculate indicators
df_silver = df_bronze.withColumn("prev_close", F.lag("close", 1).over(window_spec)) \
    .withColumn("sma_20", F.avg("close").over(window_20)) \
    .withColumn("volume_avg_20", F.avg("volume").over(window_20)) \
    .withColumn("high_14", F.max("high").over(window_14)) \
    .withColumn("low_14", F.min("low").over(window_14)) \
    .withColumn("price_change", F.col("close") - F.col("prev_close")) \
    .withColumn("price_change_pct", ((F.col("close") - F.col("prev_close")) / F.col("prev_close")) * 100)

print("✅ Technical indicators calculated")
df_silver.show(5)

StatementMeta(, 2528c9ee-e6f2-4ff9-8c77-ad6acc999d56, 5, Finished, Available, Finished)

✅ Technical indicators calculated
+------------+-------------------+------+------+------+------+------+-------------------+--------------------+----------+--------------------+----------+------+-------------+-------+------+------------+----------------+
|      symbol|          timestamp|  open|  high|   low| close|volume|     last_refreshed|      ingestion_time|      date|     processing_time|prev_close|sma_20|volume_avg_20|high_14|low_14|price_change|price_change_pct|
+------------+-------------------+------+------+------+------+------+-------------------+--------------------+----------+--------------------+----------+------+-------------+-------+------+------------+----------------+
|RELIANCE.BSE|2024-01-15 15:15:00|2495.0|2500.0|2490.0|2500.0| 98000|2024-01-15 15:20:00|2025-09-25T08:16:...|2024-01-15|2025-09-25 08:16:...|      NULL|2500.0|      98000.0| 2500.0|2490.0|        NULL|            NULL|
|RELIANCE.BSE|2024-01-15 15:15:00|2495.0|2500.0|2490.0|2500.0| 98000|2024-01-15 15:20:

In [4]:
# Cell 3 - RSI Calculation
# Calculate gains and losses
df_rsi = df_silver.withColumn("gain", F.when(F.col("price_change") > 0, F.col("price_change")).otherwise(0)) \
    .withColumn("loss", F.when(F.col("price_change") < 0, -F.col("price_change")).otherwise(0))

# 14-period average
df_rsi = df_rsi.withColumn("avg_gain", F.avg("gain").over(window_14)) \
    .withColumn("avg_loss", F.avg("loss").over(window_14)) \
    .withColumn("rs", F.col("avg_gain") / F.col("avg_loss")) \
    .withColumn("rsi", 100 - (100 / (1 + F.col("rs"))))

print("✅ RSI calculated")

StatementMeta(, 2528c9ee-e6f2-4ff9-8c77-ad6acc999d56, 6, Finished, Available, Finished)

✅ RSI calculated


In [5]:
# Cell 4 - Bollinger Bands & Volume Analysis
df_final = df_rsi.withColumn("std_20", F.stddev("close").over(window_20)) \
    .withColumn("bb_upper", F.col("sma_20") + (2 * F.col("std_20"))) \
    .withColumn("bb_lower", F.col("sma_20") - (2 * F.col("std_20"))) \
    .withColumn("bb_position", (F.col("close") - F.col("bb_lower")) / (F.col("bb_upper") - F.col("bb_lower"))) \
    .withColumn("volume_ratio", F.col("volume") / F.col("volume_avg_20")) \
    .withColumn("unusual_volume", F.when(F.col("volume_ratio") > 2.0, True).otherwise(False))

# Save to Silver layer
df_final.write.mode("overwrite").option("mergeSchema", "true").saveAsTable("silver_stock_analytics")

print(f"✅ Saved {df_final.count()} records to silver_stock_analytics")

StatementMeta(, 2528c9ee-e6f2-4ff9-8c77-ad6acc999d56, 7, Finished, Available, Finished)

✅ Saved 25 records to silver_stock_analytics
