In [1]:
from pyspark.sql import *
from pyspark.sql.functions import *

In [2]:
spark = SparkSession.builder.appName("test").getOrCreate()

In [3]:
data = [
    [1, "Steve"],
    [2, "David"],
    [3, "Aryan"],
]
student_df = spark.createDataFrame(data, ["student_id", "student_name"])
student_df.show()

+----------+------------+
|student_id|student_name|
+----------+------------+
|         1|       Steve|
|         2|       David|
|         3|       Aryan|
+----------+------------+



In [4]:
data = [
    [1, "pyspark", 90],
    [1, "sql", 100],
    [2, "pyspark", 70],
    [2, "sql", 60],
    [3, "pyspark", 30],
    [3, "sql", 20],
]
score_df = spark.createDataFrame(data, ["student_id", "subject_name", "score"])
score_df.show()

+----------+------------+-----+
|student_id|subject_name|score|
+----------+------------+-----+
|         1|     pyspark|   90|
|         1|         sql|  100|
|         2|     pyspark|   70|
|         2|         sql|   60|
|         3|     pyspark|   30|
|         3|         sql|   20|
+----------+------------+-----+



In [7]:
join_df = student_df.alias("students").join(
    score_df.alias("scores"), student_df.student_id == score_df.student_id
)
agg_df = join_df.groupBy("students.student_id", "students.student_name").agg(
    avg(col("scores.score")).alias("average_score")
)
agg_df.show()

+----------+------------+-------------+
|student_id|student_name|average_score|
+----------+------------+-------------+
|         1|       Steve|         95.0|
|         2|       David|         65.0|
|         3|       Aryan|         25.0|
+----------+------------+-------------+



In [12]:
agg_df.withColumn(
    "status",
    when(col("average_score") >= 70, "Distinction")
    .when((col("average_score") < 70) & (col("average_score") >= 60), "First Class")
    .when((col("average_score") < 60) & (col("average_score") >= 50), "Second Class")
    .when((col("average_score") < 50) & (col("average_score") >= 40), "Third Class")
    .otherwise("Fail"),
).show()

+----------+------------+-------------+-----------+
|student_id|student_name|average_score|     status|
+----------+------------+-------------+-----------+
|         1|       Steve|         95.0|Distinction|
|         2|       David|         65.0|First Class|
|         3|       Aryan|         25.0|       Fail|
+----------+------------+-------------+-----------+

