In [1]:
from pyspark.sql import SparkSession
import pyspark.sql.functions as F
from pyspark.sql.types import *

spark = SparkSession\
    .builder\
    .appName("chapter-08-join")\
    .getOrCreate()

In [2]:
data = [
    (100, 'Kathy', None),
    (101, 'Ken', 100),
    (102, 'Ben', 100),
    (201, 'Dan', 101),
    (202, 'Jane', 101),
    (901, 'Wendy', None),
]

columns = ["empId", "name", "mgrId"]
df = spark.createDataFrame(data=data, schema=columns)

In [3]:
df.show()

+-----+-----+-----+
|empId| name|mgrId|
+-----+-----+-----+
|  100|Kathy| null|
|  101|  Ken|  100|
|  102|  Ben|  100|
|  201|  Dan|  101|
|  202| Jane|  101|
|  901|Wendy| null|
+-----+-----+-----+



In [11]:
df1 = df.alias("df1")
df2 = df.alias("df2")

In [5]:
df2.show()

+-----+-----+-----+
|empId| name|mgrId|
+-----+-----+-----+
|  100|Kathy| null|
|  101|  Ken|  100|
|  102|  Ben|  100|
|  201|  Dan|  101|
|  202| Jane|  101|
|  901|Wendy| null|
+-----+-----+-----+



In [19]:
df3 = df1.join(df2, F.col("df1.mgrId") == F.col("df2.empId"), "left")
(
df3.select(
        F.col("df1.empId").alias("empId"),
        F.col("df1.name").alias("name"),
        F.col("df2.name").alias("mgr_name"),
    )
    .sort("df1.name")
    .show()
)


+-----+-----+--------+
|empId| name|mgr_name|
+-----+-----+--------+
|  102|  Ben|   Kathy|
|  201|  Dan|     Ken|
|  202| Jane|     Ken|
|  100|Kathy|    null|
|  101|  Ken|   Kathy|
|  901|Wendy|    null|
+-----+-----+--------+



In [14]:
empTab = df.createOrReplaceTempView("empTab")

In [15]:
spark.sql("select * from empTab").show()

+-----+-----+-----+
|empId| name|mgrId|
+-----+-----+-----+
|  100|Kathy| null|
|  101|  Ken|  100|
|  102|  Ben|  100|
|  201|  Dan|  101|
|  202| Jane|  101|
|  901|Wendy| null|
+-----+-----+-----+



In [18]:
spark.sql("""
    select 
        emp1.empId,
        emp1.name,
        emp2.name as mgrName
    from empTab as emp1 
    left join empTab as emp2 
        on emp1.mgrId = emp2.empId
    order by emp1.name
""").show()

+-----+-----+-------+
|empId| name|mgrName|
+-----+-----+-------+
|  102|  Ben|  Kathy|
|  201|  Dan|    Ken|
|  202| Jane|    Ken|
|  100|Kathy|   null|
|  101|  Ken|  Kathy|
|  901|Wendy|   null|
+-----+-----+-------+

