In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *

In [2]:
spark = SparkSession.builder.appName("test").getOrCreate()

In [3]:
data = [
    [1, "Alice"],
    [2, "Bob"],
    [3, "Charlie"],
    [4, "David"],
    [5, "Eve"],
]
df = spark.createDataFrame(data, ["id", "name"])
df.show()

+---+-------+
| id|   name|
+---+-------+
|  1|  Alice|
|  2|    Bob|
|  3|Charlie|
|  4|  David|
|  5|    Eve|
+---+-------+



In [4]:
# DOES NOT WORK
df1 = df.withColumn("id_change", when(col("id") % 2 == 1, col("id") + 1).otherwise(col("id") - 1))
df1.select(col("id").alias("id"), "name").orderBy("id_change").show()

+---+-------+
| id|   name|
+---+-------+
|  2|    Bob|
|  1|  Alice|
|  4|  David|
|  3|Charlie|
|  5|    Eve|
+---+-------+



In [5]:
from pyspark.sql import Window

In [6]:
df1 = (
    df
    .withColumn("prev_seat", lag("name").over(Window.orderBy("id")))
    .withColumn("next_seat", lead("name").over(Window.orderBy("id")))
)
df1.show()

+---+-------+---------+---------+
| id|   name|prev_seat|next_seat|
+---+-------+---------+---------+
|  1|  Alice|     null|      Bob|
|  2|    Bob|    Alice|  Charlie|
|  3|Charlie|      Bob|    David|
|  4|  David|  Charlie|      Eve|
|  5|    Eve|    David|     null|
+---+-------+---------+---------+



In [7]:
df2 = (
    df1
    .withColumn("exchanged_seat",
                when(col("id") % 2 == 1, coalesce("next_seat", "name"))
                .when(col("id") % 1 == 1, coalesce("prev_seat", "name"))
                .otherwise("name")
               )
)
df2 = df2.withColumnRenamed("name", "original_set").drop("next_seat", "prev_seat", "id_change")
df2.show()

+---+------------+--------------+
| id|original_set|exchanged_seat|
+---+------------+--------------+
|  1|       Alice|           Bob|
|  2|         Bob|          name|
|  3|     Charlie|         David|
|  4|       David|          name|
|  5|         Eve|           Eve|
+---+------------+--------------+



In [8]:
df.createOrReplaceTempView("student_seats")

In [9]:
query = """
SELECT
    id,
    name AS original_seat,
    CASE
        WHEN id % 2 = 1 
            THEN COALESCE(LEAD(name) OVER (ORDER BY id), name)
        WHEN id % 2 = 0 
            THEN COALESCE(LAG(name) OVER (ORDER BY id), name)
        ELSE name
    END AS new_seat
FROM student_seats
"""
spark.sql(query).show()

+---+-------------+--------+
| id|original_seat|new_seat|
+---+-------------+--------+
|  1|        Alice|     Bob|
|  2|          Bob|   Alice|
|  3|      Charlie|   David|
|  4|        David| Charlie|
|  5|          Eve|     Eve|
+---+-------------+--------+

