In [0]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.window import Window

spark = SparkSession.builder.getOrCreate()

data = [
    (1, "2023-01-01", 100),
    (1, "2023-01-05", 120),
    (1, "2023-01-10", 110),
    (2, "2023-02-01", 200),
    (2, "2023-02-03", 250),
    (3, "2023-03-01", 300),
]

df = spark.createDataFrame(
    data, ["customer_id", "order_date", "amount"]
).withColumn("order_date", F.to_date("order_date"))

df.show()


+-----------+----------+------+
|customer_id|order_date|amount|
+-----------+----------+------+
|          1|2023-01-01|   100|
|          1|2023-01-05|   120|
|          1|2023-01-10|   110|
|          2|2023-02-01|   200|
|          2|2023-02-03|   250|
|          3|2023-03-01|   300|
+-----------+----------+------+



In [0]:
window = Window.partitionBy(F.col("customer_id")).orderBy(F.col("order_date"))
df.withColumn("prev_purchase_amt", F.lag(F.col("amount")).over(window))\
    .filter(F.col("amount")>F.col("prev_purchase_amt")).show()

+-----------+----------+------+-----------------+
|customer_id|order_date|amount|prev_purchase_amt|
+-----------+----------+------+-----------------+
|          1|2023-01-05|   120|              100|
|          2|2023-02-03|   250|              200|
+-----------+----------+------+-----------------+



Time: O(N LOG N) & Space: O(N)