In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

In [0]:
data=[(1,'2024-01-01',"I1",10,1000),(2,"2024-01-15","I2",20,2000),(3,"2024-02-01","I3",10,1500),(4,"2024-02-15","I4",20,2500),(5,"2024-03-01","I5",30,3000),(6,"2024-03-10","I6",40,3500),(7,"2024-03-20","I7",20,2500),(8,"2024-03-30","I8",10,1000)]
schema=["SOId","SODate","ItemId","ItemQty","ItemValue"]
df1=spark.createDataFrame(data,schema)
display(df1)

SOId,SODate,ItemId,ItemQty,ItemValue
1,2024-01-01,I1,10,1000
2,2024-01-15,I2,20,2000
3,2024-02-01,I3,10,1500
4,2024-02-15,I4,20,2500
5,2024-03-01,I5,30,3000
6,2024-03-10,I6,40,3500
7,2024-03-20,I7,20,2500
8,2024-03-30,I8,10,1000


In [0]:
df1=df1.withColumn("SODate",df1.SODate.cast(DateType()))
df1.printSchema()


root
 |-- SOId: long (nullable = true)
 |-- SODate: date (nullable = true)
 |-- ItemId: string (nullable = true)
 |-- ItemQty: long (nullable = true)
 |-- ItemValue: long (nullable = true)



In [0]:
df1=df1.select(df1.ItemValue,year('SODate').alias('year'),month('SODate').alias('month'))
display(df1)


ItemValue,year,month
1000,2024,1
2000,2024,1
1500,2024,2
2500,2024,2
3000,2024,3
3500,2024,3
2500,2024,3
1000,2024,3


In [0]:
df_agg=df1.groupBy(col('month'),col('year')).agg(sum(col('itemValue')).alias('total_sum')).orderBy(col('month'))
display(df_agg)

month,year,total_sum
1,2024,3000
2,2024,4000
3,2024,10000


In [0]:
from pyspark.sql.functions import *

In [0]:
from pyspark.sql.window import Window
from pyspark.sql.functions import lag

In [0]:
df_lag=df_agg.select('*',lag(df_agg.total_sum).over(Window.orderBy(df_agg.month,df_agg.year)).alias('lag'))
display(df_lag)

month,year,total_sum,lag
1,2024,3000,
2,2024,4000,3000.0
3,2024,10000,4000.0


In [0]:
df_percentage=df_lag.withColumn('percentage',(col('total_sum')-col('lag'))*100/col('total_sum'))



month,year,total_sum,lag,percentage
1,2024,3000,,
2,2024,4000,3000.0,25.0
3,2024,10000,4000.0,60.0


In [0]:
df_drop=df_percentage.drop('lag')
display(df_drop)

month,year,total_sum,percentage
1,2024,3000,
2,2024,4000,25.0
3,2024,10000,60.0
