### PySpark Timestamp Difference (seconds, minutes, hours)

In [1]:
from pyspark.sql import SparkSession

In [2]:
spark = SparkSession.builder\
.appName('PySpark-Timestamp')\
.getOrCreate()

In [3]:
dates = [("1","2020-07-01 12:01:19.111"),
    ("2","2020-06-24 12:01:19.222"),
    ("3","2020-11-16 16:44:55.406"),
    ("4","2020-11-16 16:50:59.406")
    ]

In [4]:
df = spark.createDataFrame(data=dates, schema=["id","from_timestamp"])

In [5]:
from pyspark.sql.functions import *

In [6]:
df2 = df.withColumn('from_timestamp',to_timestamp(col('from_timestamp')))\
.withColumn('end_timestamp',current_timestamp())\
.withColumn("DiffInSeconds",col("end_timestamp").cast("long") - col('from_timestamp').cast("long"))

In [7]:
df2.show(truncate=False)

+---+-----------------------+-----------------------+-------------+
|id |from_timestamp         |end_timestamp          |DiffInSeconds|
+---+-----------------------+-----------------------+-------------+
|1  |2020-07-01 12:01:19.111|2021-03-07 10:59:21.183|21509882     |
|2  |2020-06-24 12:01:19.222|2021-03-07 10:59:21.183|22114682     |
|3  |2020-11-16 16:44:55.406|2021-03-07 10:59:21.183|9569666      |
|4  |2020-11-16 16:50:59.406|2021-03-07 10:59:21.183|9569302      |
+---+-----------------------+-----------------------+-------------+



In [8]:
df2.withColumn('DiffInMinutes',round(col('DiffInSeconds')/60))\
.show(truncate=False)

+---+-----------------------+-----------------------+-------------+-------------+
|id |from_timestamp         |end_timestamp          |DiffInSeconds|DiffInMinutes|
+---+-----------------------+-----------------------+-------------+-------------+
|1  |2020-07-01 12:01:19.111|2021-03-07 11:05:18.121|21510239     |358504.0     |
|2  |2020-06-24 12:01:19.222|2021-03-07 11:05:18.121|22115039     |368584.0     |
|3  |2020-11-16 16:44:55.406|2021-03-07 11:05:18.121|9570023      |159500.0     |
|4  |2020-11-16 16:50:59.406|2021-03-07 11:05:18.121|9569659      |159494.0     |
+---+-----------------------+-----------------------+-------------+-------------+



In [9]:
df2.withColumn('DiffInHours',round(col('DiffInSeconds')/3600))\
.show(truncate=False)

+---+-----------------------+-----------------------+-------------+-----------+
|id |from_timestamp         |end_timestamp          |DiffInSeconds|DiffInHours|
+---+-----------------------+-----------------------+-------------+-----------+
|1  |2020-07-01 12:01:19.111|2021-03-07 11:06:46.397|21510327     |5975.0     |
|2  |2020-06-24 12:01:19.222|2021-03-07 11:06:46.397|22115127     |6143.0     |
|3  |2020-11-16 16:44:55.406|2021-03-07 11:06:46.397|9570111      |2658.0     |
|4  |2020-11-16 16:50:59.406|2021-03-07 11:06:46.397|9569747      |2658.0     |
+---+-----------------------+-----------------------+-------------+-----------+



In [11]:
#Difference between two timestamps when input has just timestamp

data= [("12:01:19.000","13:01:19.000"),
    ("12:01:19.000","12:02:19.000"),
    ("16:44:55.406","17:44:55.406"),
    ("16:50:59.406","16:44:59.406")]
df3 = spark.createDataFrame(data=data, schema=["from_timestamp","to_timestamp"])

In [13]:
#Date & Time is not in PySpark Default Format

df3 = spark.createDataFrame(
        data=[("1","07-01-2019 12:01:19.406")], 
        schema=["id","input_timestamp"]
        )
df3.withColumn("input_timestamp",to_timestamp(col("input_timestamp"),"MM-dd-yyyy HH:mm:ss.SSS")) \
    .withColumn("current_timestamp",current_timestamp().alias("current_timestamp")) \
    .withColumn("DiffInSeconds",current_timestamp().cast("long") - col("input_timestamp").cast("long")) \
    .withColumn("DiffInMinutes",round(col("DiffInSeconds")/60)) \
    .withColumn("DiffInHours",round(col("DiffInSeconds")/3600)) \
    .withColumn("DiffInDays",round(col("DiffInSeconds")/24*3600)) \
    .show(truncate=False)

+---+-----------------------+-----------------------+-------------+-------------+-----------+------------+
|id |input_timestamp        |current_timestamp      |DiffInSeconds|DiffInMinutes|DiffInHours|DiffInDays  |
+---+-----------------------+-----------------------+-------------+-------------+-----------+------------+
|1  |2019-07-01 12:01:19.406|2021-03-07 11:13:20.101|53133121     |885552.0     |14759.0    |7.96996815E9|
+---+-----------------------+-----------------------+-------------+-------------+-----------+------------+

