In [1]:
from clickzetta.zettapark.session import Session

In [2]:
import json
# 从配置文件中读取参数
with open('security/config-uat.json', 'r') as config_file:
    config = json.load(config_file)

# 创建会话
session = Session.builder.configs(config).create()

In [4]:
dates = [("1","2019-07-01 12:01:19.111"),
    ("2","2019-06-24 12:01:19.222"),
    ("3","2019-11-16 16:44:55.406"),
    ("4","2019-11-16 16:50:59.406")
    ]

df = session.createDataFrame(data=dates, schema=["id","from_timestamp"])

In [5]:
from clickzetta.zettapark.functions import *
df2=df.withColumn('from_timestamp',to_timestamp(col('from_timestamp')))\
  .withColumn('end_timestamp', current_timestamp())\
  .withColumn('DiffInSeconds',col("end_timestamp").cast("long") - col('from_timestamp').cast("long"))
df2.show()

df.withColumn('from_timestamp',to_timestamp(col('from_timestamp')))\
  .withColumn('end_timestamp', current_timestamp())\
  .withColumn('DiffInSeconds',unix_timestamp("end_timestamp") - unix_timestamp('from_timestamp')) \
  .show()

df2.withColumn('DiffInMinutes',round(col('DiffInSeconds')/60))\
  .show()
  
df2.withColumn('DiffInHours',round(col('DiffInSeconds')/3600))\
  .show()

--------------------------------------------------------------------------------------------
|id  |from_timestamp                    |end_timestamp                     |diffinseconds  |
--------------------------------------------------------------------------------------------
|1   |2019-07-01 12:01:19.111000+08:00  |2024-12-24 15:37:10.099166+08:00  |173072151      |
|2   |2019-06-24 12:01:19.222000+08:00  |2024-12-24 15:37:10.099166+08:00  |173676951      |
|3   |2019-11-16 16:44:55.406000+08:00  |2024-12-24 15:37:10.099166+08:00  |161131935      |
|4   |2019-11-16 16:50:59.406000+08:00  |2024-12-24 15:37:10.099166+08:00  |161131571      |
--------------------------------------------------------------------------------------------

--------------------------------------------------------------------------------------------
|id  |from_timestamp                    |end_timestamp                     |diffinseconds  |
---------------------------------------------------------------------

In [6]:
#Difference between two timestamps when input has just timestamp

data= [("12:01:19.000","13:01:19.000"),
    ("12:01:19.000","12:02:19.000"),
    ("16:44:55.406","17:44:55.406"),
    ("16:50:59.406","16:44:59.406")]
df3 = session.createDataFrame(data=data, schema=["from_timestamp","to_timestamp"])

df3.withColumn("from_timestamp",to_timestamp(col("from_timestamp"),"HH:mm:ss.SSS")) \
   .withColumn("to_timestamp",to_timestamp(col("to_timestamp"),"HH:mm:ss.SSS")) \
   .withColumn("DiffInSeconds", col("from_timestamp").cast("long") - col("to_timestamp").cast("long")) \
   .withColumn("DiffInMinutes",round(col("DiffInSeconds")/60)) \
   .withColumn("DiffInHours",round(col("DiffInSeconds")/3600)) \
   .show()

---------------------------------------------------------------------------------------------------------------------
|from_timestamp                    |to_timestamp                      |diffinseconds  |diffinminutes  |diffinhours  |
---------------------------------------------------------------------------------------------------------------------
|1970-01-01 12:01:19+08:00         |1970-01-01 13:01:19+08:00         |-3600          |-60.0          |-1.0         |
|1970-01-01 12:01:19+08:00         |1970-01-01 12:02:19+08:00         |-60            |-1.0           |0.0          |
|1970-01-01 16:44:55.406000+08:00  |1970-01-01 17:44:55.406000+08:00  |-3600          |-60.0          |-1.0         |
|1970-01-01 16:50:59.406000+08:00  |1970-01-01 16:44:59.406000+08:00  |360            |6.0            |0.0          |
---------------------------------------------------------------------------------------------------------------------



In [7]:
# 创建 DataFrame
df3 = session.createDataFrame(
    data=[("1", "07-01-2019 12:01:19.406")],
    schema=["id", "input_timestamp"]
)

# 转换时间戳并计算差异
df3 = df3.withColumn("input_timestamp", to_timestamp(col("input_timestamp"), "MM-dd-yyyy HH:mm:ss.SSS")) \
         .withColumn("current_timestamp", current_timestamp()) \
         .withColumn("DiffInSeconds", (col("current_timestamp").cast("long") - col("input_timestamp").cast("long"))) \
         .withColumn("DiffInMinutes", round(col("DiffInSeconds") / 60)) \
         .withColumn("DiffInHours", round(col("DiffInSeconds") / 3600)) \
         .withColumn("DiffInDays", round(col("DiffInSeconds") / (24 * 3600)))

# 显示结果
df3.show()


---------------------------------------------------------------------------------------------------------------------------------------
|id  |input_timestamp                   |current_timestamp                 |diffinseconds  |diffinminutes  |diffinhours  |diffindays  |
---------------------------------------------------------------------------------------------------------------------------------------
|1   |2019-07-01 12:01:19.406000+08:00  |2024-12-24 15:37:11.451376+08:00  |173072152      |2884536.0      |48076.0      |2003.0      |
---------------------------------------------------------------------------------------------------------------------------------------



In [8]:
#SQL

session.sql("select unix_timestamp('2019-07-02 12:01:19') - unix_timestamp('2019-07-01 12:01:19') DiffInSeconds").show()
session.sql("select (unix_timestamp('2019-07-02 12:01:19') - unix_timestamp('2019-07-01 12:01:19'))/60 DiffInMinutes").show()
session.sql("select (unix_timestamp('2019-07-02 12:01:19') - unix_timestamp('2019-07-01 12:01:19'))/3600 DiffInHours").show()

-----------------
|diffinseconds  |
-----------------
|86400          |
-----------------

-----------------
|diffinminutes  |
-----------------
|1440.0         |
-----------------

---------------
|diffinhours  |
---------------
|24.0         |
---------------



In [9]:
session.close()