In [1]:
from clickzetta.zettapark.session import Session,Row
from clickzetta.zettapark.functions import *

In [2]:
import json
# 从配置文件中读取参数
with open('security/config-uat.json', 'r') as config_file:
    config = json.load(config_file)

# 创建会话
session = Session.builder.configs(config).create()

In [3]:
data = [("1","2019-07-01"),("2","2019-06-24"),("3","2019-08-24")]

In [4]:
df = session.createDataFrame(data=data,schema=["id","date"])
df.show()

-------------------
|id  |date        |
-------------------
|1   |2019-07-01  |
|2   |2019-06-24  |
|3   |2019-08-24  |
-------------------



In [5]:
from clickzetta.zettapark import functions as F
df.select(
    F.col("date"),
    F.current_date().alias("current_date"),
    F.datediff("day", F.col("date"), F.current_date()).alias("datediff")
).show()

----------------------------------------
|date        |current_date  |datediff  |
----------------------------------------
|2019-07-01  |2024-12-26    |2005      |
|2019-06-24  |2024-12-26    |2012      |
|2019-08-24  |2024-12-26    |1951      |
----------------------------------------



In [6]:
df.withColumn("datesDiff", F.datediff("day", F.current_date(), F.col("date"))) \
  .withColumn("monthsDiff", F.months_between(F.current_date(), F.col("date"))) \
  .withColumn("monthsDiff_round", F.round(F.months_between(F.current_date(), F.col("date")), 2)) \
  .withColumn("yearsDiff", F.months_between(F.current_date(), F.col("date")) / F.lit(12)) \
  .withColumn("yearsDiff_round", F.round(F.months_between(F.current_date(), F.col("date")) / F.lit(12), 2)) \
  .show()

------------------------------------------------------------------------------------------------------
|id  |date        |datesdiff  |monthsdiff  |monthsdiff_round  |yearsdiff           |yearsdiff_round  |
------------------------------------------------------------------------------------------------------
|1   |2019-07-01  |-2005      |-65         |-65               |-5.416666666666667  |-5.42            |
|2   |2019-06-24  |-2012      |-66         |-66               |-5.5                |-5.5             |
|3   |2019-08-24  |-1951      |-64         |-64               |-5.333333333333333  |-5.33            |
------------------------------------------------------------------------------------------------------



In [7]:
data2 = [("1","07-01-2019"),("2","06-24-2019"),("3","08-24-2019")]  
df2=session.createDataFrame(data=data2,schema=["id","date"])
df2.select(
    to_date(col("date"),"MM-dd-yyyy").alias("date"),
    current_date().alias("endDate")
    )

#SQL

session.sql("""
SELECT 
    ROUND(DATEDIFF(CURRENT_DATE(), '2019-07-01') / 365, 2) AS years_diff
""").show()


--------------
|years_diff  |
--------------
|5.49        |
--------------



In [8]:
session.close()