In [1]:
from clickzetta.zettapark.session import Session
from clickzetta.zettapark.functions import *
from clickzetta.zettapark import functions as F

In [2]:
import json
# 从配置文件中读取参数
with open('security/config-uat.json', 'r') as config_file:
    config = json.load(config_file)

# 创建会话
session = Session.builder.configs(config).create()

In [3]:
data=[["1","2020-02-01"],["2","2019-03-01"],["3","2021-03-01"]]

In [4]:
df = session.createDataFrame(data,["id","input"])
df.show()

-------------------
|id  |input       |
-------------------
|1   |2020-02-01  |
|2   |2019-03-01  |
|3   |2021-03-01  |
-------------------



In [5]:
#current_date()
df.select(current_date().alias("current_date")
  ).show(1)

----------------
|current_date  |
----------------
|2024-12-26    |
----------------



In [6]:
#date_format()
df.select(col("input"), 
    date_format(col("input"), "MM-dd-yyyy").alias("date_format_") 
  ).show()

-----------------------------
|input       |date_format_  |
-----------------------------
|2020-02-01  |02-01-2020    |
|2019-03-01  |03-01-2019    |
|2021-03-01  |03-01-2021    |
-----------------------------



In [7]:
#to_date()
df.select(col("input"), 
    to_date(col("input"), "yyy-MM-dd").alias("to_date") 
  ).show()

---------------------------
|input       |to_date     |
---------------------------
|2020-02-01  |2020-02-01  |
|2019-03-01  |2019-03-01  |
|2021-03-01  |2021-03-01  |
---------------------------



In [8]:
#months_between()
df.select(col("input"), 
    months_between(current_date(),col("input")).alias("months_between")  
  ).show()

-------------------------------
|input       |months_between  |
-------------------------------
|2020-02-01  |-58             |
|2019-03-01  |-69             |
|2021-03-01  |-45             |
-------------------------------



In [9]:
#trunc()
df.select(F.col("input"), 
    F.trunc(F.col("input"), lit("MONTH")).alias("Month_Trunc"), 
    F.trunc(F.col("input"), lit("YEAR")).alias("Month_Year"), 
    F.trunc(F.col("input"), lit("DAY")).alias("Day_Trunc")
).show()


-----------------------------------------------------
|input       |month_trunc  |month_year  |day_trunc  |
-----------------------------------------------------
|2020-02-01  |2020-02-01   |2020-01-01  |NULL       |
|2019-03-01  |2019-03-01   |2019-01-01  |NULL       |
|2021-03-01  |2021-03-01   |2021-01-01  |NULL       |
-----------------------------------------------------



In [10]:
#add_months() , date_add(), date_sub()

df.select(col("input"), 
    add_months(col("input"),3).alias("add_months"), 
    add_months(col("input"),-3).alias("sub_months"), 
    date_add(col("input"),4).alias("date_add"), 
    date_sub(col("input"),4).alias("date_sub") 
  ).show()

------------------------------------------------------------------
|input       |add_months  |sub_months  |date_add    |date_sub    |
------------------------------------------------------------------
|2020-02-01  |2020-05-01  |2019-11-01  |2020-02-05  |2020-01-28  |
|2019-03-01  |2019-06-01  |2018-12-01  |2019-03-05  |2019-02-25  |
|2021-03-01  |2021-06-01  |2020-12-01  |2021-03-05  |2021-02-25  |
------------------------------------------------------------------



In [11]:
df.select(col("input"), 
     year(col("input")).alias("year"), 
     month(col("input")).alias("month"), 
     # next_day(col("input"),"Sunday").alias("next_day"), 
     weekofyear(col("input")).alias("weekofyear") 
  ).show()

------------------------------------------
|input       |year  |month  |weekofyear  |
------------------------------------------
|2020-02-01  |2020  |2      |5           |
|2019-03-01  |2019  |3      |9           |
|2021-03-01  |2021  |3      |9           |
------------------------------------------



In [12]:
df.select(col("input"),  
     dayofweek(col("input")).alias("dayofweek"), 
     dayofmonth(col("input")).alias("dayofmonth"), 
     dayofyear(col("input")).alias("dayofyear"), 
  ).show()

---------------------------------------------------
|input       |dayofweek  |dayofmonth  |dayofyear  |
---------------------------------------------------
|2020-02-01  |7          |1           |32         |
|2019-03-01  |6          |1           |60         |
|2021-03-01  |2          |1           |60         |
---------------------------------------------------



In [13]:
data=[["1","02-01-2020 11 01 19 06"],["2","03-01-2019 12 01 19 406"],["3","03-01-2021 12 01 19 406"]]
df2=session.createDataFrame(data,["id","input"])
df2.show()

--------------------------------
|id  |input                    |
--------------------------------
|1   |02-01-2020 11 01 19 06   |
|2   |03-01-2019 12 01 19 406  |
|3   |03-01-2021 12 01 19 406  |
--------------------------------



In [14]:
#current_timestamp()
df2.select(current_timestamp().alias("current_timestamp")
  ).show(1)

#to_timestamp()
df2.select(col("input"), 
    to_timestamp(col("input"), "MM-dd-yyyy HH mm ss SSS").alias("to_timestamp") 
  ).show()

------------------------------------
|current_timestamp                 |
------------------------------------
|2024-12-26 11:52:40.629325+08:00  |
------------------------------------

--------------------------------------------------------------
|input                    |to_timestamp                      |
--------------------------------------------------------------
|02-01-2020 11 01 19 06   |2020-02-01 11:01:19.060000+08:00  |
|03-01-2019 12 01 19 406  |2019-03-01 12:01:19.406000+08:00  |
|03-01-2021 12 01 19 406  |2021-03-01 12:01:19.406000+08:00  |
--------------------------------------------------------------



In [15]:
#hour, minute,second
data=[["1","2020-02-01 11:01:19.06"],["2","2019-03-01 12:01:19.406"],["3","2021-03-01 12:01:19.406"]]
df3=session.createDataFrame(data,["id","input"])

df3.select(col("input"), 
    hour(col("input")).alias("hour"), 
    minute(col("input")).alias("minute"),
    second(col("input")).alias("second") 
  ).show()

----------------------------------------------------
|input                    |hour  |minute  |second  |
----------------------------------------------------
|2020-02-01 11:01:19.06   |11    |1       |19      |
|2019-03-01 12:01:19.406  |12    |1       |19      |
|2021-03-01 12:01:19.406  |12    |1       |19      |
----------------------------------------------------



In [16]:
session.close()