In [1]:
import json
from clickzetta.zettapark.session import Session
# 从配置文件中读取参数
with open('security/config-uat.json', 'r') as config_file:
    config = json.load(config_file)

# 创建会话
session = Session.builder.configs(config).create()

In [2]:
data = [('James','Smith','M',3000),
  ('Anna','Rose','F',4100),
  ('Robert','Williams','M',6200), 
]

columns = ["firstname","lastname","gender","salary"]
df = session.createDataFrame(data=data, schema = columns)
df.show()


if 'salary1' not in df.columns:
    print("aa")

------------------------------------------
|firstname  |lastname  |gender  |salary  |
------------------------------------------
|James      |Smith     |M       |3000    |
|Anna       |Rose      |F       |4100    |
|Robert     |Williams  |M       |6200    |
------------------------------------------

aa


In [3]:
# Add new constanct column
from clickzetta.zettapark.functions import lit
df.withColumn("bonus_percent", lit(0.3)) \
  .show()

----------------------------------------------------------------
|firstname  |lastname  |gender  |salary  |bonus_percent        |
----------------------------------------------------------------
|James      |Smith     |M       |3000    |0.30000001192092896  |
|Anna       |Rose      |F       |4100    |0.30000001192092896  |
|Robert     |Williams  |M       |6200    |0.30000001192092896  |
----------------------------------------------------------------



In [4]:
#Add column from existing column
df.withColumn("bonus_amount", df.salary*0.3) \
  .show()

---------------------------------------------------------------
|firstname  |lastname  |gender  |salary  |bonus_amount        |
---------------------------------------------------------------
|James      |Smith     |M       |3000    |900.0000357627869   |
|Anna       |Rose      |F       |4100    |1230.0000488758087  |
|Robert     |Williams  |M       |6200    |1860.0000739097595  |
---------------------------------------------------------------



In [5]:
#Add column by concatinating existing columns
from clickzetta.zettapark.functions import concat_ws
df.withColumn("name", concat_ws("firstname",'lastname')) \
  .show()

-----------------------------------------------------
|firstname  |lastname  |gender  |salary  |name      |
-----------------------------------------------------
|James      |Smith     |M       |3000    |Smith     |
|Anna       |Rose      |F       |4100    |Rose      |
|Robert     |Williams  |M       |6200    |Williams  |
-----------------------------------------------------



In [6]:
#Add current date
from clickzetta.zettapark.functions import current_date
df.withColumn("current_date", current_date()) \
  .show()

---------------------------------------------------------
|firstname  |lastname  |gender  |salary  |current_date  |
---------------------------------------------------------
|James      |Smith     |M       |3000    |2024-12-26    |
|Anna       |Rose      |F       |4100    |2024-12-26    |
|Robert     |Williams  |M       |6200    |2024-12-26    |
---------------------------------------------------------



In [7]:
from clickzetta.zettapark.functions import when
df.withColumn("grade", \
   when((df.salary < 4000), lit("A")) \
     .when((df.salary >= 4000) & (df.salary <= 5000), lit("B")) \
     .otherwise(lit("C")) \
  ).show()
    
# Add column using select
df.select("firstname","salary", lit(0.3).alias("bonus")).show()
df.select("firstname","salary", lit(df.salary * 0.3).alias("bonus_amount")).show()
df.select("firstname","salary", current_date().alias("today_date")).show()

#Add columns using SQL
df.createOrReplaceTempView("PER")
session.sql("select firstname,salary, '0.3' as bonus from PER").show()
session.sql("select firstname,salary, salary * 0.3 as bonus_amount from PER").show()
session.sql("select firstname,salary, current_date() as today_date from PER").show()

--------------------------------------------------
|firstname  |lastname  |gender  |salary  |grade  |
--------------------------------------------------
|James      |Smith     |M       |3000    |A      |
|Anna       |Rose      |F       |4100    |B      |
|Robert     |Williams  |M       |6200    |C      |
--------------------------------------------------

--------------------------------------------
|firstname  |salary  |bonus                |
--------------------------------------------
|James      |3000    |0.30000001192092896  |
|Anna       |4100    |0.30000001192092896  |
|Robert     |6200    |0.30000001192092896  |
--------------------------------------------

-------------------------------------------
|firstname  |salary  |bonus_amount        |
-------------------------------------------
|James      |3000    |900.0000357627869   |
|Anna       |4100    |1230.0000488758087  |
|Robert     |6200    |1860.0000739097595  |
-------------------------------------------

-----------------

In [8]:
session.sql("select firstname,salary, " +
          "case  when salary < 4000 then 'A' "+
          "else 'B' END as grade from PER").show()

------------------------------
|firstname  |salary  |grade  |
------------------------------
|James      |3000    |A      |
|Anna       |4100    |B      |
|Robert     |6200    |B      |
------------------------------



In [9]:
session.close()