In [1]:
from clickzetta.zettapark.session import Session,Row

In [2]:
import json
# 从配置文件中读取参数
with open('security/config-uat.json', 'r') as config_file:
    config = json.load(config_file)

# 创建会话
session = Session.builder.configs(config).create()

In [4]:
data = [("James","M",60000), ("Michael","M",70000),
        ("Robert",None,400000), ("Maria","F",500000),
        ("Jen","",None)]

columns = ["name","gender","salary"]
df = session.createDataFrame(data = data, schema = columns)
df.show()

-----------------------------
|name     |gender  |salary  |
-----------------------------
|James    |M       |60000   |
|Michael  |M       |70000   |
|Robert   |NULL    |400000  |
|Maria    |F       |500000  |
|Jen      |        |NULL    |
-----------------------------



In [5]:
#Using When otherwise
from clickzetta.zettapark.functions import when,col
df2 = df.withColumn("new_gender", when(df.gender == "M","Male")
                                 .when(df.gender == "F","Female")
                                 .when(df.gender.isNull() ,"")
                                 .otherwise(df.gender))
df2.show()
df2 = df.withColumn("new_gender", when(df.gender == "M","Male")
                                 .when(df.gender == "F","Female")
                                 .when(df.gender.isNull() ,"")
                                 .otherwise(df.gender))

df2=df.select(col("*"),when(df.gender == "M","Male")
                  .when(df.gender == "F","Female")
                  .when(df.gender.isNull() ,"")
                  .otherwise(df.gender).alias("new_gender"))
df2.show()

------------------------------------------
|name     |gender  |salary  |new_gender  |
------------------------------------------
|James    |M       |60000   |Male        |
|Michael  |M       |70000   |Male        |
|Robert   |NULL    |400000  |            |
|Maria    |F       |500000  |Female      |
|Jen      |        |NULL    |            |
------------------------------------------

------------------------------------------
|name     |gender  |salary  |new_gender  |
------------------------------------------
|James    |M       |60000   |Male        |
|Michael  |M       |70000   |Male        |
|Robert   |NULL    |400000  |            |
|Maria    |F       |500000  |Female      |
|Jen      |        |NULL    |            |
------------------------------------------



In [6]:
# Using SQL Case When
from clickzetta.zettapark.functions import expr
df3 = df.withColumn("new_gender", expr("CASE WHEN gender = 'M' THEN 'Male男' " + 
           "WHEN gender = 'F' THEN 'Female女' WHEN gender IS NULL THEN ''" +
          "ELSE gender END"))
df3.show()

df4 = df.select(col("*"), expr("CASE WHEN gender = 'M' THEN 'Male男' " +
           "WHEN gender = 'F' THEN 'Female女' WHEN gender IS NULL THEN ''" +
           "ELSE gender END").alias("new_gender"))

df.createOrReplaceTempView("EMP")
session.sql("select name, CASE WHEN gender = 'M' THEN 'Male男' " + 
               "WHEN gender = 'F' THEN 'Female女' WHEN gender IS NULL THEN ''" +
              "ELSE gender END as new_gender from EMP").show()

------------------------------------------
|name     |gender  |salary  |new_gender  |
------------------------------------------
|James    |M       |60000   |Male男       |
|Michael  |M       |70000   |Male男       |
|Robert   |NULL    |400000  |            |
|Maria    |F       |500000  |Female女     |
|Jen      |        |NULL    |            |
------------------------------------------

------------------------
|name     |new_gender  |
------------------------
|James    |Male男       |
|Michael  |Male男       |
|Robert   |            |
|Maria    |Female女     |
|Jen      |            |
------------------------



In [7]:
session.close()