In [1]:
import json
from clickzetta.zettapark.session import Session
# 从配置文件中读取参数
with open('security/config-uat.json', 'r') as config_file:
    config = json.load(config_file)

# 创建会话
session = Session.builder.configs(config).create()

In [2]:
data=[("James","Bond","100",None),
      ("Ann","Varsa","200",'F'),
      ("Tom Cruise","XXX","400",''),
      ("Tom Brand",None,"400",'M')] 
columns=["fname","lname","id","gender"]
df=session.createDataFrame(data,columns)

In [3]:
#alias
from clickzetta.zettapark.functions import expr
df.select(df.fname.alias("first_name"), \
          df.lname.alias("last_name"), \
          expr(" fname ||','|| lname").alias("fullName") \
   ).show()

-------------------------------------------
|first_name  |last_name  |fullname        |
-------------------------------------------
|James       |Bond       |James,Bond      |
|Ann         |Varsa      |Ann,Varsa       |
|Tom Cruise  |XXX        |Tom Cruise,XXX  |
|Tom Brand   |NULL       |NULL            |
-------------------------------------------



In [4]:
#asc, desc
df.sort(df.fname.asc()).show()
df.sort(df.fname.desc()).show()

#cast
df.select(df.fname,df.id.cast("int")).printSchema()

#between
df.filter(df.id.between(100,300)).show()

-------------------------------------
|fname       |lname  |id   |gender  |
-------------------------------------
|Ann         |Varsa  |200  |F       |
|James       |Bond   |100  |NULL    |
|Tom Brand   |NULL   |400  |M       |
|Tom Cruise  |XXX    |400  |        |
-------------------------------------

-------------------------------------
|fname       |lname  |id   |gender  |
-------------------------------------
|Tom Cruise  |XXX    |400  |        |
|Tom Brand   |NULL   |400  |M       |
|James       |Bond   |100  |NULL    |
|Ann         |Varsa  |200  |F       |
-------------------------------------

root
 |-- `fname`: StringType() (nullable = False)
 |-- `cast(id as int)`: IntegerType() (nullable = True)
--------------------------------
|fname  |lname  |id   |gender  |
--------------------------------
|James  |Bond   |100  |NULL    |
|Ann    |Varsa  |200  |F       |
--------------------------------



In [5]:
#like
df.filter(df.fname.like('%Cruise')).show()

-------------------------------------
|fname       |lname  |id   |gender  |
-------------------------------------
|Tom Cruise  |XXX    |400  |        |
-------------------------------------



In [6]:
#startswith, endswith()
df.filter(df.fname.startswith("T")).show()
df.filter(df.fname.endswith("Cruise")).show()

-------------------------------------
|fname       |lname  |id   |gender  |
-------------------------------------
|Tom Cruise  |XXX    |400  |        |
|Tom Brand   |NULL   |400  |M       |
-------------------------------------

-------------------------------------
|fname       |lname  |id   |gender  |
-------------------------------------
|Tom Cruise  |XXX    |400  |        |
-------------------------------------



In [7]:
#eqNullSafe

#isNull & isNotNull
df.filter(df.lname.isNull()).show()
df.filter(df.lname.isNotNull()).show()

#like , rlike
df.select(df.fname,df.lname,df.id) \
  .filter(df.fname.like("%om")) 

#over

#substr
df.select(df.fname.substr(1,2).alias("substr")).show()

#when & otherwise
from clickzetta.zettapark.functions import when
df.select(df.fname,df.lname,when(df.gender=="M","Male") \
              .when(df.gender=="F","Female") \
              .when(df.gender==None ,"") \
              .otherwise(df.gender).alias("new_gender") \
    ).show()

#isin
li=["100","200"]
df.select(df.fname,df.lname,df.id) \
  .filter(df.id.isin(li)) \
  .show()



------------------------------------
|fname      |lname  |id   |gender  |
------------------------------------
|Tom Brand  |NULL   |400  |M       |
------------------------------------

-------------------------------------
|fname       |lname  |id   |gender  |
-------------------------------------
|James       |Bond   |100  |NULL    |
|Ann         |Varsa  |200  |F       |
|Tom Cruise  |XXX    |400  |        |
-------------------------------------

----------
|substr  |
----------
|Ja      |
|An      |
|To      |
|To      |
----------

-----------------------------------
|fname       |lname  |new_gender  |
-----------------------------------
|James       |Bond   |NULL        |
|Ann         |Varsa  |Female      |
|Tom Cruise  |XXX    |            |
|Tom Brand   |NULL   |Male        |
-----------------------------------

-----------------------
|fname  |lname  |id   |
-----------------------
|James  |Bond   |100  |
|Ann    |Varsa  |200  |
-----------------------



In [8]:
session.close()