In [1]:
import json
from clickzetta.zettapark.session import Session
# 从配置文件中读取参数
with open('security/config-uat.json', 'r') as config_file:
    config = json.load(config_file)

# 创建会话
session = Session.builder.configs(config).create()

In [2]:
simpleData = [("James",34,"2006-01-01","true","M",3000.60),
    ("Michael",33,"1980-01-10","true","F",3300.80),
    ("Robert",37,"06-01-1992","false","M",5000.50)
  ]

columns = ["firstname","age","jobStartDate","isGraduated","gender","salary"]
df = session.createDataFrame(data = simpleData, schema = columns)
df.printSchema()
# df.show(truncate=False)
df.show()

root
 |-- `firstname`: StringType() (nullable = False)
 |-- `age`: IntegerType() (nullable = False)
 |-- `jobstartdate`: StringType() (nullable = False)
 |-- `isgraduated`: StringType() (nullable = False)
 |-- `gender`: StringType() (nullable = False)
 |-- `salary`: FloatType() (nullable = False)
-----------------------------------------------------------------------------
|firstname  |age  |jobstartdate  |isgraduated  |gender  |salary             |
-----------------------------------------------------------------------------
|James      |34   |2006-01-01    |true         |M       |3000.60009765625   |
|Michael    |33   |1980-01-10    |true         |F       |3300.800048828125  |
|Robert     |37   |06-01-1992    |false        |M       |5000.5             |
-----------------------------------------------------------------------------



In [3]:
from clickzetta.zettapark.functions import col
from clickzetta.zettapark.types import StringType,BooleanType,DateType
df2 = df.withColumn("age",col("age").cast(StringType())) \
    .withColumn("isGraduated",col("isGraduated").cast(BooleanType())) \
    .withColumn("jobStartDate",col("jobStartDate").cast(DateType()))
df2.printSchema()

df3 = df2.selectExpr("cast(age as int) age",
    "cast(isGraduated as string) isGraduated",
    "cast(jobStartDate as string) jobStartDate")
df3.printSchema()
# df3.show(truncate=False)
df3.show()

df3.createOrReplaceTempView("CastExample")
df4 = session.sql("SELECT STRING(age),BOOLEAN(isGraduated),DATE(jobStartDate) from CastExample")
df4.printSchema()
# df4.show(truncate=False)
df4.show()

root
 |-- `firstname`: StringType() (nullable = False)
 |-- `gender`: StringType() (nullable = False)
 |-- `salary`: FloatType() (nullable = False)
 |-- `age`: StringType() (nullable = False)
 |-- `isgraduated`: BooleanType() (nullable = True)
 |-- `jobstartdate`: DateType() (nullable = True)
root
 |-- `age`: IntegerType() (nullable = True)
 |-- `isgraduated`: StringType() (nullable = True)
 |-- `jobstartdate`: StringType() (nullable = True)
------------------------------------
|age  |isgraduated  |jobstartdate  |
------------------------------------
|34   |true         |2006-01-01    |
|33   |true         |1980-01-10    |
|37   |false        |NULL          |
------------------------------------

root
 |-- `string(age)`: StringType() (nullable = True)
 |-- ```boolean``(isgraduated)`: BooleanType() (nullable = True)
 |-- ```date``(jobstartdate)`: DateType() (nullable = True)
---------------------------------------------------------------
|string(age)  |`boolean`(isgraduated)  |`date`(jo

In [4]:
session.close()