In [1]:
from clickzetta.zettapark.session import Session

In [2]:
import json
# 从配置文件中读取参数
with open('security/config-uat.json', 'r') as config_file:
    config = json.load(config_file)

# 创建会话
session = Session.builder.configs(config).create()

In [4]:
data = [('James','Smith','M',3000),
  ('Anna','Rose','F',4100),
  ('Robert','Williams','M',6200), 
]

columns = ["firstname","lastname","gender","salary"]
df = session.createDataFrame(data=data, schema = columns)
df.show()


------------------------------------------
|firstname  |lastname  |gender  |salary  |
------------------------------------------
|James      |Smith     |M       |3000    |
|Anna       |Rose      |F       |4100    |
|Robert     |Williams  |M       |6200    |
------------------------------------------



In [5]:
# #Example 1 mapPartitions()
# def reformat(partitionData):
#     for row in partitionData:
#         yield [row.firstname+","+row.lastname,row.salary*10/100]
# df.rdd.mapPartitions(reformat).toDF().show()

# #Example 2 mapPartitions()
# def reformat2(partitionData):
#   updatedData = []
#   for row in partitionData:
#     name=row.firstname+","+row.lastname
#     bonus=row.salary*10/100
#     updatedData.append([name,bonus])
#   return iter(updatedData)

# df2=df.rdd.mapPartitions(reformat2).toDF("name","bonus")
# df2.show()


In [6]:
from clickzetta.zettapark.functions import concat_ws, col, lit

# Example 1: 使用 DataFrame API 代替 mapPartitions
df2 = df.select(
    concat_ws(lit(","), col("firstname"), col("lastname")).alias("name"),
    (col("salary") * 10 / 100).alias("bonus")
)

# 显示 DataFrame
df2.show()

---------------------------
|name             |bonus  |
---------------------------
|James,Smith      |300.0  |
|Anna,Rose        |410.0  |
|Robert,Williams  |620.0  |
---------------------------



In [7]:
from clickzetta.zettapark.functions import col, concat_ws, lit

# 使用 DataFrame API 代替 mapPartitions
df2 = df.select(
    concat_ws(lit(","), col("firstname"), col("lastname")).alias("name"),
    (col("salary") * 10 / 100).alias("bonus")
)

# 显示结果
df2.show()

# 第二个示例不需要转换为 Pandas
def reformat2(df):
    return df.select(
        concat_ws(lit(","), col("firstname"), col("lastname")).alias("name"),
        (col("salary") * 10 / 100).alias("bonus")
    )

# 调用 reformat2 函数并显示结果
df2 = reformat2(df)
df2.show()


---------------------------
|name             |bonus  |
---------------------------
|James,Smith      |300.0  |
|Anna,Rose        |410.0  |
|Robert,Williams  |620.0  |
---------------------------

---------------------------
|name             |bonus  |
---------------------------
|James,Smith      |300.0  |
|Anna,Rose        |410.0  |
|Robert,Williams  |620.0  |
---------------------------



In [8]:
session.close()