In [1]:
from clickzetta.zettapark.session import Session
from clickzetta.zettapark.functions import col

In [2]:
import json
# 从配置文件中读取参数
with open('security/config-uat.json', 'r') as config_file:
    config = json.load(config_file)

# 创建会话
session = Session.builder.configs(config).create()

In [3]:
emp = [(1,"Smith",-1,"2018","10","M",3000), \
    (2,"Rose",1,"2010","20","M",4000), \
    (3,"Williams",1,"2010","10","M",1000), \
    (4,"Jones",2,"2005","10","F",2000), \
    (5,"Brown",2,"2010","40","",-1), \
      (6,"Brown",2,"2010","50","",-1) \
  ]
empColumns = ["emp_id","name","superior_emp_id","year_joined", \
       "emp_dept_id","gender","salary"]

In [4]:
empDF = session.createDataFrame(data=emp, schema = empColumns)
empDF.printSchema()
empDF.show()

root
 |-- `emp_id`: IntegerType() (nullable = False)
 |-- `name`: StringType() (nullable = False)
 |-- `superior_emp_id`: IntegerType() (nullable = False)
 |-- `year_joined`: StringType() (nullable = False)
 |-- `emp_dept_id`: StringType() (nullable = False)
 |-- `gender`: StringType() (nullable = False)
 |-- `salary`: IntegerType() (nullable = False)
-------------------------------------------------------------------------------------
|emp_id  |name      |superior_emp_id  |year_joined  |emp_dept_id  |gender  |salary  |
-------------------------------------------------------------------------------------
|1       |Smith     |-1               |2018         |10           |M       |3000    |
|2       |Rose      |1                |2010         |20           |M       |4000    |
|3       |Williams  |1                |2010         |10           |M       |1000    |
|4       |Jones     |2                |2005         |10           |F       |2000    |
|5       |Brown     |2                |2010 

In [5]:
dept = [("Finance",10), \
    ("Marketing",20), \
    ("Sales",30), \
    ("IT",40) \
  ]
deptColumns = ["dept_name","dept_id"]
deptDF = session.createDataFrame(data=dept, schema = deptColumns)
deptDF.printSchema()
deptDF.show()

root
 |-- `dept_name`: StringType() (nullable = False)
 |-- `dept_id`: IntegerType() (nullable = False)
-----------------------
|dept_name  |dept_id  |
-----------------------
|Finance    |10       |
|Marketing  |20       |
|Sales      |30       |
|IT         |40       |
-----------------------



In [6]:
empDF.join(deptDF,empDF.emp_dept_id ==  deptDF.dept_id,"inner") \
     .show()

empDF.join(deptDF,empDF.emp_dept_id ==  deptDF.dept_id,"outer") \
    .show()
empDF.join(deptDF,empDF.emp_dept_id ==  deptDF.dept_id,"full") \
    .show()
empDF.join(deptDF,empDF.emp_dept_id ==  deptDF.dept_id,"fullouter") \
    .show()
    
empDF.join(deptDF,empDF.emp_dept_id ==  deptDF.dept_id,"left") \
    .show()
empDF.join(deptDF,empDF.emp_dept_id ==  deptDF.dept_id,"leftouter") \
   .show()

empDF.join(deptDF,empDF.emp_dept_id ==  deptDF.dept_id,"right") \
   .show()
empDF.join(deptDF,empDF.emp_dept_id ==  deptDF.dept_id,"rightouter") \
   .show()

empDF.join(deptDF,empDF.emp_dept_id ==  deptDF.dept_id,"leftsemi") \
   .show()
   
empDF.join(deptDF,empDF.emp_dept_id ==  deptDF.dept_id,"leftanti") \
   .show()
   
# empDF.alias("emp1").join(empDF.alias("emp2"), \
#     col("emp1.superior_emp_id") == col("emp2.emp_id"),"inner") \
#     .select(col("emp1.emp_id"),col("emp1.name"), \
#       col("emp2.emp_id").alias("superior_emp_id"), \
#       col("emp2.name").alias("superior_emp_name")) \
#    .show()



-----------------------------------------------------------------------------------------------------------
|emp_id  |name      |superior_emp_id  |year_joined  |emp_dept_id  |gender  |salary  |dept_name  |dept_id  |
-----------------------------------------------------------------------------------------------------------
|1       |Smith     |-1               |2018         |10           |M       |3000    |Finance    |10       |
|2       |Rose      |1                |2010         |20           |M       |4000    |Marketing  |20       |
|3       |Williams  |1                |2010         |10           |M       |1000    |Finance    |10       |
|4       |Jones     |2                |2005         |10           |F       |2000    |Finance    |10       |
|5       |Brown     |2                |2010         |40           |        |-1      |IT         |40       |
-----------------------------------------------------------------------------------------------------------

---------------------------

In [7]:
empDF.createOrReplaceTempView("EMP")
deptDF.createOrReplaceTempView("DEPT")
joinDF = session.sql("select * from EMP e, DEPT d where e.emp_dept_id == d.dept_id") \
  .show()

joinDF2 = session.sql("select * from EMP e INNER JOIN DEPT d ON e.emp_dept_id == d.dept_id") \
  .show()

-----------------------------------------------------------------------------------------------------------
|emp_id  |name      |superior_emp_id  |year_joined  |emp_dept_id  |gender  |salary  |dept_name  |dept_id  |
-----------------------------------------------------------------------------------------------------------
|1       |Smith     |-1               |2018         |10           |M       |3000    |Finance    |10       |
|2       |Rose      |1                |2010         |20           |M       |4000    |Marketing  |20       |
|3       |Williams  |1                |2010         |10           |M       |1000    |Finance    |10       |
|4       |Jones     |2                |2005         |10           |F       |2000    |Finance    |10       |
|5       |Brown     |2                |2010         |40           |        |-1      |IT         |40       |
-----------------------------------------------------------------------------------------------------------

---------------------------

In [8]:
session.close()