In [1]:
import findspark
findspark.init()

In [2]:
# winutils.exe  chmod 777  C:\spark-temp
# winutils.exe  chmod 777  C:\spark

from pyspark.conf import SparkConf
config = SparkConf()
config.setMaster("spark://192.168.11.71:7077").setAppName("SparkDatabaseMetaDataServer")
# config.setMaster("local[1]").setAppName("HiveApp")
config.set("spark.executor.memory", "4g")
config.set("spark.executor.cores", 2)
config.set("spark.cores.max", 2)
config.set("spark.driver.memory", "4g")

config.set("hive.metastore.uris", "thrift://192.168.93.128:9083")
config.set("hive.metastore.warehouse.dir", "hdfs://192.168.93.128:9000/user/hive/warehouse")

#config.set("spark.local.dir", "c:/spark-temp") # temporary files 
#config.set("spark.sql.warehouse.dir", "hdfs://192.168.93.128:9000/user/hive/warehouse") # spark warehouse


from pyspark.sql import SparkSession
spark = SparkSession.builder\
                    .config(conf=config)\
                    .enableHiveSupport()\
                    .getOrCreate()

In [3]:
spark.sql("show databases").show()
spark.sql("show tables").show()
spark.sql("select * from invoices").show()

+------------+
|databaseName|
+------------+
|     default|
|      joindb|
|    ordersdb|
+------------+

+--------+-------------+-----------+
|database|    tableName|isTemporary|
+--------+-------------+-----------+
| default|       brands|      false|
| default|employees_ext|      false|
| default|      example|      false|
| default|   hive_table|      false|
| default|     invoices|      false|
| default|          src|      false|
| default|         test|      false|
+--------+-------------+-----------+

+---+------+
| id|amount|
+---+------+
|  1|  1000|
|  2|  2000|
+---+------+



In [9]:
spark.sql("create database joindb")

DataFrame[]

In [10]:
spark.sql("show databases").show()

+------------+
|databaseName|
+------------+
|     default|
|      joindb|
|    ordersdb|
+------------+



In [11]:
products = [ 
          # (product_id, product_name, brand_id)  
         (1, 'iPhone', 100),
         (2, 'Galaxy', 200),
         (3, 'Redme', 300), # orphan record, no matching brand
         (4, 'Pixel', 400),
]

brands = [
    #(brand_id, brand_name)
    (100, "Apple"),
    (200, "Samsung"),
    (400, "Google"),
    (500, "Sony"), # no matching products
]
 
productDf = spark.createDataFrame(data=products, schema=["product_id", "product_name", "brand_id"])
brandDf = spark.createDataFrame(data=brands, schema=["brand_id", "brand_name"])
productDf.show()
brandDf.show()

+----------+------------+--------+
|product_id|product_name|brand_id|
+----------+------------+--------+
|         1|      iPhone|     100|
|         2|      Galaxy|     200|
|         3|       Redme|     300|
|         4|       Pixel|     400|
+----------+------------+--------+

+--------+----------+
|brand_id|brand_name|
+--------+----------+
|     100|     Apple|
|     200|   Samsung|
|     400|    Google|
|     500|      Sony|
+--------+----------+



In [12]:
# write teh data into productdb brands table
# create table in hive metastore_db managed remote server
brandDf.write.mode('overwrite').saveAsTable("joindb.brands") # productdb  /hdfs
productDf.write.mode('overwrite').saveAsTable("joindb.products") # productdb  /hdfs


In [13]:
# query brands
spark.sql("select * from joindb.brands").show()

+--------+----------+
|brand_id|brand_name|
+--------+----------+
|     100|     Apple|
|     200|   Samsung|
|     400|    Google|
|     500|      Sony|
+--------+----------+



In [14]:
# query products
spark.sql("select * from joindb.products").show()

+----------+------------+--------+
|product_id|product_name|brand_id|
+----------+------------+--------+
|         1|      iPhone|     100|
|         2|      Galaxy|     200|
|         3|       Redme|     300|
|         4|       Pixel|     400|
+----------+------------+--------+



In [15]:
spark.sql("select * from joindb.products p INNER JOIN joindb.brands b ON p.brand_id=b.brand_id").show()

+----------+------------+--------+--------+----------+
|product_id|product_name|brand_id|brand_id|brand_name|
+----------+------------+--------+--------+----------+
|         1|      iPhone|     100|     100|     Apple|
|         2|      Galaxy|     200|     200|   Samsung|
|         4|       Pixel|     400|     400|    Google|
+----------+------------+--------+--------+----------+

