In [0]:
# Importing the pyspark and pyspark SQL modules and specifying the app name 
import pyspark
from pyspark.sql import SparkSession
from pyspark.sql import Row
appName= "spark hive example"
master= "local"
     

In [0]:
# Creating a spark session and enabling the Hive support to interact with the Hive database
spark = SparkSession.builder \
	.master(master).appName(appName).enableHiveSupport().getOrCreate() 
spark.sql("CREATE DATABASE IF NOT EXISTS spark_hive").show()
spark.sql("DESCRIBE DATABASE spark_hive").show(truncate=False)
spark.sql("USE spark_hive").show() 

++
||
++
++

+-------------------------+---------------------------------------+
|database_description_item|database_description_value             |
+-------------------------+---------------------------------------+
|Catalog Name             |spark_catalog                          |
|Namespace Name           |spark_hive                             |
|Comment                  |                                       |
|Location                 |dbfs:/user/hive/warehouse/spark_hive.db|
|Owner                    |root                                   |
+-------------------------+---------------------------------------+

++
||
++
++



In [0]:
# Verifying the databases in Hive using pyspark
df=spark.sql("show databases")
df.show()
     

+------------+
|databaseName|
+------------+
|     default|
|  spark_hive|
+------------+



In [0]:
# Specifying the path to your CSV file
csv_file_path = "dbfs:/FileStore/tables/employees.csv"

# Reading CSV file into a DataFrame
datafile = spark.read.csv(csv_file_path, header=True)

# Showing the first 5 rows to verify the data is read correctly
datafile.show(5)



+----------+---------+---------+--------+--------------+--------------------+-----------------+-------------+------+---------+-------+-------------+
|EmployeeID|ManagerID|FirstName|LastName|      FullName|            JobTitle|OrganizationLevel|MaritalStatus|Gender|Territory|Country|        Group|
+----------+---------+---------+--------+--------------+--------------------+-----------------+-------------+------+---------+-------+-------------+
|       274|     NULL|  Stephen|   Jiang| Stephen Jiang|North American Sa...|                2|            M|     M|     NULL|   NULL|         NULL|
|       275|      274|  Michael|  Blythe|Michael Blythe|Sales Representative|                3|            S|     M|Northeast|     US|North America|
|       276|      274|    Linda|Mitchell|Linda Mitchell|Sales Representative|                3|            M|     F|Southwest|     US|North America|
|       277|      274|  Jillian|  Carson|Jillian Carson|Sales Representative|                3|           

In [0]:
# Saving DataFrame as Hive table
datafile.write.saveAsTable("employees_table")

In [0]:
# Querying the Hive table using Spark SQL
result = spark.sql("SELECT * FROM employees")

# Showing the query result
result.show(5)


+----------+---------+---------+--------+--------------+--------------------+-----------------+-------------+------+---------+-------+-------------+
|EmployeeID|ManagerID|FirstName|LastName|      FullName|            JobTitle|OrganizationLevel|MaritalStatus|Gender|Territory|Country|        Group|
+----------+---------+---------+--------+--------------+--------------------+-----------------+-------------+------+---------+-------+-------------+
|       274|     NULL|  Stephen|   Jiang| Stephen Jiang|North American Sa...|                2|            M|     M|     NULL|   NULL|         NULL|
|       275|      274|  Michael|  Blythe|Michael Blythe|Sales Representative|                3|            S|     M|Northeast|     US|North America|
|       276|      274|    Linda|Mitchell|Linda Mitchell|Sales Representative|                3|            M|     F|Southwest|     US|North America|
|       277|      274|  Jillian|  Carson|Jillian Carson|Sales Representative|                3|           

In [0]:
# Querying the Hive table
result = spark.sql("SELECT * FROM employees_table WHERE EmployeeID > 275")
result.show(5)


+----------+---------+---------+------------+-------------------+--------------------+-----------------+-------------+------+---------+-------+-------------+
|EmployeeID|ManagerID|FirstName|    LastName|           FullName|            JobTitle|OrganizationLevel|MaritalStatus|Gender|Territory|Country|        Group|
+----------+---------+---------+------------+-------------------+--------------------+-----------------+-------------+------+---------+-------+-------------+
|       276|      274|    Linda|    Mitchell|     Linda Mitchell|Sales Representative|                3|            M|     F|Southwest|     US|North America|
|       277|      274|  Jillian|      Carson|     Jillian Carson|Sales Representative|                3|            S|     F|  Central|     US|North America|
|       278|      274|  Garrett|      Vargas|     Garrett Vargas|Sales Representative|                3|            M|     M|   Canada|     CA|North America|
|       279|      274|     Tsvi|      Reiter|       