## System Environment Variables

In [1]:
import os
import sys
os.environ["PYSPARK_PYTHON"] = sys.executable
os.environ["PYSPARK_DRIVER_PYTHON"] = sys.executable

## SparkSession

In [2]:
from pyspark.sql import SparkSession

# Create SparkSession- with configuration
spark = SparkSession.builder \
           .appName('Read JDBC') \
           .config("spark.jars", "mysql-connector-java-8.0.13.jar") \
           .getOrCreate()

## Read table using jdbc()

In [3]:
jdbc_url = "jdbc:mysql://127.0.0.1:3306"
jdbc_database = "employee"
jdbc_table = "employee"
jdbc_credentials = {
    "user" : "yatee",
    "password" : "03072001",
    "driver" : "com.mysql.cj.jdbc.Driver"
}

df = spark.read \
    .jdbc(jdbc_url + "/" + jdbc_database,\
          jdbc_table, \
          properties=jdbc_credentials)
df.show()

+----------+---------+--------+----------+--------------------+--------+
|EmployeeID|FirstName|LastName|Department|            Position|  Salary|
+----------+---------+--------+----------+--------------------+--------+
|         1|     John|     Doe|        IT|   Software Engineer|75000.00|
|         2|     Jane|   Smith|        HR|          HR Manager|80000.00|
|         3|  Michael| Johnson|   Finance|          Accountant|65000.00|
|         4|    Emily|Williams| Marketing|Marketing Specialist|70000.00|
|         5|   Daniel|   Brown|        IT|System Administrator|70000.00|
|         6|   Olivia|   Jones|   Finance|   Financial Analyst|72000.00|
|         7|     Liam|  Miller| Marketing|Marketing Coordin...|60000.00|
|         8|   Sophia|   Davis|        HR|        HR Assistant|55000.00|
|         9|    Aiden|Anderson|        IT|Database Administ...|78000.00|
|        10|      Mia|Martinez|   Finance|   Senior Accountant|85000.00|
|        11|    Ethan|  Taylor| Marketing|     Prod

## Read from MySQL with JDBC Options

In [4]:
df = spark.read \
    .format("jdbc") \
    .option("url", "jdbc:mysql://localhost:3306/employee") \
    .option("user", "yatee") \
    .option("password", "03072001") \
    .option("driver", "com.mysql.cj.jdbc.Driver")\
    .option("dbtable", "employee") \
    .load()
df.printSchema()

root
 |-- EmployeeID: integer (nullable = true)
 |-- FirstName: string (nullable = true)
 |-- LastName: string (nullable = true)
 |-- Department: string (nullable = true)
 |-- Position: string (nullable = true)
 |-- Salary: decimal(10,2) (nullable = true)



## Specific Columns from MySql

In [5]:
df = spark.read \
    .format("jdbc") \
    .option("url", "jdbc:mysql://localhost:3306/employee") \
    .option("driver", "com.mysql.cj.jdbc.Driver") \
    .option("query", "select FirstName,Position,Salary from employee where Salary=72000.00") \
    .option("user", "yatee") \
    .option("password", "03072001") \
    .load()

df.show()

+---------+--------------------+--------+
|FirstName|            Position|  Salary|
+---------+--------------------+--------+
|   Olivia|   Financial Analyst|72000.00|
|    Caden|    Network Engineer|72000.00|
|    Logan|Public Relations ...|72000.00|
+---------+--------------------+--------+

