In [1]:

from pyspark.sql import SparkSession

spark = (
    SparkSession 
    .builder 
    .appName("Writing to Multiple Sinks") 
    .config("spark.streaming.stopGracefullyOnShutdown", True) 
    .config('spark.jars.packages', 'org.apache.spark:spark-sql-kafka-0-10_2.12:3.3.0')
    .config("spark.jars", "/opt/spark/jars/postgresql-42.5.6.jar") \
    .config("spark.sql.shuffle.partitions", 8)
    .master("local[*]") 
    .getOrCreate()
)
from pyspark.sql import SparkSession

# Initialize SparkSession
spark = SparkSession.builder \
    .appName("PostgresExample") \
    .config("spark.jars", "/opt/spark/jars/postgresql-42.5.6.jar") \
    .getOrCreate()

spark

In [2]:
print(spark.sparkContext.getConf().get("spark.jars"))


/opt/spark/jars/postgresql-42.5.6.jar


In [35]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType

schema = StructType([
    StructField("eventId", StringType(), True),
    StructField("eventOffset", IntegerType(), True),
    StructField("eventPublisher", StringType(), True),
    StructField("customerId", StringType(), True),
    StructField("eventTime", StringType(), True),
    StructField("deviceId", StringType(), True),
    StructField("temperature", IntegerType(), True),
    StructField("measure", StringType(), True),
    StructField("status", StringType(), True)
])

# Create the data
data = [
    ('e3cb26d3-41b2-49a2-84f3-0156ed8d7502', 10001, 'device', 'CI00103','2023-01-05 11:13:53.643364', 'D001', 15, 'C', 'SUCCESS')
]

# Create DataFrame
df = spark.createDataFrame(data, schema)

In [36]:
df.show()

+--------------------+-----------+--------------+----------+--------------------+--------+-----------+-------+-------+
|             eventId|eventOffset|eventPublisher|customerId|           eventTime|deviceId|temperature|measure| status|
+--------------------+-----------+--------------+----------+--------------------+--------+-----------+-------+-------+
|e3cb26d3-41b2-49a...|      10001|        device|   CI00103|2023-01-05 11:13:...|    D001|         15|      C|SUCCESS|
+--------------------+-----------+--------------+----------+--------------------+--------+-----------+-------+-------+



In [37]:

try:
    (
        df.write
        .mode("append")  
        .format("jdbc") 
        .option("driver", "org.postgresql.Driver")  
        .option("url", "jdbc:postgresql://postgres:5432/db")  
        .option("dbtable", "public.events") 
        .option("user", "postgres")  
        .option("password", "postgres")
        .save()
    )
    print("Data successfully written to PostgreSQL!")
except Exception as e:
    print(f"Write operation failed: {e}")


Data successfully written to PostgreSQL!


In [38]:

url = "jdbc:postgresql://postgres:5432/db" 
properties = {
    "user": "postgres",
    "password": "postgres",
    "driver": "org.postgresql.Driver"
}

try:
    df = spark.read.jdbc(url=url, table="events", properties=properties)
    df.show() 
    print("Connection successful!")
except Exception as e:
    print(f"Connection failed: {e}")

+--------------------+-----------+--------------+----------+--------------------+--------+--------+-------+-----------+
|             eventid|eventoffset|eventpublisher|customerid|           eventtime|deviceid| measure| status|temperature|
+--------------------+-----------+--------------+----------+--------------------+--------+--------+-------+-----------+
|        test_event_2|       1002|    publisher2| customer2|          2025-01-02| device2|measure2|  ERROR|         24|
|        test_event_1|       1001|    publisher1| customer1|          2025-01-01| device1|measure1|     OK|         22|
|        test_event_2|       1002|    publisher2| customer2|          2025-01-02| device2|measure2|  ERROR|         24|
|        test_event_1|       1001|    publisher1| customer1|          2025-01-01| device1|measure1|     OK|         22|
|e3cb26d3-41b2-49a...|      10001|        device|   CI00103|2023-01-05 11:13:...|    D001|       C|SUCCESS|         15|
+--------------------+-----------+------

In [39]:
df.printSchema()

root
 |-- eventid: string (nullable = true)
 |-- eventoffset: long (nullable = true)
 |-- eventpublisher: string (nullable = true)
 |-- customerid: string (nullable = true)
 |-- eventtime: string (nullable = true)
 |-- deviceid: string (nullable = true)
 |-- measure: string (nullable = true)
 |-- status: string (nullable = true)
 |-- temperature: long (nullable = true)

