In [1]:
from pyspark.sql import SparkSession
from pyspark.sql import Row
import numpy as np

In [2]:
spark = SparkSession.builder.config("spark.sql.warehouse.dir", "temp").appName("SparkSQL").getOrCreate()

In [3]:
data = [("US", 10), ("UK", 20), ("India", 30), ("China", 40), ("Japan", 50)]
rdd = spark.sparkContext.parallelize(data)

In [4]:
df = rdd.toDF()
df.printSchema()
df.show(truncate=False)

root
 |-- _1: string (nullable = true)
 |-- _2: long (nullable = true)

+-----+---+
|_1   |_2 |
+-----+---+
|US   |10 |
|UK   |20 |
|India|30 |
|China|40 |
|Japan|50 |
+-----+---+



In [5]:
cols = ["country", "id"]
df = rdd.toDF(cols)
df.printSchema()
df.show(truncate=False)

root
 |-- country: string (nullable = true)
 |-- id: long (nullable = true)

+-------+---+
|country|id |
+-------+---+
|US     |10 |
|UK     |20 |
|India  |30 |
|China  |40 |
|Japan  |50 |
+-------+---+



In [6]:
cols = ["country", "id"]
df = spark.createDataFrame(rdd, schema=cols)
df.printSchema()
df.show(truncate=False)

root
 |-- country: string (nullable = true)
 |-- id: long (nullable = true)

+-------+---+
|country|id |
+-------+---+
|US     |10 |
|UK     |20 |
|India  |30 |
|China  |40 |
|Japan  |50 |
+-------+---+



In [7]:
from pyspark.sql.types import StructType, StructField, StringType


data = [("US", 10), ("UK", 20), ("India", 30), ("China", 40), ("Japan", None)]
rdd = spark.sparkContext.parallelize(data)

colSchema = StructType([
    StructField('country', StringType(), True),
    StructField('id', StringType(), False)
])

df = spark.createDataFrame(rdd, schema=colSchema)

df.printSchema()
try:
    df.show(truncate=False)
except Exception as e:
    print(f"ValueError: field id: This field is not nullable, but got None")

root
 |-- country: string (nullable = true)
 |-- id: string (nullable = false)

ValueError: field id: This field is not nullable, but got None
