In [0]:
%python

from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, FloatType
from pyspark.sql.functions import when, col

spark = SparkSession.builder.appName("CSV with Schema").getOrCreate()

schema = StructType([
    StructField("imdb_title_id", StringType(), True),  # ID tytułu w IMDb
    StructField("ordering", IntegerType(), True),      # Kolejność aktora w filmie
    StructField("imdb_name_id", StringType(), True),   # ID aktora w IMDb
    StructField("category", StringType(), True),       # Kategoria (np. actor, actress)
    StructField("job", StringType(), True),            # Zawód (np. actor, director)
    StructField("characters", StringType(), True)      # Postać grana przez aktora
])

file_path = "dbfs:/FileStore/tables/Files/actors.csv"
actors_df = spark.read.csv(file_path, schema=schema, header=True)
display(actors_df)

file_path = "dbfs:/FileStore/tables/Files/bad_data.csv"
bad_df = spark.read.csv(file_path, schema=schema, header=True)
# Tryb PERMISSIVE (domyślny)
print("Tryb PERMISSIVE:")
df_permissive = spark.read.csv(file_path, schema=schema, header=True, mode="PERMISSIVE")
df_permissive.show()

# Tryb DROPMALFORMED
print("Tryb DROPMALFORMED:")
df_dropmalformed = spark.read.csv(file_path, schema=schema, header=True, mode="DROPMALFORMED")
df_dropmalformed.show()

# Tryb FAILFAST
print("Tryb FAILFAST:")
try:
    df_failfast = spark.read.csv(file_path, schema=schema, header=True, mode="FAILFAST")
    df_failfast.show()
except Exception as e:
    print(f"Błąd w trybie FAILFAST: {e}")

imdb_title_id,ordering,imdb_name_id,category,job,characters
tt0000009,1,nm0063086,actress,,[Miss Geraldine Holbrook (Miss Jerry)]
tt0000009,2,nm0183823,actor,,[Mr. Hamilton]
tt0000009,3,nm1309758,actor,,[Chauncey Depew - the Director of the New York Central Railroad]
tt0000009,4,nm0085156,director,,
tt0000574,1,nm0846887,actress,,[Kate Kelly]
tt0000574,2,nm0846894,actor,,[School Master]
tt0000574,3,nm3002376,actor,,[Steve Hart]
tt0000574,4,nm0170118,actress,,
tt0000574,5,nm0846879,director,,
tt0000574,6,nm0317210,producer,producer,


Tryb PERMISSIVE:
+-------------+--------+--------------------+--------+----+----------+
|imdb_title_id|ordering|        imdb_name_id|category| job|characters|
+-------------+--------+--------------------+--------+----+----------+
|            1|    null|                  25|    null|null|      null|
|            2|    null|thirty  # Niepraw...|    null|null|      null|
|            3|    null|30        # Braku...|    null|null|      null|
|            4|    null|                  40|    null|null|      null|
|            5|    null|       # Brakując...|    null|null|      null|
+-------------+--------+--------------------+--------+----+----------+

Tryb DROPMALFORMED:
+-------------+--------+------------+--------+---+----------+
|imdb_title_id|ordering|imdb_name_id|category|job|characters|
+-------------+--------+------------+--------+---+----------+
+-------------+--------+------------+--------+---+----------+

Tryb FAILFAST:
Błąd w trybie FAILFAST: An error occurred while calling o53