## Part 1: Date Functions in PySpark

In [7]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, current_date, current_timestamp, date_add, date_sub, datediff, months_between

# Create Spark session
spark = SparkSession.builder.appName("PySparkDateFunctions").getOrCreate()

# Sample data
data = [(1, "2024-01-01"), (2, "2023-06-15"), (3, "2022-12-31")]
columns = ["ID", "Date"]

# Create DataFrame
df = spark.createDataFrame(data, columns)

# Convert column to date type
df = df.withColumn("Date", col("Date").cast("date"))

# Show DataFrame
df.show()

StatementMeta(, 077fb726-4c8c-4bb6-aab1-85770bcd1b31, 9, Finished, Available, Finished)

+---+----------+
| ID|      Date|
+---+----------+
|  1|2024-01-01|
|  2|2023-06-15|
|  3|2022-12-31|
+---+----------+



### Applying Common Date Functions

- **`current_date()`**: Returns the current system date.
- **`current_timestamp()`**: Returns the current system timestamp.


In [8]:
df.select(current_date().alias("Current_Date"), current_timestamp().alias("Current_Timestamp")).show()

StatementMeta(, 077fb726-4c8c-4bb6-aab1-85770bcd1b31, 10, Finished, Available, Finished)

+------------+--------------------+
|Current_Date|   Current_Timestamp|
+------------+--------------------+
|  2025-02-21|2025-02-21 15:08:...|
|  2025-02-21|2025-02-21 15:08:...|
|  2025-02-21|2025-02-21 15:08:...|
+------------+--------------------+



### Date Arithmetic Functions
- **`date_add(start_date, days)`**: Adds a specified number of days to the date.
- **`date_sub(start_date, days)`**: Subtracts a specified number of days from the date.
- **`datediff(end_date, start_date)`**: Returns the difference in days between two dates.
- **`months_between(end_date, start_date)`**: Returns the difference in months between two dates.


In [9]:
df_date = df.select(
    col("Date"),
    date_add(col("Date"), 10).alias("Date_Add_10_Days"),
    date_sub(col("Date"), 5).alias("Date_Sub_5_Days")
)
df_date.show()

StatementMeta(, 077fb726-4c8c-4bb6-aab1-85770bcd1b31, 11, Finished, Available, Finished)

+----------+----------------+---------------+
|      Date|Date_Add_10_Days|Date_Sub_5_Days|
+----------+----------------+---------------+
|2024-01-01|      2024-01-11|     2023-12-27|
|2023-06-15|      2023-06-25|     2023-06-10|
|2022-12-31|      2023-01-10|     2022-12-26|
+----------+----------------+---------------+

