Użyj każdą z tych funkcji 
* `unix_timestamp()` 
* `date_format()`
* `to_unix_timestamp()`
* `from_unixtime()`
* `to_date()` 
* `to_timestamp()` 
* `from_utc_timestamp()` 
* `to_utc_timestamp()`

In [0]:
from pyspark.sql.functions import current_date, current_timestamp

kolumny = ["timestamp", "unix", "Date"]
dane = [("2015-03-22T14:13:34", 1646641525847, "May, 2021"),
        ("2015-03-22T15:03:18", 1646641557555, "Mar, 2021"),
        ("2015-03-22T14:38:39", 1646641578622, "Jan, 2021")]

dataFrame = spark.createDataFrame(dane, kolumny) \
    .withColumn("current_date", current_date()) \
    .withColumn("current_timestamp", current_timestamp())

display(dataFrame)

timestamp,unix,Date,current_date,current_timestamp
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-22,2025-03-22T09:27:11.109+0000
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-22,2025-03-22T09:27:11.109+0000
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-22,2025-03-22T09:27:11.109+0000


In [0]:

dataFrame.printSchema()

root
 |-- timestamp: string (nullable = true)
 |-- unix: long (nullable = true)
 |-- Date: string (nullable = true)
 |-- current_date: date (nullable = false)
 |-- current_timestamp: timestamp (nullable = false)



## unix_timestamp(..) & cast(..)

Konwersja **string** to a **timestamp**.

Lokalizacja funkcji 
* `pyspark.sql.functions` in the case of Python
* `org.apache.spark.sql.functions` in the case of Scala & Java

## 1. Zmiana formatu wartości timestamp yyyy-MM-dd'T'HH:mm:ss 
`unix_timestamp(..)`

Dokumentacja API `unix_timestamp(..)`:
> Convert time string with given pattern (see <a href="http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html" target="_blank">SimpleDateFormat</a>) to Unix time stamp (in seconds), return null if fail.

`SimpleDataFormat` is part of the Java API and provides support for parsing and formatting date and time values.

In [0]:
from pyspark.sql.functions import unix_timestamp

zmianaFormatu = dataFrame.withColumn("unix_timestamp", unix_timestamp("timestamp", "yyyy-MM-dd'T'HH:mm:ss"))
zmianaFormatu.printSchema()
zmianaFormatu.show()

root
 |-- timestamp: string (nullable = true)
 |-- unix: long (nullable = true)
 |-- Date: string (nullable = true)
 |-- current_date: date (nullable = false)
 |-- current_timestamp: timestamp (nullable = false)
 |-- unix_timestamp: long (nullable = true)

+-------------------+-------------+---------+------------+--------------------+--------------+
|          timestamp|         unix|     Date|current_date|   current_timestamp|unix_timestamp|
+-------------------+-------------+---------+------------+--------------------+--------------+
|2015-03-22T14:13:34|1646641525847|May, 2021|  2025-03-22|2025-03-22 09:36:...|    1427033614|
|2015-03-22T15:03:18|1646641557555|Mar, 2021|  2025-03-22|2025-03-22 09:36:...|    1427036598|
|2015-03-22T14:38:39|1646641578622|Jan, 2021|  2025-03-22|2025-03-22 09:36:...|    1427035119|
+-------------------+-------------+---------+------------+--------------------+--------------+



2. Zmień format zgodnie z klasą `SimpleDateFormat`**yyyy-MM-dd HH:mm:ss**
  * a. Wyświetl schemat i dane żeby sprawdzicz czy wartości się zmieniły

In [0]:
from pyspark.sql.functions import unix_timestamp

zmianaFormatu = dataFrame.withColumn("unix_timestamp", unix_timestamp("timestamp", "yyyy-MM-dd'T'HH:mm:ss"))
zmianaFormatu.printSchema()
zmianaFormatu.show()


root
 |-- timestamp: string (nullable = true)
 |-- unix: long (nullable = true)
 |-- Date: string (nullable = true)
 |-- current_date: date (nullable = false)
 |-- current_timestamp: timestamp (nullable = false)
 |-- unix_timestamp: long (nullable = true)

+-------------------+-------------+---------+------------+--------------------+--------------+
|          timestamp|         unix|     Date|current_date|   current_timestamp|unix_timestamp|
+-------------------+-------------+---------+------------+--------------------+--------------+
|2015-03-22T14:13:34|1646641525847|May, 2021|  2025-03-22|2025-03-22 09:37:...|    1427033614|
|2015-03-22T15:03:18|1646641557555|Mar, 2021|  2025-03-22|2025-03-22 09:37:...|    1427036598|
|2015-03-22T14:38:39|1646641578622|Jan, 2021|  2025-03-22|2025-03-22 09:37:...|    1427035119|
+-------------------+-------------+---------+------------+--------------------+--------------+



In [0]:
tempE = zmianaFormatu.withColumn("unix_timestamp", unix_timestamp("timestamp", "yyyy-MM-dd'T'HH:mm:ss"))
display(tempE)


timestamp,unix,Date,current_date,current_timestamp,unix_timestamp
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-22,2025-03-22T09:38:21.613+0000,1427033614
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-22,2025-03-22T09:38:21.613+0000,1427036598
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-22,2025-03-22T09:38:21.613+0000,1427035119


## Stwórz nowe kolumny do DataFrame z wartościami year(..), month(..), dayofyear(..)

In [0]:
#date_format
from pyspark.sql.functions import date_format
#date_format
zmianaFormatu = zmianaFormatu.withColumn("year", date_format("timestamp", "yyyy"))
zmianaFormatu = zmianaFormatu.withColumn("month", date_format("timestamp", "MM"))
zmianaFormatu = zmianaFormatu.withColumn("dayofyear", date_format("timestamp", "D"))
display(zmianaFormatu)

timestamp,unix,Date,current_date,current_timestamp,unix_timestamp,year,month,dayofyear
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-22,2025-03-22T09:39:46.290+0000,1427033614,2015,3,81
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-22,2025-03-22T09:39:46.290+0000,1427036598,2015,3,81
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-22,2025-03-22T09:39:46.290+0000,1427035119,2015,3,81


In [0]:
#to_date()
from pyspark.sql.functions import to_date
toDate = zmianaFormatu.withColumn("date_column", to_date("Date", "MMM, yyyy"))
display(toDate)

timestamp,unix,Date,current_date,current_timestamp,unix_timestamp,year,month,dayofyear,date_column
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-22,2025-03-22T09:43:45.212+0000,1427033614,2015,3,81,2021-05-01
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-22,2025-03-22T09:43:45.212+0000,1427036598,2015,3,81,2021-03-01
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-22,2025-03-22T09:43:45.212+0000,1427035119,2015,3,81,2021-01-01


In [0]:
#from_unixtime()
from pyspark.sql.functions import from_unixtime
fromUnix = zmianaFormatu.withColumn("from_unixtime", from_unixtime("unix_timestamp"))
display(fromUnix)

timestamp,unix,Date,current_date,current_timestamp,unix_timestamp,year,month,dayofyear,from_unixtime
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-22,2025-03-22T09:44:33.327+0000,1427033614,2015,3,81,2015-03-22 14:13:34
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-22,2025-03-22T09:44:33.327+0000,1427036598,2015,3,81,2015-03-22 15:03:18
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-22,2025-03-22T09:44:33.327+0000,1427035119,2015,3,81,2015-03-22 14:38:39


In [0]:
#to_timestamp()
from pyspark.sql.functions import to_timestamp
toTimestamp = zmianaFormatu.withColumn("to_timestamp", to_timestamp("timestamp", "yyyy-MM-dd'T'HH:mm:ss"))
display(toTimestamp)


timestamp,unix,Date,current_date,current_timestamp,unix_timestamp,year,month,dayofyear,to_timestamp
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-22,2025-03-22T09:44:34.090+0000,1427033614,2015,3,81,2015-03-22T14:13:34.000+0000
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-22,2025-03-22T09:44:34.090+0000,1427036598,2015,3,81,2015-03-22T15:03:18.000+0000
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-22,2025-03-22T09:44:34.090+0000,1427035119,2015,3,81,2015-03-22T14:38:39.000+0000


In [0]:
#to_utc_timestamp()
from pyspark.sql.functions import to_utc_timestamp
toUtcTimestamp = zmianaFormatu.withColumn("to_utc_timestamp", to_utc_timestamp("timestamp", "UTC"))
display(toUtcTimestamp)



timestamp,unix,Date,current_date,current_timestamp,unix_timestamp,year,month,dayofyear,to_utc_timestamp
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-22,2025-03-22T09:44:34.936+0000,1427033614,2015,3,81,2015-03-22T14:13:34.000+0000
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-22,2025-03-22T09:44:34.936+0000,1427036598,2015,3,81,2015-03-22T15:03:18.000+0000
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-22,2025-03-22T09:44:34.936+0000,1427035119,2015,3,81,2015-03-22T14:38:39.000+0000


In [0]:
#from_utc_timestamp()
from pyspark.sql.functions import from_utc_timestamp
fromUtcTimestamp = zmianaFormatu.withColumn("from_utc_timestamp", from_utc_timestamp("timestamp", "UTC"))
display(fromUtcTimestamp)

timestamp,unix,Date,current_date,current_timestamp,unix_timestamp,year,month,dayofyear,from_utc_timestamp
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-22,2025-03-22T09:44:49.058+0000,1427033614,2015,3,81,2015-03-22T14:13:34.000+0000
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-22,2025-03-22T09:44:49.058+0000,1427036598,2015,3,81,2015-03-22T15:03:18.000+0000
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-22,2025-03-22T09:44:49.058+0000,1427035119,2015,3,81,2015-03-22T14:38:39.000+0000
