Użyj każdą z tych funkcji 
* `unix_timestamp()` 
* `date_format()`
* `to_unix_timestamp()`
* `from_unixtime()`
* `to_date()` 
* `to_timestamp()` 
* `from_utc_timestamp()` 
* `to_utc_timestamp()`

In [0]:
%python
from pyspark.sql import SparkSession
from pyspark.sql.functions import current_date, current_timestamp

spark = SparkSession.builder.appName("Example").getOrCreate()

kolumny = ["timestamp", "unix", "Date"]
dane = [
    ("2015-03-22T14:13:34", 1646641525847, "May, 2021"),
    ("2015-03-22T15:03:18", 1646641557555, "Mar, 2021"),
    ("2015-03-22T14:38:39", 1646641578622, "Jan, 2021")
]

dataFrame = spark.createDataFrame(dane, kolumny) \
    .withColumn("current_date", current_date()) \
    .withColumn("current_timestamp", current_timestamp())

display(dataFrame)

timestamp,unix,Date,current_date,current_timestamp
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-18,2025-03-18T15:32:51.727+0000
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-18,2025-03-18T15:32:51.727+0000
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-18,2025-03-18T15:32:51.727+0000


In [0]:
%python
dataFrame.printSchema()

root
 |-- timestamp: string (nullable = true)
 |-- unix: long (nullable = true)
 |-- Date: string (nullable = true)
 |-- current_date: date (nullable = false)
 |-- current_timestamp: timestamp (nullable = false)



## unix_timestamp(..) & cast(..)

Konwersja **string** to a **timestamp**.

Lokalizacja funkcji 
* `pyspark.sql.functions` in the case of Python
* `org.apache.spark.sql.functions` in the case of Scala & Java

## 1. Zmiana formatu wartości timestamp yyyy-MM-dd'T'HH:mm:ss 
`unix_timestamp(..)`

Dokumentacja API `unix_timestamp(..)`:
> Convert time string with given pattern (see <a href="http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html" target="_blank">SimpleDateFormat</a>) to Unix time stamp (in seconds), return null if fail.

`SimpleDataFormat` is part of the Java API and provides support for parsing and formatting date and time values.

In [0]:
%python
import pyspark.sql.functions
from pyspark.sql.functions import col, to_date, from_unixtime, to_timestamp, to_utc_timestamp, from_utc_timestamp, unix_timestamp,year,month,dayofyear, date_format

2. Zmień format zgodnie z klasą `SimpleDateFormat`**yyyy-MM-dd HH:mm:ss**
  * a. Wyświetl schemat i dane żeby sprawdzicz czy wartości się zmieniły

In [0]:
%python

zmianaFormatu = dataFrame.withColumn("timestamp", to_timestamp(col("timestamp")))
display(zmianaFormatu)
zmianaFormatu.printSchema()

timestamp,unix,Date,current_date,current_timestamp
2015-03-22T14:13:34.000+0000,1646641525847,"May, 2021",2025-03-18,2025-03-18T15:32:53.094+0000
2015-03-22T15:03:18.000+0000,1646641557555,"Mar, 2021",2025-03-18,2025-03-18T15:32:53.094+0000
2015-03-22T14:38:39.000+0000,1646641578622,"Jan, 2021",2025-03-18,2025-03-18T15:32:53.094+0000


root
 |-- timestamp: timestamp (nullable = true)
 |-- unix: long (nullable = true)
 |-- Date: string (nullable = true)
 |-- current_date: date (nullable = false)
 |-- current_timestamp: timestamp (nullable = false)



In [0]:
%python
#unix_timestamp
tempE = zmianaFormatu.withColumn("timestamp", unix_timestamp(col("timestamp"), "yyyy-MM-dd HH:mm:ss"))
display(tempE)

timestamp,unix,Date,current_date,current_timestamp
1427033614,1646641525847,"May, 2021",2025-03-18,2025-03-18T15:32:54.104+0000
1427036598,1646641557555,"Mar, 2021",2025-03-18,2025-03-18T15:32:54.104+0000
1427035119,1646641578622,"Jan, 2021",2025-03-18,2025-03-18T15:32:54.104+0000


## Stwórz nowe kolumny do DataFrame z wartościami year(..), month(..), dayofyear(..)

In [0]:
%python
#date_format
yearDate = dataFrame \
    .withColumn("year", date_format(col("timestamp"), "yyyy")) \
    .withColumn("month", date_format(col("timestamp"), "MM")) \
    .withColumn("dayofyear", date_format(col("timestamp"), "DD"))

display(yearDate)

timestamp,unix,Date,current_date,current_timestamp,year,month,dayofyear
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-18,2025-03-18T15:32:55.068+0000,2015,3,81
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-18,2025-03-18T15:32:55.068+0000,2015,3,81
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-18,2025-03-18T15:32:55.068+0000,2015,3,81


In [0]:
%python
#to_date()
yearDate = dataFrame \
    .withColumn("date", to_date(col("timestamp"))) \
    .withColumn("year", year(col("date"))) \
    .withColumn("month", month(col("date"))) \
    .withColumn("dayofyear", dayofyear(col("date")))

display(yearDate)

timestamp,unix,date,current_date,current_timestamp,year,month,dayofyear
2015-03-22T14:13:34,1646641525847,2015-03-22,2025-03-18,2025-03-18T15:32:56.084+0000,2015,3,81
2015-03-22T15:03:18,1646641557555,2015-03-22,2025-03-18,2025-03-18T15:32:56.084+0000,2015,3,81
2015-03-22T14:38:39,1646641578622,2015-03-22,2025-03-18,2025-03-18T15:32:56.084+0000,2015,3,81


In [0]:
%python
#from_unixtime()
fromUnix = dataFrame \
    .withColumn("date", from_unixtime(col("unix"),"yyyy-MM-dd HH:mm:ss.SSS")) \
    .withColumn("year", year(col("date"))) \
    .withColumn("month", month(col("date"))) \
    .withColumn("dayofyear", dayofyear(col("date")))
display(fromUnix)

timestamp,unix,date,current_date,current_timestamp,year,month,dayofyear
2015-03-22T14:13:34,1646641525847,+54149-12-28 23:50:47.000,2025-03-18,2025-03-18T15:32:57.249+0000,54149,12,362
2015-03-22T15:03:18,1646641557555,+54149-12-29 08:39:15.000,2025-03-18,2025-03-18T15:32:57.249+0000,54149,12,363
2015-03-22T14:38:39,1646641578622,+54149-12-29 14:30:22.000,2025-03-18,2025-03-18T15:32:57.249+0000,54149,12,363


In [0]:
%python
#to_timestamp()
toTimestamp = dataFrame.withColumn("timestamp", to_timestamp(col("timestamp"))) \
    .withColumn("year", year(col("timestamp"))) \
    .withColumn("month", month(col("timestamp"))) \
    .withColumn("dayofyear", dayofyear(col("timestamp")))
display(toTimestamp)

timestamp,unix,Date,current_date,current_timestamp,year,month,dayofyear
2015-03-22T14:13:34.000+0000,1646641525847,"May, 2021",2025-03-18,2025-03-18T15:32:58.330+0000,2015,3,81
2015-03-22T15:03:18.000+0000,1646641557555,"Mar, 2021",2025-03-18,2025-03-18T15:32:58.330+0000,2015,3,81
2015-03-22T14:38:39.000+0000,1646641578622,"Jan, 2021",2025-03-18,2025-03-18T15:32:58.330+0000,2015,3,81


In [0]:
%python
#to_utc_timestamp()
toUtcTimestamp = dataFrame \
    .withColumn("utc_timestamp", to_utc_timestamp((col("timestamp")), "UTC")) \
    .withColumn("date", col("utc_timestamp").cast("date")) \
    .withColumn("year", year(col("date"))) \
    .withColumn("month", month(col("date"))) \
    .withColumn("dayofyear", dayofyear(col("date")))
display(toUtcTimestamp)


timestamp,unix,date,current_date,current_timestamp,utc_timestamp,year,month,dayofyear
2015-03-22T14:13:34,1646641525847,2015-03-22,2025-03-18,2025-03-18T15:32:59.314+0000,2015-03-22T14:13:34.000+0000,2015,3,81
2015-03-22T15:03:18,1646641557555,2015-03-22,2025-03-18,2025-03-18T15:32:59.314+0000,2015-03-22T15:03:18.000+0000,2015,3,81
2015-03-22T14:38:39,1646641578622,2015-03-22,2025-03-18,2025-03-18T15:32:59.314+0000,2015-03-22T14:38:39.000+0000,2015,3,81


In [0]:
%python
#from_utc_timestamp()
fromUtcTimestamp = toUtcTimestamp.withColumn("from_utc_timestamp",from_utc_timestamp(col("utc_timestamp"),"UTC"))\
    .withColumn("date", col("utc_timestamp").cast("date")) \
    .withColumn("year", year(col("date"))) \
    .withColumn("month", month(col("date"))) \
    .withColumn("dayofyear", dayofyear(col("date")))

display(fromUtcTimestamp)

timestamp,unix,date,current_date,current_timestamp,utc_timestamp,year,month,dayofyear,from_utc_timestamp
2015-03-22T14:13:34,1646641525847,2015-03-22,2025-03-18,2025-03-18T15:33:00.576+0000,2015-03-22T14:13:34.000+0000,2015,3,81,2015-03-22T14:13:34.000+0000
2015-03-22T15:03:18,1646641557555,2015-03-22,2025-03-18,2025-03-18T15:33:00.576+0000,2015-03-22T15:03:18.000+0000,2015,3,81,2015-03-22T15:03:18.000+0000
2015-03-22T14:38:39,1646641578622,2015-03-22,2025-03-18,2025-03-18T15:33:00.576+0000,2015-03-22T14:38:39.000+0000,2015,3,81,2015-03-22T14:38:39.000+0000
