https://towardsdatascience.com/effective-coding-with-dates-and-times-in-python-3be13a77b45b

In [1]:
# Load libraries
import pandas as pd
import numpy as np
from datetime import datetime, date, timezone
from dateutil import parser
from zoneinfo import ZoneInfo

In [3]:
# Creating date & datetime from scratch

date_1 = date(2020,12,1)

datetime_1 = datetime(2020,12,2)
date_2 = datetime_1.date()

print(date_1)
print(type(date_1))
print(datetime_1)
print(type(datetime_1))
print(date_2)
print(type(date_2))

2020-12-01
<class 'datetime.date'>
2020-12-02 00:00:00
<class 'datetime.datetime'>
2020-12-02
<class 'datetime.date'>


In [4]:

# Creating date & datetime for today and now (for me CET)

date_today = date.today()
datetime_now = datetime.now()

# Built-in: UTC
datetime_now_utc = datetime.now(tz=timezone.utc)

In [5]:
datetime_now_utc

datetime.datetime(2024, 2, 8, 17, 32, 49, 172385, tzinfo=datetime.timezone.utc)

In [6]:
# Create a string from a datetime object

date_1_string = date_1.strftime("%Y-%m-%d")

print('The first day of December 2020 is :',  date_1_string)

# Other example
datetime_1_string = datetime_1.strftime("%Y-%m-%d %H:%M:%S")

The first day of December 2020 is : 2020-12-01


In [7]:
# Create a datetime object from a string

date_3 = datetime.strptime('2017-12-07', "%Y-%m-%d").date()

# Other example
datetime_2 = datetime.strptime('2017-12-07', "%Y-%m-%d")

In [9]:
# Parse complex strings

datetime_3 = parser.parse("2017-12-07 03:12:00PM")
date_4 = parser.parse("December 7th 2019")

print(datetime_3)
print(type(datetime_3))
print(date_4)
print(type(date_4))

2017-12-07 15:12:00
<class 'datetime.datetime'>
2019-12-07 00:00:00
<class 'datetime.datetime'>


In [12]:

# Create numpy datetime object

date_5 = np.array('2018-04-05', dtype=np.datetime64)
datetime_4 = np.array('2018-04-05 15:12:00', dtype=np.datetime64)

#print(date_5)
#print(type(date_5))
#print(datetime_4)
#print(type(datetime_4))

# Vectorized operation
date_6 = date_5 + 1  #works with numpy object
#date_6 = date_1 + 1  #error with datetime object
#date_6 = date_1 + 1  #error with dateutil object

# Other vectorized operation
date_7 = date_5 + np.arange(7)

In [14]:
# Create pandas datetime objects

datetime_5 = pd.to_datetime("2020-06-01") #simple string
datetime_6 = pd.to_datetime("December 7th 2019") #complex string
date_8 = datetime_4.date() 

# Vectorized operation
datetime_7 = datetime_5 + pd.to_timedelta(np.arange(3), 'D')

AttributeError: 'numpy.ndarray' object has no attribute 'date'

In [15]:
# Converting a column to date using strptime

games_nfl = pd.read_csv('games.csv')
#games_nfl.head()
print(type(games_nfl['gameDate'][0]))

games_nfl['gameDate_dateformat'] = pd.to_datetime(games_nfl['gameDate'])
print(type(games_nfl['gameDate_dateformat'][0]))

# Converting a column directly to datetime using parse_dates
games_nfl_2 = pd.read_csv('games.csv', parse_dates=['gameDate'])
print(type(games_nfl_2['gameDate'][0]))

<class 'str'>
<class 'pandas._libs.tslibs.timestamps.Timestamp'>
<class 'pandas._libs.tslibs.timestamps.Timestamp'>


In [16]:
# Filtering by date

games_nfl_3 = games_nfl_2[games_nfl_2['gameDate'] > datetime(2019,1,1)]

In [18]:

# Setting datetime as index

games_nfl_4 = games_nfl_2.set_index(['gameDate'])
games_nfl_4.head(20)

# Other example for filter
games_nfl_5 = games_nfl_4['2018-09-01':'2018-09-30']
display(games_nfl_5)

  games_nfl_5 = games_nfl_4['2018-09-01':'2018-09-30']



Unnamed: 0_level_0,season,week,gameId,gameTimeEastern,HomeScore,VisitorScore,homeTeamAbbr,visitorTeamAbbr,homeDisplayName,visitorDisplayName,Stadium,Location,StadiumType,Turf,GameLength,GameWeather,Temperature,Humidity,WindSpeed,WindDirection
gameDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1


In [21]:
# Extract time delta between two datetime objects

uber_data = pd.read_csv('My Uber Drives - 2016.csv', 
                        parse_dates=['START_DATE*', 'END_DATE*'])
                        
# Pre-processing: Removing a "Total" Row
uber_data.iloc[[1155]]
uber_data.drop([1155],axis=0,inplace=True)

print(type(uber_data['START_DATE*'][0]))
print(type(uber_data['END_DATE*'][0]))
uber_data['START_DATE*'] = pd.to_datetime(uber_data['START_DATE*'], format="%m/%d/%Y %H:%M")

# New dimensions ‘Duration‘ and ‘Duration Minutes‘
uber_data['DURATION'] = uber_data['END_DATE*']-uber_data['START_DATE*']
uber_data['DURATION_MINUTES'] = (uber_data['END_DATE*']-uber_data['START_DATE*']).dt.total_seconds() / 60

<class 'str'>
<class 'pandas._libs.tslibs.timestamps.Timestamp'>


In [22]:
# Extract weekday and month

# Single datetime
weekday = date_3.weekday()

# Series
uber_data['START_DATE_WEEKDAY'] = uber_data['START_DATE*'].dt.weekday
uber_data['START_DATE_WEEKDAY_NAME'] = uber_data['START_DATE*'].dt.day_name()

In [23]:

# Create datetime lag

uber_data['START_DATE_LAGGED'] = uber_data['START_DATE*'].shift(1)