In [1]:
import pandas as pd
pd.set_option("display.max.columns", None)

In [2]:
pd.set_option("display.max.rows", 16)

In [3]:
pd.set_option("display.precision", 2)

# Date and Time Basic 

In [4]:
import datetime

today = datetime.date.today()
today

datetime.date(2023, 9, 26)

In [5]:
type(today)

datetime.date

In [6]:
print(today)

2023-09-26


In [7]:
today.year, today.month, today.day 

(2023, 9, 26)

In [8]:
import datetime

now = datetime.datetime.now()
now

datetime.datetime(2023, 9, 26, 19, 51, 46, 982311)

In [9]:
print(now)

2023-09-26 19:51:46.982311


In [10]:
now.year, now.month, now.day, now.hour, now.minute

(2023, 9, 26, 19, 51)

default tz is None

In [11]:
now.tzinfo

In [12]:
import datetime

birthday = datetime.date(2022, 12, 15)
birthday_next_year = birthday + datetime.timedelta(days=365)
birthday_next_year

datetime.date(2023, 12, 15)

In [13]:
import datetime

my_wedding = datetime.date(2022, 10, 12)
formatted_date = my_wedding.strftime('%m/%d/%Y')
print(formatted_date)

10/12/2022


In [14]:
import datetime

d1 = datetime.date(2022, 5, 1)
d2 = datetime.date(2022, 6, 1)

if d1 < d2:
    print('d1 is before d2')
else:
    print('d1 is after or equal to d2')

d1 is before d2


# Importing DateTimes in Pandas DataFrames

In [15]:
df = pd.read_csv('../data/avocado.csv', index_col=0)
df.head(2)

Unnamed: 0,Date,AveragePrice,Total Volume,4046,4225,4770,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region
0,2015-12-27,1.33,64236.62,1036.74,54454.85,48.16,8696.87,8603.62,93.25,0.0,conventional,2015,Albany
1,2015-12-20,1.35,54876.98,674.28,44638.81,58.33,9505.56,9408.07,97.49,0.0,conventional,2015,Albany


In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 18249 entries, 0 to 11
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Date          18249 non-null  object 
 1   AveragePrice  18249 non-null  float64
 2   Total Volume  18249 non-null  float64
 3   4046          18249 non-null  float64
 4   4225          18249 non-null  float64
 5   4770          18249 non-null  float64
 6   Total Bags    18249 non-null  float64
 7   Small Bags    18249 non-null  float64
 8   Large Bags    18249 non-null  float64
 9   XLarge Bags   18249 non-null  float64
 10  type          18249 non-null  object 
 11  year          18249 non-null  int64  
 12  region        18249 non-null  object 
dtypes: float64(9), int64(1), object(3)
memory usage: 1.9+ MB


We can see that the data type of the Date column is `object`. This means that the data are stored as `strings`, meaning that you can’t access the slew of DateTime functionality available in Pandas.

###### Using Pandas `parse_dates` to Import DateTimes
One easy way to import data as DateTime is to use the `parse_dates=` argument. 

In [17]:
df = pd.read_csv('../data/avocado.csv', index_col=0, parse_dates=['Date'])
df.head(1)

Unnamed: 0,Date,AveragePrice,Total Volume,4046,4225,4770,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region
0,2015-12-27,1.33,64236.62,1036.74,54454.85,48.16,8696.87,8603.62,93.25,0.0,conventional,2015,Albany


In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 18249 entries, 0 to 11
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   Date          18249 non-null  datetime64[ns]
 1   AveragePrice  18249 non-null  float64       
 2   Total Volume  18249 non-null  float64       
 3   4046          18249 non-null  float64       
 4   4225          18249 non-null  float64       
 5   4770          18249 non-null  float64       
 6   Total Bags    18249 non-null  float64       
 7   Small Bags    18249 non-null  float64       
 8   Large Bags    18249 non-null  float64       
 9   XLarge Bags   18249 non-null  float64       
 10  type          18249 non-null  object        
 11  year          18249 non-null  int64         
 12  region        18249 non-null  object        
dtypes: datetime64[ns](1), float64(9), int64(1), object(2)
memory usage: 1.9+ MB


We can see that our column is now correctly imported as a DateTime format.

# Using `to_datetime` to Convert Columns to DateTime


In [19]:
import pandas as pd
df = pd.read_csv('../data/time_data.csv', parse_dates=['Date'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 337 entries, 0 to 336
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Date         337 non-null    object 
 1   Close Price  337 non-null    float64
 2   High Price   337 non-null    float64
 3   Low Price    337 non-null    float64
 4   Open Price   337 non-null    float64
 5   Volume       337 non-null    float64
dtypes: float64(5), object(1)
memory usage: 15.9+ KB


In [20]:
from datetime import datetime

datetime.strptime('31/12/23 23:59:59.999999','%d/%m/%y %H:%M:%S.%f')

datetime.datetime(2023, 12, 31, 23, 59, 59, 999999)

In [21]:
_

datetime.datetime(2023, 12, 31, 23, 59, 59, 999999)

In [22]:
_.strftime('%a %d %b %Y, %I:%M%p')

'Sun 31 Dec 2023, 11:59PM'

In [23]:
df.dtypes

Date            object
Close Price    float64
High Price     float64
Low Price      float64
Open Price     float64
Volume         float64
dtype: object

In [24]:
df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d %H%p')
df.head(2)

Unnamed: 0,Date,Close Price,High Price,Low Price,Open Price,Volume
0,2021-12-10 05:00:00,48246.57,48359.35,48051.08,48170.66,827.4
1,2021-12-10 06:00:00,47847.59,48430.0,47810.81,48249.78,1296.19


In [25]:
df['Date'].dtype

dtype('<M8[ns]')

In [26]:
df.dtypes

Date           datetime64[ns]
Close Price           float64
High Price            float64
Low Price             float64
Open Price            float64
Volume                float64
dtype: object

In [27]:
df['Date'].info()

<class 'pandas.core.series.Series'>
RangeIndex: 337 entries, 0 to 336
Series name: Date
Non-Null Count  Dtype         
--------------  -----         
337 non-null    datetime64[ns]
dtypes: datetime64[ns](1)
memory usage: 2.8 KB


In [28]:
import pandas as pd
pd.__version__

'2.0.3'

###### Use “date_format” parameter in read_csv() 

In [29]:
import pandas as pd

df = pd.read_csv('../data/time_data.csv', parse_dates=['Date'], date_format='%Y-%m-%d %H%p')
df.head(1)

Unnamed: 0,Date,Close Price,High Price,Low Price,Open Price,Volume
0,2021-12-10 05:00:00,48246.57,48359.35,48051.08,48170.66,827.4


In [30]:
df.dtypes

Date           datetime64[ns]
Close Price           float64
High Price            float64
Low Price             float64
Open Price            float64
Volume                float64
dtype: object

# DateTime Attributes and Methods in Pandas


## DateTime Attributes in Pandas


In [31]:
# Parsing out DateTime Parts

df['DateOnly'] = df['Date'].dt.date
df['year']     = df['Date'].dt.year
df['month']    = df['Date'].dt.month
df['day']      = df['Date'].dt.day
df['hour']     = df['Date'].dt.hour
df['minute']   = df['Date'].dt.minute

df.head(1)

Unnamed: 0,Date,Close Price,High Price,Low Price,Open Price,Volume,DateOnly,year,month,day,hour,minute
0,2021-12-10 05:00:00,48246.57,48359.35,48051.08,48170.66,827.4,2021-12-10,2021,12,10,5,0


These attributes can be accessed using the `.dt` accessor.

In [32]:
df.dtypes

Date           datetime64[ns]
Close Price           float64
High Price            float64
Low Price             float64
Open Price            float64
Volume                float64
DateOnly               object
year                    int32
month                   int32
day                     int32
hour                    int32
minute                  int32
dtype: object

## DateTime Methods in Pandas


In [33]:
df.dtypes

Date           datetime64[ns]
Close Price           float64
High Price            float64
Low Price             float64
Open Price            float64
Volume                float64
DateOnly               object
year                    int32
month                   int32
day                     int32
hour                    int32
minute                  int32
dtype: object

###### Calculate the largest and smallest dates

In [34]:
# Calculating Max and Min DateTimes

print(df['Date'].min())
print(df['Date'].max())

2021-12-10 05:00:00
2021-12-24 05:00:00


In [35]:
# Subtracting DateTimes in Pandas

print(df['Date'].max() - df['Date'].min())

14 days 00:00:00


###### Day and month name

In [36]:
# Accessing the String Name of a Week Day
df['DayName'] = df['Date'].dt.day_name()

# Accessing the String Name of a month
df['MonthName'] = df['Date'].dt.month_name()

In [37]:
df[['Date','DayName','MonthName']].head()

Unnamed: 0,Date,DayName,MonthName
0,2021-12-10 05:00:00,Friday,December
1,2021-12-10 06:00:00,Friday,December
2,2021-12-10 07:00:00,Friday,December
3,2021-12-10 08:00:00,Friday,December
4,2021-12-10 09:00:00,Friday,December


###### Use Normalize to a Extract a Date from a Pandas Datetime Column


In [38]:
df['DateNorm'] = df['Date'].dt.normalize()

In [39]:
df[['Date','DateOnly','DateNorm']].dtypes

Date        datetime64[ns]
DateOnly            object
DateNorm    datetime64[ns]
dtype: object

In [40]:
df[['Date','DateOnly','DateNorm']].head(2)

Unnamed: 0,Date,DateOnly,DateNorm
0,2021-12-10 05:00:00,2021-12-10,2021-12-10
1,2021-12-10 06:00:00,2021-12-10,2021-12-10


# Filtering a Pandas DataFrame Based on DateTimes


In [41]:
import pandas as pd

df = pd.read_csv('../data/avocado.csv', index_col=0, parse_dates=['Date'])
df.head(1)

Unnamed: 0,Date,AveragePrice,Total Volume,4046,4225,4770,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region
0,2015-12-27,1.33,64236.62,1036.74,54454.85,48.16,8696.87,8603.62,93.25,0.0,conventional,2015,Albany


In [42]:
df = df.set_index('Date')
df.head()

Unnamed: 0_level_0,AveragePrice,Total Volume,4046,4225,4770,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2015-12-27,1.33,64236.62,1036.74,54454.85,48.16,8696.87,8603.62,93.25,0.0,conventional,2015,Albany
2015-12-20,1.35,54876.98,674.28,44638.81,58.33,9505.56,9408.07,97.49,0.0,conventional,2015,Albany
2015-12-13,0.93,118220.22,794.7,109149.67,130.5,8145.35,8042.21,103.14,0.0,conventional,2015,Albany
2015-12-06,1.08,78992.15,1132.0,71976.41,72.58,5811.16,5677.4,133.76,0.0,conventional,2015,Albany
2015-11-29,1.28,51039.6,941.48,43838.39,75.78,6183.95,5986.26,197.69,0.0,conventional,2015,Albany


In [43]:
df.drop(columns=['4046','4225','4770'], inplace=True) #Just drop some columns 

In [44]:
df.loc['2015-11-29'].head()

Unnamed: 0_level_0,AveragePrice,Total Volume,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2015-11-29,1.28,51039.6,6183.95,5986.26,197.69,0.0,conventional,2015,Albany
2015-11-29,0.99,333280.79,53794.68,33031.72,20738.55,24.41,conventional,2015,Atlanta
2015-11-29,1.16,545800.05,105468.28,103615.95,1852.33,0.0,conventional,2015,BaltimoreWashington
2015-11-29,1.06,49069.13,16057.99,15860.63,0.0,197.36,conventional,2015,Boise
2015-11-29,1.19,350559.81,74125.62,73864.52,261.1,0.0,conventional,2015,Boston


In [45]:
df.loc['2016-12' ].head()

Unnamed: 0_level_0,AveragePrice,Total Volume,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2016-12-25,1.52,73341.73,11432.09,11017.32,411.83,2.94,conventional,2016,Albany
2016-12-18,1.53,68938.53,9504.66,8876.65,587.73,40.28,conventional,2016,Albany
2016-12-11,1.49,71777.85,12822.02,12176.75,645.27,0.0,conventional,2016,Albany
2016-12-04,1.48,113031.96,6704.29,6476.12,228.17,0.0,conventional,2016,Albany
2016-12-25,0.91,502787.29,210731.46,118884.36,91847.1,0.0,conventional,2016,Atlanta


In [46]:
try:
    df.loc['2016-12-12']
except Exception as e:
       print("An error occurred:", type(e).__name__, "–", e)  

An error occurred: KeyError – '2016-12-12'


In [47]:
df.loc['2016-12-11':'2016-12-18']

Unnamed: 0_level_0,AveragePrice,Total Volume,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2016-12-18,1.53,68938.53,9504.66,8876.65,587.73,40.28,conventional,2016,Albany
2016-12-11,1.49,71777.85,12822.02,12176.75,645.27,0.00,conventional,2016,Albany
2016-12-18,0.94,463483.07,190881.84,114872.09,76009.75,0.00,conventional,2016,Atlanta
2016-12-11,1.06,422026.71,179311.31,112574.78,66736.53,0.00,conventional,2016,Atlanta
2016-12-18,1.21,691008.74,137281.09,133753.32,2845.65,682.12,conventional,2016,BaltimoreWashington
...,...,...,...,...,...,...,...,...,...
2016-12-11,1.42,856795.63,509347.50,293760.79,215586.71,0.00,organic,2016,TotalUS
2016-12-18,1.23,173890.10,111968.71,27269.38,84699.33,0.00,organic,2016,West
2016-12-11,1.34,163585.07,99570.68,24023.81,75546.87,0.00,organic,2016,West
2016-12-18,1.34,11406.50,7857.31,7819.17,38.14,0.00,organic,2016,WestTexNewMexico


In [48]:
pd.__version__

'2.0.3'

In [49]:
try:
    df.loc['2016-12-12':'2016-12-18']
except Exception as e:
       print("An error occurred:", type(e).__name__, "–", e)  

An error occurred: KeyError – 'Value based partial slicing on non-monotonic DatetimeIndexes with non-existing keys is not allowed.'


###### alternatives to find records within a specific date range

In [50]:
start_date = '2016-12-01'
end_date = '2016-12-05'

df[(df.index >= start_date) & (df.index <= end_date)].head()

Unnamed: 0_level_0,AveragePrice,Total Volume,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2016-12-04,1.48,113031.96,6704.29,6476.12,228.17,0.0,conventional,2016,Albany
2016-12-04,1.03,475815.02,171591.0,67806.59,103784.41,0.0,conventional,2016,Atlanta
2016-12-04,1.41,566956.23,159678.2,157703.86,1738.23,236.11,conventional,2016,BaltimoreWashington
2016-12-04,1.01,72471.63,31508.76,31278.13,217.75,12.88,conventional,2016,Boise
2016-12-04,1.26,473428.36,71859.07,71377.77,307.69,173.61,conventional,2016,Boston


In [51]:
start_date = '2016-12-01'
end_date = '2016-12-05'

In [52]:
filtered_df = df.query('@start_date <= index <= @end_date')

In [53]:
filtered_df.head()

Unnamed: 0_level_0,AveragePrice,Total Volume,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2016-12-04,1.48,113031.96,6704.29,6476.12,228.17,0.0,conventional,2016,Albany
2016-12-04,1.03,475815.02,171591.0,67806.59,103784.41,0.0,conventional,2016,Atlanta
2016-12-04,1.41,566956.23,159678.2,157703.86,1738.23,236.11,conventional,2016,BaltimoreWashington
2016-12-04,1.01,72471.63,31508.76,31278.13,217.75,12.88,conventional,2016,Boise
2016-12-04,1.26,473428.36,71859.07,71377.77,307.69,173.61,conventional,2016,Boston


###### check for duplicate

In [54]:
duplicates = df.index.duplicated()
duplicates

array([False, False, False, ...,  True,  True,  True])

In [55]:
duplicates.sum()

18080

###### Reset the index to default integer-based index and handle datetime in a separate column


In [56]:
df.reset_index(inplace=True)

In [57]:
df.head()

Unnamed: 0,Date,AveragePrice,Total Volume,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region
0,2015-12-27,1.33,64236.62,8696.87,8603.62,93.25,0.0,conventional,2015,Albany
1,2015-12-20,1.35,54876.98,9505.56,9408.07,97.49,0.0,conventional,2015,Albany
2,2015-12-13,0.93,118220.22,8145.35,8042.21,103.14,0.0,conventional,2015,Albany
3,2015-12-06,1.08,78992.15,5811.16,5677.4,133.76,0.0,conventional,2015,Albany
4,2015-11-29,1.28,51039.6,6183.95,5986.26,197.69,0.0,conventional,2015,Albany


In [58]:
selected_rows = df[df['Date'] >= '2018-02-02']
selected_rows.head()

Unnamed: 0,Date,AveragePrice,Total Volume,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region
8478,2018-03-25,1.57,149396.5,23924.33,19273.8,4270.53,380.0,conventional,2018,Albany
8479,2018-03-18,1.35,105304.65,30977.21,26755.9,3721.31,500.0,conventional,2018,Albany
8480,2018-03-11,1.12,144648.75,17804.72,14480.52,3033.09,291.11,conventional,2018,Albany
8481,2018-03-04,1.08,139520.6,22353.29,16128.51,5941.45,283.33,conventional,2018,Albany
8482,2018-02-25,1.28,104278.89,34138.8,30126.31,3702.49,310.0,conventional,2018,Albany


In [59]:
sliced_rows = df[(df['Date'] >= '2016-08-02') & (df['Date'] <= '2016-08-15')]

In [60]:
sliced_rows.head()

Unnamed: 0,Date,AveragePrice,Total Volume,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region
2827,2016-08-14,1.2,110528.09,13530.48,12434.82,350.1,745.56,conventional,2016,Albany
2828,2016-08-07,1.48,99683.11,15947.06,14597.2,178.19,1171.67,conventional,2016,Albany
2879,2016-08-14,0.87,666493.32,242570.06,182840.11,59729.95,0.0,conventional,2016,Atlanta
2880,2016-08-07,1.04,508354.34,192926.85,131851.39,61075.46,0.0,conventional,2016,Atlanta
2931,2016-08-14,1.52,742291.98,227903.75,219529.71,6984.04,1390.0,conventional,2016,BaltimoreWashington


###### Use multi-index

In [61]:
import pandas as pd

# Sample data
data = {'date': ['2023-08-01', '2023-08-02', '2023-08-02', '2023-08-03'],
        'category': ['A', 'A', 'B', 'A'],
        'value': [10, 15, 20, 25]}

# Create a DataFrame with datetime and category columns
df = pd.DataFrame(data)
df['date'] = pd.to_datetime(df['date'])

# Set multi-index with 'date' and 'category'
df.set_index(['date', 'category'], inplace=True)

df

Unnamed: 0_level_0,Unnamed: 1_level_0,value
date,category,Unnamed: 2_level_1
2023-08-01,A,10
2023-08-02,A,15
2023-08-02,B,20
2023-08-03,A,25


In [62]:
df.index

MultiIndex([('2023-08-01', 'A'),
            ('2023-08-02', 'A'),
            ('2023-08-02', 'B'),
            ('2023-08-03', 'A')],
           names=['date', 'category'])

In [63]:
selected_rows = df.loc['2023-08-01']
selected_rows

Unnamed: 0_level_0,value
category,Unnamed: 1_level_1
A,10


In [64]:
selected_rows = df.loc['2023-08-02':'2023-08-03']
selected_rows

Unnamed: 0_level_0,Unnamed: 1_level_0,value
date,category,Unnamed: 2_level_1
2023-08-02,A,15
2023-08-02,B,20
2023-08-03,A,25


In [65]:
sliced_rows = df.loc[('2023-08-01', 'A'):('2023-08-02', 'A')]
sliced_rows

Unnamed: 0_level_0,Unnamed: 1_level_0,value
date,category,Unnamed: 2_level_1
2023-08-01,A,10
2023-08-02,A,15


In [66]:
import pandas as pd

df = pd.read_csv('../data/time_data.csv', parse_dates=['Date'], 
                 date_format='%Y-%m-%d %H%p')
df.set_index('Date',inplace=True)
df.head(5)

Unnamed: 0_level_0,Close Price,High Price,Low Price,Open Price,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-12-10 05:00:00,48246.57,48359.35,48051.08,48170.66,827.4
2021-12-10 06:00:00,47847.59,48430.0,47810.81,48249.78,1296.19
2021-12-10 07:00:00,47694.62,48037.48,47550.0,47847.59,2299.85
2021-12-10 08:00:00,48090.35,48169.06,47587.39,47694.62,1371.25
2021-12-10 09:00:00,48366.37,48495.0,47869.8,48090.35,1232.3


In [67]:
df.at_time('07:00').head()

Unnamed: 0_level_0,Close Price,High Price,Low Price,Open Price,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-12-10 07:00:00,47694.62,48037.48,47550.0,47847.59,2299.85
2021-12-11 07:00:00,48021.76,48165.18,47765.33,48040.5,1944.77
2021-12-12 07:00:00,49301.21,49359.86,49082.3,49159.82,509.33
2021-12-13 07:00:00,48740.01,48995.94,48554.77,48936.88,1284.62
2021-12-14 07:00:00,47000.0,47050.0,46506.38,46670.32,1246.92


In [68]:
df.at_time('07:01').head()

Unnamed: 0_level_0,Close Price,High Price,Low Price,Open Price,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1


In [69]:
df.between_time('07:00','09:00').head(7)

Unnamed: 0_level_0,Close Price,High Price,Low Price,Open Price,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-12-10 07:00:00,47694.62,48037.48,47550.0,47847.59,2299.85
2021-12-10 08:00:00,48090.35,48169.06,47587.39,47694.62,1371.25
2021-12-10 09:00:00,48366.37,48495.0,47869.8,48090.35,1232.3
2021-12-11 07:00:00,48021.76,48165.18,47765.33,48040.5,1944.77
2021-12-11 08:00:00,48391.51,48900.0,47843.1,48021.75,1994.81
2021-12-11 09:00:00,48529.7,48627.0,48120.09,48391.51,840.82
2021-12-12 07:00:00,49301.21,49359.86,49082.3,49159.82,509.33


In [70]:
df.between_time('07:01','09:00').head(7)

Unnamed: 0_level_0,Close Price,High Price,Low Price,Open Price,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-12-10 08:00:00,48090.35,48169.06,47587.39,47694.62,1371.25
2021-12-10 09:00:00,48366.37,48495.0,47869.8,48090.35,1232.3
2021-12-11 08:00:00,48391.51,48900.0,47843.1,48021.75,1994.81
2021-12-11 09:00:00,48529.7,48627.0,48120.09,48391.51,840.82
2021-12-12 08:00:00,48965.02,49376.79,48943.37,49301.2,870.06
2021-12-12 09:00:00,48806.4,49150.0,48710.66,48965.01,944.37
2021-12-13 08:00:00,49015.0,49171.57,48145.0,48740.01,1996.57


# Group data by time intervals in Python Pandas


## Resampling Time Series Data


In [71]:
import pandas as pd

df = pd.read_csv('../data/avocado.csv', index_col=0, parse_dates=['Date'])
df.head(1)

Unnamed: 0,Date,AveragePrice,Total Volume,4046,4225,4770,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region
0,2015-12-27,1.33,64236.62,1036.74,54454.85,48.16,8696.87,8603.62,93.25,0.0,conventional,2015,Albany


In [72]:
df = df.set_index('Date')
df.head()

Unnamed: 0_level_0,AveragePrice,Total Volume,4046,4225,4770,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2015-12-27,1.33,64236.62,1036.74,54454.85,48.16,8696.87,8603.62,93.25,0.0,conventional,2015,Albany
2015-12-20,1.35,54876.98,674.28,44638.81,58.33,9505.56,9408.07,97.49,0.0,conventional,2015,Albany
2015-12-13,0.93,118220.22,794.7,109149.67,130.5,8145.35,8042.21,103.14,0.0,conventional,2015,Albany
2015-12-06,1.08,78992.15,1132.0,71976.41,72.58,5811.16,5677.4,133.76,0.0,conventional,2015,Albany
2015-11-29,1.28,51039.6,941.48,43838.39,75.78,6183.95,5986.26,197.69,0.0,conventional,2015,Albany


In [73]:
df.drop(columns=['AveragePrice','4046','4225','4770','type','year','region'], inplace=True) #Just drop some columns 

In [74]:
df.head()

Unnamed: 0_level_0,Total Volume,Total Bags,Small Bags,Large Bags,XLarge Bags
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-12-27,64236.62,8696.87,8603.62,93.25,0.0
2015-12-20,54876.98,9505.56,9408.07,97.49,0.0
2015-12-13,118220.22,8145.35,8042.21,103.14,0.0
2015-12-06,78992.15,5811.16,5677.4,133.76,0.0
2015-11-29,51039.6,6183.95,5986.26,197.69,0.0


In [75]:
df1 = df.resample('M').mean()
df1.head()

Unnamed: 0_level_0,Total Volume,Total Bags,Small Bags,Large Bags,XLarge Bags
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-01-31,736308.36,117337.26,96502.58,20513.63,321.05
2015-02-28,839438.89,126555.65,106004.82,20017.51,533.32
2015-03-31,775153.83,127622.4,109589.48,17325.54,707.38
2015-04-30,796630.54,134856.55,114351.52,19976.72,528.32
2015-05-31,895896.38,146426.59,123510.64,22365.19,550.77


In [76]:
df1 = df.resample('Y').mean()
df1.head()

Unnamed: 0_level_0,Total Volume,Total Bags,Small Bags,Large Bags,XLarge Bags
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-12-31,781000.0,137523.11,113033.43,23520.29,969.39
2016-12-31,858000.0,260533.99,197025.33,59940.59,3568.07
2017-12-31,862000.0,287712.12,213728.16,69790.12,4193.84
2018-12-31,1070000.0,379271.56,278349.82,95358.02,5563.73


### Resampling Pandas Timeseries with Different Methods


In [77]:
df2 = df.resample('Y').agg({
    'Total Volume': ['mean','sum'],
    'Total Bags': ['mean','sum'] 
    })

df2.head()

Unnamed: 0_level_0,Total Volume,Total Volume,Total Bags,Total Bags
Unnamed: 0_level_1,mean,sum,mean,sum
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2015-12-31,781000.0,4390000000.0,137523.11,772000000.0
2016-12-31,858000.0,4820000000.0,260533.99,1460000000.0
2017-12-31,862000.0,4930000000.0,287712.12,1650000000.0
2018-12-31,1070000.0,1380000000.0,379271.56,492000000.0


## Grouper Time Series Data



In [78]:
df.reset_index(inplace=True)

In [79]:
df.head()

Unnamed: 0,Date,Total Volume,Total Bags,Small Bags,Large Bags,XLarge Bags
0,2015-12-27,64236.62,8696.87,8603.62,93.25,0.0
1,2015-12-20,54876.98,9505.56,9408.07,97.49,0.0
2,2015-12-13,118220.22,8145.35,8042.21,103.14,0.0
3,2015-12-06,78992.15,5811.16,5677.4,133.76,0.0
4,2015-11-29,51039.6,6183.95,5986.26,197.69,0.0


In [80]:
tt = df.groupby(pd.Grouper(key='Date', axis=0, freq='Y')).agg(
                                        Total_volume=('Total Volume','sum'), 
                                        Total_bags=('Total Bags','sum'))

In [81]:
tt

Unnamed: 0_level_0,Total_volume,Total_bags
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-12-31,4390000000.0,772000000.0
2016-12-31,4820000000.0,1460000000.0
2017-12-31,4930000000.0,1650000000.0
2018-12-31,1380000000.0,492000000.0


In [82]:
import pandas as pd

df = pd.read_csv('../data/avocado.csv', index_col=0, parse_dates=['Date'])
df.head(1)

Unnamed: 0,Date,AveragePrice,Total Volume,4046,4225,4770,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region
0,2015-12-27,1.33,64236.62,1036.74,54454.85,48.16,8696.87,8603.62,93.25,0.0,conventional,2015,Albany


In [83]:
df.drop(columns=['AveragePrice','4046','4225','4770'], inplace=True) #Just drop some columns 

In [84]:
df.head()

Unnamed: 0,Date,Total Volume,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region
0,2015-12-27,64236.62,8696.87,8603.62,93.25,0.0,conventional,2015,Albany
1,2015-12-20,54876.98,9505.56,9408.07,97.49,0.0,conventional,2015,Albany
2,2015-12-13,118220.22,8145.35,8042.21,103.14,0.0,conventional,2015,Albany
3,2015-12-06,78992.15,5811.16,5677.4,133.76,0.0,conventional,2015,Albany
4,2015-11-29,51039.6,6183.95,5986.26,197.69,0.0,conventional,2015,Albany


In [85]:
tt = df.groupby([pd.Grouper(key='Date', axis=0, freq='Y'), 'type']).agg(
                                        Total_volume=('Total Volume','sum'), 
                                        Total_bags=('Total Bags','sum'))

In [86]:
tt

Unnamed: 0_level_0,Unnamed: 1_level_0,Total_volume,Total_bags
Date,type,Unnamed: 2_level_1,Unnamed: 3_level_1
2015-12-31,conventional,4300000000.0,748000000.0
2015-12-31,organic,88900000.0,24100000.0
2016-12-31,conventional,4690000000.0,1400000000.0
2016-12-31,organic,131000000.0,65100000.0
2017-12-31,conventional,4770000000.0,1540000000.0
2017-12-31,organic,168000000.0,105000000.0
2018-12-31,conventional,1330000000.0,459000000.0
2018-12-31,organic,48500000.0,32200000.0
