In [9]:
import time
import datetime
import calendar
import pytz

import numpy as np
import pandas as pd

# 一、pandas TimeSeries APIs:

### 1. Timestamp - `pd.Timestamp()`

In [10]:
datetime.datetime(2019, 8, 21)

datetime.datetime(2019, 8, 21, 0, 0)

In [11]:
pd.Timestamp(datetime.datetime(2019, 8, 21))

Timestamp('2019-08-21 00:00:00')

In [12]:
pd.Timestamp("2019-08-21")

Timestamp('2019-08-21 00:00:00')

In [13]:
pd.Timestamp(2019, 8, 21)

Timestamp('2019-08-21 00:00:00')

In [14]:
day = pd.Timestamp(2019, 8, 21)
print(day.day_name())

Wednesday


### 2. Period - `pd.Period()`

In [15]:
pd.Period("2019-08")

Period('2019-08', 'M')

In [16]:
pd.Period("2019-08", freq = "D")

Period('2019-08-01', 'D')

### 3. Timestamp & Period as index

#### 3.1 DatetimeIndex

In [36]:
dates = [pd.Timestamp("2019-08-21"), pd.Timestamp("2019-08-22"), pd.Timestamp("2019-08-23")]
ts = pd.Series(np.random.randn(3), index = dates)
print(ts)
print("-" * 25)
print(ts.index)
print("-" * 25)
print(type(ts.index))

2019-08-21   -0.195642
2019-08-22    0.433605
2019-08-23    0.519007
dtype: float64
-------------------------
DatetimeIndex(['2019-08-21', '2019-08-22', '2019-08-23'], dtype='datetime64[ns]', freq=None)
-------------------------
<class 'pandas.core.indexes.datetimes.DatetimeIndex'>


#### 3.2 PeriodIndex

In [37]:
periods = [pd.Period("2019-08"), pd.Period("2019-07"), pd.Period("2019-06")]
ts = pd.Series(np.random.randn(3), index = periods)
print(ts)
print("-" * 20)
print(ts.index)
print("-" * 20)
print(type(ts.index))

2019-08   -0.999400
2019-07   -0.213444
2019-06   -1.463501
Freq: M, dtype: float64
--------------------
PeriodIndex(['2019-08', '2019-07', '2019-06'], dtype='period[M]', freq='M')
--------------------
<class 'pandas.core.indexes.period.PeriodIndex'>


### 4. To Timestamp

* `pd.to_datetime(pd.Series/List, dayfirst, format, errors, unit, origin)`

In [39]:
# series
s = pd.Series(["Jul 31, 2009", "2010-01-10", None])
ts  = pd.to_datetime(s)
print(ts)

0   2009-07-31
1   2010-01-10
2          NaT
dtype: datetime64[ns]


In [42]:
# list
l = ["2005/11/23", "2010-12-31"]
tl = pd.to_datetime(l)
print(tl)

DatetimeIndex(['2005-11-23', '2010-12-31'], dtype='datetime64[ns]', freq=None)


In [45]:
d = ["04-01-2012 10:00"]
td1 = pd.to_datetime(d)
td2 = pd.to_datetime(d, dayfirst = True)
print(td1)
print(td2)
print("-" * 80)
d2 = ["14-01-2019", "01-14-2019"]
td3 = pd.to_datetime(d2)
td4 = pd.to_datetime(d2, dayfirst = True)
print(td3)
print(td4)

DatetimeIndex(['2012-04-01 10:00:00'], dtype='datetime64[ns]', freq=None)
DatetimeIndex(['2012-01-04 10:00:00'], dtype='datetime64[ns]', freq=None)
--------------------------------------------------------------------------------
DatetimeIndex(['2019-01-14', '2019-01-14'], dtype='datetime64[ns]', freq=None)
DatetimeIndex(['2019-01-14', '2019-01-14'], dtype='datetime64[ns]', freq=None)


In [22]:
print(pd.to_datetime("2010/11/12", format = "%Y/%m/%d"))
print(pd.to_datetime("12-11-2010 00:00", format = "%d-%m-%Y %H:%M"))

2010-11-12 00:00:00
2010-11-12 00:00:00


In [23]:
df = pd.DataFrame({
    "year": [2015, 2016],
    "month": [2, 3],
    "day": [4, 5],
    "hour": [2, 3]
})
print(df)
print()
print(pd.to_datetime(df))
print()
print(pd.to_datetime(df[["year", "month", "day"]]))

   year  month  day  hour
0  2015      2    4     2
1  2016      3    5     3

0   2015-02-04 02:00:00
1   2016-03-05 03:00:00
dtype: datetime64[ns]

0   2015-02-04
1   2016-03-05
dtype: datetime64[ns]


In [24]:
try:
    pd.to_datetime(["2019-08-21", "asd"], errors = "raise")
except ValueError as e:
    print(e)

print(pd.to_datetime(["2019-08-21", "asd"], errors = "ignore"))
print(pd.to_datetime(["2019-08-21", "asd"], errors = "coerce"))

('Unknown string format:', 'asd')
['2019-08-21' 'asd']
DatetimeIndex(['2019-08-21', 'NaT'], dtype='datetime64[ns]', freq=None)


In [48]:
# to_datetime() 中传入了List
print(pd.to_datetime([1349720105, 1349806505, 1349892905, 1349979305, 1350065705], unit = "s"))
print(pd.to_datetime([1349720105000, 1349806505000, 1349892905000, 1349979305000, 1350065705000], unit = "ms"))
print(pd.to_datetime([1262347200000000000]).tz_localize("US/Pacific"))
print(pd.to_datetime([1262347200000000000]).tz_localize("UTC"))
print(pd.DatetimeIndex([1262347200000000000]).tz_localize("US/Pacific"))

DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
               '2012-10-10 18:15:05', '2012-10-11 18:15:05',
               '2012-10-12 18:15:05'],
              dtype='datetime64[ns]', freq=None)
DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
               '2012-10-10 18:15:05', '2012-10-11 18:15:05',
               '2012-10-12 18:15:05'],
              dtype='datetime64[ns]', freq=None)
DatetimeIndex(['2010-01-01 12:00:00-08:00'], dtype='datetime64[ns, US/Pacific]', freq=None)
DatetimeIndex(['2010-01-01 12:00:00+00:00'], dtype='datetime64[ns, UTC]', freq=None)
DatetimeIndex(['2010-01-01 12:00:00-08:00'], dtype='datetime64[ns, US/Pacific]', freq=None)


In [26]:
print(pd.to_datetime([1, 2, 3], unit = "D", origin = pd.Timestamp('1960-01-01')))
print(pd.to_datetime([1, 2, 3], unit = "D"))

DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'], dtype='datetime64[ns]', freq=None)
DatetimeIndex(['1970-01-02', '1970-01-03', '1970-01-04'], dtype='datetime64[ns]', freq=None)


### 5. Range of Timestamp

* `pd.date_range(start, end, freq, period)`
* `pd.bdate_range(start, end, freq, period)`

In [54]:
start = datetime.datetime(2018, 1, 1)
end = datetime.datetime(2019, 1, 1)

In [62]:
print(pd.date_range(start, end))  # Calendar day 
print(pd.bdate_range(start, end)) # Business day
print("-" * 100)
print(pd.date_range(start, periods = 1000, freq = "M"))   # Month End
print(pd.bdate_range(start, periods = 250, freq = "BQS")) # Business Quarter Start
print("-" * 100)
print(pd.date_range(start, end, freq = "BM")) # Business Month End
print(pd.date_range(start, end, freq = "W")) # Weekly
print("-" * 100)
print(pd.date_range(end = end, periods = 20))     # end and length
print(pd.date_range(start = start, periods = 20)) # start and length
print("-" * 100)
print(pd.date_range("2018-01-01", "2018-01-05", periods = 5)) # start and end and length
print(pd.date_range("2018-01-01", "2018-01-05", periods = 10))# start and end and length

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08',
               '2018-01-09', '2018-01-10',
               ...
               '2018-12-23', '2018-12-24', '2018-12-25', '2018-12-26',
               '2018-12-27', '2018-12-28', '2018-12-29', '2018-12-30',
               '2018-12-31', '2019-01-01'],
              dtype='datetime64[ns]', length=366, freq='D')
DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-08', '2018-01-09', '2018-01-10',
               '2018-01-11', '2018-01-12',
               ...
               '2018-12-19', '2018-12-20', '2018-12-21', '2018-12-24',
               '2018-12-25', '2018-12-26', '2018-12-27', '2018-12-28',
               '2018-12-31', '2019-01-01'],
              dtype='datetime64[ns]', length=262, freq='B')
----------------------------------------------------------------------------------------------------

In [61]:
weekmask = "Mon Wed Fri"
holidays = [datetime.datetime(2011, 1, 5), datetime.datetime(2011, 3, 14)]
idx1 = pd.bdate_range(start, end, freq = "C", weekmask = weekmask, holidays = holidays)
print(idx1)
idx2 = pd.bdate_range(start, end, freq = "CBMS", weekmask = weekmask)
print(idx2)

DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05', '2018-01-08',
               '2018-01-10', '2018-01-12', '2018-01-15', '2018-01-17',
               '2018-01-19', '2018-01-22',
               ...
               '2018-12-10', '2018-12-12', '2018-12-14', '2018-12-17',
               '2018-12-19', '2018-12-21', '2018-12-24', '2018-12-26',
               '2018-12-28', '2018-12-31'],
              dtype='datetime64[ns]', length=157, freq='C')
DatetimeIndex(['2018-01-01', '2018-02-02', '2018-03-02', '2018-04-02',
               '2018-05-02', '2018-06-01', '2018-07-02', '2018-08-01',
               '2018-09-03', '2018-10-01', '2018-11-02', '2018-12-03'],
              dtype='datetime64[ns]', freq='CBMS')


In [30]:
print(pd.Timestamp.min)
print(pd.Timestamp.max)

1677-09-21 00:12:43.145225
2262-04-11 23:47:16.854775807


### 6. Timestamp Index —— DatetimeIndex

In [64]:
start = datetime.datetime(2011, 1, 1)
end = datetime.datetime(2012, 1, 1)
rng = pd.date_range(start, end, freq = "BM") # Business Month 
ts = pd.Series(np.random.randn(len(rng)), index = rng)
print(ts)
print("-" * 100)
print(ts.index)
print("-" * 100)
print(ts[:5].index)
print("-" * 100)
print(ts[::2].index)
print("-" * 100)
print(ts["1/31/2011"])
print("-" * 100)
print(ts[datetime.datetime(2011, 12, 25):])
print("-" * 100)
print(ts['10/31/2011':'12/31/2011'])
print("-" * 100)
print(ts["2011"])
print("-" * 100)
print(ts["2011-6"])

2011-01-31    0.341942
2011-02-28    0.859577
2011-03-31    0.985406
2011-04-29   -0.224811
2011-05-31   -0.888166
2011-06-30    0.710712
2011-07-29    0.355943
2011-08-31   -2.422465
2011-09-30   -0.769204
2011-10-31    1.886651
2011-11-30    0.456291
2011-12-30    1.312205
Freq: BM, dtype: float64
----------------------------------------------------------------------------------------------------
DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-29',
               '2011-05-31', '2011-06-30', '2011-07-29', '2011-08-31',
               '2011-09-30', '2011-10-31', '2011-11-30', '2011-12-30'],
              dtype='datetime64[ns]', freq='BM')
----------------------------------------------------------------------------------------------------
DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-29',
               '2011-05-31'],
              dtype='datetime64[ns]', freq='BM')
------------------------------------------------------------------------------------

In [21]:
print(pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"]))
print(pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"], freq = "infer"))

DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05'], dtype='datetime64[ns]', freq=None)
DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05'], dtype='datetime64[ns]', freq='2D')


In [65]:
dft = pd.DataFrame(np.random.randn(100000, 1),
                   columns = ["A"],
                   index = pd.date_range("20130101", periods = 100000, freq = "T")) # minutely 
print(dft)
print("-" * 100)
print(dft["2013"])
print("-" * 100)
print(dft["2013-1":"2013-2"])
print("-" * 100)
print(dft["2013-1":"2013-2-28"])
print("-" * 100)
print(dft["2013-1":"2013-2-28 00:00:00"])
print("-" * 100)
print(dft["2013-1-15":"2013-1-15 12:30:00"])

                            A
2013-01-01 00:00:00 -0.242536
2013-01-01 00:01:00  0.548547
2013-01-01 00:02:00 -1.117038
2013-01-01 00:03:00  0.432176
2013-01-01 00:04:00 -0.510630
2013-01-01 00:05:00 -0.957357
2013-01-01 00:06:00  0.965401
2013-01-01 00:07:00 -1.400734
2013-01-01 00:08:00  0.578109
2013-01-01 00:09:00 -0.655137
2013-01-01 00:10:00 -1.963523
2013-01-01 00:11:00  1.283547
2013-01-01 00:12:00  1.595535
2013-01-01 00:13:00 -0.881298
2013-01-01 00:14:00 -0.100967
2013-01-01 00:15:00  0.090259
2013-01-01 00:16:00  0.776400
2013-01-01 00:17:00  0.421742
2013-01-01 00:18:00  0.240029
2013-01-01 00:19:00  0.759932
2013-01-01 00:20:00 -0.121517
2013-01-01 00:21:00  1.081618
2013-01-01 00:22:00 -0.212912
2013-01-01 00:23:00  0.184384
2013-01-01 00:24:00  0.612930
2013-01-01 00:25:00 -0.353999
2013-01-01 00:26:00  1.431318
2013-01-01 00:27:00  0.184914
2013-01-01 00:28:00  0.187825
2013-01-01 00:29:00 -0.142211
...                       ...
2013-03-11 10:10:00  0.389272
2013-03-11

In [37]:
dft2 = pd.DataFrame(np.random.randn(20, 1),
                    columns = ["A"],
                    index = pd.MultiIndex.from_product([
                        pd.date_range("20130101", periods = 10, freq = "12H"),
                        ["a", "b"]
                    ]))
print(dft2)
print("-" * 100)

try:
    print(dft2["2013-01-05"])
except:
    print("ERROR")
print("-" * 100)

print(dft2.loc["2013-01-05"])
print("-" * 100)

dft2 = dft2.swaplevel(0, 1).sort_index()
print(dft2)
print("-" * 100)

idx = pd.IndexSlice
print(dft2.loc[idx[:, "2013-01-05"], :])

                              A
2013-01-01 00:00:00 a -0.866774
                    b -0.930500
2013-01-01 12:00:00 a -0.026939
                    b  1.313407
2013-01-02 00:00:00 a  1.138884
                    b -0.593830
2013-01-02 12:00:00 a -1.465060
                    b -0.495297
2013-01-03 00:00:00 a  1.009147
                    b  0.310987
2013-01-03 12:00:00 a -0.808627
                    b -1.626384
2013-01-04 00:00:00 a -0.143131
                    b -0.828087
2013-01-04 12:00:00 a -1.048906
                    b  0.360801
2013-01-05 00:00:00 a  0.446044
                    b -1.093575
2013-01-05 12:00:00 a -0.528344
                    b  1.119421
----------------------------------------------------------------------------------------------------
ERROR
----------------------------------------------------------------------------------------------------
                              A
2013-01-05 00:00:00 a  0.446044
                    b -1.093575
2013-01-05 12:00:00 a -0

### 7.Resapling