###  pandas生成时间序列

In [9]:
import pandas as pd
import numpy as np
from datetime import datetime as dt

###  时间序列
- 时间戳（timestamp）具体到时间点
- 固定周期（period）
- 时间间隔（interval）

<img src="f1.png" alt="FAO" width="590" >

### date_range
- 可以指定开始时间与周期
- H：小时
- D：天
- M：月

In [10]:
# TIMES # 2021 Jul 1 7/1/2021 1/7/2021 2021-07-01 2021/07/01都可以
rng = pd.date_range('2021-07-01', periods = 10, freq = '3D')
rng

DatetimeIndex(['2021-07-01', '2021-07-04', '2021-07-07', '2021-07-10',
               '2021-07-13', '2021-07-16', '2021-07-19', '2021-07-22',
               '2021-07-25', '2021-07-28'],
              dtype='datetime64[ns]', freq='3D')

In [12]:
time=pd.Series(np.random.randn(20),
           index=pd.date_range(dt(2021,1,1),periods=20))
print(time)

2021-01-01    0.608358
2021-01-02   -0.402434
2021-01-03   -1.917616
2021-01-04    1.568936
2021-01-05   -0.668169
2021-01-06   -0.148038
2021-01-07   -0.393429
2021-01-08    0.052060
2021-01-09   -0.074732
2021-01-10    1.457457
2021-01-11   -0.106878
2021-01-12    0.340505
2021-01-13    0.694755
2021-01-14    0.261571
2021-01-15    0.231021
2021-01-16   -0.454639
2021-01-17   -0.313779
2021-01-18    0.311580
2021-01-19   -1.375962
2021-01-20    1.450409
Freq: D, dtype: float64


###  truncate过滤

In [13]:
time.truncate(before='2021-1-10') # 之前的都没有了

2021-01-10    1.457457
2021-01-11   -0.106878
2021-01-12    0.340505
2021-01-13    0.694755
2021-01-14    0.261571
2021-01-15    0.231021
2021-01-16   -0.454639
2021-01-17   -0.313779
2021-01-18    0.311580
2021-01-19   -1.375962
2021-01-20    1.450409
Freq: D, dtype: float64

In [14]:
time.truncate(after='2021-1-10') # 之后的都没了

2021-01-01    0.608358
2021-01-02   -0.402434
2021-01-03   -1.917616
2021-01-04    1.568936
2021-01-05   -0.668169
2021-01-06   -0.148038
2021-01-07   -0.393429
2021-01-08    0.052060
2021-01-09   -0.074732
2021-01-10    1.457457
Freq: D, dtype: float64

In [15]:
print(time['2021-01-15'])

0.2310208242057297


In [16]:
print(time['2021-01-15':'2021-01-20'])

2021-01-15    0.231021
2021-01-16   -0.454639
2021-01-17   -0.313779
2021-01-18    0.311580
2021-01-19   -1.375962
2021-01-20    1.450409
Freq: D, dtype: float64


In [17]:
data=pd.date_range('2020-01-01','2021-01-01',freq='M')
print(data)

DatetimeIndex(['2020-01-31', '2020-02-29', '2020-03-31', '2020-04-30',
               '2020-05-31', '2020-06-30', '2020-07-31', '2020-08-31',
               '2020-09-30', '2020-10-31', '2020-11-30', '2020-12-31'],
              dtype='datetime64[ns]', freq='M')


<img src="f2.png" alt="FAO" width="590" >

In [18]:
# 时间戳
pd.Timestamp('2021-07-10')

Timestamp('2021-07-10 00:00:00')

In [19]:
# 可以指定更多细节
pd.Timestamp('2021-07-10 10')

Timestamp('2021-07-10 10:00:00')

In [20]:
pd.Timestamp('2021-07-10 10:15')

Timestamp('2021-07-10 10:15:00')

In [21]:
# How much detail can you add?

In [22]:
t = pd.Timestamp('2021-07-10 10:15')

In [23]:
# 时间区间
pd.Period('2021-01')

Period('2021-01', 'M')

In [24]:
pd.Period('2021-01-01')

Period('2021-01-01', 'D')

In [25]:
# TIME OFFSETS 对时间的加减！
pd.Timedelta('1 day')

Timedelta('1 days 00:00:00')

In [26]:
pd.Period('2021-01-01 10:10') + pd.Timedelta('1 day')

Period('2021-01-02 10:10', 'T')

In [27]:
pd.Timestamp('2021-01-01 10:10') + pd.Timedelta('1 day')

Timestamp('2021-01-02 10:10:00')

In [28]:
pd.Timestamp('2021-01-01 10:10') + pd.Timedelta('15 ns')

Timestamp('2021-01-01 10:10:00.000000015')

In [29]:
p1 = pd.period_range('2021-01-01 10:10', freq = '25H', periods = 10)

In [30]:
p2 = pd.period_range('2021-01-01 10:10', freq = '1D1H', periods = 10)

In [31]:
p1

PeriodIndex(['2021-01-01 10:00', '2021-01-02 11:00', '2021-01-03 12:00',
             '2021-01-04 13:00', '2021-01-05 14:00', '2021-01-06 15:00',
             '2021-01-07 16:00', '2021-01-08 17:00', '2021-01-09 18:00',
             '2021-01-10 19:00'],
            dtype='period[25H]', freq='25H')

In [32]:
p2

PeriodIndex(['2021-01-01 10:00', '2021-01-02 11:00', '2021-01-03 12:00',
             '2021-01-04 13:00', '2021-01-05 14:00', '2021-01-06 15:00',
             '2021-01-07 16:00', '2021-01-08 17:00', '2021-01-09 18:00',
             '2021-01-10 19:00'],
            dtype='period[25H]', freq='25H')

In [33]:
# 指定索引
rng = pd.date_range('2016 Jul 1', periods = 10, freq = 'D')
rng
pd.Series(range(len(rng)), index = rng)

2016-07-01    0
2016-07-02    1
2016-07-03    2
2016-07-04    3
2016-07-05    4
2016-07-06    5
2016-07-07    6
2016-07-08    7
2016-07-09    8
2016-07-10    9
Freq: D, dtype: int32

In [34]:
periods = [pd.Period('2021-01'), pd.Period('2021-02'), pd.Period('2021-03')]
ts = pd.Series(np.random.randn(len(periods)), index = periods)
ts

2021-01    0.997422
2021-02    2.006248
2021-03    0.606481
Freq: M, dtype: float64

In [35]:
type(ts.index)

pandas.core.indexes.period.PeriodIndex

In [37]:
# 时间戳和时间周期可以转换
ts = pd.Series(range(10), pd.date_range('07-10-21 8:00', periods = 10, freq = 'H'))
ts

2021-07-10 08:00:00    0
2021-07-10 09:00:00    1
2021-07-10 10:00:00    2
2021-07-10 11:00:00    3
2021-07-10 12:00:00    4
2021-07-10 13:00:00    5
2021-07-10 14:00:00    6
2021-07-10 15:00:00    7
2021-07-10 16:00:00    8
2021-07-10 17:00:00    9
Freq: H, dtype: int32

In [38]:
ts_period = ts.to_period()
ts_period

2021-07-10 08:00    0
2021-07-10 09:00    1
2021-07-10 10:00    2
2021-07-10 11:00    3
2021-07-10 12:00    4
2021-07-10 13:00    5
2021-07-10 14:00    6
2021-07-10 15:00    7
2021-07-10 16:00    8
2021-07-10 17:00    9
Freq: H, dtype: int32

In [40]:
ts_period['2021-07-10 08:30':'2021-07-10 11:45']  # 时间周期包括8:00

2021-07-10 08:00    0
2021-07-10 09:00    1
2021-07-10 10:00    2
2021-07-10 11:00    3
Freq: H, dtype: int32

In [41]:
ts['2021-07-10 08:30':'2021-07-10 11:45']   # 时间戳不包活8:00

2021-07-10 09:00:00    1
2021-07-10 10:00:00    2
2021-07-10 11:00:00    3
Freq: H, dtype: int32