In [1]:
import numpy as np
import pandas as pd


## Periods and Period Arithmetic

In [2]:
p = pd.Period(2007, freq='A-DEC')
p

Period('2007', 'A-DEC')

In [3]:
p + 5

Period('2012', 'A-DEC')

In [4]:
p - 2

Period('2005', 'A-DEC')

In [5]:
#p:Period('2007', 'A-DEC')
pd.Period('2014', freq='A-DEC') - p

7

In [6]:
#period_range
rng = pd.period_range('2000-01-01', '2000-06-30', freq='M')
rng

PeriodIndex(['2000-01', '2000-02', '2000-03', '2000-04', '2000-05', '2000-06'], dtype='period[M]', freq='M')

In [7]:
#The PeriodIndex class stores a sequence of periods and can serve as an axis index in any pandas data structure:    
pd.Series(np.random.randn(6), index=rng)

2000-01    0.557302
2000-02    1.054242
2000-03   -0.944905
2000-04    1.454510
2000-05   -0.151761
2000-06   -0.607588
Freq: M, dtype: float64

In [40]:
#就算你用字串， PeriodIndex 也可以把它變index
values = ['2001Q3', '2002Q2', '2003Q1']
index = pd.PeriodIndex(values, freq='Q-DEC')
index

PeriodIndex(['2001Q3', '2002Q2', '2003Q1'], dtype='period[Q-DEC]', freq='Q-DEC')

### Period Frequency Conversion
* 頻率轉換

In [6]:
p = pd.Period('2007', freq='A-DEC')
p

Period('2007', 'A-DEC')

**asfreq**(freq, method=None, how=None, normalize=False, fill_value=None)

    how: {‘start’, ‘end’}, default end

In [10]:
p.asfreq('D', how='start') #'M'

Period('2007-01-01', 'D')

In [9]:
p.asfreq('D', how='end')

Period('2007-12-31', 'D')

In [12]:
p = pd.Period('2007', freq='A-JUN')
p

Period('2007', 'A-JUN')

In [13]:
p.asfreq('M', 'start')

Period('2006-07', 'M')

In [14]:
p.asfreq('M', 'end')

Period('2007-06', 'M')

In [15]:
p = pd.Period('Aug-2007', 'M')


In [16]:
p.asfreq('A-JUN')

Period('2008', 'A-JUN')

In [17]:
p

Period('2007-08', 'M')

In [18]:
rng = pd.period_range('2006', '2009', freq='A-DEC')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts


2006   -0.671896
2007    0.128905
2008   -1.795061
2009   -0.587677
Freq: A-DEC, dtype: float64

In [19]:
ts.asfreq('M', how='end')

2006-12   -0.671896
2007-12    0.128905
2008-12   -1.795061
2009-12   -0.587677
Freq: M, dtype: float64

In [20]:
ts.asfreq('B', how='end')

2006-12-29   -0.671896
2007-12-31    0.128905
2008-12-31   -1.795061
2009-12-31   -0.587677
Freq: B, dtype: float64

### Quarterly Period Frequencies

In [21]:
#Q4 is Jan
p = pd.Period('2018Q3', freq='Q-JAN')
p

Period('2018Q3', 'Q-JAN')

In [22]:
p.asfreq('D','start')


Period('2017-08-01', 'D')

In [23]:
p.asfreq('D', 'end')

Period('2017-10-31', 'D')

In [24]:
p4pm = (p.asfreq('B', 'e') -1).asfreq('T', 's') + 16 * 60

p4pm

Period('2017-10-30 16:00', 'T')

In [25]:
p4pm.to_timestamp()

Timestamp('2017-10-30 16:00:00')

In [26]:
rng = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN')
ts = pd.Series(np.arange(len(rng)), index=rng)
ts


2011Q3    0
2011Q4    1
2012Q1    2
2012Q2    3
2012Q3    4
2012Q4    5
Freq: Q-JAN, dtype: int64

In [27]:
rng.asfreq('M', 'e')

PeriodIndex(['2010-10', '2011-01', '2011-04', '2011-07', '2011-10', '2012-01'], dtype='period[M]', freq='M')

In [28]:
# T:minute
#e : end,s : start
new_rng = (rng.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60
ts.index = new_rng.to_timestamp()
ts

2010-10-28 16:00:00    0
2011-01-28 16:00:00    1
2011-04-28 16:00:00    2
2011-07-28 16:00:00    3
2011-10-28 16:00:00    4
2012-01-30 16:00:00    5
dtype: int64

### Converting Timestamps to Periods (and Back)

In [29]:
rng = pd.date_range('2000-01-01', periods=3, freq='M')
ts = pd.Series(np.random.randn(3), index=rng)
ts


2000-01-31   -0.566350
2000-02-29    0.288515
2000-03-31    0.645842
Freq: M, dtype: float64

**to_period**(freq=None, axis=0, copy=True)

Convert DataFrame from **DatetimeIndex to PeriodIndex** with desired frequency (inferred from index if not passed)

In [30]:
pts = ts.to_period()
pts

2000-01   -0.566350
2000-02    0.288515
2000-03    0.645842
Freq: M, dtype: float64

In [31]:
rng = pd.date_range('1/29/2000', periods=6, freq='D')
rng

DatetimeIndex(['2000-01-29', '2000-01-30', '2000-01-31', '2000-02-01',
               '2000-02-02', '2000-02-03'],
              dtype='datetime64[ns]', freq='D')

In [32]:
ts2 = pd.Series(np.random.randn(6), index=rng)
ts2
ts2.to_period('M')

2000-01    1.345836
2000-01   -0.788818
2000-01   -1.379283
2000-02    0.252171
2000-02    0.118469
2000-02    0.261861
Freq: M, dtype: float64

to_timestamp(freq=None, how='start', axis=0, copy=True)
* how : {‘s’, ‘e’, ‘start’, ‘end’}

In [33]:
#to_timestamp
pts = ts2.to_period()
pts
pts.to_timestamp(how='end')

2000-01-29    1.345836
2000-01-30   -0.788818
2000-01-31   -1.379283
2000-02-01    0.252171
2000-02-02    0.118469
2000-02-03    0.261861
Freq: D, dtype: float64

PPT省略

### Creating a PeriodIndex from Arrays

In [35]:
data = pd.read_csv('macrodata.csv')
data.head(5)


Unnamed: 0,year,quarter,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
0,1959.0,1.0,2710.349,1707.4,286.898,470.045,1886.9,28.98,139.7,2.82,5.8,177.146,0.0,0.0
1,1959.0,2.0,2778.801,1733.7,310.859,481.301,1919.7,29.15,141.7,3.08,5.1,177.83,2.34,0.74
2,1959.0,3.0,2775.488,1751.8,289.226,491.26,1916.4,29.35,140.5,3.82,5.3,178.657,2.74,1.09
3,1959.0,4.0,2785.204,1753.7,299.356,484.052,1931.3,29.37,140.0,4.33,5.6,179.386,0.27,4.06
4,1960.0,1.0,2847.699,1770.5,331.722,462.199,1955.5,29.54,139.6,3.5,5.2,180.007,2.31,1.19


In [36]:
data[:10].year

0    1959.0
1    1959.0
2    1959.0
3    1959.0
4    1960.0
5    1960.0
6    1960.0
7    1960.0
8    1961.0
9    1961.0
Name: year, dtype: float64

In [37]:
data[:10].quarter

0    1.0
1    2.0
2    3.0
3    4.0
4    1.0
5    2.0
6    3.0
7    4.0
8    1.0
9    2.0
Name: quarter, dtype: float64

In [38]:
index2 = pd.PeriodIndex(year=data.year, quarter=data.quarter,
                       freq='Q-DEC')
index2


PeriodIndex(['1959Q1', '1959Q2', '1959Q3', '1959Q4', '1960Q1', '1960Q2',
             '1960Q3', '1960Q4', '1961Q1', '1961Q2',
             ...
             '2007Q2', '2007Q3', '2007Q4', '2008Q1', '2008Q2', '2008Q3',
             '2008Q4', '2009Q1', '2009Q2', '2009Q3'],
            dtype='period[Q-DEC]', length=203, freq='Q-DEC')

In [39]:
#infl是一個欄位
data.index = index2
data.infl[:10]

1959Q1    0.00
1959Q2    2.34
1959Q3    2.74
1959Q4    0.27
1960Q1    2.31
1960Q2    0.14
1960Q3    2.70
1960Q4    1.21
1961Q1   -0.40
1961Q2    1.47
Freq: Q-DEC, Name: infl, dtype: float64