# pandas中的时间序列

python中datetime的复习<br>
date_range<br>
set_index<br>
resample<br>

## time datetime timedelta复习

timedelta表示时间间隔

### 1. time

In [1]:
import time

In [2]:
time.time()       # 获得现在距离1970年初的时间戳

1534237517.760913

In [3]:
time.localtime(1534066261)           # 将时间戳转为易读的时间

time.struct_time(tm_year=2018, tm_mon=8, tm_mday=12, tm_hour=17, tm_min=31, tm_sec=1, tm_wday=6, tm_yday=224, tm_isdst=0)

In [4]:
time.localtime(time.time())             

time.struct_time(tm_year=2018, tm_mon=8, tm_mday=14, tm_hour=17, tm_min=5, tm_sec=17, tm_wday=1, tm_yday=226, tm_isdst=0)

In [5]:
time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))        # strftime() 可以将时间转化为指定的格式

'2018-08-14 17:05:17'

In [6]:
time.strftime('%Y-%m-%d', time.localtime(time.time()))

'2018-08-14'

In [7]:
time.strftime('%H:%M:%S', time.localtime(time.time()))

'17:05:17'

In [8]:
local = time.strptime('2018-08-12 17:34:04', '%Y-%m-%d %H:%M:%S')
local

time.struct_time(tm_year=2018, tm_mon=8, tm_mday=12, tm_hour=17, tm_min=34, tm_sec=4, tm_wday=6, tm_yday=224, tm_isdst=-1)

In [9]:
time.mktime(local)

1534066444.0

In [10]:
local2 = time.strptime('2018-08-12 17', '%Y-%m-%d %H')
local2

time.struct_time(tm_year=2018, tm_mon=8, tm_mday=12, tm_hour=17, tm_min=0, tm_sec=0, tm_wday=6, tm_yday=224, tm_isdst=-1)

In [11]:
time.mktime(local2)

1534064400.0

### 2. datetime

In [12]:
from datetime import datetime

In [13]:
now = datetime.now()
now

datetime.datetime(2018, 8, 14, 17, 5, 17, 885396)

In [14]:
now.year, now.month, now.day

(2018, 8, 14)

In [15]:
now.strftime('%Y-%m-%d %H:%M:%S')

'2018-08-14 17:05:17'

In [16]:
now.strftime('%Y-%m-%d')

'2018-08-14'

In [17]:
now.strftime('%H:%M:%S')

'17:05:17'

In [18]:
datetime.strptime('2018-08-12 17:39:50', '%Y-%m-%d %H:%M:%S')

datetime.datetime(2018, 8, 12, 17, 39, 50)

In [19]:
datetime(2018, 8, 12)

datetime.datetime(2018, 8, 12, 0, 0)

In [20]:
now.timestamp()         # 转化为时间戳

1534237517.885396

In [21]:
datetime.fromtimestamp(1534066790.32124)

datetime.datetime(2018, 8, 12, 17, 39, 50, 321240)

### 3. timedelta

In [22]:
delta = datetime.now() - datetime(2017, 4, 25)
delta

datetime.timedelta(days=476, seconds=61517, microseconds=973284)

In [23]:
delta.days

476

In [24]:
delta.seconds

61517

In [25]:
delta.microseconds

973284

## date_range

In [26]:
import pandas as pd
import numpy as np

In [27]:
pd.date_range('2018-08-12', '2018-09-25')

DatetimeIndex(['2018-08-12', '2018-08-13', '2018-08-14', '2018-08-15',
               '2018-08-16', '2018-08-17', '2018-08-18', '2018-08-19',
               '2018-08-20', '2018-08-21', '2018-08-22', '2018-08-23',
               '2018-08-24', '2018-08-25', '2018-08-26', '2018-08-27',
               '2018-08-28', '2018-08-29', '2018-08-30', '2018-08-31',
               '2018-09-01', '2018-09-02', '2018-09-03', '2018-09-04',
               '2018-09-05', '2018-09-06', '2018-09-07', '2018-09-08',
               '2018-09-09', '2018-09-10', '2018-09-11', '2018-09-12',
               '2018-09-13', '2018-09-14', '2018-09-15', '2018-09-16',
               '2018-09-17', '2018-09-18', '2018-09-19', '2018-09-20',
               '2018-09-21', '2018-09-22', '2018-09-23', '2018-09-24',
               '2018-09-25'],
              dtype='datetime64[ns]', freq='D')

In [28]:
pd.date_range('2018-08-12', '2018-09-25', freq='W')

DatetimeIndex(['2018-08-12', '2018-08-19', '2018-08-26', '2018-09-02',
               '2018-09-09', '2018-09-16', '2018-09-23'],
              dtype='datetime64[ns]', freq='W-SUN')

In [29]:
pd.date_range('2018-08-12', '2018-09-25', freq='M')

DatetimeIndex(['2018-08-31'], dtype='datetime64[ns]', freq='M')

In [30]:
pd.date_range('2018-08-12', '2018-09-25', freq='Q')         # D-天  W-周  M-月  Q-季度  A-年  H-小时  T-分  S-秒

DatetimeIndex([], dtype='datetime64[ns]', freq='Q-DEC')

In [31]:
pd.date_range('2018-08-12', freq='W', periods=10)

DatetimeIndex(['2018-08-12', '2018-08-19', '2018-08-26', '2018-09-02',
               '2018-09-09', '2018-09-16', '2018-09-23', '2018-09-30',
               '2018-10-07', '2018-10-14'],
              dtype='datetime64[ns]', freq='W-SUN')

In [32]:
data = {
    'time': pd.date_range('2018-08-12', freq='T', periods=200000),
    'cpu': np.random.randn(200000) + 10
}
df = pd.DataFrame(data, columns=['time', 'cpu'])

In [33]:
df.head()

Unnamed: 0,time,cpu
0,2018-08-12 00:00:00,9.626151
1,2018-08-12 00:01:00,9.91764
2,2018-08-12 00:02:00,10.138593
3,2018-08-12 00:03:00,10.786513
4,2018-08-12 00:04:00,8.950595


In [34]:
df.tail()

Unnamed: 0,time,cpu
199995,2018-12-28 21:15:00,10.657856
199996,2018-12-28 21:16:00,12.131958
199997,2018-12-28 21:17:00,10.368931
199998,2018-12-28 21:18:00,8.781899
199999,2018-12-28 21:19:00,10.1036


In [35]:
df[(df.time>='2018-08-12 08:00:00') & (df.time<='2018-08-12 08:10:00')]

Unnamed: 0,time,cpu
480,2018-08-12 08:00:00,8.98485
481,2018-08-12 08:01:00,8.927575
482,2018-08-12 08:02:00,11.04837
483,2018-08-12 08:03:00,10.352017
484,2018-08-12 08:04:00,9.881482
485,2018-08-12 08:05:00,8.936678
486,2018-08-12 08:06:00,9.411199
487,2018-08-12 08:07:00,10.781394
488,2018-08-12 08:08:00,10.523912
489,2018-08-12 08:09:00,9.546083


## set_index

In [36]:
df2 = df.set_index("time")
df2.head()

Unnamed: 0_level_0,cpu
time,Unnamed: 1_level_1
2018-08-12 00:00:00,9.626151
2018-08-12 00:01:00,9.91764
2018-08-12 00:02:00,10.138593
2018-08-12 00:03:00,10.786513
2018-08-12 00:04:00,8.950595


set_index()等效于下面的代码：

In [37]:
s = pd.to_datetime(df.time)
s.head()
df.index = s

In [38]:
df.head()

Unnamed: 0_level_0,time,cpu
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-08-12 00:00:00,2018-08-12 00:00:00,9.626151
2018-08-12 00:01:00,2018-08-12 00:01:00,9.91764
2018-08-12 00:02:00,2018-08-12 00:02:00,10.138593
2018-08-12 00:03:00,2018-08-12 00:03:00,10.786513
2018-08-12 00:04:00,2018-08-12 00:04:00,8.950595


In [39]:
df = df.drop('time', axis=1)
df.head()

Unnamed: 0_level_0,cpu
time,Unnamed: 1_level_1
2018-08-12 00:00:00,9.626151
2018-08-12 00:01:00,9.91764
2018-08-12 00:02:00,10.138593
2018-08-12 00:03:00,10.786513
2018-08-12 00:04:00,8.950595


In [40]:
df['2018-08-12 08:00:00':'2018-08-12 08:10:00']

Unnamed: 0_level_0,cpu
time,Unnamed: 1_level_1
2018-08-12 08:00:00,8.98485
2018-08-12 08:01:00,8.927575
2018-08-12 08:02:00,11.04837
2018-08-12 08:03:00,10.352017
2018-08-12 08:04:00,9.881482
2018-08-12 08:05:00,8.936678
2018-08-12 08:06:00,9.411199
2018-08-12 08:07:00,10.781394
2018-08-12 08:08:00,10.523912
2018-08-12 08:09:00,9.546083


In [41]:
df2 = df['2018-08-12']
df2.head()

Unnamed: 0_level_0,cpu
time,Unnamed: 1_level_1
2018-08-12 00:00:00,9.626151
2018-08-12 00:01:00,9.91764
2018-08-12 00:02:00,10.138593
2018-08-12 00:03:00,10.786513
2018-08-12 00:04:00,8.950595


In [42]:
df2.tail()

Unnamed: 0_level_0,cpu
time,Unnamed: 1_level_1
2018-08-12 23:55:00,8.19222
2018-08-12 23:56:00,11.100015
2018-08-12 23:57:00,10.252504
2018-08-12 23:58:00,10.293859
2018-08-12 23:59:00,9.66642


In [43]:
df2 = df.groupby(df.index.date).mean()
print(df2.head())
print(df2.tail())

                  cpu
2018-08-12   9.982413
2018-08-13   9.991605
2018-08-14  10.024788
2018-08-15  10.042854
2018-08-16  10.021630
                  cpu
2018-12-24   9.971736
2018-12-25  10.033450
2018-12-26  10.003885
2018-12-27  10.003722
2018-12-28   9.987678


In [44]:
df2 = df.groupby(df.index.hour).mean()
print(df2.head())
print(df2.tail())

            cpu
time           
0     10.007725
1      9.996076
2      9.984526
3     10.017543
4      9.990612
            cpu
time           
19    10.002742
20     9.974482
21     9.972723
22    10.006801
23     9.997885


In [45]:
df2 = df.groupby(df.index.week).mean()
print(df2.head())
print(df2.tail())

            cpu
time           
32     9.982413
33    10.014457
34     9.977128
35     9.993188
36     9.994639
            cpu
time           
48    10.004162
49     9.989295
50    10.005922
51     9.986940
52    10.000376


## resample

In [46]:
df2 = df.resample('90S').mean()           # 重新取样，时间间隔为90秒了

In [47]:
df2.head()

Unnamed: 0_level_0,cpu
time,Unnamed: 1_level_1
2018-08-12 00:00:00,9.771895
2018-08-12 00:01:30,10.138593
2018-08-12 00:03:00,9.868554
2018-08-12 00:04:30,9.882623
2018-08-12 00:06:00,10.73773


In [48]:
df2 = df.resample('5T').max()
df2.head()

Unnamed: 0_level_0,cpu
time,Unnamed: 1_level_1
2018-08-12 00:00:00,10.786513
2018-08-12 00:05:00,12.005524
2018-08-12 00:10:00,11.940386
2018-08-12 00:15:00,10.571349
2018-08-12 00:20:00,12.368275
