In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame(np.random.randn(5,4) - 1)
df

Unnamed: 0,0,1,2,3
0,-3.10422,-0.419795,-2.071506,-3.063148
1,0.125816,-1.960976,-1.315828,-1.210885
2,0.013444,-0.127939,-0.830398,1.220542
3,0.891796,-0.009239,-0.746392,0.15736
4,-1.495127,-0.042951,0.265528,-0.654582


# 一、apply和applymap

In [3]:
# apply函数中，axis默认是等于0，代表列
# apply函数：以行或列作为整体来进行处理
df.apply(lambda values: values.max(), axis=0)

0    0.891796
1   -0.009239
2    0.265528
3    1.220542
dtype: float64

In [4]:
# 2. axis的理解

a = np.array([
    [1,np.NAN,3],
    [4,5,6],
    [7,8,9]
])

# axis=0，在处理的时候，会以数组的第1层元素进行处理
# axis=1，在处理的时候，会以数组的第2层元素进行处理

# 为什么apply在axis=0的情况下，是代表列？
a1 = [1,2,3]
a2 = [4,8,6]
a3 = [7,3,5]

np.max([a1,a2,a3])
[7, 8, 6]

# 为什么df.dropna在axix=0的情况，是代表行？

[7, 8, 6]

In [5]:
# 3. applymap：针对DataFrame中的每个元素进行处理
df.applymap(lambda value: value*10)

Unnamed: 0,0,1,2,3
0,-31.042196,-4.197952,-20.715063,-30.631484
1,1.258163,-19.60976,-13.158281,-12.108849
2,0.134438,-1.279389,-8.303982,12.205421
3,8.917955,-0.092387,-7.463918,1.573603
4,-14.951267,-0.429509,2.655283,-6.54582


# 二、排序

In [6]:
# 1. sort_values：按照某一列进行排序，默认是按照从小到大排序
# 如果想要按照从大到小排序，那么就是ascending=False
df.sort_values(0, ascending=False)

Unnamed: 0,0,1,2,3
3,0.891796,-0.009239,-0.746392,0.15736
1,0.125816,-1.960976,-1.315828,-1.210885
2,0.013444,-0.127939,-0.830398,1.220542
4,-1.495127,-0.042951,0.265528,-0.654582
0,-3.10422,-0.419795,-2.071506,-3.063148


In [7]:
# 2. 按照索引排序
df.sort_index(ascending=False)

Unnamed: 0,0,1,2,3
4,-1.495127,-0.042951,0.265528,-0.654582
3,0.891796,-0.009239,-0.746392,0.15736
2,0.013444,-0.127939,-0.830398,1.220542
1,0.125816,-1.960976,-1.315828,-1.210885
0,-3.10422,-0.419795,-2.071506,-3.063148


# 三、算术和逻辑运算

In [8]:
# 1. 算术运算
df*10

Unnamed: 0,0,1,2,3
0,-31.042196,-4.197952,-20.715063,-30.631484
1,1.258163,-19.60976,-13.158281,-12.108849
2,0.134438,-1.279389,-8.303982,12.205421
3,8.917955,-0.092387,-7.463918,1.573603
4,-14.951267,-0.429509,2.655283,-6.54582


In [9]:
# 2. 逻辑运算
df = pd.read_csv("data/stock_day.csv")
df[(df['open'] > 15) & (df['p_change'] > 2)]

Unnamed: 0,open,high,close,low,volume,price_change,p_change,ma5,ma10,ma20,v_ma5,v_ma10,v_ma20,turnover
2018-02-27,23.53,25.88,24.16,23.53,95578.03,0.63,2.68,22.942,22.142,22.875,53782.64,46738.65,55576.11,2.39
2018-02-26,22.80,23.78,23.53,22.80,60985.11,0.69,3.02,22.406,21.955,22.942,40827.52,42736.34,56007.50,1.53
2018-02-23,22.88,23.37,22.82,22.71,52914.01,0.54,2.42,21.938,21.929,23.022,35119.58,41871.97,56372.85,1.32
2018-02-14,21.49,21.99,21.92,21.48,23331.04,0.44,2.05,21.366,21.923,23.253,33590.21,42935.74,61716.11,0.58
2018-02-12,20.70,21.40,21.19,20.63,32445.39,0.82,4.03,21.504,22.338,23.533,44645.16,45679.94,68686.33,0.81
2018-02-05,22.45,23.39,23.27,22.25,52341.39,0.65,2.87,23.172,23.928,24.112,46714.72,69278.66,77070.00,1.31
2018-01-19,24.60,25.34,25.13,24.42,128449.11,0.53,2.15,24.432,24.254,23.537,91838.07,88985.70,82975.10,3.21
2018-01-16,23.40,24.60,24.40,23.30,101295.42,0.96,4.10,23.908,24.058,23.321,82003.73,101081.47,74590.92,2.54
2018-01-12,23.70,25.15,24.24,23.42,120303.53,0.56,2.37,24.076,23.748,23.236,86133.33,91838.46,69690.35,3.01
2018-01-04,22.79,25.07,25.07,22.51,130131.15,2.28,10.00,22.966,22.690,22.935,67939.35,59938.43,57071.47,3.26


In [10]:
df.query("p_change>2 & open > 15")

Unnamed: 0,open,high,close,low,volume,price_change,p_change,ma5,ma10,ma20,v_ma5,v_ma10,v_ma20,turnover
2018-02-27,23.53,25.88,24.16,23.53,95578.03,0.63,2.68,22.942,22.142,22.875,53782.64,46738.65,55576.11,2.39
2018-02-26,22.80,23.78,23.53,22.80,60985.11,0.69,3.02,22.406,21.955,22.942,40827.52,42736.34,56007.50,1.53
2018-02-23,22.88,23.37,22.82,22.71,52914.01,0.54,2.42,21.938,21.929,23.022,35119.58,41871.97,56372.85,1.32
2018-02-14,21.49,21.99,21.92,21.48,23331.04,0.44,2.05,21.366,21.923,23.253,33590.21,42935.74,61716.11,0.58
2018-02-12,20.70,21.40,21.19,20.63,32445.39,0.82,4.03,21.504,22.338,23.533,44645.16,45679.94,68686.33,0.81
2018-02-05,22.45,23.39,23.27,22.25,52341.39,0.65,2.87,23.172,23.928,24.112,46714.72,69278.66,77070.00,1.31
2018-01-19,24.60,25.34,25.13,24.42,128449.11,0.53,2.15,24.432,24.254,23.537,91838.07,88985.70,82975.10,3.21
2018-01-16,23.40,24.60,24.40,23.30,101295.42,0.96,4.10,23.908,24.058,23.321,82003.73,101081.47,74590.92,2.54
2018-01-12,23.70,25.15,24.24,23.42,120303.53,0.56,2.37,24.076,23.748,23.236,86133.33,91838.46,69690.35,3.01
2018-01-04,22.79,25.07,25.07,22.51,130131.15,2.28,10.00,22.966,22.690,22.935,67939.35,59938.43,57071.47,3.26


In [11]:
# df[df['high'].isin([25.88, 17.98])]

df.query('high == 25.88 | high == 17.98')

Unnamed: 0,open,high,close,low,volume,price_change,p_change,ma5,ma10,ma20,v_ma5,v_ma10,v_ma20,turnover
2018-02-27,23.53,25.88,24.16,23.53,95578.03,0.63,2.68,22.942,22.142,22.875,53782.64,46738.65,55576.11,2.39
2017-10-17,25.3,25.88,25.43,25.23,67649.41,0.24,0.95,25.908,26.734,26.542,89964.65,147619.06,159389.25,1.69
2017-08-31,24.16,25.88,25.62,23.78,259406.52,1.47,6.09,23.94,23.485,22.554,159122.44,189711.82,126717.47,6.49
2015-08-06,16.94,17.98,17.35,16.81,60609.0,-0.15,-0.86,17.098,18.587,20.783,84605.36,94500.89,116331.12,2.07
2015-04-07,16.54,17.98,17.54,16.5,122471.85,0.88,5.28,16.62,16.12,15.51,86769.62,97473.29,98832.94,4.19


# 四、统计和累计函数

In [12]:
df['volume'].sum()

64239248.78999999

In [13]:
df['volume'].cummax()

2018-02-27     95578.03
2018-02-26     95578.03
2018-02-23     95578.03
2018-02-22     95578.03
2018-02-14     95578.03
2018-02-13     95578.03
2018-02-12     95578.03
2018-02-09     95578.03
2018-02-08     95578.03
2018-02-07     95578.03
2018-02-06     95578.03
2018-02-05     95578.03
2018-02-02     95578.03
2018-02-01     95578.03
2018-01-31     95578.03
2018-01-30     95578.03
2018-01-29     95578.03
2018-01-26     95578.03
2018-01-25    104097.59
2018-01-24    134838.00
2018-01-23    134838.00
2018-01-22    134838.00
2018-01-19    134838.00
2018-01-18    134838.00
2018-01-17    134838.00
2018-01-16    134838.00
2018-01-15    134838.00
2018-01-12    134838.00
2018-01-11    134838.00
2018-01-10    134838.00
                ...    
2015-04-13    501915.41
2015-04-10    501915.41
2015-04-09    501915.41
2015-04-08    501915.41
2015-04-07    501915.41
2015-04-03    501915.41
2015-04-02    501915.41
2015-04-01    501915.41
2015-03-31    501915.41
2015-03-30    501915.41
2015-03-27    50