In [1]:
import pandas as pd 
import numpy as np

In [7]:
# 数据准备
dates = pd.date_range("20220301",periods=7)
df = pd.DataFrame(30 * np.random.randn(7,4), index=dates, columns=["open","low","high","close"])
df

Unnamed: 0,open,low,high,close
2022-03-01,31.811979,7.377107,-69.565457,-14.170258
2022-03-02,18.112216,-89.80777,75.537749,40.973986
2022-03-03,19.72118,19.468085,14.357835,2.911148
2022-03-04,8.957858,34.526111,-8.373342,1.331443
2022-03-05,-30.004508,-10.666025,-18.321218,48.449892
2022-03-06,4.9589,-51.840751,-19.060248,14.453452
2022-03-07,36.426164,53.005507,-10.406315,12.581437


In [10]:
# 按行平均
df.mean()

open     12.854827
low      -5.419677
high     -5.118714
close    15.218729
dtype: float64

In [11]:
# 按列平均
df.mean(1)

2022-03-01   -11.136657
2022-03-02    11.204045
2022-03-03    14.114562
2022-03-04     9.110517
2022-03-05    -2.635465
2022-03-06   -12.872162
2022-03-07    22.901698
Freq: D, dtype: float64

In [14]:
# 广播与对齐  shift(2) 表示下面数组往右移动两位，前面补Nan
s = pd.Series([11,23,15,np.nan,26,18,17], index=dates).shift(2)
s

2022-03-01     NaN
2022-03-02     NaN
2022-03-03    11.0
2022-03-04    23.0
2022-03-05    15.0
2022-03-06     NaN
2022-03-07    26.0
Freq: D, dtype: float64

In [16]:
# 相减，s自动广播对齐，有Nan的都变为Nan
df.sub(s, axis="index")

Unnamed: 0,open,low,high,close
2022-03-01,,,,
2022-03-02,,,,
2022-03-03,8.72118,8.468085,3.357835,-8.088852
2022-03-04,-14.042142,11.526111,-31.373342,-21.668557
2022-03-05,-45.004508,-25.666025,-33.321218,33.449892
2022-03-06,,,,
2022-03-07,10.426164,27.005507,-36.406315,-13.418563


In [35]:
# Apply函数

# 按行累加
df.apply(np.cumsum)

Unnamed: 0,open,low,high,close
2022-03-01,31.811979,7.377107,-69.565457,-14.170258
2022-03-02,49.924195,-82.430663,5.972292,26.803728
2022-03-03,69.645375,-62.962578,20.330127,29.714876
2022-03-04,78.603233,-28.436468,11.956785,31.046319
2022-03-05,48.598725,-39.102492,-6.364433,79.496211
2022-03-06,53.557625,-90.943244,-25.424681,93.949663
2022-03-07,89.983789,-37.937737,-35.830996,106.5311


In [36]:
# 最大减最小
df.apply(lambda x:x.max() - x.min())

open      66.430672
low      142.813277
high     145.103206
close     62.620150
dtype: float64

In [37]:
# 分组统计
s = pd.Series(np.random.randint(2, 6, size=10))
s


4    4
5    3
3    3
dtype: int64

In [38]:
s.value_counts()

4    4
5    3
3    3
dtype: int64

In [40]:
#字符串方法
s = pd.Series(["ABC","D","a",np.nan,"cAt"])
s.str.lower()

0    abc
1      d
2      a
3    NaN
4    cat
dtype: object

In [41]:
# 连接
# concat
df

Unnamed: 0,open,low,high,close
2022-03-01,31.811979,7.377107,-69.565457,-14.170258
2022-03-02,18.112216,-89.80777,75.537749,40.973986
2022-03-03,19.72118,19.468085,14.357835,2.911148
2022-03-04,8.957858,34.526111,-8.373342,1.331443
2022-03-05,-30.004508,-10.666025,-18.321218,48.449892
2022-03-06,4.9589,-51.840751,-19.060248,14.453452
2022-03-07,36.426164,53.005507,-10.406315,12.581437


In [42]:
pieces = [df[:3],df[4:6],df[6:]]
pieces

[                 open        low       high      close
 2022-03-01  31.811979   7.377107 -69.565457 -14.170258
 2022-03-02  18.112216 -89.807770  75.537749  40.973986
 2022-03-03  19.721180  19.468085  14.357835   2.911148,
                  open        low       high      close
 2022-03-05 -30.004508 -10.666025 -18.321218  48.449892
 2022-03-06   4.958900 -51.840751 -19.060248  14.453452,
                  open        low       high      close
 2022-03-07  36.426164  53.005507 -10.406315  12.581437]

In [43]:
pd.concat(pieces)

Unnamed: 0,open,low,high,close
2022-03-01,31.811979,7.377107,-69.565457,-14.170258
2022-03-02,18.112216,-89.80777,75.537749,40.973986
2022-03-03,19.72118,19.468085,14.357835,2.911148
2022-03-05,-30.004508,-10.666025,-18.321218,48.449892
2022-03-06,4.9589,-51.840751,-19.060248,14.453452
2022-03-07,36.426164,53.005507,-10.406315,12.581437


In [51]:
# join
left = pd.DataFrame({"key":["foo","foo"], "lval":[1,2]})
right = pd.DataFrame({"key":["foo1","foo"], "rval":[4,5]})
left

Unnamed: 0,key,lval
0,foo,1
1,foo,2


In [52]:
right

Unnamed: 0,key,rval
0,foo1,4
1,foo,5


In [53]:
pd.merge(left, right, on='key')

Unnamed: 0,key,lval,rval
0,foo,1,5
1,foo,2,5


In [54]:
# Grouping
df = pd.DataFrame(
    {
        "A":["appl","goog","appl","goog","appl","appl","goog"],
        "B":["open","close","high","open","high","low","open"],
        "C": np.random.randn(7) * 100,
        "D": np.random.randn(7) * 100,
        
    }
)
df

Unnamed: 0,A,B,C,D
0,appl,open,86.56035,-93.302707
1,goog,close,-121.744652,25.346452
2,appl,high,-82.536901,-51.18918
3,goog,open,18.309978,-192.876883
4,appl,high,-0.658158,109.541893
5,appl,low,-1.86848,82.089713
6,goog,open,40.066798,3.860949


In [55]:
df.groupby("A").sum()

Unnamed: 0_level_0,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1
appl,1.496811,47.139718
goog,-63.367876,-163.669481


In [56]:
df.groupby(["A", "B"]).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,C,D
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
appl,high,-83.195059,58.352713
appl,low,-1.86848,82.089713
appl,open,86.56035,-93.302707
goog,close,-121.744652,25.346452
goog,open,58.376776,-189.015933


In [65]:
# Stack

tuples = list(
    zip(
        *[
            ["appl","appl","goog","goog","bili","bili"],
            ["open","close","open","close","open","close"],
            
        ]
    )
)
tuples

[('appl', 'open'),
 ('appl', 'close'),
 ('goog', 'open'),
 ('goog', 'close'),
 ('bili', 'open'),
 ('bili', 'close')]

In [66]:
index = pd.MultiIndex.from_tuples(tuples, names=["stock", "price"])
index

MultiIndex([('appl',  'open'),
            ('appl', 'close'),
            ('goog',  'open'),
            ('goog', 'close'),
            ('bili',  'open'),
            ('bili', 'close')],
           names=['stock', 'price'])

In [67]:
df = pd.DataFrame(np.random.randn(6,2)*100, index=index, columns=["price1","price2"])

In [68]:
df2 = df[:4]
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,price1,price2
stock,price,Unnamed: 2_level_1,Unnamed: 3_level_1
appl,open,97.332941,-83.064304
appl,close,99.859753,67.755965
goog,open,251.001866,-158.36232
goog,close,-82.1266,99.277345


In [69]:
stacked = df2.stack()
stacked

stock  price        
appl   open   price1     97.332941
              price2    -83.064304
       close  price1     99.859753
              price2     67.755965
goog   open   price1    251.001866
              price2   -158.362320
       close  price1    -82.126600
              price2     99.277345
dtype: float64

In [70]:
stacked.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,price1,price2
stock,price,Unnamed: 2_level_1,Unnamed: 3_level_1
appl,close,99.859753,67.755965
appl,open,97.332941,-83.064304
goog,close,-82.1266,99.277345
goog,open,251.001866,-158.36232


In [73]:
stacked.unstack(0)

Unnamed: 0_level_0,stock,appl,goog
price,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
close,price1,99.859753,-82.1266
close,price2,67.755965,99.277345
open,price1,97.332941,251.001866
open,price2,-83.064304,-158.36232
