In [1]:
# pandasの練習
import pandas as pd

data = [
    [0, 1, 2],
    [3, 4, 5],
    [6, 7, 8]]

# リストからデータフレーム作成
dfA = pd.DataFrame(data, columns=['a1', 'a2', 'a3'])
dfA

Unnamed: 0,a1,a2,a3
0,0,1,2
1,3,4,5
2,6,7,8


In [2]:
# データフレームに列追加
dfA['a4'] = dfA.a2 * dfA.a3
dfA

Unnamed: 0,a1,a2,a3,a4
0,0,1,2,2
1,3,4,5,20
2,6,7,8,56


In [3]:
# データフレームに列追加
dfA.assign(a5 = False)

Unnamed: 0,a1,a2,a3,a4,a5
0,0,1,2,2,False
1,3,4,5,20,False
2,6,7,8,56,False


In [4]:
# 条件を指定して行抽出
dfA[dfA.a2%2==0]

Unnamed: 0,a1,a2,a3,a4
1,3,4,5,20


In [5]:
# 条件を指定して列追加
dfA.assign(
    a1IsEven=dfA.a1%2==0,
    a2IsEven=(dfA.a2%2==0).astype(int),
    a1a3Is6の倍数 = dfA.a1*dfA.a3%6 == 0
)

Unnamed: 0,a1,a2,a3,a4,a1IsEven,a2IsEven,a1a3Is6の倍数
0,0,1,2,2,True,0,True
1,3,4,5,20,False,1,False
2,6,7,8,56,True,0,True


In [6]:
# ラグ
dfA.shift(-1).a1

0    3.0
1    6.0
2    NaN
Name: a1, dtype: float64

In [7]:
# ラグ列追加
dfA.assign(
    a5 = dfA.a3.shift(-1),
    a6 = dfA.a4.shift(+1)
)

Unnamed: 0,a1,a2,a3,a4,a5,a6
0,0,1,2,2,5.0,
1,3,4,5,20,8.0,2.0
2,6,7,8,56,,20.0


In [8]:
# 複数列を追加
pd.concat([
    dfA,
    dfA.apply(lambda row: pd.Series({'a7': row.a1, 'a8':row.a2}), axis=1)]
,axis=1)

Unnamed: 0,a1,a2,a3,a4,a7,a8
0,0,1,2,2,0,1
1,3,4,5,20,3,4
2,6,7,8,56,6,7


In [9]:
dfB = pd.DataFrame([
    [0, 1],
    [1, 1],
    [2, 1],
    [3, 1]],
    columns=['b1', 'b2'])

In [10]:
dfA

Unnamed: 0,a1,a2,a3,a4
0,0,1,2,2
1,3,4,5,20
2,6,7,8,56


In [11]:
dfB

Unnamed: 0,b1,b2
0,0,1
1,1,1
2,2,1
3,3,1


In [12]:
# 2つのデータフレームに対する操作
# apply
dfA.apply(lambda row1: row1[['a2', 'a4']], axis=1)

Unnamed: 0,a2,a4
0,1,2
1,4,20
2,7,56


In [13]:
# 2重apply
dfA.apply(lambda row1: dfB.apply(lambda row2: row1.a1*row2.b1, axis=1), axis=1)

Unnamed: 0,0,1,2,3
0,0,0,0,0
1,0,3,6,9
2,0,6,12,18


In [14]:
# さらに2重apply
dfA.apply(lambda row1: dfB.apply(lambda row2: row1.a1*row2.b1+row2.b2, axis=1), axis=1)

Unnamed: 0,0,1,2,3
0,1,1,1,1
1,1,4,7,10
2,1,7,13,19


In [15]:
# 自作関数を使ったもの
# 戻り値が2つある関数
def func(row1):
    res1 = sum(dfB.apply(lambda row2: row1.a1*row2.b1, axis=1))
    res2 = sum(dfB.apply(lambda row2: row1.a1*row2.b1+row2.b2, axis=1))
    return pd.Series([res1, res2])

dfA.apply(func, axis=1)

Unnamed: 0,0,1
0,0,4
1,18,22
2,36,40


In [16]:
# 自作関数を使ったもの
# applyを呼び出す回数を減らしている（ので速いはず）
# Series型を返す
def func1(row1):
    res = dfB.apply(lambda row2: 
        [row1.a1*row2.b1,
         row1.a1*row2.b1+row2.b2], axis=1)
    return pd.Series(res.sum(axis=0))

dfA.apply(func1, axis=1)

Unnamed: 0,b1,b2
0,0,4
1,18,22
2,36,40


In [17]:
from tqdm import tqdm
tqdm.pandas(desc="bar!")
dfA.progress_apply(func1, axis=1)

bar!: 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 427.51it/s]


Unnamed: 0,b1,b2
0,0,4
1,18,22
2,36,40
