# Operations and Apply Functions

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame(np.random.default_rng(42).random((5, 4)), columns=["A", "B", "C", "D"])
df

Unnamed: 0,A,B,C,D
0,0.773956,0.438878,0.858598,0.697368
1,0.094177,0.975622,0.76114,0.786064
2,0.128114,0.450386,0.370798,0.926765
3,0.643865,0.822762,0.443414,0.227239
4,0.554585,0.063817,0.827631,0.631664


In [3]:
df2 = pd.DataFrame(np.random.default_rng(43).random((2, 2)), columns=["A", "B"])
df2

Unnamed: 0,A,B
0,0.652299,0.043775
1,0.02003,0.839213


## Arithmetic Operation

In [4]:
df + df2

Unnamed: 0,A,B,C,D
0,1.426255,0.482654,,
1,0.114207,1.814835,,
2,,,,
3,,,,
4,,,,


In [5]:
df - df.iloc[0]

Unnamed: 0,A,B,C,D
0,0.0,0.0,0.0,0.0
1,-0.679779,0.536744,-0.097458,0.088696
2,-0.645842,0.011507,-0.4878,0.229397
3,-0.130091,0.383883,-0.415184,-0.470129
4,-0.219371,-0.375061,-0.030967,-0.065704


In [6]:
1 / df

Unnamed: 0,A,B,C,D
0,1.292063,2.278535,1.16469,1.433963
1,10.618265,1.024987,1.313819,1.272161
2,7.805571,2.220318,2.696886,1.079022
3,1.55312,1.215419,2.255228,4.400658
4,1.803151,15.669743,1.208268,1.583119


## Numpy Function

In [50]:
np.sqrt(df)

Unnamed: 0,A,B,C,D
0,0.879748,0.662479,0.926606,0.835086
1,0.306883,0.987736,0.872433,0.886603
2,0.35793,0.671108,0.608932,0.962686
3,0.802412,0.907062,0.665894,0.476696
4,0.744704,0.252621,0.909742,0.794773


In [51]:
np.max(df, axis=1)

0    0.858598
1    0.975622
2    0.926765
3    0.822762
4    0.827631
dtype: float64

## Built-in Operations

In [9]:
df.mean()

A    0.438939
B    0.550293
C    0.652316
D    0.653820
dtype: float64

In [10]:
df.max(axis=1)

0    0.858598
1    0.975622
2    0.926765
3    0.822762
4    0.827631
dtype: float64

## Apply Function

In [11]:
df.apply(np.cumsum, axis=1)

Unnamed: 0,A,B,C,D
0,0.773956,1.212834,2.071432,2.7688
1,0.094177,1.0698,1.830939,2.617004
2,0.128114,0.5785,0.949298,1.876063
3,0.643865,1.466627,1.910041,2.13728
4,0.554585,0.618402,1.446033,2.077698


In [12]:
df.apply(lambda x: x.sum() / x.size)

A    0.438939
B    0.550293
C    0.652316
D    0.653820
dtype: float64

# Series-Only Function

In [38]:
s = pd.Series(["a", "B-B", "CC-c", "ccc"])

In [40]:
s.value_counts()

ccc     1
B-B     1
CC-c    1
a       1
dtype: int64

In [39]:
s.str.upper()

0       A
1     B-B
2    CC-C
3     CCC
dtype: object

In [45]:
s.str.split("-")

0        [a]
1     [B, B]
2    [CC, c]
3      [ccc]
dtype: object

In [46]:
s.str.get(0)

0    a
1    B
2    C
3    c
dtype: object

In [48]:
s.str.split("-").str.get(0)

0      a
1      B
2     CC
3    ccc
dtype: object

# Reference

- https://pandas.pydata.org/pandas-docs/stable/user_guide/10min.html#operations
- https://pandas.pydata.org/pandas-docs/stable/user_guide/basics.html