In [1]:
# Operating on Data in Pandas
import pandas as pd 
import numpy as np 

In [2]:
# Ufuncs: Index Preservation 
rng = np.random.default_rng(42)
ser = pd.Series(rng.integers(0,10,4))   # low/high/size
ser 

0    0
1    7
2    6
3    4
dtype: int64

In [3]:
df = pd.DataFrame(rng.integers(0,10,(3,4)), columns=["A","B","C","D"])
df 

Unnamed: 0,A,B,C,D
0,4,8,0,6
1,2,0,5,9
2,7,7,7,7


In [5]:
# if we apply NumPy ufunc on either of these objects, the result will be another 
# Pandas object with the indices preserved:
np.exp(ser)

0       1.000000
1    1096.633158
2     403.428793
3      54.598150
dtype: float64

In [6]:
# indexes preserved even in the more complest sequences of operations
np.sin(df * np.pi / 4)

Unnamed: 0,A,B,C,D
0,1.224647e-16,-2.449294e-16,0.0,-1.0
1,1.0,0.0,-0.707107,0.707107
2,-0.7071068,-0.7071068,-0.707107,-0.707107


In [7]:
# Ufuncs: Index Alignment

In [8]:
# index alignment in Series 
area = pd.Series({'Alaska': 1723337, 'Texas': 695662,
'California': 423967}, name='area')
population = pd.Series({'California': 39538223, 'Texas': 29145505,
'Florida': 21538187}, name='population')

In [9]:
population / area 

Alaska              NaN
California    93.257784
Florida             NaN
Texas         41.896072
dtype: float64

In [10]:
# refer indexes directly 
area.index.union(population.index)

Index(['Alaska', 'California', 'Florida', 'Texas'], dtype='object')

In [11]:
A = pd.Series([2,4,5], index=[0,1,2])
B = pd.Series([1,3,5], index=[1,2,3])
A + B # missed with NaN 

0    NaN
1    5.0
2    8.0
3    NaN
dtype: float64

In [15]:
# fill NaN values with .add:
A.add(B, fill_value = 1000) # 2/5 from one of the value arrays 

0    1002.0
1       5.0
2       8.0
3    1005.0
dtype: float64

In [17]:
# Index Allignment in DataFrames 

A = pd.DataFrame(rng.integers(0,20,(2,2)), columns=["a","b"])
A 

Unnamed: 0,a,b
0,10,7
1,3,18


In [18]:
B = pd.DataFrame(rng.integers(0,10,(3,3)), columns=["b","a","c"])
B 

Unnamed: 0,b,a,c
0,7,6,4
1,8,5,4
2,4,2,0


In [19]:
A + B 

Unnamed: 0,a,b,c
0,16.0,14.0,
1,8.0,26.0,
2,,,


In [20]:
# fill gaps with the mean of "A":
A.add(B, fill_value=A.values.mean())

Unnamed: 0,a,b,c
0,16.0,14.0,13.5
1,8.0,26.0,13.5
2,11.5,13.5,9.5


In [21]:
# Ufuncs: Operations Between DataFrames and Series 
A = rng.integers(10, size=(3,4))
A 

array([[5, 8, 0, 8],
       [8, 2, 6, 1],
       [7, 7, 3, 0]])

In [22]:
# propagate (broadcast) the operation:
A - A[0]

array([[ 0,  0,  0,  0],
       [ 3, -6,  6, -7],
       [ 2, -1,  3, -8]])

In [23]:
# operation is row-wise:
df = pd.DataFrame(A, columns=["Q","R","S","T"])
df - df.iloc[0]

Unnamed: 0,Q,R,S,T
0,0,0,0,0
1,3,-6,6,-7
2,2,-1,3,-8


In [24]:
# for the conversion to column-wise you need to use "axis" parameter:
# rows = 1 
# columns = 0
df.subtract(df["R"], axis=0)

Unnamed: 0,Q,R,S,T
0,-3,0,-8,0
1,6,0,4,-1
2,0,0,-4,-7


In [25]:
halfrow = df.iloc[0,::2]
halfrow

Q    5
S    0
Name: 0, dtype: int64

In [26]:
df - halfrow 

Unnamed: 0,Q,R,S,T
0,0.0,,0.0,
1,3.0,,6.0,
2,2.0,,3.0,
