#Arithmetic and Data Alignment

1. When you perform arithmetic operations between two pandas Series, the operation is aligned by index labels.

In [3]:
import pandas as pd

s1 = pd.Series([7, -2, 3, 6], index=['a', 'c', 'd', 'e'])
s2 = pd.Series([1, 4, 7, 0], index=['a', 'c', 'b', 'e'])

result = s1 + s2
print(result)

# here Values are added only where indexes match.

# If an index is missing in either Series, the result is NaN(Not a Number)

a    8.0
b    NaN
c    2.0
d    NaN
e    6.0
dtype: float64


2. DataFrame Arithmetic with Row and Column Alignment

When performing operations between DataFrames, both row and column labels must match. If they don’t, the result will have NaN in unmatched locations.

In [4]:
import numpy as np

df1 = pd.DataFrame(np.arange(9.0).reshape((3, 3)), dtype = int,columns=list('bcd'), index=['Ohio', 'Texas', 'Colorado'])
df2 = pd.DataFrame(np.arange(12.0).reshape((4, 3)), dtype = int, columns=list('bde'), index=['Utah', 'Ohio', 'Texas', 'Oregon'])

print(df1)

          b  c  d
Ohio      0  1  2
Texas     3  4  5
Colorado  6  7  8


In [5]:
print(df2)

        b   d   e
Utah    0   1   2
Ohio    3   4   5
Texas   6   7   8
Oregon  9  10  11


In [6]:
print(df1+df2)

# Only matching rows (Ohio, Texas) and columns (b, d) are computed.

# Others become NaN because they are missing in one of the DataFrames.

            b   c     d   e
Colorado  NaN NaN   NaN NaN
Ohio      3.0 NaN   6.0 NaN
Oregon    NaN NaN   NaN NaN
Texas     9.0 NaN  12.0 NaN
Utah      NaN NaN   NaN NaN


3. Using Arithmetic Methods with fill_value

To avoid NaN results due to missing labels, use pandas arithmetic methods with fill_value.

Methods: add(), sub(), mul(), div()



In [7]:
df3 = df1.add(df2, fill_value=0)
print(df3)

            b    c     d     e
Colorado  6.0  7.0   8.0   NaN
Ohio      3.0  1.0   6.0   5.0
Oregon    9.0  NaN  10.0  11.0
Texas     9.0  4.0  12.0   8.0
Utah      0.0  NaN   1.0   2.0


In [8]:
df3 = df1.sub(df2, fill_value=0)
print(df3)

            b    c     d     e
Colorado  6.0  7.0   8.0   NaN
Ohio     -3.0  1.0  -2.0  -5.0
Oregon   -9.0  NaN -10.0 -11.0
Texas    -3.0  4.0  -2.0  -8.0
Utah      0.0  NaN  -1.0  -2.0


In [9]:
df3 = df1.mul(df2, fill_value=0)
print(df3)

             b    c     d    e
Colorado   0.0  0.0   0.0  NaN
Ohio       0.0  0.0   8.0  0.0
Oregon     0.0  NaN   0.0  0.0
Texas     18.0  0.0  35.0  0.0
Utah       0.0  NaN   0.0  0.0


In [10]:
df3 = df1.div(df2, fill_value=0)
print(df3)

            b    c         d    e
Colorado  inf  inf       inf  NaN
Ohio      0.0  inf  0.500000  0.0
Oregon    0.0  NaN  0.000000  0.0
Texas     0.5  inf  0.714286  0.0
Utah      NaN  NaN  0.000000  0.0


In [11]:
# Get union of indexes and columns
all_index = df1.index.union(df2.index)
all_columns = df1.columns.union(df2.columns)
# all_index
all_columns

# Reindex both with union and fill missing values with 0
df1_aligned = df1.reindex(index=all_index, columns=all_columns, fill_value=0)
df2_aligned = df2.reindex(index=all_index, columns=all_columns, fill_value=0)

# Now add
df3 = df1_aligned.add(df2_aligned, fill_value=0)
print(df3)


          b  c   d   e
Colorado  6  7   8   0
Ohio      3  1   6   5
Oregon    9  0  10  11
Texas     9  4  12   8
Utah      0  0   1   2


4. Operations Between DataFrame and Series


pandas allows broadcasting Series across a DataFrame. The alignment occurs either across rows or columns depending on axis.

In [None]:
#  A. Row-wise Broadcasting (Default: axis=1)

df = pd.DataFrame(np.arange(12.).reshape((4, 3)), dtype = int, columns=list('bde'), index=['Utah', 'Ohio', 'Texas', 'Oregon'])
row = df.iloc[0] # First row
df

Unnamed: 0,b,d,e
Utah,0,1,2
Ohio,3,4,5
Texas,6,7,8
Oregon,9,10,11


In [None]:
row

Unnamed: 0,Utah
b,0
d,1
e,2


In [None]:
print(df - row) #subracting first row from the df

        b  d  e
Utah    0  0  0
Ohio    3  3  3
Texas   6  6  6
Oregon  9  9  9


In [None]:
#  B. Column-wise Broadcasting (Use axis=0)

df = pd.DataFrame(np.arange(12.).reshape((4, 3)), dtype = int, columns=list('bde'), index=['Utah', 'Ohio', 'Texas', 'Oregon'])
df


Unnamed: 0,b,d,e
Utah,0,1,2
Ohio,3,4,5
Texas,6,7,8
Oregon,9,10,11


In [None]:
column = df['b']
print(column)

Utah      0
Ohio      3
Texas     6
Oregon    9
Name: b, dtype: int64


In [None]:
print(df.sub(column, axis=0))

# Direction depends on axis:

#     axis=1 → Row-wise broadcasting (Series aligned on columns)

#     axis=0 → Column-wise broadcasting (Series aligned on rows)

        b  d  e
Utah    0  1  2
Ohio    0  1  2
Texas   0  1  2
Oregon  0  1  2


Exercise:

    Create two Series with overlapping indexes and try addition and subtraction.

    Use add() with fill_value between mismatched DataFrames.

    Subtract a row from a DataFrame to normalize each row by the first row.

    Subtract a column to center data column-wise.

#Function Application and Mapping,



*   pipe() : Table wise function application in pandas
*   apply() : Row or Column wise function application
*   applymap() : Element wise function application





#pipe()
series

In [12]:
import pandas as pd
import numpy as np

def adder(ele1,ele2):
    return ele1+ele2

s1 = pd.Series([1, 2, 3, 4])
print(s1)

0    1
1    2
2    3
3    4
dtype: int64


In [None]:
print(s1.pipe(adder,3))

0    4
1    5
2    6
3    7
dtype: int64


more operations on pipe()

In [15]:
import pandas as pd
import numpy as np

def sub(ele1,ele2):
    return ele1-ele2
def mul(ele1,ele2):
    return ele1*ele2
def div(ele1,ele2):
    return ele1/ele2

s1 = pd.Series([1, 2, 3, 4])
print(s1)

0    1
1    2
2    3
3    4
dtype: int64


In [19]:
s1.pipe(sub,3)
# s1.pipe(mul,3)
# s1.pipe(div,3)

Unnamed: 0,0
0,-2
1,-1
2,0
3,1


columns

In [20]:
import pandas as pd
import numpy as np

df = pd.DataFrame(np.arange(12).reshape(4, 3),index = ['a','b','c','d'])
df

Unnamed: 0,0,1,2
a,0,1,2
b,3,4,5
c,6,7,8
d,9,10,11


In [21]:
print(df.pipe(adder,3))

    0   1   2
a   3   4   5
b   6   7   8
c   9  10  11
d  12  13  14


#apply()

In [22]:
import pandas as pd
import numpy as np

df = pd.DataFrame(np.arange(12).reshape(4, 3),index = ['a','b','c','d'])
df

Unnamed: 0,0,1,2
a,0,1,2
b,3,4,5
c,6,7,8
d,9,10,11


In [23]:
print(df.apply(np.mean)) # by default it will take column wise operations

0    4.5
1    5.5
2    6.5
dtype: float64


In [24]:
(0+3+6+9)/4  #mean

4.5

In [25]:
print(df.apply(np.mean,axis=1)) # row wise

a     1.0
b     4.0
c     7.0
d    10.0
dtype: float64


#applymap()

element wise function application
here we will use the word lambda

In [26]:
import pandas as pd
import numpy as np
df1 = pd.DataFrame({'m1': ['edav','dm'],
                    'm2': ['dccst','dlca']})
df1

Unnamed: 0,m1,m2
0,edav,dccst
1,dm,dlca


In [28]:
x = lambda x: x.upper()
# y = lambda x: (str(len(x)))
df1.applymap(x)

  df1.applymap(x)


Unnamed: 0,m1,m2
0,EDAV,DCCST
1,DM,DLCA


In [29]:
df1.applymap(y)

  df1.applymap(y)


Unnamed: 0,m1,m2
0,4,5
1,2,4


#mapping in pandas

In [None]:
#mapping is the function that is used to map one series to other series values based on common column
#we can use functions, series and dictionary to map one series to another series
# Syntax
# Series.map(arg, na_action=None)


In [None]:
import pandas as pd
import numpy as np
a = pd.Series(['java','c++','python','c'])
print(a)

0      java
1       c++
2    python
3         c
dtype: object


In [None]:
# map the above series with other series
print(a.map({'java':'core','c++':'c plus plus','python':'language','c':'c language'}, na_action = 'ignore' ))

0           core
1    c plus plus
2       language
3     c language
4            NaN
dtype: object


In [None]:
# example 2
import pandas as pd
import numpy as np
a = pd.Series(['java','c++','python','c',np.nan])
print(a.map('I like {}'.format,na_action= 'ignore'))

0      I like java
1       I like c++
2    I like python
3         I like c
4              NaN
dtype: object


In [None]:
# Practice Exercises:

#     Given a DataFrame of student marks, filter out students scoring less than 40 in Math.

#     Create two Series with partial overlapping indexes. Perform addition and observe alignment.

#     Apply a function that computes (max − mean) for each column in a DataFrame.

#     Format every element of a DataFrame to show 1 decimal place using applymap().

#     # Subtract a Series representing the first row from the entire DataFrame.

    # Import a DataFrame from CSV

    # Clean missing values using map() or apply()

    # Compute row-wise totals using apply()

    # Format the entire DataFrame to 2 decimal places using applymap()