# Pandas DataFrame: Apply Functions

In [1]:
import pandas as pd
import numpy as np

pd.__version__, np.__version__

('0.25.3', '1.17.4')

In [2]:
data = {
    
     'vivek' : [35, 'M', 'Trainer'] ,
     'john'  : [22, 'M', 'Gardener'] ,
     'jill'  : [19, 'F', 'Accountant'] ,
     'ravi'  : [42, 'M', 'Sales'] 
}

df1 = pd.DataFrame(data, index=['age', 'sex', 'profession'])

df1

Unnamed: 0,vivek,john,jill,ravi
age,35,22,19,42
sex,M,M,F,M
profession,Trainer,Gardener,Accountant,Sales


### Applying a Function to all cells individually

#### Input to function is value and output is value

http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.applymap.html

In [3]:
df1

Unnamed: 0,vivek,john,jill,ravi
age,35,22,19,42
sex,M,M,F,M
profession,Trainer,Gardener,Accountant,Sales


#### Let's find the order of traversal

In [4]:

def traversalorder(inp) :
    print (inp)
    return str(inp) + '-done'

In [6]:
df1.applymap(traversalorder)

35
M
Trainer
35
M
Trainer
22
M
Gardener
19
F
Accountant
42
M
Sales


Unnamed: 0,vivek,john,jill,ravi
age,35-done,22-done,19-done,42-done
sex,M-done,M-done,F-done,M-done
profession,Trainer-done,Gardener-done,Accountant-done,Sales-done


In [7]:
df1

Unnamed: 0,vivek,john,jill,ravi
age,35,22,19,42
sex,M,M,F,M
profession,Trainer,Gardener,Accountant,Sales


#### Lets do something useful with it :) 

In [8]:
def flipsex(sex) :
    
    if type(sex) is str:
        if sex == 'M':
            return 'F'
        elif sex == 'F' :
            return 'M'
    
    return type(sex)

In [9]:
df1.applymap(flipsex)

Unnamed: 0,vivek,john,jill,ravi
age,<class 'int'>,<class 'int'>,<class 'int'>,<class 'int'>
sex,F,F,M,F
profession,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>


### Apply function to each Column

http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.apply.html

In [None]:
df1

In [10]:
def applycolumn(col) :
    print (col)
    print (type(col))
    print ('\n\n\n')
    return (col)
    

In [11]:
df1.apply(applycolumn)

age                35
sex                 M
profession    Trainer
Name: vivek, dtype: object
<class 'pandas.core.series.Series'>




age                35
sex                 M
profession    Trainer
Name: vivek, dtype: object
<class 'pandas.core.series.Series'>




age                 22
sex                  M
profession    Gardener
Name: john, dtype: object
<class 'pandas.core.series.Series'>




age                   19
sex                    F
profession    Accountant
Name: jill, dtype: object
<class 'pandas.core.series.Series'>




age              42
sex               M
profession    Sales
Name: ravi, dtype: object
<class 'pandas.core.series.Series'>






Unnamed: 0,vivek,john,jill,ravi
age,35,22,19,42
sex,M,M,F,M
profession,Trainer,Gardener,Accountant,Sales


### Apply Function to each Row

In [12]:
df1

Unnamed: 0,vivek,john,jill,ravi
age,35,22,19,42
sex,M,M,F,M
profession,Trainer,Gardener,Accountant,Sales


In [14]:
def applyrow(row) :
    print (row)
    print (type(row))
    print ('\n\n')
    return row

In [15]:
df1.apply(applyrow, axis=1)

vivek    35
john     22
jill     19
ravi     42
Name: age, dtype: object
<class 'pandas.core.series.Series'>



vivek    35
john     22
jill     19
ravi     42
Name: age, dtype: object
<class 'pandas.core.series.Series'>



vivek    M
john     M
jill     F
ravi     M
Name: sex, dtype: object
<class 'pandas.core.series.Series'>



vivek       Trainer
john       Gardener
jill     Accountant
ravi          Sales
Name: profession, dtype: object
<class 'pandas.core.series.Series'>





Unnamed: 0,vivek,john,jill,ravi
age,35,22,19,42
sex,M,M,F,M
profession,Trainer,Gardener,Accountant,Sales


### Apply to one column only!

In [16]:
df1

Unnamed: 0,vivek,john,jill,ravi
age,35,22,19,42
sex,M,M,F,M
profession,Trainer,Gardener,Accountant,Sales


In [18]:
def func(inp) :
    print (inp)
    print (type(inp))
    print ('\n\n')
    return inp

In [19]:
df1['ravi'].apply(func)

42
<class 'int'>



M
<class 'str'>



Sales
<class 'str'>





age              42
sex               M
profession    Sales
Name: ravi, dtype: object

### Apply to one row only!

In [24]:
df1

Unnamed: 0,vivek,john,jill,ravi
age,35,22,19,42
sex,M,M,F,M
profession,Trainer,Gardener,Accountant,Sales


In [21]:
df1.ix['profession'].apply(func)

Trainer
<class 'str'>



Gardener
<class 'str'>



Accountant
<class 'str'>



Sales
<class 'str'>





.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  df1.ix['profession'].apply(func)


vivek       Trainer
john       Gardener
jill     Accountant
ravi          Sales
Name: profession, dtype: object

In [22]:
df1.loc['profession'].apply(func)

Trainer
<class 'str'>



Gardener
<class 'str'>



Accountant
<class 'str'>



Sales
<class 'str'>





vivek       Trainer
john       Gardener
jill     Accountant
ravi          Sales
Name: profession, dtype: object