## Apply Function to Pandas Series or DataFrame
from https://www.youtube.com/watch?v=P_q0tkYqvSk

In [3]:
import pandas as pd
import numpy as np

In [4]:
train = pd.read_csv('http://bit.ly/kaggletrain')
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [5]:
# change sex male and female to 1 and 0 
train['sex_num'] = train.Sex.map({'female':0, 'male':1})

In [6]:
train.loc[0:4, ['Sex', 'sex_num']]

Unnamed: 0,Sex,sex_num
0,male,1
1,female,0
2,female,0
3,female,0
4,male,1


In [9]:
# Apply as a series method
train['name_length'] = train.Name.apply(len) # just len the function not len()

In [10]:
train.loc[0:4, ['Name', 'name_length']]

Unnamed: 0,Name,name_length
0,"Braund, Mr. Owen Harris",23
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",51
2,"Heikkinen, Miss. Laina",22
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",44
4,"Allen, Mr. William Henry",24


In [11]:
# round up fare to integers
train['fare_ceil'] = train.Fare.apply(np.ceil)

In [13]:
train.loc[0:4, ['Fare', 'fare_ceil']]

Unnamed: 0,Fare,fare_ceil
0,7.25,8.0
1,71.2833,72.0
2,7.925,8.0
3,53.1,54.0
4,8.05,9.0


In [14]:
train.Name.str.split(',').head()

0                           [Braund,  Mr. Owen Harris]
1    [Cumings,  Mrs. John Bradley (Florence Briggs ...
2                            [Heikkinen,  Miss. Laina]
3      [Futrelle,  Mrs. Jacques Heath (Lily May Peel)]
4                          [Allen,  Mr. William Henry]
Name: Name, dtype: object

In [15]:
def get_element(my_list, position):
    return my_list[position]

In [16]:
train.Name.str.split(',').apply(get_element, position = 0).head()

0       Braund
1      Cumings
2    Heikkinen
3     Futrelle
4        Allen
Name: Name, dtype: object

In [19]:
train.Name.str.split(',').apply(lambda x: x[0]).head() # lambda functions

0       Braund
1      Cumings
2    Heikkinen
3     Futrelle
4        Allen
Name: Name, dtype: object

In [21]:
# Apply as a DataFrame method, on either axis of the DataFrame
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [23]:
drinks.loc[:, 'beer_servings':'wine_servings'].head()

Unnamed: 0,beer_servings,spirit_servings,wine_servings
0,0,0,0
1,89,132,54
2,25,0,14
3,245,138,312
4,217,57,45


In [26]:
drinks.loc[:, 'beer_servings':'wine_servings'].apply(max, axis=0) # get max of rows, like saying max of each columns

beer_servings      376
spirit_servings    438
wine_servings      370
dtype: int64

In [29]:
drinks.loc[:, 'beer_servings':'wine_servings'].apply(max, axis=1).head() # gets the max in each row

0      0
1    132
2     25
3    312
4    217
dtype: int64

In [37]:
drinks.loc[:, 'beer_servings':'wine_servings'].apply(np.argmax, axis=1).head() # identifies which column has the max?

0      beer_servings
1    spirit_servings
2      beer_servings
3      wine_servings
4      beer_servings
dtype: object

In [39]:
# Applymap changes every element in DataFrame
drinks.loc[:, 'beer_servings':'wine_servings'].applymap(float).head()

Unnamed: 0,beer_servings,spirit_servings,wine_servings
0,0.0,0.0,0.0
1,89.0,132.0,54.0
2,25.0,0.0,14.0
3,245.0,138.0,312.0
4,217.0,57.0,45.0


In [41]:
# use applymap to change the numbers into floats
drinks.loc[:, 'beer_servings':'wine_servings'] = drinks.loc[:, 'beer_servings':'wine_servings'].applymap(float).head()
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0.0,0.0,0.0,0.0,Asia
1,Albania,89.0,132.0,54.0,4.9,Europe
2,Algeria,25.0,0.0,14.0,0.7,Africa
3,Andorra,245.0,138.0,312.0,12.4,Europe
4,Angola,217.0,57.0,45.0,5.9,Africa
