In [1]:
import numpy as np
import pandas as pd
train = pd.read_csv('./data/kaggletrain')

In [2]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [3]:
# map is a Series method
train['sex_map'] = train.Sex.map({'female': 0, 'male': 1})
train.loc[0:4, ['Sex', 'sex_map']]

Unnamed: 0,Sex,sex_map
0,male,1
1,female,0
2,female,0
3,female,0
4,male,1


In [4]:
# apply() is both a Series method and a DataFrame method
train['name_length'] = train.Name.apply(len)
train.loc[0:4, ['Name', 'name_length']]

Unnamed: 0,Name,name_length
0,"Braund, Mr. Owen Harris",23
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",51
2,"Heikkinen, Miss. Laina",22
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",44
4,"Allen, Mr. William Henry",24


In [5]:
train['fare_ceil'] = train.Fare.apply(np.ceil)
train.loc[0:4, ['Fare', 'fare_ceil']]

Unnamed: 0,Fare,fare_ceil
0,7.25,8.0
1,71.2833,72.0
2,7.925,8.0
3,53.1,54.0
4,8.05,9.0


In [6]:
train.Name.str.split(',').head()

0                           [Braund,  Mr. Owen Harris]
1    [Cumings,  Mrs. John Bradley (Florence Briggs ...
2                            [Heikkinen,  Miss. Laina]
3      [Futrelle,  Mrs. Jacques Heath (Lily May Peel)]
4                          [Allen,  Mr. William Henry]
Name: Name, dtype: object

In [7]:
def get_element(my_list, position):
    return my_list[position]

In [8]:
train['first_name']=train.Name.str.split(',').apply(get_element, position = 0)
train.loc[0:4, ['Name', 'first_name']]

Unnamed: 0,Name,first_name
0,"Braund, Mr. Owen Harris",Braund
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",Cumings
2,"Heikkinen, Miss. Laina",Heikkinen
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",Futrelle
4,"Allen, Mr. William Henry",Allen


In [9]:
train.Name.str.split(',').apply(lambda x: x[0]).head()

0       Braund
1      Cumings
2    Heikkinen
3     Futrelle
4        Allen
Name: Name, dtype: object

In [10]:
drinks = pd.read_csv('./data/drinksbycountry.csv')
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [11]:
# axis = 0 for maximum value of each column
drinks.loc[:, 'beer_servings': 'wine_servings'].apply(max, axis = 0)

beer_servings      376
spirit_servings    438
wine_servings      370
dtype: int64

In [12]:
# axis = 1 for maximum value of each row
drinks.loc[:, 'beer_servings': 'wine_servings'].apply(max, axis = 1)

0        0
1      132
2       25
3      312
4      217
      ... 
188    333
189    111
190      6
191     32
192     64
Length: 193, dtype: int64

In [13]:
# np.argmax for selecting which column is the max value
drinks.loc[:, 'beer_servings': 'wine_servings'].apply(np.argmax, axis = 1)

0      0
1      1
2      0
3      2
4      0
      ..
188    0
189    0
190    0
191    0
192    0
Length: 193, dtype: int64

In [14]:
# applymap() apply to every element of DataFrame
drinks.loc[:, 'beer_servings': 'wine_servings'] = drinks.loc[:, 'beer_servings': 'wine_servings'].applymap(float)

In [15]:
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0.0,0.0,0.0,0.0,Asia
1,Albania,89.0,132.0,54.0,4.9,Europe
2,Algeria,25.0,0.0,14.0,0.7,Africa
3,Andorra,245.0,138.0,312.0,12.4,Europe
4,Angola,217.0,57.0,45.0,5.9,Africa


#### https://www.youtube.com/watch?v=g_IpO4A7RU4

In [16]:
df = pd.read_csv('./data/iris.csv')
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [17]:
capitalize = lambda x: x.upper()
df['species'].apply(capitalize)

0         SETOSA
1         SETOSA
2         SETOSA
3         SETOSA
4         SETOSA
         ...    
145    VIRGINICA
146    VIRGINICA
147    VIRGINICA
148    VIRGINICA
149    VIRGINICA
Name: species, Length: 150, dtype: object

In [18]:
df['species'].map(capitalize)

0         SETOSA
1         SETOSA
2         SETOSA
3         SETOSA
4         SETOSA
         ...    
145    VIRGINICA
146    VIRGINICA
147    VIRGINICA
148    VIRGINICA
149    VIRGINICA
Name: species, Length: 150, dtype: object

In [19]:
df1 = df.drop(['species'], axis = 1)
df1.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [21]:
df1.applymap(np.square)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,26.01,12.25,1.96,0.04
1,24.01,9.00,1.96,0.04
2,22.09,10.24,1.69,0.04
3,21.16,9.61,2.25,0.04
4,25.00,12.96,1.96,0.04
...,...,...,...,...
145,44.89,9.00,27.04,5.29
146,39.69,6.25,25.00,3.61
147,42.25,9.00,27.04,4.00
148,38.44,11.56,29.16,5.29


In [22]:
df1.applymap(np.sqrt)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,2.258318,1.870829,1.183216,0.447214
1,2.213594,1.732051,1.183216,0.447214
2,2.167948,1.788854,1.140175,0.447214
3,2.144761,1.760682,1.224745,0.447214
4,2.236068,1.897367,1.183216,0.447214
...,...,...,...,...
145,2.588436,1.732051,2.280351,1.516575
146,2.509980,1.581139,2.236068,1.378405
147,2.549510,1.732051,2.280351,1.414214
148,2.489980,1.843909,2.323790,1.516575


In [31]:
def multiply50(x):
    if type(x) is str:
        return x
    else:
        return 50*x  

In [33]:
df.applymap(multiply50)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,255.0,175.0,70.0,10.0,setosa
1,245.0,150.0,70.0,10.0,setosa
2,235.0,160.0,65.0,10.0,setosa
3,230.0,155.0,75.0,10.0,setosa
4,250.0,180.0,70.0,10.0,setosa
...,...,...,...,...,...
145,335.0,150.0,260.0,115.0,virginica
146,315.0,125.0,250.0,95.0,virginica
147,325.0,150.0,260.0,100.0,virginica
148,310.0,170.0,270.0,115.0,virginica


In [28]:
df.dtypes

sepal_length    float64
sepal_width     float64
petal_length    float64
petal_width     float64
species          object
dtype: object