# Pandas Apply

apply를 사용해서 연월일의 정보에서 연도만 빼보는 예제입니다.

In [29]:
import pandas as pd

In [30]:
date_list = [{'yyyy-mm-dd': '2000-06-27'},
         {'yyyy-mm-dd': '2002-09-24'},
         {'yyyy-mm-dd': '2005-12-20'}]
df = pd.DataFrame(date_list, columns = ['yyyy-mm-dd'])
df

Unnamed: 0,yyyy-mm-dd
0,2000-06-27
1,2002-09-24
2,2005-12-20


In [31]:
def extract_year(column):
    return column.split('-')[0]

In [32]:
df['year'] = df['yyyy-mm-dd'].apply(extract_year)

In [33]:
df

Unnamed: 0,yyyy-mm-dd,year
0,2000-06-27,2000
1,2002-09-24,2002
2,2005-12-20,2005


In [34]:
def get_age(year, current_year):
    return current_year - int(year)

In [35]:
df['age'] = df['year'].apply(get_age, current_year=2018)

In [36]:
df

Unnamed: 0,yyyy-mm-dd,year,age
0,2000-06-27,2000,18
1,2002-09-24,2002,16
2,2005-12-20,2005,13


In [37]:
def get_introduce(age, prefix, suffix):
    return prefix + str(age) + suffix

In [38]:
df['introduce'] = df['age'].apply(get_introduce, prefix='I am', suffix=' years old')
df

Unnamed: 0,yyyy-mm-dd,year,age,introduce
0,2000-06-27,2000,18,I am18 years old
1,2002-09-24,2002,16,I am16 years old
2,2005-12-20,2005,13,I am13 years old


In [39]:
def get_introduce2(row):
    return "I am born in " + str(row.year) + " my age is " + str(row.age)

In [40]:
df['introduce'] = df.apply(get_introduce2, axis=1)

In [41]:
df.introduce = df.apply(get_introduce2, axis=1)

In [42]:
df

Unnamed: 0,yyyy-mm-dd,year,age,introduce
0,2000-06-27,2000,18,I am born in 2000 my age is 18
1,2002-09-24,2002,16,I am born in 2002 my age is 16
2,2005-12-20,2005,13,I am born in 2005 my age is 13


파라미터로 딕셔너리를 전달하면 컬럼값을 쉽게 원하는 값으로 변경 가능합니다.  
기존의 컬럼값은 딕셔너리의 key로 사용되고, 해당되는 value의 값으로 컬럼값이 변경됩니다

In [43]:
job_list = [{'age': 20, 'job': 'student'},
         {'age': 30, 'job': 'developer'},
         {'age': 30, 'job': 'teacher'}]
job_list = pd.DataFrame(job_list)
job_list

Unnamed: 0,age,job
0,20,student
1,30,developer
2,30,teacher


In [44]:
job_list.job = job_list.job.map({'student': 1, 'developer': 2, 'teacher': 3})
job_list

Unnamed: 0,age,job
0,20,1
1,30,2
2,30,3


In [45]:
x_y = [{'x': 5.5, 'y': -5.6},
         {'x': -5.2, 'y': 5.5},
         {'x': -1.6, 'y': -4.5}]
df = pd.DataFrame(x_y)
df

Unnamed: 0,x,y
0,5.5,-5.6
1,-5.2,5.5
2,-1.6,-4.5
