### [참고] <a href="https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf">Pandas Cheat Sheet</a>

In [1]:
import pandas as pd
import numpy as np

### 1) apply()
* Series, DataFrame 둘 다 적용 가능

In [2]:
df = pd.DataFrame({
    "P" : [9,9,9],
    "Q" : [25,25,25]
})
df

Unnamed: 0,P,Q
0,9,25
1,9,25
2,9,25


<img src='https://www.w3resource.com/w3r_images/pandas-dataframe-apply-1.png' width="300" height="150">

In [3]:
df.apply(np.sqrt) # default ( axis = 0(row) ) 

Unnamed: 0,P,Q
0,3.0,5.0
1,3.0,5.0
2,3.0,5.0


<img src="https://www.w3resource.com/w3r_images/pandas-dataframe-apply-2.png" width="300" height="200">

In [4]:
df.apply(np.sum)

P    27
Q    75
dtype: int64

<img src="https://www.w3resource.com/w3r_images/pandas-dataframe-apply-3.png">

In [5]:
df.apply(np.sum, axis=1)

0    34
1    34
2    34
dtype: int64

In [29]:
date_list = {"yyyy-mm-dd":["2005-09-28", "2007-10-05", "2012-12-20"]}
df = pd.DataFrame(date_list)
df

Unnamed: 0,yyyy-mm-dd
0,2005-09-28
1,2007-10-05
2,2012-12-20


#### 1) 연도 추출

In [30]:
def extract_year(date):
    return date.split("-")[0]

In [31]:
df['yyyy-mm-dd'].apply(extract_year)

0    2005
1    2007
2    2012
Name: yyyy-mm-dd, dtype: object

In [32]:
df['year'] = df['yyyy-mm-dd'].apply(extract_year)
df

Unnamed: 0,yyyy-mm-dd,year
0,2005-09-28,2005
1,2007-10-05,2007
2,2012-12-20,2012


#### 2) 나이 구하는 함수 생성 ( 현재 연도 - yyyy ) 

In [33]:
def getAge(year, cur_year):
    return cur_year - int(year)

In [34]:
df["age"] = df['year'].apply(getAge, cur_year=2022)
df

Unnamed: 0,yyyy-mm-dd,year,age
0,2005-09-28,2005,17
1,2007-10-05,2007,15
2,2012-12-20,2012,10


#### 3) 소개글 만드는 함수 생성

In [35]:
def get_introduce(age, prefix, suffix):
    return prefix + str(age) + suffix

In [36]:
df['introduce'] = df['age'].apply(get_introduce, prefix="안녕하세요 저는 ", suffix=" 살 입니다.")
df

Unnamed: 0,yyyy-mm-dd,year,age,introduce
0,2005-09-28,2005,17,안녕하세요 저는 17 살 입니다.
1,2007-10-05,2007,15,안녕하세요 저는 15 살 입니다.
2,2012-12-20,2012,10,안녕하세요 저는 10 살 입니다.


#### 4) 함수 DF 전체 적용

In [37]:
def get_introduce2(col):
    return "나는 " + str(col.year) + " 년에 태어났고, 나의 나이는 " + str(col.age) + " 입니다."

In [40]:
df['introduce2'] = df.apply(get_introduce2, axis=1)
df

Unnamed: 0,yyyy-mm-dd,year,age,introduce,introduce2
0,2005-09-28,2005,17,안녕하세요 저는 17 살 입니다.,"나는 2005 년에 태어났고, 나의 나이는 17 입니다."
1,2007-10-05,2007,15,안녕하세요 저는 15 살 입니다.,"나는 2007 년에 태어났고, 나의 나이는 15 입니다."
2,2012-12-20,2012,10,안녕하세요 저는 10 살 입니다.,"나는 2012 년에 태어났고, 나의 나이는 10 입니다."


***

In [42]:
df = pd.DataFrame({
    "영어" : [60,70],
    "수학" : [100,50]
}, index=["Tom", "David"])
df

Unnamed: 0,영어,수학
Tom,60,100
David,70,50


In [43]:
def func(df1):
    print(type(df1))
    print(df1.index)
    print(df1.values)

In [44]:
df.apply(func)

<class 'pandas.core.series.Series'>
Index(['Tom', 'David'], dtype='object')
[60 70]
<class 'pandas.core.series.Series'>
Index(['Tom', 'David'], dtype='object')
[100  50]


영어    None
수학    None
dtype: object

In [45]:
df.apply(func, axis=1)

<class 'pandas.core.series.Series'>
Index(['영어', '수학'], dtype='object')
[ 60 100]
<class 'pandas.core.series.Series'>
Index(['영어', '수학'], dtype='object')
[70 50]


Tom      None
David    None
dtype: object

#### 1) 모든 학생의 영어 성적을 80으로 변경

In [49]:
def func2(df1):
    df1['영어'] = 80
    return df1

In [51]:
new_df = df.apply(func2, axis=1)
new_df

Unnamed: 0,영어,수학
Tom,80,100
David,80,50


***

In [52]:
import seaborn as sns

In [53]:
iris_df = sns.load_dataset("iris")
iris_df.head(2)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa


In [54]:
iris_df.apply(lambda x:x[0])

sepal_length       5.1
sepal_width        3.5
petal_length       1.4
petal_width        0.2
species         setosa
dtype: object

In [55]:
iris_df['species'].apply(lambda x:x[0])

0      s
1      s
2      s
3      s
4      s
      ..
145    v
146    v
147    v
148    v
149    v
Name: species, Length: 150, dtype: object

In [59]:
iris_df['species-3'] = iris_df['species'].apply(lambda x:x[-3:])
iris_df.head(3)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species-3
0,5.1,3.5,1.4,0.2,setosa,osa
1,4.9,3.0,1.4,0.2,setosa,osa
2,4.7,3.2,1.3,0.2,setosa,osa


In [60]:
def sample(x):
    return x[-3:]

In [61]:
iris_df['species'].apply(sample)

0      osa
1      osa
2      osa
3      osa
4      osa
      ... 
145    ica
146    ica
147    ica
148    ica
149    ica
Name: species, Length: 150, dtype: object

***

### 2) map()
* Series 만 적용 가능 ( 1 개의 컬럼 ) 

In [62]:
s = pd.Series(['cat', 'dog', np.nan, 'rabbit'])
s

0       cat
1       dog
2       NaN
3    rabbit
dtype: object

In [63]:
s.map({'cat':'kitten', 'dog':'puppy'})

0    kitten
1     puppy
2       NaN
3       NaN
dtype: object

In [64]:
date_list = {"yyyy-mm-dd":["2005-09-28", "2007-10-05", "2012-12-20"]}
df = pd.DataFrame(date_list)
df

Unnamed: 0,yyyy-mm-dd
0,2005-09-28
1,2007-10-05
2,2012-12-20


In [65]:
df['year'] = df['yyyy-mm-dd'].map(extract_year)
df

Unnamed: 0,yyyy-mm-dd,year
0,2005-09-28,2005
1,2007-10-05,2007
2,2012-12-20,2012


In [66]:
job_list = {
    'age' : [20,30,40],
    'job' : ['student', 'developer', 'teacher']
}
df = pd.DataFrame(job_list)
df

Unnamed: 0,age,job
0,20,student
1,30,developer
2,40,teacher


In [67]:
# job_id 컬럼 생성 후, job 숫자 부여
df['job_id'] = df['job'].map({'student':1, 'developer':2, 'teacher':3})
df

Unnamed: 0,age,job,job_id
0,20,student,1
1,30,developer,2
2,40,teacher,3


### 3) applymap()
* DataFrame 만 사용 가능 ( Series 사용 불가 )

In [68]:
df = pd.DataFrame([[1,2.12], [3.356,4.567]])
df

Unnamed: 0,0,1
0,1.0,2.12
1,3.356,4.567


In [69]:
df.applymap(lambda x:str(x))

Unnamed: 0,0,1
0,1.0,2.12
1,3.356,4.567


In [72]:
df.applymap(lambda x:len(str(x)))

Unnamed: 0,0,1
0,3,4
1,5,5


In [73]:
df = pd.DataFrame({
    "P" : [9,9,9],
    "Q" : [25,25,25]
})
df

Unnamed: 0,P,Q
0,9,25
1,9,25
2,9,25


In [74]:
df.applymap(np.sqrt)

Unnamed: 0,P,Q
0,3.0,5.0
1,3.0,5.0
2,3.0,5.0
