## 1.개별 원소에 함수 매핑

### 1-1.시리즈 원소에 함수 매핑

In [1]:
import pandas as pd
import seaborn as sns

titanic = sns.load_dataset('titanic')
print(f"age:\n{titanic['age'].head()}\n")

# 함수정의
def mul_2(data):
    return data*2

def mul_two_obj(a, b):
    return a*b

# apply
print(f"titanic['age'].apply(mul_2):\n{titanic['age'].apply(mul_2).head()}\n")
print(f"titanic['age'].apply(mul_two_obj, b=2):\n{titanic['age'].apply(mul_two_obj, b=2).head()}\n")

#lambda
print(f"titanic['age'].apply(lambda x:x+10):\n{titanic['age'].apply(lambda x:x+10).head()}")

age:
0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

titanic['age'].apply(mul_2):
0    44.0
1    76.0
2    52.0
3    70.0
4    70.0
Name: age, dtype: float64

titanic['age'].apply(mul_two_obj, b=2):
0    44.0
1    76.0
2    52.0
3    70.0
4    70.0
Name: age, dtype: float64

titanic['age'].apply(lambda x:x+10):
0    32.0
1    48.0
2    36.0
3    45.0
4    45.0
Name: age, dtype: float64


### 1-2.데이터 프레임 원소에 함수 매핑

In [2]:
import pandas as pd
import seaborn as sns

titanic = sns.load_dataset('titanic')
df = titanic[['age', 'fare']]
print(f"DataFrame:\n{df.head()}\n")

# 함수정의
def mul_2(data):
    return data*2

print(f"df.applymap(mul_2):\n{df.applymap(mul_2).head()}")

DataFrame:
    age     fare
0  22.0   7.2500
1  38.0  71.2833
2  26.0   7.9250
3  35.0  53.1000
4  35.0   8.0500

df.applymap(mul_2):
    age      fare
0  44.0   14.5000
1  76.0  142.5666
2  52.0   15.8500
3  70.0  106.2000
4  70.0   16.1000


## 2.시리즈 객체에 함수 매핑

### 2-1.데이터 프레임 각 열에 함수 매핑

In [3]:
import pandas as pd
import seaborn as sns

titanic = sns.load_dataset('titanic')
df = titanic[['age', 'fare']]
print(f"DataFrame:\n{df.head()}\n")

#함수 정의
def missing_value(series):
    return series.isnull()

def min_max(data):
    min = data.min()
    max = data.max()
    return (data - min)/(max - min)

print(f"df.apply(missing_value, axis=0).head():\n{df.apply(missing_value, axis=0).head()}\n") # default: axis=0
print(f"df.apply(min_max):\n{df.apply(min_max).head()}")

DataFrame:
    age     fare
0  22.0   7.2500
1  38.0  71.2833
2  26.0   7.9250
3  35.0  53.1000
4  35.0   8.0500

df.apply(missing_value, axis=0).head():
     age   fare
0  False  False
1  False  False
2  False  False
3  False  False
4  False  False

df.apply(min_max):
        age      fare
0  0.271174  0.014151
1  0.472229  0.139136
2  0.321438  0.015469
3  0.434531  0.103644
4  0.434531  0.015713


### 1-3.데이터 프레임 각 행에 함수 매핑

In [4]:
import pandas as pd
import seaborn as sns

titanic = sns.load_dataset('titanic')
df = titanic[['age', 'fare']]
print(f"DataFrame:\n{df.head()}\n")

# 함수 정의
def add_two_object(a, b):
    return a+b

df['add'] = df.apply(lambda x:add_two_object(x['age'], x['fare']), axis=1)
print(f'DataFrame:\n{df.head()}\n')

DataFrame:
    age     fare
0  22.0   7.2500
1  38.0  71.2833
2  26.0   7.9250
3  35.0  53.1000
4  35.0   8.0500

DataFrame:
    age     fare       add
0  22.0   7.2500   29.2500
1  38.0  71.2833  109.2833
2  26.0   7.9250   33.9250
3  35.0  53.1000   88.1000
4  35.0   8.0500   43.0500



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':


## 3.데이터 프레임 객체에 함수 매핑

In [5]:
import pandas as pd
import seaborn as sns

titanic = sns.load_dataset('titanic')
df = titanic[['age', 'fare']]
print(f"DataFrame:\n{df.head()}\n")

# 데이터 프레임 반환
def missing_value(data):
    return data.isnull()

result_df = df.pipe(missing_value)
print(f"result_df:\n{result_df.head()}, type: {type(result_df)}\n")

# 시리즈 반환
def missing_count(data):
    return missing_value(data).sum()

result_sr = df.pipe(missing_count)
print(f'result_sr:\n{result_sr}, type: {type(result_sr)}\n')

# 값 반환
def total_missing_count(data):
    return missing_count(data).sum()

result_value = df.pipe(total_missing_count)
print(f'result_value:\n{result_value}, type: {type(result_value)}')

DataFrame:
    age     fare
0  22.0   7.2500
1  38.0  71.2833
2  26.0   7.9250
3  35.0  53.1000
4  35.0   8.0500

result_df:
     age   fare
0  False  False
1  False  False
2  False  False
3  False  False
4  False  False, type: <class 'pandas.core.frame.DataFrame'>

result_sr:
age     177
fare      0
dtype: int64, type: <class 'pandas.core.series.Series'>

result_value:
177, type: <class 'numpy.int64'>
