# pandas的数据转换函数map、apply、applymap

数据转换函数对比：map、apply、applymap

1. map：只用于Series，实现每个值->值的隐射
2. apply：用户与series实现每个值的处理，用于DataFrame实现某个轴的Series的处理
3. applymap：只能用于DataFrame，用于处理该DataFrame的每个元素

## map用于处理Series值的转换

实例：将股票代码英文转换成中文名字

Series.map(dict) / Series.map(function)

In [1]:
import pandas as pd

stocks = pd.read_excel('./data/stocks/互联网公司股票.xlsx')

stocks.head()

Unnamed: 0,日期,公司,收盘,开盘,高,低,交易量,涨跌幅
0,2019-10-03,BIDU,104.32,102.35,104.73,101.15,2.24,0.02
1,2019-10-02,BIDU,102.62,100.85,103.24,99.5,2.69,0.01
2,2019-10-01,BIDU,102.0,102.8,103.26,101.0,1.78,-0.01
3,2019-10-03,BABA,169.48,166.65,170.18,165.0,10.39,0.02
4,2019-10-02,BABA,165.77,162.82,166.88,161.9,11.6,0.0


In [2]:
stocks['公司'].unique()

array(['BIDU', 'BABA', 'IQ', 'JD'], dtype=object)

In [3]:
# 注意这里是小写
dict_name = {
    'bidu': '百度',
    'baba': '阿里巴巴',
    'iq': '爱奇艺',
    'jd': '京东',
}

### 方法1.Series.map()

In [4]:
stocks['公司中文名'] = stocks['公司'].str.lower().map(dict_name)

In [5]:
stocks.head()

Unnamed: 0,日期,公司,收盘,开盘,高,低,交易量,涨跌幅,公司中文名
0,2019-10-03,BIDU,104.32,102.35,104.73,101.15,2.24,0.02,百度
1,2019-10-02,BIDU,102.62,100.85,103.24,99.5,2.69,0.01,百度
2,2019-10-01,BIDU,102.0,102.8,103.26,101.0,1.78,-0.01,百度
3,2019-10-03,BABA,169.48,166.65,170.18,165.0,10.39,0.02,阿里巴巴
4,2019-10-02,BABA,165.77,162.82,166.88,161.9,11.6,0.0,阿里巴巴


### 方法2.Series.map(function)

In [6]:
stocks['公司中文名2'] = stocks['公司'].map(lambda x: dict_name[x.lower()])

In [7]:
stocks.head()

Unnamed: 0,日期,公司,收盘,开盘,高,低,交易量,涨跌幅,公司中文名,公司中文名2
0,2019-10-03,BIDU,104.32,102.35,104.73,101.15,2.24,0.02,百度,百度
1,2019-10-02,BIDU,102.62,100.85,103.24,99.5,2.69,0.01,百度,百度
2,2019-10-01,BIDU,102.0,102.8,103.26,101.0,1.78,-0.01,百度,百度
3,2019-10-03,BABA,169.48,166.65,170.18,165.0,10.39,0.02,阿里巴巴,阿里巴巴
4,2019-10-02,BABA,165.77,162.82,166.88,161.9,11.6,0.0,阿里巴巴,阿里巴巴


## apply函数用于Series和DataFrame的转换

- Series.apply(function) 函数的参数是每个值
- DataFrame.apply(function) 函数的参数是Series

### Series.apply(function)

function的参数是series的每个值

In [8]:
stocks['公司中文名3'] = stocks['公司'].apply(
    lambda x: dict_name[x.lower()]
)

In [9]:
stocks.head()

Unnamed: 0,日期,公司,收盘,开盘,高,低,交易量,涨跌幅,公司中文名,公司中文名2,公司中文名3
0,2019-10-03,BIDU,104.32,102.35,104.73,101.15,2.24,0.02,百度,百度,百度
1,2019-10-02,BIDU,102.62,100.85,103.24,99.5,2.69,0.01,百度,百度,百度
2,2019-10-01,BIDU,102.0,102.8,103.26,101.0,1.78,-0.01,百度,百度,百度
3,2019-10-03,BABA,169.48,166.65,170.18,165.0,10.39,0.02,阿里巴巴,阿里巴巴,阿里巴巴
4,2019-10-02,BABA,165.77,162.82,166.88,161.9,11.6,0.0,阿里巴巴,阿里巴巴,阿里巴巴


### DataFrame.apply(function)

function的参数是对应轴的Series

In [10]:
stocks['公司中文名4'] = stocks.apply(
    lambda x: dict_name[x['公司'].lower()],
    axis=1
)
# 注意这个代码
# 1. apply是在stocks这个DataFrame上调用
# 2. lambda x的x是一个Series，因为制定了axis=1 所以Series的key是列名 可以用x['公司']来获取

In [11]:
stocks.head()

Unnamed: 0,日期,公司,收盘,开盘,高,低,交易量,涨跌幅,公司中文名,公司中文名2,公司中文名3,公司中文名4
0,2019-10-03,BIDU,104.32,102.35,104.73,101.15,2.24,0.02,百度,百度,百度,百度
1,2019-10-02,BIDU,102.62,100.85,103.24,99.5,2.69,0.01,百度,百度,百度,百度
2,2019-10-01,BIDU,102.0,102.8,103.26,101.0,1.78,-0.01,百度,百度,百度,百度
3,2019-10-03,BABA,169.48,166.65,170.18,165.0,10.39,0.02,阿里巴巴,阿里巴巴,阿里巴巴,阿里巴巴
4,2019-10-02,BABA,165.77,162.82,166.88,161.9,11.6,0.0,阿里巴巴,阿里巴巴,阿里巴巴,阿里巴巴


## applymap用于DataFrame所有值的转换

In [12]:
sub_df = stocks[['收盘', '开盘', '高', '低', '交易量']]

In [13]:
sub_df

Unnamed: 0,收盘,开盘,高,低,交易量
0,104.32,102.35,104.73,101.15,2.24
1,102.62,100.85,103.24,99.5,2.69
2,102.0,102.8,103.26,101.0,1.78
3,169.48,166.65,170.18,165.0,10.39
4,165.77,162.82,166.88,161.9,11.6
5,165.15,168.01,168.23,163.64,14.19
6,16.06,15.71,16.38,15.32,10.08
7,15.72,15.85,15.87,15.12,8.1
8,15.92,16.14,16.22,15.5,11.65
9,28.8,28.11,28.97,27.82,8.77


In [15]:
# 将这些数字取整 应用于所有元素
sub_df.applymap(lambda x:int(x))

  sub_df.applymap(lambda x : int(x))


Unnamed: 0,收盘,开盘,高,低,交易量
0,104,102,104,101,2
1,102,100,103,99,2
2,102,102,103,101,1
3,169,166,170,165,10
4,165,162,166,161,11
5,165,168,168,163,14
6,16,15,16,15,10
7,15,15,15,15,8
8,15,16,16,15,11
9,28,28,28,27,8


In [16]:
# 直接修改原来这几列
stocks.loc[:,['收盘', '开盘', '高', '低', '交易量']] = sub_df.applymap(lambda x:int(x))

  stocks.loc[:,['收盘', '开盘', '高', '低', '交易量']] = sub_df.applymap(lambda x:int(x))


In [18]:
stocks.head()

Unnamed: 0,日期,公司,收盘,开盘,高,低,交易量,涨跌幅,公司中文名,公司中文名2,公司中文名3,公司中文名4
0,2019-10-03,BIDU,104.0,102.0,104.0,101.0,2.0,0.02,百度,百度,百度,百度
1,2019-10-02,BIDU,102.0,100.0,103.0,99.0,2.0,0.01,百度,百度,百度,百度
2,2019-10-01,BIDU,102.0,102.0,103.0,101.0,1.0,-0.01,百度,百度,百度,百度
3,2019-10-03,BABA,169.0,166.0,170.0,165.0,10.0,0.02,阿里巴巴,阿里巴巴,阿里巴巴,阿里巴巴
4,2019-10-02,BABA,165.0,162.0,166.0,161.0,11.0,0.0,阿里巴巴,阿里巴巴,阿里巴巴,阿里巴巴
