# 绘图与可视化

In [1]:
import numpy as np
import pandas as pd
PREVIOUS_MAX_ROWS = pd.options.display.max_rows
pd.options.display.max_rows = 20
np.random.seed(12345)
import matplotlib
import matplotlib.pyplot as plt
plt.rc('figure', figsize=(10, 6))
np.set_printoptions(precision=4, suppress=True)

In [2]:
%matplotlib notebook

## 简明 matplotlib API 入门

In [3]:
import matplotlib.pyplot as plt
import numpy as np

data = np.arange(10)
data

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [4]:
plt.plot(data)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1130585f8>]

### 图片与子图

In [9]:
fig = plt.figure()
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)

<IPython.core.display.Javascript object>

In [10]:
plt.plot(np.random.randn(50).cumsum(), 'k--')

[<matplotlib.lines.Line2D at 0x115e7c940>]

In [11]:
_ = ax1.hist(np.random.randn(100), bins=20, color='k', alpha=0.3)

In [12]:
ax2.scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30))

<matplotlib.collections.PathCollection at 0x1166dada0>

In [13]:
fig, axes = plt.subplots(2, 3)

<IPython.core.display.Javascript object>

In [14]:
axes

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1166c19b0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x115f1c8d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1166e0d68>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1166d9a90>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1165ff978>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x115f63908>]],
      dtype=object)

#### 调整子图周围的间距

subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

In [15]:
fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)
for i in range(2):
    for j in range(2):
        axes[i, j].hist(np.random.randn(500), bins=50, color='k', alpha=0.5)
plt.subplots_adjust(wspace=0, hspace=0)

<IPython.core.display.Javascript object>

### 颜色、标记和线类型

ax.plot(x, y, 'g--')

ax.plot(x, y, linestyle='--', color='g')

In [16]:
from numpy.random import randn

plt.plot(randn(30).cumsum(), 'ko--')  # 标记必须跟在颜色后面

[<matplotlib.lines.Line2D at 0x116a58128>]

plot(randn(30).cumsum(), color='k', linestyle='dashed', marker='0')

In [17]:
data = np.random.randn(30).cumsum()
plt.plot(data, 'k--', label='Default')

[<matplotlib.lines.Line2D at 0x116e50400>]

In [18]:
plt.plot(data, 'k--', drawstyle='steps-post', label='steps-post')

[<matplotlib.lines.Line2D at 0x116a2f0f0>]

In [19]:
plt.legend(loc='best')

<matplotlib.legend.Legend at 0x116e55630>

### 刻度、标签和图例

#### 设置标题、轴标签、刻度和刻度标签

In [20]:
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(np.random.randn(1000).cumsum())

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x116e55780>]

In [21]:
ticks = ax.set_xticks([0, 250, 500, 750, 1000])
labels = ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'],  # 标签赋值
                            rotation=30, fontsize='small')

In [22]:
ax.set_title('My first matplotlib plot')

Text(0.5, 1.0, 'My first matplotlib plot')

In [23]:
ax.set_xlabel('Stages')

Text(0.5, 10.763891973024519, 'Stages')

props = {'title':'My first matplotlib plot',
       'xlable':'Stages'}
ax.set(**props)

#### 添加图例

In [24]:
from numpy.random import randn

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(randn(1000).cumsum(), 'k', label='one')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1170a3d68>]

In [25]:
ax.plot(randn(1000).cumsum(), 'k--', label='two')

[<matplotlib.lines.Line2D at 0x11777e208>]

In [26]:
ax.plot(randn(1000).cumsum(), 'k.', label='three' )

[<matplotlib.lines.Line2D at 0x116eca0b8>]

In [27]:
ax.legend(loc='best')  # 自动生成图例

<matplotlib.legend.Legend at 0x117895a58>

### 注释与子图加工

ax.text(x, y, 'Hello world!', family='monespace', fontsie=10)

In [34]:
from datetime import datetime

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

data = pd.read_csv('examples/spx.csv', index_col=0, parse_dates=True)
spx = data['SPX']

crisis_data = [
    (datetime(2007, 10, 11), 'Peak of bull market'),
    (datetime(2008, 3, 12), 'Bear Stearns Fails'),
    (datetime(2008, 9, 15), 'Lehman Bankruptcy')
]
for date, label in crisis_data:
    ax.annotate(label, 
                xy=(date, spx.asof(date) + 75),
                xytext=(date, spx.asof(date) + 225),
                arrowprops=dict(
                    facecolor='black', 
                    headwidth=4, 
                    width=2, 
                    headlength=4),
                horizontalalignment='left',
                verticalalignment='top'
               )    

ax.set_xlim(['1/1/2007', '1/1/2011'])

<IPython.core.display.Javascript object>

(array([0.]), array([1.]))

In [35]:
ax.set_ylim([600, 1800])

(600, 1800)

In [36]:
ax.set_title('Important dates in the 2008-2009 financial crisis')

Text(0.5, 1.0, 'Important dates in the 2008-2009 financial crisis')

In [38]:
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

rect = plt.Rectangle((0.2, 0.75), 0.4, 0.15, color='k', alpha=0.3)
circ = plt.Circle((0.7, 0.2), 0.15, color='b', alpha=0.3)
pgon = plt.Polygon([[0.15, 0.15], [0.35, 0.4], [0.2, 0.6]], color='g', alpha=0.5)

ax.add_patch(rect)
ax.add_patch(circ)
ax.add_patch(pgon)

<IPython.core.display.Javascript object>

<matplotlib.patches.Polygon at 0x11ca0b9e8>

### 将图片保存到文件

plt.savefig('figpath.svg')

plt.savefig('figpath.png', dpi=400, bbox_inches='tight')

In [40]:
from io import BytesIO
    
buffer = BytesIO()
plt.savefig(buffer)
plot_data = buffer.getvalue

### matplotlib设置

plt.rc('figure', figsize=(10, 10))

font_options = {'family' : 'monospace', 'weight' : 'bold', 'size' : 'small'} 

plt.rc('font', **font_options)

## 使用pandas和seaborn绘图

### 折线图

In [3]:
s = pd.Series(np.random.randn(10).cumsum(),
             index=np.arange(0, 100, 10))
s.plot()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x118168a20>

In [5]:
df = pd.DataFrame(np.random.randn(10, 4).cumsum(0),
                 columns=['A', 'B', 'C', 'D'],
                 index=np.arange(0, 100, 10))
df.plot()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x119077710>

### 柱状图

In [8]:
fig, axes = plt.subplots(2, 1)

data = pd.Series(np.random.rand(16),
                index=list('abcdefghijklmnop'))

data.plot.bar(ax=axes[0], color='k', alpha=0.7)  # 水平柱状图
data.plot.barh(ax=axes[1], color='k', alpha=0.7)  # 垂直柱状图

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x11e4ac940>

In [9]:
df = pd.DataFrame(np.random.rand(6, 4),
                 index=['one', 'two', 'three', 'four', 'five', 'six'],
                 columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus'))
df

Genus,A,B,C,D
one,0.290095,0.58518,0.453995,0.411178
two,0.882634,0.692708,0.279273,0.06444
three,0.198624,0.931683,0.854414,0.954735
four,0.052253,0.579472,0.480496,0.021709
five,0.37362,0.414092,0.603907,0.671749
six,0.838866,0.779526,0.400701,0.794529


In [11]:
df.plot.bar()  # DataFrame并排柱状图
df.plot.barh(stacked=True, alpha=0.5)  # DataFrame堆积柱状图

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1205c6f60>

In [12]:
tips = pd.read_csv('examples/tips.csv')
party_counts = pd.crosstab(tips['day'], tips['size'])
party_counts

size,1,2,3,4,5,6
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Fri,1,16,1,1,0,0
Sat,2,53,18,13,1,0
Sun,0,39,15,18,3,1
Thur,1,48,4,5,1,3


In [14]:
party_counts = party_counts.loc[:, 2:5]
party_pcts = party_counts.div(party_counts.sum(1), axis=0)
party_pcts

size,2,3,4,5
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fri,0.888889,0.055556,0.055556,0.0
Sat,0.623529,0.211765,0.152941,0.011765
Sun,0.52,0.2,0.24,0.04
Thur,0.827586,0.068966,0.086207,0.017241


In [29]:
party_pcts.plot.bar()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x12ae0fdd8>

In [19]:
tips['tip_pct'] = tips['tip'] / (tips['total_bill'] - tips['tip'])
tips.head()

Unnamed: 0,total_bill,tip,smoker,day,time,size,tip_pct
0,16.99,1.01,No,Sun,Dinner,2,0.063204
1,10.34,1.66,No,Sun,Dinner,3,0.191244
2,21.01,3.5,No,Sun,Dinner,3,0.199886
3,23.68,3.31,No,Sun,Dinner,2,0.162494
4,24.59,3.61,No,Sun,Dinner,4,0.172069


In [32]:
import seaborn as sns

plt.figure()

sns.barplot(x='tip_pct', y='day', data=tips, orient='h')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x121a6f6d8>

In [33]:
plt.figure()

sns.barplot(x='tip_pct', y='day', hue='time', data=tips, orient='h')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x121aba128>

In [35]:
sns.set(style="whitegrid")

### 直方图和密度图

In [36]:
plt.figure()

tips['tip_pct'].plot.hist(bins=50)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x12be30eb8>

In [40]:
plt.figure()

tips['tip_pct'].plot.density()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x12f063978>

In [49]:
# 正态混合的标准化直方图与密度估计
comp1 = np.random.normal(0, 1, size=200)
comp2 = np.random.normal(10, 2, size=200)
values = pd.Series(np.concatenate([comp1, comp2]))

plt.figure()
sns.distplot(values, bins=100, color='k')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x130c76470>

### 散点图或点图

In [43]:
macro = pd.read_csv('examples/macrodata.csv')
data = macro[['cpi', 'm1', 'tbilrate', 'unemp']]
trans_data = np.log(data).diff().dropna()
trans_data[-5:]

Unnamed: 0,cpi,m1,tbilrate,unemp
198,-0.007904,0.045361,-0.396881,0.105361
199,-0.021979,0.066753,-2.277267,0.139762
200,0.00234,0.010286,0.606136,0.160343
201,0.008419,0.037461,-0.200671,0.127339
202,0.008894,0.012202,-0.405465,0.04256


In [46]:
plt.figure()
sns.regplot('m1', 'unemp', data=trans_data)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x12f1e4b38>

In [47]:
plt.title('Changer in log %s versus log %s' % ('m1', 'unemp'))

Text(0.5, 1.0, 'Changer in log m1 versus log unemp')

In [51]:
sns.pairplot(trans_data, diag_kind='kde', plot_kws={'alpha':0.2})  # 成对图矩阵

<IPython.core.display.Javascript object>

<seaborn.axisgrid.PairGrid at 0x120eb8f28>

### 分面网格和分类数据

In [52]:
sns.factorplot(x='day', y='tip_pct', hue='time', col='smoker', kind='bar', data=tips[tips.tip_pct < 1])



<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x133c51dd8>

In [53]:
sns.factorplot(x='day', y='tip_pct', row='time', col='smoker', kind='bar', data=tips[tips.tip_pct < 1])

<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x1365ee518>

In [54]:
sns.factorplot(x='day', y='tip_pct', kind='box', data=tips[tips.tip_pct < 1])

<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x136916400>

## 其他 Python 可视化工具

浏览器直接创建交互式图像：Bokeh && Plotly

其他第三方工具：Tableau && DataV