# Plotting and Visualization

<img src="images\matplotlib-cheatsheet.png">

In [1]:
import numpy as np
import pandas as pd
np.random.seed(12345)
import matplotlib.pyplot as plt
import matplotlib
plt.rc('figure', figsize=(10, 6))
np.set_printoptions(precision=4, suppress=True)

%matplotlib notebook

In [3]:
data = np.arange(10)
data

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [5]:
plt.plot(data)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1d5c44d2f28>]

### Figures and Subplots

In [13]:
fig = plt.figure()

<IPython.core.display.Javascript object>

In [14]:
ax1 = fig.add_subplot(2, 2, 1)

In [15]:
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)

In [16]:
plt.plot(np.random.randn(50).cumsum(), 'k--')

[<matplotlib.lines.Line2D at 0x1d5c4e447b8>]

In [17]:
_ = ax1.hist(np.random.randn(100), bins=20, color='k', alpha=0.3)
ax2.scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30))

<matplotlib.collections.PathCollection at 0x1d5c4e57668>

In [18]:
plt.close('all')

### Plotting with pandas and seaborn

##### Line Plots

In [22]:
s = pd.Series(np.random.randn(10).cumsum(), index=np.arange(0, 100, 10))
s.plot()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1d5c547dc18>

DataFrame’s plot method plots each of its columns as a different line on the same
subplot, creating a legend automatically

In [23]:
df = pd.DataFrame(np.random.randn(10, 4).cumsum(0), columns=['A', 'B', 'C', 'D'], index=np.arange(0, 100, 10))
df.plot()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1d5c5598f60>

In [24]:
plt.close('all')

##### Bar Plots

In [27]:
fig, axes = plt.subplots(2, 1)
data = pd.Series(np.random.randn(16), index=list('abcdefghijklmnop'))
data.plot.bar(ax=axes[0], color='k', alpha=0.7)
data.plot.bar(ax=axes[1], color='k', alpha=0.7)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1d5c7d8ad68>

In [28]:
np.random.seed(12348)

In [29]:
df = pd.DataFrame(np.random.randn(6, 4),
                 index=['one', 'two', 'three', 'four', 'five', 'six'],
                 columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus'))
df

Genus,A,B,C,D
one,1.309562,-1.647653,-0.076477,-1.562585
two,1.656111,-1.847125,-0.589575,0.832264
three,-0.180365,0.99886,0.802854,-0.253226
four,-0.934185,-0.092905,-1.027632,-0.182916
five,0.563362,-0.006496,-0.517052,2.453182
six,0.287594,-1.397959,0.203887,-2.213737


In [30]:
df.plot.bar()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1d5c8d325f8>

In [32]:
df.plot.barh(stacked=True, alpha=0.5)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1d5c8de5128>

In [33]:
plt.close('all')

In [35]:
import seaborn as sns
tips = pd.read_csv('examples/tips.csv')
tips['tip_pct'] = tips['tip'] / (tips['total_bill'] - tips['tip'])
tips.head()

Unnamed: 0,total_bill,tip,smoker,day,time,size,tip_pct
0,16.99,1.01,No,Sun,Dinner,2,0.063204
1,10.34,1.66,No,Sun,Dinner,3,0.191244
2,21.01,3.5,No,Sun,Dinner,3,0.199886
3,23.68,3.31,No,Sun,Dinner,2,0.162494
4,24.59,3.61,No,Sun,Dinner,4,0.172069


In [36]:
sns.barplot(x='tip_pct', y='day', data=tips, orient='h')

  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1d5c8654d68>

In [37]:
plt.close('all')

In [39]:
sns.barplot(x='tip_pct', y='day', hue='time', data=tips, orient='h')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1d5ccd739e8>

In [40]:
plt.close('all')

##### Histograms and Density Plots

In [42]:
plt.figure()
tips['tip_pct'].plot.hist(bins=50)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1d5cdcc4518>

A related plot type is a density plot, which is formed by computing an estimate of a
continuous probability distribution that might have generated the observed data. A
usual procedure is to approximate this distribution as a mixture of kernels, that is,
simpler distributions like the normal (Gaussian) distribution. Thus, density plots are
also known as KDE (kernel density estimate) plots.

In [43]:
plt.figure()
tips['tip_pct'].plot.density()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1d5ce2035f8>

In [44]:
plt.figure()
comp1 = np.random.normal(0, 1, size=200)
comp2 = np.random.normal(10, 2, size=200)
values = pd.Series(np.concatenate([comp1, comp2]))
sns.distplot(values, bins=100, color='k')

<IPython.core.display.Javascript object>

  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


<matplotlib.axes._subplots.AxesSubplot at 0x1d5ceb034a8>

In [45]:
plt.close('all')

##### Scatter or Point Plots

In [46]:
macro = pd.read_csv('examples/macrodata.csv')
data = macro[['cpi', 'm1', 'tbilrate', 'unemp']]
trans_data = np.log(data).dropna()
trans_data[-5:]

Unnamed: 0,cpi,m1,tbilrate,unemp
198,5.379386,7.29621,0.157004,1.791759
199,5.357407,7.362962,-2.120264,1.931521
200,5.359746,7.373249,-1.514128,2.091864
201,5.368165,7.41071,-1.714798,2.219203
202,5.377059,7.422912,-2.120264,2.261763


In [47]:
plt.figure()
sns.regplot('m1', 'unemp', data=trans_data)
plt.title('Changes in log %s versus log %s' % ('m1', 'unemp'))

<IPython.core.display.Javascript object>

Text(0.5, 1.0, 'Changes in log m1 versus log unemp')

In exploratory data analysis it’s helpful to be able to look at all the scatter plots among
a group of variables; this is known as a pairs plot or scatter plot matrix.

In [49]:
sns.pairplot(trans_data, diag_kind='kde', plot_kws={'alpha':0.2})

<IPython.core.display.Javascript object>

<seaborn.axisgrid.PairGrid at 0x1d5c8baeeb8>

In [50]:
plt.close('all')