# Jupyter notebook demo with Python
This notebook is a small demonstration on how use of Jupyter notebooks with Python.

In [1]:
# Load dependencies
import pandas as pd # for data wrangling
%matplotlib notebook
import matplotlib.pyplot as plt # for plots
import seaborn as sns # for improved plot look
import matplotlib.patches as mpatches # additional plotting stuff
from matplotlib.colors import ListedColormap # additional plotting stuff
from mpl_toolkits.mplot3d import Axes3D # 3D axes

In [2]:
def get_iris_data():
    """
    This function imports iris dataset
    from scikit learn dataset collection
    and saves is at .csv.
    """
    from sklearn import datasets
    iris = datasets.load_iris()
    df = pd.DataFrame(iris.data)
    df = pd.concat([df, pd.DataFrame(iris.target)], axis = 1)
    names = [x.replace(' (cm)', '') for x in iris.feature_names]
    df.columns = names + ['type']
    df.to_csv("iris_data.csv",index=False)

In [3]:
# Save iris dataset as .csv, already done
#get_iris_data()

In [4]:
# Load iris dataset from .csv
df = pd.read_csv("iris_data.csv")

# variable "type": setosa (0), versicolor (1), virginica(2)
type_names = ['setosa', 'versicolor', 'virginica']

# Display first observations in data
df.head()

Unnamed: 0,sepal length,sepal width,petal length,petal width,type
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [5]:
# Assign features (exogenous variabes) to variable "X" 
# and target (dependent variable) to variable "y"
X = df.copy()
del X['type']
y = df['type']

## 2-dimensional scatter plot
Now that we loaded the data, let's do some plotting. We will first illustrate the data in two feature dimensions.

In [6]:
# Figure and axis
fig = plt.figure(figsize=(15, 5))
ax = fig.add_subplot(121)

# Color map
cmap = plt.cm.PuOr
colors = []
colors.append(cmap(0.3)); colors.append(cmap(0.6)); colors.append(cmap(0.9))
cmap = cmap.from_list('Custom cmap', colors, 3)

# Draw scatter
ax.scatter(X['sepal length'], X['sepal width'], c = y, cmap = cmap, edgecolor = 'k', s = 22)
ax.set_xlabel('Sepal length')
ax.set_ylabel('Sepal width')

# legend
patch1 = mpatches.Patch(color = cmap(0), label = type_names[0])
patch2 = mpatches.Patch(color = cmap(1), label = type_names[1])
patch3 = mpatches.Patch(color = cmap(2), label = type_names[2])
patches = [patch1, patch2, patch3]
ax.legend(handles = patches, loc='upper right',fontsize = 8)


<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x20189a01b70>

## Apply Principal Component Analysis to reduce dimensionality
Recall that for <i>k</i>th component

$$\mathbf{\hat{X}}_k = \mathbf{X} - \sum_{s=1}^{k-1} \mathbf{X} \mathbf{w}_{(s)} \mathbf{w}_{(s)}^{T}$$

See PCA page in <a href="https://en.wikipedia.org/wiki/Principal_component_analysis">Wikipedia</a>.

In [8]:
# Apply PCA to data to reduce feature dimesions from 4 to 3
from sklearn.decomposition import PCA
pca = PCA(n_components = 3)
X_pca = pd.DataFrame(pca.fit_transform(X))

# Display first observations from reduced feature data
X_pca.head()

Unnamed: 0,0,1,2
0,-2.684207,0.326607,-0.021512
1,-2.715391,-0.169557,-0.203521
2,-2.88982,-0.137346,0.024709
3,-2.746437,-0.311124,0.037672
4,-2.728593,0.333925,0.09623


## Plot PCAs in 3D scatter plot

In [9]:
# Figure and axis
fig = plt.figure(figsize=(16, 6))
ax = fig.add_subplot(121, projection='3d')

# Color map
cmap = plt.cm.PuOr
colors = []
colors.append(cmap(0.3)); colors.append(cmap(0.6)); colors.append(cmap(0.9))
cmap = cmap.from_list('Custom cmap', colors, 3)

# Draw scatter
ax.scatter(X_pca.iloc[:, 0], X_pca.iloc[:, 1], X_pca.iloc[:, 2], c=y,
           cmap=cmap, edgecolor='k', s=40)
ax.set_title("First three PCA directions")
ax.set_xlabel("1st PCA")
ax.w_xaxis.set_ticklabels([])
ax.set_ylabel("2nd PCA")
ax.w_yaxis.set_ticklabels([])
ax.set_zlabel("3rd PCA")
ax.w_zaxis.set_ticklabels([])
ax.view_init(azim = 110, elev = -150)

# Legend
patch1 = mpatches.Patch(color = cmap(0), label = type_names[0])
patch2 = mpatches.Patch(color = cmap(1), label = type_names[1])
patch3 = mpatches.Patch(color = cmap(2), label = type_names[2])
patches = [patch1, patch2, patch3]
ax.legend(handles = patches, loc='upper right')

fig.tight_layout()


<IPython.core.display.Javascript object>