## Iris PCA 降維

* [Install plotly via conda](https://anaconda.org/plotly/plotly)
* [Plotly jupyter support](https://plot.ly/python/getting-started/)
* [3d scatter plot in plotly](https://plot.ly/python/3d-scatter-plots/)

In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
import seaborn as sns; sns.set()
import plotly.express as px

## Load data

In [None]:
data = load_iris()
iris = pd.DataFrame(data["data"], columns=data["feature_names"])
iris["target"] = data["target"]
iris

## Visualize in original space

In [None]:
sns.scatterplot(x="sepal length (cm)", y="sepal width (cm)", hue="target", data=iris, palette="Set1")

In [None]:
sns.scatterplot(x="petal length (cm)", y="petal width (cm)", hue="target", data=iris, palette="Set1")

In [None]:
sns.scatterplot(x="sepal length (cm)", y="petal length (cm)", hue="target", data=iris, palette="Set1")

In [None]:
fig = px.scatter_3d(iris, x="sepal length (cm)", y="sepal width (cm)", z="petal length (cm)",
                    color="target", opacity=0.7)
fig.update_traces(marker=dict(size=5))
fig.show()

## PCA

In [None]:
pca = PCA(n_components=3)

In [None]:
Z = pca.fit_transform(X=data["data"])

In [None]:
Z = pd.DataFrame(Z, columns=["PC1", "PC2", "PC3"])
Z

In [None]:
iris = pd.concat([iris, Z], axis=1)
iris.head()

## 3D scatter plot

In [None]:
fig = px.scatter_3d(iris, x="PC1", y="PC2", z="PC3",
                    color="target", opacity=0.7)
fig.update_traces(marker=dict(size=5))
fig.show()

## Scree plot

In [None]:
variances = pd.DataFrame()
variances["principal component"] = [1,2,3]
variances["explained variance"] = pca.explained_variance_
variances.head()

In [None]:
sns.lineplot(x="principal component", y="explained variance", data=variances)

In [None]:
ratio = pd.DataFrame()
ratio["principal component"] = [1,2,3]
ratio["cumulated explained variance ratio"] = np.cumsum(pca.explained_variance_ratio_)
ratio.head()

In [None]:
ax = sns.lineplot(x="principal component", y="cumulated explained variance ratio", data=ratio)
ax.set(ylim=(0, 1.1))