In [2]:
# new wine data 불러오기
import pandas as pd

dat_wine = pd.read_csv('./wine_new.csv', header=None)

from sklearn.model_selection import train_test_split
X, y = dat_wine.iloc[:,1:].values, dat_wine.iloc[:,0].values
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=1, stratify=y)

from sklearn.preprocessing import StandardScaler
std = StandardScaler()
X_train_std = std.fit_transform(X_train)
X_test_std = std.transform(X_test)

In [4]:
# LDA로 차원 축소 후 로지스틱 회귀
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

# LDA에 n_components=2를 지정하면 LDA가 차원을 2로 줄여준다.
lda = LDA(n_components=2)
X_train_lda = lda.fit_transform(X_train_std, y_train)
X_test_lda = lda.transform(X_test_std)
lr = LogisticRegression()
lr.fit(X_train_lda, y_train)
y_train_pred = lr.predict(X_train_lda)
y_test_pred = lr.predict(X_test_lda)
print(metrics.accuracy_score(y_train, y_train_pred))
print(metrics.accuracy_score(y_test, y_test_pred))
print(metrics.confusion_matrix(y_test, y_test_pred))

1.0
0.9629629629629629
[[18  0  0]
 [ 1 19  1]
 [ 0  0 15]]


In [9]:
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import numpy as np

def make_hello(N=1000, rseed=42) : 
    fig, ax = plt.subplots(figsize=(4, 1))
    fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
    ax.axis('off')
    ax.text(0.5, 0.4, 'HELLO', va = 'center',
            ha = 'center', weight = 'bold', size = 85)
    fig.savefig('hello.png')
    plt.close(fig)
    from matplotlib.image import imread
    data = imread('hello.png')[::-1, :, 0].T
    rng = np.random.RandomState(rseed)
    X = rng.rand(4* N, 2)
    i, j = (X * data.shape).astype(int).T
    mask = (data[i, j] < 1)
    X = X[mask]
    X[:, 0] *= (data.shape[0] / data.shape[1])
    X = X[:N]
    return X[np.argsort(X[:, 0])]
X = make_hello(1000)

def random_projection(X, dimension=3, rseed=42) :
    assert dimension >= X.shape[1]
    rng = np.random.RandomState(rseed)
    C = rng.randn(dimension, dimension)
    e, V = np.linalg.eigh(np.dot(C, C.T))
    return np.dot(X, V[:X.shape[1]])
X1 = random_projection(X, 3)
X1.shape

(1000, 3)

In [10]:
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter3d(x=X1[:,0], y=X1[:,1], z = X1[:,2],
    opacity=0.3, mode = 'markers',
    marker = dict(size=5, color=X1[:,2], colorscale='Viridis')))
fig.update_layout(height = 800, width = 800)
fig.show()


In [13]:
# Hello에 대한 MDS
from sklearn.manifold import MDS
mds = MDS(n_components=2, random_state=1)
out_mds = mds.fit_transform(X1)
print(out_mds[:5])
fig = go.Figure()
fig.add_trace(go.Scatter(x=out_mds[:,0], y=out_mds[:,1],
    mode = 'markers',
    marker = dict(size = 5, color = out_mds[:,1], colorscale='Viridis')))
fig.update_layout(height=500, width = 500)
fig.show()

[[-0.4497622  -1.8207143 ]
 [-0.26918837 -1.8806143 ]
 [-0.26591131 -1.87822051]
 [-0.62879339 -1.75620828]
 [-0.71833169 -1.72592393]]


In [15]:
# 비선형 차원축소 기법을 위한 S자 형태의 3차원 HELLO
def make_hello_s_curve(X) : 
    t = (X[:,0] -2) * 0.75 * np.pi
    x = np.sin(t)
    y = X[:, 1]
    z = np.sign(t) * (np.cos(t) - 1)
    return np.vstack((x, y, z)).T
XS = make_hello_s_curve(X)

import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter3d(x=XS[:,0], y=XS[:,1], z=XS[:,2],
    opacity=0.8, mode = 'markers',
    marker=dict(size=5, color=XS[:,2], colorscale='Viridis')))
fig.update_layout(height=500)
fig.show()

In [16]:
# MDS에 의한 S자형 HELLO 축소
from sklearn.manifold import MDS
model = MDS(n_components=2, random_state=2)
mds_out = model.fit_transform(XS)

import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(x=mds_out[:, 0], y=mds_out[:,1],
    opacity=0.7, mode='markers',
    marker=dict(size=3, color=mds_out[:,1], colorscale='Viridis')))
fig.update_layout(height=500, width=500)
fig.show()

In [18]:
# ISOMAP에 의한 차원 축소
from sklearn.manifold import Isomap
iso = Isomap(n_neighbors=100, n_components=2)
iso_out = iso.fit_transform(XS)
fig = go.Figure()
fig.add_trace(go.Scatter(x=iso_out[:,0], y=iso_out[:,1],
    mode='markers',
    marker=dict(size=5, color=iso_out[:,0], colorscale='Viridis')))
fig.update_layout(height=500, width=500)
fig.show()

In [23]:
from sklearn.manifold import LocallyLinearEmbedding as LLE
lle = LLE(n_neighbors=100, n_components=2, method='modified')
lle_out = lle.fit_transform(XS)
fig = go.Figure()
fig.add_trace(go.Scatter(x=lle_out[:,0], y=lle_out[:,1],
    mode='markers',
    marker=dict(size=5, color=lle_out[:,0], colorscale='Viridis')))
fig.update_layout(height=400, width=400)
fig.show()

In [24]:
from sklearn.manifold import TSNE
t_sne = TSNE(n_components=2, perplexity=40)
sne_out = t_sne.fit_transform(XS)
fig = go.Figure()
fig.add_trace(go.Scatter(x=sne_out[:,0], y=sne_out[:,1],
    mode='markers',
    marker=dict(size=5, color=sne_out[:,0], colorscale='Viridis')))
fig.update_layout(height=400, width=400)
fig.show()


The default initialization in TSNE will change from 'random' to 'pca' in 1.2.


The default learning rate in TSNE will change from 200.0 to 'auto' in 1.2.

