In [1]:
import pandas as pd

df = pd.read_csv(
    filepath_or_buffer='https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', 
    header=None, 
    sep=',')

df.columns=['sepal_len', 'sepal_wid', 'petal_len', 'petal_wid', 'class']
df.dropna(how="all", inplace=True) # drops the empty line at file-end

df.tail()

Unnamed: 0,sepal_len,sepal_wid,petal_len,petal_wid,class
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica
149,5.9,3.0,5.1,1.8,Iris-virginica


In [2]:
X = df.iloc[:,0:4].values
y = df.iloc[:,4].values

In [3]:
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [4]:
# plotting histograms
data = []

legend = {0:False, 1:False, 2:False, 3:True}

colors = {'Iris-setosa': '#0D76BF', 
          'Iris-versicolor': '#00cc96', 
          'Iris-virginica': '#EF553B'}

for col in range(4):
    for key in colors:
        trace = dict(
            type='histogram',
            x=list(X[y==key, col]),
            opacity=0.75,
            xaxis='x%s' %(col+1),
            marker=dict(color=colors[key]),
            name=key,
            showlegend=legend[col]
        )
        data.append(trace)

layout = dict(
    barmode='overlay',
    xaxis=dict(domain=[0, 0.25], title='sepal length (cm)'),
    xaxis2=dict(domain=[0.3, 0.5], title='sepal width (cm)'),
    xaxis3=dict(domain=[0.55, 0.75], title='petal length (cm)'),
    xaxis4=dict(domain=[0.8, 1], title='petal width (cm)'),
    yaxis=dict(title='count'),
    title='Distribution of the different Iris flower features'
)

fig = dict(data=data, layout=layout)
iplot(fig)

In [5]:
from sklearn.preprocessing import StandardScaler
X_std = StandardScaler().fit_transform(X)

In [17]:
X_std.shape

(150, 4)

In [11]:
from sklearn.decomposition import PCA as sklearnPCA
sklearn_pca = sklearnPCA(n_components=2)
Y_sklearn = sklearn_pca.fit_transform(X_std)

In [31]:
print(Y_sklearn[y=='Iris-versicolor'])

[[ 1.10399365  0.86311245]
 [ 0.73248144  0.59863557]
 [ 1.24210951  0.61482245]
 [ 0.39730728 -1.75816895]
 [ 1.07259395 -0.2117579 ]
 [ 0.38445815 -0.59106247]
 [ 0.74871508  0.77869861]
 [-0.49786339 -1.84886877]
 [ 0.92622237  0.03033083]
 [ 0.00496803 -1.02940111]
 [-0.12469746 -2.65806268]
 [ 0.43873012 -0.05888129]
 [ 0.55163398 -1.77258156]
 [ 0.71716507 -0.18543431]
 [-0.03725838 -0.4327951 ]
 [ 0.87589054  0.50999815]
 [ 0.3480064  -0.19062165]
 [ 0.15339254 -0.79072546]
 [ 1.21530321 -1.63335564]
 [ 0.15694118 -1.30310327]
 [ 0.7382561   0.40247038]
 [ 0.47236968 -0.41660822]
 [ 1.22798821 -0.94091479]
 [ 0.62938105 -0.41681164]
 [ 0.7004728  -0.06349393]
 [ 0.87353699  0.25070861]
 [ 1.25422219 -0.0826201 ]
 [ 1.35823985  0.32882027]
 [ 0.66212614 -0.22434607]
 [-0.04728151 -1.05721241]
 [ 0.12153421 -1.56359238]
 [ 0.01411823 -1.57339235]
 [ 0.23601084 -0.77592378]
 [ 1.05669143 -0.63690128]
 [ 0.22141709 -0.28084769]
 [ 0.43178316  0.85513692]
 [ 1.04941336  0.52219726]
 

In [18]:
data = []

for name, col in zip(('Iris-setosa', 'Iris-versicolor', 'Iris-virginica'), colors.values()):
    print(name, col)

    trace = dict(
        type='scatter',
        x=Y_sklearn[y==name,0],
        y=Y_sklearn[y==name,1],
        mode='markers',
        name=name,
        marker=dict(
            color=col,
            size=12,
            line=dict(a
                color='rgba(217, 217, 217, 0.14)',
                width=0.5),
            opacity=0.8)
    )
    data.append(trace)

layout = dict(
        xaxis=dict(title='PC1', showline=False),
        yaxis=dict(title='PC2', showline=False)
)
fig = dict(data=data, layout=layout)
iplot(fig)

Iris-setosa #0D76BF
Iris-versicolor #00cc96
Iris-virginica #EF553B
