***9.Dimensionality Reduction Using Feature Extraction***

In [1]:
#9.1 Reducing Features Using Principal Components
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn import datasets
digits=datasets.load_digits()
features=StandardScaler().fit_transform(digits.data)
pca=PCA(n_components=0.99,whiten=True)
features_pca=pca.fit_transform(features)
print("Original number of features:",features.shape[1])
print("Reduced number of features:",features_pca.shape[1])


Original number of features: 64
Reduced number of features: 54


In [8]:
#9.2 Reducing Features when data is linearly inseparable
from sklearn.decomposition import PCA, KernelPCA
from sklearn.datasets import make_circles
features,_=make_circles(n_samples=1000,random_state=1,factor=0.1,noise=0.1)
kpca=KernelPCA(n_components=1,kernel='rbf',gamma=15)
features_kpca=kpca.fit_transform(features)
features.shape[1]


2

In [9]:
features_kpca.shape[1]

1

In [11]:
##9.3 Reducing Features by maximizing class separability
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn import datasets
iris=datasets.load_iris()
features=iris.data
labels=iris.target
lda=LDA(n_components=2)
features_lda=lda.fit_transform(features,labels)
features.shape[1]


4

In [12]:
features_lda.shape[1]

2

In [14]:
lda.explained_variance_ratio_

array([0.9912126, 0.0087874])

In [15]:
#9.4 Reducing Features uing Matrix Factorization
from sklearn.decomposition import NMF #non-negative matrix factorization
from sklearn import datasets
digits=datasets.load_digits()
features=digits.data
nmf=NMF(n_components=40,init='random',random_state=1,max_iter
=200)
features_nmf=nmf.fit_transform(features)
features.shape[1]




64

In [16]:
features_nmf.shape[1]

40

In [19]:
#9.5 Reducing features on sparse Data
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import StandardScaler
from scipy.sparse import csr_matrix
from sklearn import datasets
digits=datasets.load_digits()   
features=StandardScaler().fit_transform(digits.data)
features_sparse=csr_matrix(features)
tsvd=TruncatedSVD(n_components=10,random_state=1)
features_tsvd=tsvd.fit_transform(features_sparse)
features.shape[1]



64

In [20]:
features_tsvd.shape[1]

10

In [23]:
tsvd.explained_variance_ratio_[0:3].sum()

np.float64(0.30039385392332724)

In [25]:
tsvd=TruncatedSVD(n_components=features_sparse.shape[1]-1)
features_tsvd=tsvd.fit(features)

In [26]:
tsvd_var_ratios=tsvd.explained_variance_ratio_


In [27]:
def select_n_components(var_ratio,threshold):
    total_variance=0
    n_components=0
    for explained_variance in var_ratio:
        total_variance+=explained_variance
        n_components+=1
        if total_variance>=threshold:
            break
    return n_components
select_n_components(tsvd_var_ratios,0.95)

40