In [8]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from feature_scaling import pretreat
from performance_metrics import calculate_metrics
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import KFold
from sklearn.cross_decomposition import PLSRegression
from sklearn.decomposition import PCA

In [2]:

def plsnipals(X, Y, A):
    # Initialize variables
    n, p = X.shape
    m = Y.shape[1]
    W = np.zeros((p, A))
    T = np.zeros((n, A))
    P = np.zeros((p, A))
    Q = np.zeros((m, A))

    varX = np.sum(np.sum(X**2))
    varY = np.sum(np.sum(Y**2))

    R2X = np.zeros(A)
    R2Y = np.zeros(A)

    # NIPALS algorithm
    for i in range(A):
        error = 1
        u = Y[:, 0]
        niter = 0
        while error > 1e-8 and niter < 1000:
            w = np.dot(X.T, u) / np.dot(u.T, u)
            w = w / np.linalg.norm(w)
            t = np.dot(X, w)
            q = np.dot(Y.T, t) / np.dot(t.T, t)
            u1 = np.dot(Y, q) / np.dot(q.T, q)
            error = np.linalg.norm(u1 - u) / np.linalg.norm(u)
            u = u1
            niter += 1

        p = np.dot(X.T, t) / np.dot(t.T, t)
        X = X - np.outer(t, p)
        Y = Y - np.outer(t, q)

        # Store variables
        W[:, i] = w
        T[:, i] = t
        P[:, i] = p
        Q[:, i] = q

    R2X = np.diag(np.dot(np.dot(T.T, T), np.dot(P.T, P))) / varX
    R2Y = np.diag(np.dot(np.dot(T.T, T), np.dot(Q.T, Q))) / varY

    # Calculate B and Wstar
    Wstar = np.dot(W, np.linalg.inv(np.dot(P.T, W)))
    B = np.dot(Wstar, Q.T)
    Q = Q.T

    return B, Wstar, T, P, Q, W, R2X, R2Y


In [3]:
heart_data = pd.read_csv('./dataset/heart.csv')
iris_data = pd.read_csv('./dataset/iris.csv')
wine_data = pd.read_csv('./dataset/winequality-combined.csv')

iris_data.rename(columns={'Species': 'target'}, inplace=True)
wine_data.rename(columns={'quality': 'target'}, inplace=True)

heart_data.name = 'heart data'
iris_data.name = 'iris data'
wine_data.name = 'wine data'

iris_data.drop(columns=['Id'], inplace=True)
iris_data['target'] = iris_data['target'].map({'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}).astype('float64')
wine_data['target'] = wine_data['target'].map({3: 0, 4: 1, 5: 2, 6: 3, 7: 4, 8: 5}).astype('float64')

heart_data.dropna(inplace=True)
iris_data.dropna(inplace=True)
wine_data.dropna(inplace=True)


In [4]:
# X = wine_data.drop(columns=['target']).values
# y = wine_data['target'].values

X = heart_data.drop(columns=['target']).values
y = heart_data['target'].values

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35, random_state=42)
X_train = X
y_train = y

# X_train = pretreat(X_train)

In [6]:
y_train_matrix = y_train.reshape(-1, 1)
X_train_pretreated = pretreat(X_train)
B, Wstar, T, P, Q, W, R2X, R2Y = plsnipals(X_train_pretreated, X_train_pretreated, 5)

X_train_transformed = X_train_pretreated @ Wstar

In [7]:
X_train_transformed

array([[-0.52255556, -1.1128032 , -0.95681529,  1.14919812,  0.55925213],
       [ 2.59038087, -0.53316171, -1.46731519, -1.53661342, -1.34533498],
       [ 3.04235194, -1.32752065,  0.42476454, -1.56720367, -0.28381334],
       ...,
       [ 1.24507315, -1.45735643,  0.47387354,  0.64524015,  0.2711963 ],
       [-1.62005298,  0.1244435 ,  1.32795627,  1.19680354,  0.22491269],
       [ 0.93416858, -1.77854873,  0.00588158, -0.35337225,  0.7433809 ]])

In [9]:
principal=PCA(n_components=5)

In [10]:
principal.fit(X_train_pretreated)
principal.transform(X_train_pretreated)

array([[-0.52255555, -1.11280319,  0.95681551, -1.14919793, -0.55925188],
       [ 2.59038087, -0.53316168,  1.46731492,  1.53661364,  1.34533474],
       [ 3.04235195, -1.32752064, -0.42476481,  1.56720359,  0.28381352],
       ...,
       [ 1.24507316, -1.45735643, -0.4738734 , -0.64524022, -0.27119664],
       [-1.62005298,  0.12444348, -1.32795605, -1.19680377, -0.22491296],
       [ 0.93416859, -1.77854872, -0.00588162,  0.35337228, -0.7433807 ]])