# Joint Distribution Adaptation (JDA)

JDA jointly aligns both marginal and conditional distributions of source and target domains in a less dimensional subspace.

For this purpose, the maximum mean difference (MMD) is used to measure the difference of both marginal distributions and conditional distributions between source and target domains. The PCA method is also used to construct feature representations into the lower-dimensional subspace.

Reference:

Long M, Wang J, Ding G, et al. Transfer feature learning with joint distribution adaptation. Proceedings of the IEEE international conference on computer vision. 2013: 2200-2207.

In [54]:
import numpy as np
import scipy.io
import scipy.linalg
import sklearn.metrics
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt

In [55]:
def kernel(ker, X1, X2, gamma):
    K = None
    if not ker or ker == 'primal':
        K = X1
    elif ker == 'linear':
        if X2:
            K = sklearn.metrics.pairwise.linear_kernel(np.asarray(X1).T, np.asarray(X2).T)
        else:
            K = sklearn.metrics.pairwise.linear_kernel(np.asarray(X1).T)
    elif ker == 'rbf':
        if X2:
            K = sklearn.metrics.pairwise.rbf_kernel(np.asarray(X1).T, np.asarray(X2).T, gamma)
        else:
            K = sklearn.metrics.pairwise.rbf_kernel(np.asarray(X1).T, None, gamma)
    return K

Since the data in the target domain is unlabeled, the conditional probability in the target domain cannot be directly modeled. So here, a pseudo labels finder for the target domain is proposed which can be applied to the target domain by utilizing the source trained classifier.

In [56]:
class JDA:
    def __init__(self, kernel_type='primal', dim=30, lamb=1, gamma=1, T=10):
        '''
        Init func
        :param kernel_type: kernel, values: 'primal' | 'linear' | 'rbf'
        :param dim: dimension after transfer
        :param lamb: lambda value in equation
        :param gamma: kernel bandwidth for rbf kernel
        :param T: iteration number
        '''
        self.kernel_type = kernel_type
        self.dim = dim
        self.lamb = lamb
        self.gamma = gamma
        self.T = T

    def fit_predict(self, Xs, Ys, Xt, Yt):
        '''
        Transform and Predict using 1NN as JDA paper did
        :param Xs: ns * n_feature, source feature
        :param Ys: ns * 1, source label
        :param Xt: nt * n_feature, target feature
        :param Yt: nt * 1, target label
        :return: acc, y_pred, list_acc
        '''
        list_acc = []
        X = np.hstack((Xs.T, Xt.T))
        #X /= np.linalg.norm(X, axis=0)
        X = np.true_divide(X,np.linalg.norm(X, axis=0))
        m, n = X.shape
        ns, nt = len(Xs), len(Xt)
        e = np.vstack((1 / ns * np.ones((ns, 1)), -1 / nt * np.ones((nt, 1))))
        C = len(np.unique(Ys))
        H = np.eye(n) - 1 / n * np.ones((n, n))

        M = 0
        Y_tar_pseudo = None
        for t in range(self.T):
            N = 0
            M0 = e * e.T * C
            if Y_tar_pseudo is not None and len(Y_tar_pseudo) == nt:
                for c in range(1, C + 1):
                    e = np.zeros((n, 1))
                    tt = Ys == c
                    e[np.where(tt == True)] = 1 / len(Ys[np.where(Ys == c)])
                    yy = Y_tar_pseudo == c
                    ind = np.where(yy == True)
                    inds = [item + ns for item in ind]
                    e[tuple(inds)] = -1 / len(Y_tar_pseudo[np.where(Y_tar_pseudo == c)])
                    e[np.isinf(e)] = 0
                    N = N + np.dot(e, e.T)
            M = M0 + N
            M = M / np.linalg.norm(M, 'fro')
            K = kernel(self.kernel_type, X, None, gamma=self.gamma)
            n_eye = m if self.kernel_type == 'primal' else n
            a, b = np.linalg.multi_dot([K, M, K.T]) + self.lamb * np.eye(n_eye), np.linalg.multi_dot([K, H, K.T])
            w, V = scipy.linalg.eig(a, b)
            ind = np.argsort(w)
            A = V[:, ind[:self.dim]]
            Z = np.dot(A.T, K)
            Z /= np.linalg.norm(Z, axis=0)
            Xs_new, Xt_new = Z[:, :ns].T, Z[:, ns:].T

            clf = KNeighborsClassifier(n_neighbors=1)
            clf.fit(Xs_new, Ys.ravel())
            Y_tar_pseudo = clf.predict(Xt_new)
            acc = sklearn.metrics.accuracy_score(Yt, Y_tar_pseudo)
            list_acc.append(acc)
            print('JDA iteration [{}/{}]: Acc: {:.4f}'.format(t + 1, self.T, acc))
        return acc, Y_tar_pseudo, list_acc

In [57]:
if __name__ == '__main__':
    domains = ['caltech_SURF_L10.mat', 'amazon_SURF_L10.mat', 'webcam_SURF_L10.mat', 'dslr_SURF_L10.mat']
    for i in [2]:
        for j in [3]:
            if i != j:
                src, tar = 'data\\' + domains[i], 'data\\' + domains[j]
                src_domain, tar_domain = scipy.io.loadmat(src), scipy.io.loadmat(tar)
                Xs, Ys, Xt, Yt = src_domain['fts'], src_domain['labels'], tar_domain['fts'], tar_domain['labels']
                jda = JDA(kernel_type='primal', dim=30, lamb=1, gamma=1)
                acc, ypre, list_acc = jda.fit_predict(Xs, Ys, Xt, Yt)
                print("Classification accuracy on" ,domains[i], "v.s.", domains[j], "=", acc)

JDA iteration [1/10]: Acc: 0.8089
JDA iteration [2/10]: Acc: 0.8217
JDA iteration [3/10]: Acc: 0.8153
JDA iteration [4/10]: Acc: 0.8153
JDA iteration [5/10]: Acc: 0.8089
JDA iteration [6/10]: Acc: 0.8153
JDA iteration [7/10]: Acc: 0.8089
JDA iteration [8/10]: Acc: 0.8153
JDA iteration [9/10]: Acc: 0.8089
JDA iteration [10/10]: Acc: 0.8153
Classification accuracy on webcam_SURF_L10.mat v.s. dslr_SURF_L10.mat = 0.8152866242038217
