In [19]:
import numpy as np
import csv
import random

In [20]:
male_data = []
with open('MALE.csv') as csvfile:
    csv_reader = csv.reader(csvfile)  
    male_header = next(csv_reader)  
    for row in csv_reader: 
        male_data.append(row)
        
female_data = []
with open('FEMALE.csv') as csvfile:
    csv_reader = csv.reader(csvfile)  
    female_header = next(csv_reader)  
    for row in csv_reader: 
        female_data.append(row)
        
mixed_data = []
with open('MIXED.csv') as csvfile:
    csv_reader = csv.reader(csvfile)  
    mixed_header = next(csv_reader)  
    for row in csv_reader: 
        mixed_data.append(row)
        
male_data = [[int(a) for a in row] for row in male_data]
female_data = [[int(b) for b in row] for row in female_data]
mixed_data = [[int(c) for c in row] for row in mixed_data]

In [21]:
# shuffle dataset
random.shuffle(male_data)
random.shuffle(female_data)
random.shuffle(mixed_data)

In [22]:
# get score(label)
male_score = []
for e in male_data:
    male_score.append(e[6])
    del e[6]
    
female_score = []
for e in female_data:
    female_score.append(e[6])
    del e[6]
    
mixed_score = []
for e in mixed_data:
    mixed_score.append(e[6])
    del e[6]

In [23]:
# divide train, dev and test set
test_male_data_x = male_data[:100]
dev_male_data_x = male_data[100:200]
train_male_data_x = male_data[200:]

test_male_data_y = male_score[:100]
dev_male_data_y = male_score[100:200]
train_male_data_y = male_score[200:]

test_female_data_x = female_data[:100]
dev_female_data_x = female_data[100:200]
train_female_data_x = female_data[200:]

test_female_data_y = female_score[:100]
dev_female_data_y = female_score[100:200]
train_female_data_y = female_score[200:]

test_mixed_data_x = mixed_data[:100]
dev_mixed_data_x = mixed_data[100:200]
train_mixed_data_x = mixed_data[200:]

test_mixed_data_y = mixed_score[:100]
dev_mixed_data_y = mixed_score[100:200]
train_mixed_data_y = mixed_score[200:]

In [24]:
# encoding=utf-8
"""
    Created on 9:53 2019/4/21 
    @author: Jindong Wang
"""

"""
Kernel Mean Matching
#  1. Gretton, Arthur, et al. "Covariate shift by kernel mean matching." Dataset shift in machine learning 3.4 (2009): 5.
#  2. Huang, Jiayuan, et al. "Correcting sample selection bias by unlabeled data." Advances in neural information processing systems. 2006.
"""
import sklearn.metrics
from cvxopt import matrix, solvers

ModuleNotFoundError: No module named 'cvxopt'

In [None]:
def kernel(ker, X1, X2, gamma):
    K = None
    if ker == 'linear':
        if X2 is not None:
            K = sklearn.metrics.pairwise.linear_kernel(np.asarray(X1), np.asarray(X2))
        else:
            K = sklearn.metrics.pairwise.linear_kernel(np.asarray(X1))
    elif ker == 'rbf':
        if X2 is not None:
            K = sklearn.metrics.pairwise.rbf_kernel(np.asarray(X1), np.asarray(X2), gamma)
        else:
            K = sklearn.metrics.pairwise.rbf_kernel(np.asarray(X1), None, gamma)
    return K

class KMM:
    def __init__(self, kernel_type='linear', gamma=1.0, B=1.0, eps=None):
        '''
        Initialization function
        :param kernel_type: 'linear' | 'rbf'
        :param gamma: kernel bandwidth for rbf kernel
        :param B: bound for beta
        :param eps: bound for sigma_beta
        '''
        self.kernel_type = kernel_type
        self.gamma = gamma
        self.B = B
        self.eps = eps

    def fit(self, Xs, Xt):
        '''
        Fit source and target using KMM (compute the coefficients)
        :param Xs: ns * dim
        :param Xt: nt * dim
        :return: Coefficients (Pt / Ps) value vector (Beta in the paper)
        '''
        ns = Xs.shape[0]
        nt = Xt.shape[0]
        if self.eps == None:
            self.eps = self.B / np.sqrt(ns)
        K = kernel(self.kernel_type, Xs, None, self.gamma)
        kappa = np.sum(kernel(self.kernel_type, Xs, Xt, self.gamma) * float(ns) / float(nt), axis=1)

        K = matrix(K)
        kappa = matrix(kappa)
        G = matrix(np.r_[np.ones((1, ns)), -np.ones((1, ns)), np.eye(ns), -np.eye(ns)])
        h = matrix(np.r_[ns * (1 + self.eps), ns * (self.eps - 1), self.B * np.ones((ns,)), np.zeros((ns,))])

        sol = solvers.qp(K, -kappa, G, h)
        beta = np.array(sol['x'])
        return beta


if __name__ == '__main__':
    Xs = train_female_data_x + train_mixed_data_x
    Xt = train_female_data_x
    Xs, Xt = np.asarray(Xs), np.asarray(Xt)
    kmm = KMM(kernel_type='linear', B=10)
    beta = kmm.fit(Xs, Xt)
    y_pred = regr.predict(x_test)