In [140]:
%matplotlib inline
import numpy as np
from os.path import join
import scipy 
import random
from scipy.stats import multivariate_normal as mn
from sklearn.linear_model import LogisticRegression as LR

## Setup the synthetic data

In [121]:
eta = 0.01
nC = 1000 # number of points per class
p  = 3 # number of domains
C  = 10 # number of classes
n  = nC*p*C
U = 1.0/n

In [122]:
def get_random_D_h(nC, p, C):
    D = np.random.rand(nC*p*C,C,p) # prob point n is in domain p and class C
    h = np.random.rand(nC*p*C,C,p) # score for point n in domain p and class C
    # normalize D and h
    for k in range(p): # loop over domain
        D[:,:,k] = D[:,:,k] / D[:,:,k].sum()
    
        for i in range(nC):
            h[i,:,k] = h[i,:,k]/h[i,:,k].sum() # since this is the output of a softmax function its normalized over classes 
    return D,h

# Define a data problem

In [123]:
class synthetic_problem():
    
    def __init__(self, p=3, nC=20, c=10, seed=1337):
        self.p = p # number of domains
        self.nC = nC # pts per class
        self.C = C # number of classes
        self.n = nC * p * C # num pts
        self.U = 1.0 / self.n # unif dist
        self.eta = 0.01
        self.seed = seed
        
        np.random.seed(seed)
        self.generate_density()
        self.generate_regressor()
        self.generate_y()
        
    def generate_density(self):
        D = np.random.rand(self.n,self.p)
        # Normalize each domain
        for k in range(self.p):
            D[:,k] = D[:,k] / D[:,k].sum()
        self.D = D
    
    def generate_regressor(self):
        h = np.random.rand(self.n,self.p)
        for k in range(self.p):
            h[:,k] = h[:,k] / h[:,k].sum()
        self.h = h
        
        # compute H
        self.H = np.zeros(self.n)
        for i in range(self.n):
            self.H[i] = 1.0 / self.p * h[i,:].sum()
            
    def generate_y(self):
        y = np.random.rand(self.n)
        y = y / y.sum()
        self.y = y
        
    def get_marginal_density(self):
        return self.D
    
    def get_regressor(self):
        return self.h
    
    def get_true_values(self):
        return self.y
    
    def get_H(self):
        return self.H

In [331]:
class sentiment_analysis_data():
    
    def __init__(self, datadir=''):
        self.datadir = datadir
        self.domains = ('books', 'dvd', 'electronics', 'kitchen')
        self.label_fmt = 'exp{split}/rawdata/{domain}.{dset}.labels'
        self.data_fmt = 'exp{split}/rawdata/{domain}.{dset}.txt'
        
    def load_data(self, domain, split, dset='train'):
        dname = self.data_fmt.format(split=split, 
                                        domain=domain, dset=dset)
        with open(join(self.datadir, dname), 'rb') as f:
            data = f.read().splitlines()
        return data
    
    def load_labels(self, domain, split, dset='train'):
        dname = self.label_fmt.format(split=split, domain=domain, dset=dset)
        with open(join(self.datadir, dname), 'rb') as f:
            data = f.read().splitlines()
        labels = np.array([float(x) for x in data])
        return labels
    
    def load_prob(self, tr_dom, dom, split, n, minO, dset, scale=50):
        """
        Loads the precomputed probs of each example in dom
        according to the model trained on tr_dom. Each is 
        computed over the particular data <split> using an
        <n>-gram language model with vocabulary based on the
        minimum occurances (minO) of each word across all domains
        in sentiment analysis dataset.
        """
        prob_fmt = 'exp{:d}/prob-{:d}gram-{:d}minoccur/{:s}.{:s}-in-{:s}.prob'
        prob_file = prob_fmt.format(split, n, minO, dom, dset, tr_dom)
        with open(join(self.datadir, prob_file), 'rb') as f:
            data = f.read().splitlines()
        #prob = np.array([np.exp(-float(x)/scale) for x in data])
        prob = np.array([np.exp(-float(x)) for x in data])
        nlogprob = np.array([float(x) for x in data])
        return prob, nlogprob
    
    def load_pred(self, tr_dom, dom, split, dset):
        pred_fmt = 'exp{:d}/predictions/{:s}.{:s}.libsvm-on-{:s}.train.libsvm.model.pred'
        pred_file = pred_fmt.format(split, dom, dset, tr_dom)
        with open(join(self.datadir, pred_file), 'rb') as f:
            data = f.read().splitlines()
        pred = np.array([float(x) for x in data])
        return pred

In [332]:
datadir = '/data/sentiment_analysis/domain_adaptation/'
SA = sentiment_analysis_data(datadir=datadir)
split = 1
sources = ['kitchen', 'dvd']

p = len(sources)
y = []
#for s in sources:
#    ys = 
y = []
ys = SA.load_labels('dvd', 1)
y.append(ys)
ys2 = SA.load_labels('books',1)
y.append(ys2)
pred = SA.load_pred('books', 'dvd', 1, 'train')
prob, lp = SA.load_prob('books', 'dvd', 1, 1, 2, 'train')

In [334]:
print prob, lp, prob.shape, pred.shape
o = np.exp(-lp)
print o, o.min(), o.max()

DP = sm_problem(datadir=datadir, minO=2, ngram=1, 
                split=1, sources=('kitchen', 'dvd'))

h = DP.get_regressor()
D = DP.get_marginal_density()
d = DP.get_nlog_density()

z = 1.0/DP.p * np.ones(DP.p)

def compute_Jz_reg(z, h, D, p):
    zDh = 0
    for k in range(p):
        zDh += z[k] * D[k] * h[k]
    return zDh

def compute_Jz_log(z, h, d,p):
    "Assumes d=-log(D)"
    zDh = 0
    for k in range(p):
        zDh += np.exp(np.log(z[k]*h[k]) - d[k])
    return zDh

Jz_r = compute_Jz_reg(z, h, D, DP.p)
Jz_l = compute_Jz_log(z, h, d, DP.p)

print Jz_r, Jz_l

[  1.58602720e-132   8.86069149e-213   3.33586796e-085 ...,
   8.68431588e-039   5.82896668e-051   0.00000000e+000] [  303.48     488.269    194.515  ...,    87.6393   115.669   1136.72  ] (1600,) (1600,)
[  1.58602720e-132   8.86069149e-213   3.33586796e-085 ...,
   8.68431588e-039   5.82896668e-051   0.00000000e+000] 0.0 1.46540502801e-18
[  1.19833893e-52   1.62872950e-41] [  1.19833893e-52   1.62872950e-41]


In [314]:
ys.shape, pred.shape, prob.shape
print np.hstack(y).shape

(3200,)


In [335]:
class sm_problem():
    
    def __init__(self, split=1, datadir='', sources=None,
                ngram=1, minO=2):
        self.datadir = datadir # directory where data is stored
        self.split = split
        self.ngram = ngram
        self.minO = minO
        self.splitdir = join(datadir, 'exp{:d}'.format(split))
        self.sources = sources
        self.p = len(sources) # number of domains
        self.SA = sentiment_analysis_data(datadir=datadir)
        
        self.load_y() # load the gt labels and set self.n
        self.load_density()
        self.load_regressor()
        self.U = 1.0 / self.n # unif dist
        self.eta = 0.01
        
        
    def load_density(self):
        D = np.zeros([self.n,self.p])
        logD = np.zeros([self.n, self.p])
        for (k,d_tr) in enumerate(self.sources):
            Dd = []; logD_d = [];
            for d in self.sources:
                prob_d, nlogp = self.SA.load_prob(d_tr, d, self.split, 
                             self.ngram, self.minO, 'train')
                Dd.append(prob_d)
                logD_d.append(nlogp)
            D[:,k] = np.hstack(Dd)
            logD[:,k] = np.hstack(logD_d)
        self.D = D
        self.logD = logD
    
    def load_regressor(self):
        h = np.zeros([self.n,self.p])
        for (k, d_tr) in enumerate(self.sources):
            hd = []
            for d in self.sources:
                hd.append(self.SA.load_pred(d_tr, d, self.split,
                                         'train'))
            h[:,k] = np.hstack(hd)
        self.h = h
        
        # compute H
        self.H = np.zeros(self.n)
        for i in range(self.n):
            self.H[i] = 1.0 / self.p * h[i,:].sum()
            
    def load_y(self):
        y = []
        for s in self.sources:
            y.append(self.SA.load_labels(s, self.split))
        self.y = np.hstack(y)
        self.n = self.y.shape[0]
        
    def get_marginal_density(self):
        return self.D
    
    def get_nlog_density(self):
        return self.logD
    
    def get_regressor(self):
        return self.h
    
    def get_true_values(self):
        return self.y
    
    def get_H(self):
        return self.H

In [336]:
DP = sm_problem(datadir=datadir, minO=2, ngram=1, 
                split=1, sources=('kitchen', 'dvd'))

# Optimization

## Objective
* z_{t+1} = argmin gamma
* subject to
    * u_k(z) - v_k(z_t) - (z - z_t) grad_{v_k(z_t)} <= g for all k in [p]
    * -z_k <= 0 forall k in [p]
    * sum_{k=1}^p z_k - 1 = 0

In [337]:
def compute_H(x, DP):
    return DP.get_H()[x]

def compute_Dz(x, z, DP):
    """ Dz = sum_k z_k * D_k(x)"""
    D = DP.get_marginal_density()[x,:]
    Dz = 0
    for k in range(DP.p):
        Dz += z[k] * D[k]
    return Dz

def compute_Jz(x, z, DP):
    const = DP.eta * DP.U * compute_H(x, DP)
    D = DP.get_marginal_density()[x,:]
    h = DP.get_regressor()[x,:]
    zDh = 0
    for k in range(DP.p):
        zDh += z[k] * D[k] * h[k]
    return zDh + const

def compute_Kz(x, z, DP):
    return compute_Dz(x, z, DP) + DP.eta * DP.U

def compute_hz(x, z, DP, Jz=None, Kz=None):
    if Jz is None:
        Jz = compute_Jz(x, z, DP)
    if Kz is None:
        Kz = compute_Kz(x, z, DP)
        
    return Jz / Kz

def compute_fz(x, z, DP, Jz=None, Kz=None):
    if not Jz:
        Jz = compute_Jz(x, z, DP)
    if not Kz:
        Kz = compute_Kz(x, z, DP)
    return (Jz + 1) ** 2 / (2*Kz)

def compute_gz(x, z, DP, Jz=None, Kz=None):
    if not Jz:
        Jz = compute_Jz(x, z, DP)
    if not Kz:
        Kz = compute_Kz(x, z, DP)
    return ((Jz**2) + 1) / (2*Kz)

def compute_Fz(x, z, DP, fz=None, gz=None):
    if not fz:
        fz = compute_fz(x, z, DP)
    if not gz:
        gz = compute_gz(x, z, DP)
    return 2 * (fz**2) + 2 * (gz**2)

def compute_Gz(x, z, DP, fz=None, gz=None):
    if not fz:
        fz = compute_fz(x, z, DP)
    if not gz:
        gz = compute_gz(x, z, DP)
    return (fz + gz)**2

def compute_grad_Jz(x, z, DP):
    D = DP.get_marginal_density()
    h = DP.get_regressor()
    return D[x,:] * h[x,:]

def compute_grad_Kz(x, z, DP):
    D = DP.get_marginal_density()[x,:]
    return D

def compute_grad_gz(x, z, DP, grad_Jz=None, 
                    grad_Kz=None, Jz=None,
                   Kz=None):
    if grad_Jz is None:
        grad_Jz = compute_grad_Jz(x, z, DP)
    if grad_Kz is None:
        grad_Kz = compute_grad_Kz(x, z, DP)
    if not Jz:
        Jz = compute_Jz(x, z, DP)
    if not Kz:
        Kz = compute_Kz(x, z, DP)
    return (Jz * grad_Jz) / Kz - (((Jz**2) + 1)*grad_Kz) / Kz

def compute_grad_fz(x, z, DP, Jz=None, Kz=None,
                    grad_Jz=None, grad_Kz=None):
    if grad_Jz is None:
        grad_Jz = compute_grad_Jz(x, z, DP)
    if grad_Kz is None:
        grad_Kz = compute_grad_Kz(x, z, DP)
    if not Jz:
        Jz = compute_Jz(x, z, DP)
    if not Kz:
        Kz = compute_Kz(x, z, DP)
    return (Jz + 1)*grad_Jz / Kz - (Jz+1)**2 * grad_Kz / (Kz**2)

def compute_grad_Gz(x, z, DP, fz=None, gz=None,
                   grad_fz=None, grad_gz=None):
    if not fz:
        fz = compute_fz(x, z, DP)
    if not gz:
        gz = compute_gz(x, z, DP)
    if grad_fz is None:
        grad_fz = compute_grad_fz(x, z, DP)
    if grad_gz is None:
        grad_gz = compute_grad_gz(x, z, DP)
    return 2 * (fz + gz) * (grad_fz + grad_gz)

In [338]:
# Define u and v following proposition 9
def compute_u(z,DP):
    D = DP.get_marginal_density()
    h = DP.get_regressor()
    etaU = DP.eta * DP.U
    y = DP.get_true_values()
    
    const = np.zeros(DP.n)
    for k in range(DP.p):
        const += z[k] * D[:,k] * (y**2)

    u = np.zeros(DP.p)
    for x in range(DP.n):
        H = compute_H(x, DP)
        Dz = compute_Dz(x, z, DP)
        Jz = compute_Jz(x, z, DP)
        Kz = compute_Kz(x, z, DP)
        hz = compute_hz(x, z, DP, Jz=Jz, Kz=Kz)
        fz = compute_fz(x, z, DP, Jz=Jz, Kz=Kz)
        gz = compute_gz(x, z, DP, Jz=Jz, Kz=Kz)
        Fz = compute_Fz(x, z, DP, fz=fz, gz=gz)
        Gz = compute_Gz(x, z, DP, fz=fz, gz=gz)
        
        for k in range(DP.p):
            Dk = D[x,k]
            hk = h[x,k]

            v1 = Dk * (Fz + 2*y[x]*gz + y[x]**2)
            v2 = etaU*Fz + 2*y[x]*Jz + 2*etaU*y[x]*gz
            u[k] += v1 + v2 
    return u - const.sum()

def compute_v(z, DP):
    D = DP.get_marginal_density()
    h = DP.get_regressor()
    H = DP.get_H()
    y = DP.get_true_values()

    v = np.zeros(DP.p)
    for x in range(DP.n):
        Gz = compute_Gz(x, z, DP)
        fz = compute_fz(x, z, DP)
        Jz = compute_Jz(x, z, DP)
        hz = compute_hz(x, z, DP, Jz=Jz)

        etaU = DP.eta * DP.U
        va = D[x,:] * (Gz + 2*fz*y[x])
        vb = Jz * hz + etaU*Gz + 2*etaU*fz*y[x]
        v += va + vb
    return v

def compute_grad_v(z, DP):
    D = DP.get_marginal_density()
    h = DP.get_regressor()
    H = DP.get_H()
    y = DP.get_true_values()
    
    grad_v = np.zeros([DP.p, DP.p])
    for x in range(DP.n):
        grad_Gz = compute_grad_Gz(x, z, DP)
        grad_fz = compute_grad_fz(x, z, DP)
        Jz = compute_Jz(x, z, DP)
        Kz = compute_Kz(x, z, DP)
        grad_Jz = D[x,:] * h[x,:]
        grad_Kz = D[x,:]
        etaU = DP.eta * DP.U
        etaUH = etaU * H[x]
        grad_Jzhz = 2*(Jz/Kz)*grad_Jz - ((Jz**2)/(Kz**2)) * grad_Kz 
        a0 = D[x,:] + etaU
        a1 = 2 * y[x]*D[x,:] + 2 * etaUH * y[x]
        for k in range(DP.p): #kth element of v
            for i in range(DP.p): # ith grad dim
                a0 = (D[x,k] + etaU) * grad_Gz[i]
                a1 = (2*D[x,k]*y[x] + 2*etaU*y[x]) * grad_fz[i]
                a2 = grad_Jzhz[i]
                grad_v[k,i] += a0 + a1 + a2

    return np.matrix(grad_v)

In [339]:
v = compute_v(zp, DP)
gv = compute_grad_v(zp, DP)
print v.shape, gv.shape
print v
print gv

(2,) (2, 2)
[  1.02402911e+09   1.02402911e+09]
[[ -6.90274782e+00  -7.09684052e-09]
 [ -6.90267944e+00  -7.09684052e-09]]


In [340]:
#DP = synthetic_problem()
DP = sm_problem(datadir=datadir, minO=2, ngram=1, 
                split=1, sources=('kitchen', 'dvd', 'books', 'electronics'))
zp = np.repeat(1.0 / DP.p, DP.p)
x=0
print 'z:', zp, 'x:', x
print 'H(x)', compute_H(x, DP)
print 'Dz(x)', compute_Dz(x, zp, DP)
print 'Jz(x)', compute_Jz(x, zp, DP)
print 'Kz(x)', compute_Kz(x, zp, DP)
print 'fz(x)', compute_fz(x, zp, DP)
print 'gz(x)', compute_gz(x, zp, DP)
print 'Fz(x)', compute_Fz(x, zp, DP)
print 'Gz(x)', compute_Gz(x, zp, DP)
print 'grad_Jz(x)', compute_grad_Jz(x, zp, DP)
print 'grad_Kz(x)', compute_grad_Kz(x, zp, DP)
print 'grad_gz(x)', compute_grad_gz(x, zp, DP)
print 'grad_fz(x)', compute_grad_fz(x, zp, DP)
print 'grad_Gz(x)', compute_grad_Gz(x, zp, DP)

z: [ 0.25  0.25  0.25  0.25] x: 0
H(x) 1.69309
Dz(x) 5.15529136316e-86
Jz(x) 2.645453125e-06
Kz(x) 1.5625e-06
fz(x) 320001.693092
gz(x) 320000.000002
Fz(x) 409602167167.0
Gz(x) 409602167164.0
grad_Jz(x) [  1.04641566e-107   2.46880740e-116   1.96721813e-111   4.35356107e-085]
grad_Kz(x) [  6.61400945e-108   1.29935864e-116   1.66853388e-111   2.06211655e-085]
grad_gz(x) [ -4.23294833e-102  -8.31585348e-111  -1.06785835e-105  -1.31974722e-079]
grad_fz(x) [ -2.70910591e-096  -5.32218534e-105  -6.83433833e-100  -8.44644620e-074]
grad_Gz(x) [ -3.46767015e-90  -6.81242590e-99  -8.74798987e-94  -1.08114966e-67]


In [341]:
print 'z:', zp, 'x:', x
H = compute_H(x, DP)
Dz = compute_Dz(x, zp, DP)
Jz = compute_Jz(x, zp, DP)
Kz = compute_Kz(x, zp, DP)
fz = compute_fz(x, zp, DP, Jz=Jz, Kz=Kz)
gz = compute_gz(x, zp, DP, Jz=Jz, Kz=Kz)
Fz = compute_Fz(x, zp, DP, fz=fz, gz=gz)
Gz = compute_Gz(x, zp, DP, fz=fz, gz=gz)
grad_Jz = compute_grad_Jz(x, zp, DP)
grad_Kz = compute_grad_Kz(x, zp, DP)
grad_gz = compute_grad_gz(x, zp, DP, Jz=Jz, 
                          Kz=Kz, grad_Jz=grad_Jz,
                         grad_Kz=grad_Kz)
print 'fz(x)', fz
print 'gz(x)', gz
print 'Fz(x)', Fz
print 'Gz(x)', Gz
print 'grad_Jz(x)', grad_Jz
print 'grad_Kz(x)', grad_Kz
print 'grad_gz(x)', grad_gz
print 'grad_fz(x)', compute_grad_fz(x, zp, DP)
print 'grad_Gz(x)', compute_grad_Gz(x, zp, DP)

z: [ 0.25  0.25  0.25  0.25] x: 0
fz(x) 320001.693092
gz(x) 320000.000002
Fz(x) 409602167167.0
Gz(x) 409602167164.0
grad_Jz(x) [  1.04641566e-107   2.46880740e-116   1.96721813e-111   4.35356107e-085]
grad_Kz(x) [  6.61400945e-108   1.29935864e-116   1.66853388e-111   2.06211655e-085]
grad_gz(x) [ -4.23294833e-102  -8.31585348e-111  -1.06785835e-105  -1.31974722e-079]
grad_fz(x) [ -2.70910591e-096  -5.32218534e-105  -6.83433833e-100  -8.44644620e-074]
grad_Gz(x) [ -3.46767015e-90  -6.81242590e-99  -8.74798987e-94  -1.08114966e-67]


# Optimization

In [318]:
from scipy.optimize import minimize
# solve an iteration

#setup constraints: function of [z,gamma]
x0 = np.hstack([zp, 0]) #g0 = 0
fun = lambda x: x[-1] # bottom variable is gamma
nonneg_cst = dict(type='ineq', fun=lambda x: x[:-1])
eq_cst = dict(type='eq', fun=lambda x: sum(x[:-1]) - 1)
main_cst = dict(type='ineq', 
                fun=lambda x,x0,DP: x[-1] -
                (compute_u(x[:-1], DP) - compute_v(x0[:-1], DP) - 
                np.squeeze(np.array((x[:-1] - x0[:-1]) * compute_grad_v(x0[:-1], DP)))), 
                 args=(x0, DP))
cons = (eq_cst, nonneg_cst, main_cst)

opt = dict(maxiter=1e8, disp=True)

res = minimize(fun, x0, method='SLSQP', constraints=cons, options=opt)
print res.x
print res.message

Singular matrix E in LSQ subproblem    (Exit mode 5)
            Current function value: -305175781.0
            Iterations: 61
            Function evaluations: 897
            Gradient evaluations: 61
[  2.50000000e-01   2.50000000e-01   2.50000000e-01   2.50000000e-01
  -3.05175781e+08]
Singular matrix E in LSQ subproblem


In [266]:
x = res.x
u = compute_u(x, DP) 
v0 = compute_v(x0[:-1],DP)
gv0 = compute_grad_v(x0[:-1], DP)
print u
print v0
print gv0
print (x-x0)[:-1]
print x[-1] - (u-v0 - (x-x0)[:-1] * gv0) 

[  6.63718400e+08   5.57560615e+08   5.54037525e+08]
[  6.81409499e+08   5.66113612e+08   5.58242509e+08]
[[ -5.48540082e+08  -6.91305066e+07  -5.41857859e+07]
 [ -1.76239738e+08  -7.43943674e+07  -5.07638570e+07]
 [ -1.69021612e+08  -5.84902551e+07  -4.98386831e+07]]
[ 0.03285454  0.29546081 -0.32831535]
[[-883045.53666547 -468191.34872933 -183755.32991866]]


In [273]:
def sq_loss(pred,y):
    return (pred-y)**2
    
def compute_loss_k(k, DP):
    Dk = DP.get_marginal_density()[:,k]
    hk = DP.get_regressor()[:,k]
    y = DP.get_true_values()
    loss = 0
    for x in range(DP.n):
        loss += Dk[x] * sq_loss(hk[x], y[x])
    return loss / DP.n
    
def compute_weighted_loss_k(z,k,DP):
    Dk = DP.get_marginal_density()[:,k]
    
    y = DP.get_true_values()
    loss = 0
    for x in range(DP.n):
        hz = compute_hz(x,z,DP)
        loss += Dk[x] * sq_loss(hz, y[x])
    return loss / DP.n
    
def compute_weighted_loss(z, DP):
    y = DP.get_true_values()
    loss = 0
    for x in range(DP.n):
        Dz = compute_Dz(x,z,DP)
        hz = compute_hz(x,z,DP)
        loss += Dz * sq_loss(hz, y[x])
    return loss / DP.n
    

In [275]:
z = res.x[:-1]
print 'L(Dk,hk):', compute_loss_k(0,DP), compute_loss_k(1,DP), compute_loss_k(2,DP)
print 'L(Dk,hz):', compute_weighted_loss_k(z,0,DP), compute_weighted_loss_k(z,1,DP), compute_weighted_loss_k(z,2,DP)
print 'L(Dz,hz):', compute_weighted_loss(z,DP)

L(Dk,hk): 0.0426743639558 0.0343673164216 0.0332143615071
L(Dk,hz): 0.0425837364458 0.031093060481 0.0302574192427
L(Dz,hz): 0.0352966134819 2.43585795775e-73


In [221]:
D = DP.get_marginal_density()
D.shape

(4800, 3)

In [271]:
h = DP.get_regressor()
h.shape

(4800, 3)

In [272]:
h[0,:]

(array([ 1.58212,  1.90002,  1.17901]), 10694.4)

In [225]:
compute_weighted_loss(z,DP)/(D.max()*DP.n)

0.056043864289248964

In [224]:
DP.n

4800