In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import numpy as np
from tqdm import tqdm
import random

In [None]:
import keras
from keras.utils.np_utils import to_categorical
from keras.layers import *
from keras.models import Model
from keras import backend as K
from keras import initializers #keras2
from sklearn.metrics import accuracy_score, classification_report
from keras.optimizers import *

In [None]:
data_root_path = './'

In [None]:
with open('ml-1m') as f:
    lines = f.readlines()

In [None]:
Users = {}
Items = {}

for i in range(len(lines)):
    uid,mid, r, t = lines[i].strip().split('\t')
    uid = int(uid)
    mid = int(mid)
    r = int(r)
    t = int(t)
    if not mid in Items:
        Items[mid] = 1
    
    if not uid in Users:
        Users[uid] = {'item':[],'time':[]}
    
    Users[uid]['item'].append(mid)
    Users[uid]['time'].append(t)

In [None]:
npratio = 1 # 1 for BPR loss, K for NCE loss

In [None]:
train_users = []
train_items = []
train_labels = []
test_users = {}
trainuser_index={}
#generate normal user training data
for cnt,uid in tqdm(enumerate(Users)):
    items = Users[uid]['item']
    times = Users[uid]['time']
    items = np.array(items)
    times = np.array(times)
    arg = times.argsort()
    train_is = items[arg[:-2]]
    listtrain_is=set(train_is)
    tpindex=[]
    for j in range(len(train_is)):
        pos_item = train_is[j]
        neg_items = []
        ct = 0
        neg_items=[x for x in random.sample(list(range(1,1+len(Items))),npratio+len(train_is)) if x!=pos_item and x not in listtrain_is][:npratio]
        its = [pos_item] + neg_items
        tpindex.append(len(train_items))
        train_items.append(its)
        train_users.append(uid)
        train_labels.append([1]+[0]*npratio)
    trainuser_index[uid]=np.array(tpindex)    
        
    raw_random_items = np.random.permutation(3706,)-1
    raw_random_items += 1
    random_items = []
    for v in raw_random_items:
        if v != items[arg[-1]]:
            random_items.append(v)
    test_users[uid] = [items[arg[-1]]] + random_items
    test_users[uid] = np.array(test_users[uid])

In [None]:
train_users = np.array(train_users)
train_items = np.array(train_items)
train_labels = np.array(train_labels)
rand_user_index = np.random.permutation(len(trainuser_index))

In [None]:
def dcg_score(y_true, y_score, k=10):
    order = np.argsort(y_score)[::-1]
    y_true = np.take(y_true, order[:k])
    gains = 2 ** y_true - 1
    discounts = np.log2(np.arange(len(y_true)) + 2)
    return np.sum(gains / discounts)


def ndcg_score(y_true, y_score, k=10):
    best = dcg_score(y_true, y_true, k)
    actual = dcg_score(y_true, y_score, k)
    return actual / best


def mrr_score(y_true, y_score):
    order = np.argsort(y_score)[::-1]
    y_true = np.take(y_true, order)
    rr_score = y_true / (np.arange(len(y_true)) + 1)
    return np.sum(rr_score) / np.sum(y_true)

In [None]:
def get_inter_model(mode,L=0):
    Inputs = keras.layers.Input(shape=(2,),dtype='int32')
    User_Input = Lambda(lambda x:x[:,0])(Inputs)
    Item_Input = Lambda(lambda x:x[:,1])(Inputs)

    user_embedding_layer = Embedding(6039+1, 64,trainable=True)
    item_embedding_layer = Embedding(3706+1, 64,trainable=True)

    user_emb = user_embedding_layer(User_Input)
    item_emb = item_embedding_layer(Item_Input)
    
    user_emb = Reshape((64,))(user_emb)
    item_emb = Reshape((64,))(item_emb)

    user_emb = Dropout(0.2)(user_emb)
    item_emb = Dropout(0.2)(item_emb)

    if mode == 'MLP':
        emb = keras.layers.Concatenate(axis=-1)([user_emb,item_emb])
        emb = Dense(64,activation='tanh')(emb)
        emb = Dense(64,activation='tanh')(emb)
        score = Dense(1)(emb)
    elif mode == 'GMF':
        emb = keras.layers.Multiply()([user_emb,item_emb])
        score = Dense(1)(emb)     
    elif mode =='NCF':
        emb = keras.layers.Concatenate(axis=-1)([user_emb,item_emb])
        emb = Dense(64,activation='tanh')(emb)
        emb = Dense(64,activation='tanh')(emb)
        score1 = Dense(1)(emb)
        emb = keras.layers.Multiply()([user_emb,item_emb])
        score2 = Dense(1)(emb) 
        score = Add()([score1,score2])
    elif mode == 'Dot':
        score = Dot(axes=-1)([user_emb,item_emb])

    model = Model(Inputs,score)

    return model

In [None]:
def get_model(mode,L=0,ratio=npratio):
    User_Input = keras.layers.Input(shape=(1,),dtype='int32')
    Items_Input = keras.layers.Input(shape=(1+ratio,),dtype='int32')
    inter_model = get_inter_model(mode,L)

    uis = RepeatVector(1+ratio)(User_Input)
    uis = Reshape((1+ratio,1))(uis)
    iis = Reshape((1+ratio,1))(Items_Input)
    iss = Concatenate(axis=-1)([uis,iis])
    
    scores = TimeDistributed(inter_model)(iss)
    scores = Reshape((1+ratio,))(scores)

    logit = keras.layers.Activation('softmax')(scores)

    model = Model([User_Input,Items_Input],logit)
    
    model.compile(loss=['categorical_crossentropy'],
                  optimizer=Adam(lr=0.001), 
                  metrics=['acc'])
    
    return model,inter_model


In [None]:
len(test_users)

In [None]:
def evaluate(model,usermask):
    ct = 0
    Nums = [5,10]
    Recall = {}
    for num in Nums:
        Recall[num] = []
    morescores=[]
    for uid in test_users:
        if usermask[uid]:
            continue
        test_items = test_users[uid]
        test_uid = [uid]*len(test_items)
        
        test_uid = np.array(test_uid)
        test_itemid = test_items
        
        test_uid = test_uid.reshape((len(test_itemid),1))
        test_itemid = test_itemid.reshape((len(test_itemid),1))
        test_inputs = np.concatenate([test_uid,test_itemid],axis=-1)

        score = model.predict(test_inputs,batch_size=len(test_inputs))
        index = (-score[:,0]).argsort()
        index = test_itemid[index]
        index = index.tolist()
        score=score.flatten()
        pos = test_itemid[0]
        for num in Nums:
            mids = index[:num]
            r = int(pos in mids )
            Recall[num].append(r)

        morescores.append([ndcg_score(np.array([1]+[0]*(3706-1)),score,5),ndcg_score(np.array([1]+[0]*(3706-1)),score,10),mrr_score(np.array([1]+[0]*(3706-1)),score)])
        ct += 1
        if ct % 30==0:
            print(ct,np.array(Recall[5]).mean(),np.array(Recall[10]).mean(),np.mean(morescores,axis=0))
    tp=[]
    for num in Recall:
        tp.append(np.array(Recall[num]).mean().tolist())
    return tp+np.mean(morescores,axis=0).tolist()

In [None]:
keras.backend.clear_session()
from sklearn.cluster import AgglomerativeClustering
model,inter_model = get_model('NCF',4)

useremb=model.get_weights()[0]
itememb=model.get_weights()[1]
bzsize=16
itlist=np.arange(len(itememb)).tolist()
maliratio=0.05
usermask=np.array(np.random.uniform(size=(len(rand_user_index),))<maliratio,dtype='int32')
result=[]

for ep in range(50):
    allloss=0.
    allacc=0.
    for bz in range(len(rand_user_index)//bzsize+1): 
        batchloss=[]
        batchacc=[]
        for x in rand_user_index[bz*bzsize:bz*bzsize+bzsize]:
            current_index=trainuser_index[x]
            if len(current_index)==0:
                break
            batch_train_users=train_users[current_index]
            batch_train_items=train_items[current_index][:,0]
            uemb=useremb[batch_train_users]
            rank=[np.argsort(np.dot(itememb,x))[::-1].tolist() for x in uemb]
            temp=np.random.uniform()
            maskid=[]
            
            itememb=model.get_weights()[1]
            all_neg=[]
            all_pos=[]
            for j in range(len(batch_train_users)):
            
                if usermask[batch_train_users[j]]:
                    maskid.append(1)
                    all_neg.append([x for x in rank[j][:npratio*2] if x!=batch_train_items[j]][:npratio])
                    all_pos.append([rank[j][-1]])
                else:
                    all_neg.append([x for x in random.sample(rank[j],npratio+1) if x!=batch_train_items[j]][:npratio])
                    all_pos.append([batch_train_items[j]])
                    maskid.append(0)  

            all_pos=np.array(all_pos)
            all_neg=np.array(all_neg)
            btrain_items=np.concatenate([all_pos,all_neg],axis=-1) 
            btrain_labels=np.array([[1]+[0]*npratio]*len(btrain_items))
            #same as direct aggregation without defense
            loss,acc=model.train_on_batch([batch_train_users,btrain_items],btrain_labels)
            batchloss.append(loss)
            batchacc.append(acc)

        allloss+=np.mean(batchloss)
        allacc+=np.mean(batchacc)
        if bz%10==0:
            print(allloss/(bz+1),allacc/(bz+1))
            
    result.append(evaluate(inter_model,usermask))
            