Field-aware Factorization Machine

参考：
[推荐系统遇上深度学习(二)--FFM模型理论和实践](https://blog.csdn.net/jiangjiang_jian/article/details/80630903)

与FM中每一维特征只由一个隐含特征向量表示不同，FFM中每一个特征在于不同field的特征进行组合时，所使用的隐含特征向量不同。因此，有ＦＭ的推导公式：
$$y(x) = w_0 +　\sum_{i=1}^n w_ix_i + \sum_{i=1}^n\sum_{j=i+1}^n<V_i,V_j>x_ix_j$$ 
可以推导出FFM的推导公式：
$$y(x) = w_0 +　\sum_{i=1}^n w_ix_i + \sum_{i=1}^n\sum_{j=i+1}^n<V_{i,f_j},V_{j,f_i}>x_ix_j$$ 
可以看到，如果隐向量的长度为 k，那么FFM的二次参数有 nfk 个，远多于FM模型的 nk个。此外，由于隐向量与field相关，FFM二次项并不能够化简，其预测复杂度是 O(kn^2)。

这里讲FFM的一种实现细节，把FFM看做{-1,1}的分类问题，也可以将FFM看做和FM相同的回归问题

使用logistic损失函数:
$$min_w \sum_{i=1}^L log(1+exp\{-y_i \phi(w,x_i)\})+ \frac{\lambda}{2}||w||^2$$

# 生成数据

In [32]:
class FFM:
    def __init__(self, batch_size,learning_rate, data_path, field_num, feature_num,feature2field,data_set):
        self.batch_size = batch_size
        self.lr = learning_rate
        self.data_path = data.path
        self.field_num = field_num
        self.feature_num = feature_num
        self.feature2field = feature2field
        self.data_set = data_set
        
        with tf.name_scope('embedding_matrix'):
            self.linear_weight = tf.get_variable(name = 'linear_weight',
                                         shape=[feature_num],
                                         dtype = tf.float32,
                                         initializer=tf.truncated_normal_initializer(stddev = 0.01))
            
            tf.summary.histogram('linear_weight',self.linear_weight)
            self.field_embedding=[]
            for idx in xrange(0,self.feature_num):
                self.field_embedding.append(tf.get_variable(name='field_embedding{}'.format(idx),
                                                            shape = [field_num],
                                                            dtype = tf.float32,
                                                            initializer = tf.truncated_normal_initializer(stddev = 0.01)
                                                           ))
                tf.summary.histogram('field_vector{}'%format(idx),self.field_embedding[idx])
            
        
        with tf.name_scope('input'):
            self.label = tf.placeholdera(tf.float32,shape = (self.batch_size))
            
            self.feature_value = []
            
            for idx in xrange(0,feature_num):
                self.feature_value.append(tf.placeholder(tf.float32,
                                                        shape = (self.batch_size),
                                                        name = 'feature_{}'%format(idx)))
                
                
                
        with tf.name_scope('network'):
            
            # b0:constant bias
            # predict = b0 + sum(Vi * feature_i) + sum(Vij * Vji * feature_i * feature_j)
            self.b0 = tf.get_variable(name = 'bias_0', shape=[1], dtype = tf.float32)
            
            tf.summary.histogram('bo',self.b0)
            
            self.linear_term = tf.reduce_sum(tf.multiply(tf.transpose(tf.convert_to_tensor(self.feature_value),perm=[1,0]),
                                                         self.linear_weight))
            
            self.qua_term = tf.get_variable(name = 'quad_term',shape=[1],dtype= tf.float)
            
            for f1 in xrange(0,feature_num-1):
                for f2 in xrange(f1+1,feature_num):
                    # 从f1特征的embedding里，找f2的 feature embedding
                    w1 = tf.nn.embedding_lookup(self.field_embedding[f1],self.feature2field[f2])
                    w2 = tf.nn.embedding_lookup(self.field_embedding[f2],self.feature2field[f1])
                    self.qua_term += w1*w2*self.feature_value[f1]*self.feature_value[f2]
            
            self.predict = self.b0 + self.linear_term + self.qua_term
            self.losses = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels = self.labels,logits = self.predict))
            tf.summary.scalar('losses',self.losses)
            
            self.optimizer = tf.train.AdamOptimizer(learning_rate = self.lr,name ='Adam')
            
            self.grad = self.optimizer.compute_gradient(self.losses)
            
            self.opt = self.optimizer.apply_gradients(self.grad)
            
        self.sess = tf.InteractiveSession()
        
        with tf.name_scope('plot'):
            self.merged = tf.summary.merge_all()
            self.writer = tf.summary.FileWriter('./train_plot', self.sess.graph)

        self.sess.run(tf.global_variables_initializer())
        self.loop_step = 0
        
        
    def step(self):
        '''
        return :log_loss
        '''
        self.loop_step += 1
        feature, label =  self.get_data()
        feed_dict = {}
        feed_dict[self.label] = label
        arr_feature = np.transpose(np.array(feature))
        for idx in xrange(0,self.feature_num):
            feed_dict[self.feature_value[idx]] = arr_feature[idx]
        _,summary,loss_value = self.sess.run([self.opt,self.merged,self.losses],feed_dict=feed_dict)
        self.writer.add_summary(summary, self.loop_step)
        return loss_value
        
    
    def get_data(self):
        """
        :return: a tuple of feature and label
        feature: shape[batch_size ,feature_num] each element is a sclar
        label:[batch_size] each element is 0 or 1
        """
        feature = []
        label = []
        for _ in xrange(0, self.batch_size):
            t_feature = [0.0] * feature_num
            sample = self.data_set[random.randint(0, len(self.data_set) - 1)]
            label.append(sample[-1])
            sample = sample[:-1]
            for f in sample:
                t_feature[int(f.split(':')[0])] = float(f.split(':')[1])
            feature.append(t_feature)
        return feature, label
        
                                          
if __name__ == "__main__":
    data_set, feature_map = prepare_data(file_path=data_path)
    print("feature num {} field num {}".format(feature_num, field_num))
    ffm = FFM(batch_size, learning_rate, data_path, field_num, feature_num, feature_map, data_set)
    feature, label = ffm.get_data()
    for loop in xrange(0, 1000):
        losses = ffm.step()
        if (loop % 50):
            print("loop:{} losses:{}".format(loop, losses))                
            
                

NameError: name 'prepare_data' is not defined