# DGL反欺诈项目实践



In [9]:
# 首先导入需要的包
import dgl
import torch as th
import torch.nn as nn

## 使用小样例数据完成对模型的验证

In [3]:
def build_xy_graph(u, v):

    graph = dgl.heterograph(
        {('user', 'comment_on', 'item'): (u, v),
         ('item', 'commentdd_by', 'user'): (v, u)}
    )

    return graph

u = [0,1,2,1,1,3,4,4]
v = [0,0,0,1,2,1,1,2]

xy_graph = build_xy_graph(u, v)

## 构建模型

1. 构建一个边分类GNN模型的一个层；
2. 使用层来构建GNN的模型。

下面首先构建GNN的一层。

In [7]:
class layer(nn.Module):
    """
    This layer is designed specifically for user Xianyu Graph algorithm.
    """
    def __init__(self, d_u_in, d_u_out, d_e_in, d_e_out, d_i_in, d_i_out):
        super(layer, self).__init__()
        self.act = F.relu

        # The Weight in formula (3). Use a different way to implement concatinate and linear transform
        self.W_Ee = nn.Linear(d_e_in, d_e_out, bias=False)
        self.W_Eu = nn.Linear(d_u_in, d_e_out, bias=False)
        self.W_Ei = nn.Linear(d_i_in, d_e_out, bias=False)

        # weight parameter in formula (7), will do it in two ways, u->i and i->u
        # 1. i -> u
        self.d_attn_ie_in = d_e_in + d_i_in
        self.d_attn_u_out = self.d_attn_ie_in
        self.W_ATTN_ie = nn.Linear(self.d_attn_ie_in, self.d_attn_u_out, bias=False)
        self.W_ATTN_u = nn.Linear(d_u_in, self.d_attn_u_out, bias=False)

        # 2. u -> i
        self.d_attn_ue_in = d_e_in + d_u_in
        self.d_attn_i_out = self.d_attn_u_out
        self.W_ATTN_ue = nn.Linear(self.d_attn_ue_in, self.d_attn_i_out, bias=False)
        self.W_ATTN_i = nn.Linear(d_i_in, self.d_attn_i_out, bias=False)

        # weight parameter in formula (5)
        self.d_wu_in = self.d_attn_u_out
        self.d_wu_out = int(d_u_out / 2)
        self.W_nu = nn.Linear(self.d_wu_in, self.d_wu_out, bias=False)

        self.d_wi_in = self.d_attn_i_out
        self.d_wi_out = int(d_i_out / 2)
        self.W_ni = nn.Linear(self.d_wi_in, self.d_wi_out, bias=False)

        # weight parameters in formula (8）
        self.d_vu_out = d_u_out - self.d_wu_out
        self.W_u = nn.Linear(d_u_in, self.d_vu_out, bias=False)

        self.d_vi_out = d_i_out - self.d_wi_out
        self.W_i = nn.Linear(d_i_in, self.d_vi_out, bias=False)
        
        
    def forward(self, graph, u_feats, e_feats, i_feats):
        """
        Specificlly For this algorithm, feat_dict has 3 types of features:
        'User': l-1 layer's user features, in dict {'u': features}
        'Edge': l-1 layer's edge features, in dict {'e': features}
        'Item': l-1 layer's note features, in dict {'i': features}

        This version, we have one edge but two types:
            - 'comment_on'
            - 'commented_by'

        :param graph: bi-partitie
        :return:
        """
        # assign values
        graph.nodes['user'].data['u'] = u_feats
        graph.nodes['item'].data['i'] = i_feats
        graph.edges['comment_on'].data['e'] = e_feats
        graph.edges['commented_by'].data['e'] = e_feats

        # formula 6
        graph.apply_edges(lambda edges: {'h_ie': th.cat([edges.src['i'], edges.data['e']], dim=-1)}, etype='commented_by')
        graph.apply_edges(lambda edges: {'h_ue': th.cat([edges.src['u'], edges.data['e']], dim=-1)}, etype='comment_on')

        # formula 7, self_attension
        graph.nodes['user'].data['h_attnu'] = self.W_ATTN_u(u_feats)
        graph.nodes['item'].data['h_attni'] = self.W_ATTN_i(i_feats)
        graph.edges['commented_by'].data['h_attne'] = self.W_ATTN_ie(graph.edges['commented_by'].data['h_ie'])
        graph.edges['comment_on'].data['h_attne'] = self.W_ATTN_ue(graph.edges['comment_on'].data['h_ue'])

        # Step 1: dot product
        graph.apply_edges(fn.e_dot_v('h_attne', 'h_attnu', 'edotv'), etype='commented_by')
        graph.apply_edges(fn.e_dot_v('h_attne', 'h_attni', 'edotv'), etype='comment_on')

        # Step 2. softmax
        graph.edges['commented_by'].data['sfm'] = edge_softmax(graph['commented_by'], graph.edges['commented_by'].data['edotv'])
        graph.edges['comment_on'].data['sfm'] = edge_softmax(graph['comment_on'], graph.edges['comment_on'].data['edotv'])

        # Step 3. Broadcast softmax value to each edge, and then attention is done
        graph.apply_edges(lambda edges: {'attn': edges.data['h_attne'] * edges.data['sfm'].unsqueeze(dim=0).T},
                          etype='commented_by')
        graph.apply_edges(lambda edges: {'attn': edges.data['h_attne'] * edges.data['sfm'].unsqueeze(dim=0).T},
                          etype='comment_on')

        # Step 4. Aggregate attention to dst,user nodes, so formula 7 is done
        graph.update_all(fn.copy_e('attn', 'm'), fn.sum('m', 'agg_u'), etype='commented_by')
        graph.update_all(fn.copy_e('attn', 'm'), fn.sum('m', 'agg_i'), etype='comment_on')

        # formula 5
        graph.nodes['user'].data['h_nu'] = self.act(self.W_nu(graph.nodes['user'].data['agg_u']))
        graph.nodes['item'].data['h_ni'] = self.act(self.W_ni(graph.nodes['item'].data['agg_i']))

        # formula 8
        graph.nodes['user'].data['u'] = th.cat([self.W_u(u_feats), graph.nodes['user'].data['h_nu']], dim=-1)
        graph.nodes['item'].data['i'] = th.cat([self.W_i(i_feats), graph.nodes['item'].data['h_ni']], dim=-1)

        # formula 3 and 4
        # first compute 3 matrix multiply
        graph.edges['comment_on'].data['h_e'] = self.W_Ee(e_feats)
        graph.edges['commented_by'].data['h_e'] = self.W_Ee(e_feats)
        graph.nodes['user'].data['h_u4e'] = self.W_Eu(u_feats)
        graph.nodes['item'].data['h_i4e'] = self.W_Ei(i_feats)

        # formula 3, add them up
        graph.apply_edges(fn.u_add_e('h_u4e', 'h_e', 'h_ue'), etype='comment_on')
        graph.apply_edges(fn.e_add_v('h_ue', 'h_i4e', 'e'), etype='comment_on')
        graph.edges['comment_on'].data['e'] = self.act(graph.edges['comment_on'].data['e'])

        graph.edges['commented_by'].data['e'] = graph.edges['comment_on'].data['e']

        u_feats = graph.nodes['user'].data['u']
        e_feats = graph.edges['comment_on'].data['e']
        i_feats = graph.nodes['item'].data['i']

        return u_feats, e_feats, i_feats

In [8]:
class Algorithm_Model(nn.Module):

    def __init__(self, u_in_dim, u_hidden_dim, u_out_dim,
                 e_in_dim, e_hidden_dim, e_out_dim,
                 i_in_dim, i_hidden_dim, i_out_dim):

        super(Algorithm_Model, self).__init__()
        self.layer_1 = layer(u_in_dim, u_hidden_dim, e_in_dim, e_hidden_dim, i_in_dim, i_hidden_dim)
        self.layer_2 = layer(u_hidden_dim, u_out_dim, e_hidden_dim, e_out_dim, i_hidden_dim, i_out_dim)

    def forward(self, graph, u_features, e_features, i_features):
        h_u, h_e, h_i = self.layer_1(graph, u_features, e_features, i_features)
        h_u = F.relu(h_u)
        h_e = F.relu(h_e)
        h_i = F.relu(h_i)
        h_u, h_e, h_i = self.layer_2(graph, h_u, h_e, h_i)

        # use graph to concat user nodes logits and item nodes logits to the edges
        # assign values
        graph.nodes['user'].data['u'] = h_u
        graph.nodes['item'].data['i'] = h_i
        graph.edges['comment_on'].data['e'] = h_e

        graph.apply_edges(lambda edges:
                          {'output': th.cat([edges.src['u'], edges.data['e'], edges.dst['i']], dim=-1)}, etype='comment_on')

        return graph.edges['comment_on'].data['output']

### 通过样例数据了解模型内部运作

### 实验数据全图训练

In [None]:
# 