-
Notifications
You must be signed in to change notification settings - Fork 403
/
graph.py
176 lines (149 loc) · 8.41 KB
/
graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# -*- coding: UTF-8 -*-
# !/usr/bin/python
# @time :2019/6/3 10:51
# @author :Mo
# @function :graph of base
from keras.layers import Reshape, Concatenate, Conv2D, MaxPool2D
from keras.layers import Dense, Dropout, Flatten
from keras.models import Model
from keras_textclassification.base.graph import graph
class TextCNNGraph(graph):
def __init__(self, hyper_parameters):
"""
初始化
:param hyper_parameters: json,超参
"""
super().__init__(hyper_parameters)
def create_model(self, hyper_parameters):
"""
构建神经网络
:param hyper_parameters:json, hyper parameters of network
:return: tensor, moedl
"""
super().create_model(hyper_parameters)
embedding = self.word_embedding.output
embedding_reshape = Reshape((self.len_max, self.embed_size, 1))(embedding)
# 提取n-gram特征和最大池化, 一般不用平均池化
conv_pools = []
for filter in self.filters:
conv = Conv2D(filters = self.filters_num,
kernel_size = (filter, self.embed_size),
padding = 'valid',
kernel_initializer = 'normal',
activation = 'tanh',
)(embedding_reshape)
pooled = MaxPool2D(pool_size = (self.len_max - filter + 1, 1),
strides = (1, 1),
padding = 'valid',
)(conv)
conv_pools.append(pooled)
# 拼接
x = Concatenate(axis=-1)(conv_pools)
x = Dropout(self.dropout)(x)
x = Flatten()(x)
x = Dense(units=64, activation='tanh')(x)
x = Dropout(self.dropout)(x)
output = Dense(units=self.label, activation=self.activate_classify)(x)
self.model = Model(inputs=self.word_embedding.input, outputs=output)
self.model.summary(120)
# def focal_loss(self, gamma=2, alpha=0.75): # 0.25, 0.5
def focal_loss(self, gamma=2, alpha=0.75, batch_size=None, label_num=None, epsilon=1.e-7, multi_dim=False, use_softmax=True):
from tensorflow.python.ops import array_ops
import keras.backend as K
import tensorflow as tf
def focal_loss_fixed(y_true, y_pred): # with tensorflow
eps = 1e-12
y_pred = K.clip(y_pred, eps, 1. - eps) # improve the stability of the focal loss and see issues 1 for more information
pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
loss = -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1)) - K.sum((1 - alpha) * K.pow(pt_0, gamma) * K.log(1. - pt_0))
return loss
def focal_loss_all(prediction_tensor, target_tensor):
r"""Compute focal loss for predictions.
Multi-labels Focal loss formula:
FL = -alpha * (z-p)^gamma * log(p) -(1-alpha) * p^gamma * log(1-p)
,which alpha = 0.25, gamma = 2, p = sigmoid(x), z = target_tensor.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing the predicted logits for each class
target_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing one-hot encoded classification targets
weights: A float tensor of shape [batch_size, num_anchors]
alpha: A scalar tensor for focal loss alpha hyper-parameter
gamma: A scalar tensor for focal loss gamma hyper-parameter
Returns:
loss: A (scalar) tensor representing the value of the loss function
"""
sigmoid_p = tf.nn.sigmoid(prediction_tensor)
zeros = array_ops.zeros_like(sigmoid_p, dtype=sigmoid_p.dtype)
# For poitive prediction, only need consider front part loss, back part is 0;
# target_tensor > zeros <=> z=1, so poitive coefficient = z - p.
pos_p_sub = array_ops.where(target_tensor > zeros, target_tensor - sigmoid_p, zeros)
# For negative prediction, only need consider back part loss, front part is 0;
# target_tensor > zeros <=> z=1, so negative coefficient = 0.
neg_p_sub = array_ops.where(target_tensor > zeros, zeros, sigmoid_p)
per_entry_cross_ent = - alpha * (pos_p_sub ** gamma) * tf.log(tf.clip_by_value(sigmoid_p, 1e-8, 1.0)) \
- (1 - alpha) * (neg_p_sub ** gamma) * tf.log(tf.clip_by_value(1.0 - sigmoid_p, 1e-8, 1.0))
return tf.reduce_sum(per_entry_cross_ent)
def focal_loss_category(logits, labels):
'''
:param logits: [batch_size, n_class]
:param labels: [batch_size] not one-hot !!!
:return: -alpha*(1-y)^r * log(y)
它是在哪实现 1- y 的? 通过gather选择的就是1-p,而不是通过计算实现的;
logits soft max之后是多个类别的概率,也就是二分类时候的1-P和P;多分类的时候不是1-p了;
怎么把alpha的权重加上去?
通过gather把alpha选择后变成batch长度,同时达到了选择和维度变换的目的
是否需要对logits转换后的概率值进行限制?
需要的,避免极端情况的影响
针对输入是 (N,P,C )和 (N,P)怎么处理?
先把他转换为和常规的一样形状,(N*P,C) 和 (N*P,)
bug:
ValueError: Cannot convert an unknown Dimension to a Tensor: ?
因为输入的尺寸有时是未知的,导致了该bug,如果batchsize是确定的,可以直接修改为batchsize
'''
if multi_dim:
logits = tf.reshape(logits, [-1, logits.shape[2]])
labels = tf.reshape(labels, [-1])
# (Class ,1)
alpha = tf.constant([0.5]*batch_size, dtype=tf.float32)
labels = tf.argmax(labels) #
labels = tf.cast(labels, dtype=tf.int32)
logits = tf.cast(logits, tf.float32)
if use_softmax:
# (N,Class) > N*Class
softmax = tf.reshape(tf.nn.softmax(logits), [-1]) # [batch_size * n_class]
else:
softmax = tf.reshape(tf.nn.sigmoid(logits), [-1]) # [batch_size * n_class]
# (N,) > (N,) ,但是数值变换了,变成了每个label在N*Class中的位置
# labels_shift = tf.range(0, logits.shape[0]) * logits.shape[1] + labels
labels_shift = tf.range(0, label_num) * batch_size + labels
# (N*Class,) > (N,)
prob = tf.gather(softmax, labels_shift)
# 预防预测概率值为0的情况 ; (N,)
prob = tf.clip_by_value(prob, epsilon, 1. - epsilon)
# (Class ,1) > (N,)
alpha_choice = tf.gather(alpha, labels)
# (N,) > (N,)
weight = tf.pow(tf.subtract(1., prob), gamma)
weight = tf.multiply(alpha_choice, weight)
# (N,) > 1
loss = -tf.reduce_sum(tf.multiply(weight, tf.log(prob)))
return loss
return focal_loss_fixed
def create_compile(self):
"""
构建优化器、损失函数和评价函数
:return:
"""
# from keras_textclassification.keras_layers.keras_radam import RAdam
from keras.optimizers import Adam
# self.model.compile(optimizer=Adam(lr=self.lr, beta_1=0.9, beta_2=0.999, decay=0.0),
# loss=[self.focal_loss(alpha=.25, gamma=2)],
# metrics=['accuracy'])
self.model.compile(optimizer=Adam(lr=self.lr, beta_1=0.9, beta_2=0.999, decay=0.0),
loss=[self.focal_loss(alpha=.25, gamma=2)], # self.loss, #
# loss_weights=[0.6, 0.5],
# loss=[self.focal_loss(gamma=2, alpha=0.25, batch_size=self.batch_size, label_num=self.label, epsilon=1.e-7, multi_dim=False, use_softmax=False)],
# loss=[self.focal_loss(gamma=2, alpha=0.75)],
metrics=['accuracy']) # Any optimize