Skip to content

Commit

Permalink
修复loss,capsule
Browse files Browse the repository at this point in the history
  • Loading branch information
zfz authored and zfz committed May 1, 2019
1 parent 66ed83a commit 1071962
Show file tree
Hide file tree
Showing 8 changed files with 324 additions and 303 deletions.
53 changes: 53 additions & 0 deletions common/loss.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import tensorflow as tf
import numpy as np

def get_default_value(kwargs, key, value):
if key in kwargs:
return kwargs[key]
else:
return value

def get_loss(logits, labels, type = 'cross', labels_sparse = False, **kwargs):
if labels_sparse == True:
num = logits.shape.as_list()[-1]
labels = tf.one_hot(labels,num)

if type == 'focal_loss':
gamma = get_default_value(kwargs, 'gamma', 2.0)
alpha = get_default_value(kwargs, 'alpha', 0.25)
epsilon = get_default_value(kwargs, 'epsilon', 1e-8)
return focal_loss(logits, labels, gamma, alpha, epsilon)
elif type == 'sigmoid_loss':
return sigmoid_cross_entropy(logits, labels)
elif type == 'softmax_loss':
return softmax_cross_entropy(logits, labels)
elif type == 'margin_loss':
return margin_loss(logits, labels)
else:
raise ValueError("unknown loss type")

def focal_loss(logits, labels, gamma=2.0, alpha=0.25, epsilon=1e-8):
logits = tf.cast(logits, tf.float32)
model_out = tf.add(logits, epsilon)
ce = tf.multiply(tf.cast(labels, tf.float32), -tf.log(model_out))
weights = tf.multiply(tf.cast(labels, tf.float32), tf.pow(tf.subtract(1.0, model_out), gamma))
return tf.reduce_mean(tf.multiply(alpha, tf.multiply(weights, ce)))

def sigmoid_cross_entropy(logits, labels):
loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits,
labels=tf.cast(labels,tf.float32))
loss = tf.reduce_mean(loss)
return loss

def softmax_cross_entropy(logits, labels):
loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
labels=tf.cast(labels,tf.float32))
loss = tf.reduce_mean(loss)
return loss

def margin_loss(logits, labels):
labels = tf.cast(labels,tf.float32)
loss = labels * tf.square(tf.maximum(0., 0.9 - logits)) + \
0.25 * (1.0 - labels) * tf.square(tf.maximum(0., logits - 0.1))
loss = tf.reduce_mean(tf.reduce_sum(loss, axis=1))
return loss
2 changes: 2 additions & 0 deletions encoder/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from fasttext import FastText
from fast_attention_text import FastAttentionText
from han import HAN
from capsule import Capsule

encoder["cnn"] = CNN
encoder["dcnn"] = DCNN
Expand All @@ -29,6 +30,7 @@
encoder["fasttext"] = FastText
encoder["fast_attention_text"] = FastAttentionText
encoder["han"] = HAN
encoder["capsule"] = Capsule


#pair sentence encoder
Expand Down
267 changes: 255 additions & 12 deletions encoder/capsule.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,245 @@
import tensorflow as tf
import keras
from keras import backend as K
from utils import _conv2d_wrapper
import tensorflow.contrib.slim as slim
from tensorflow.contrib.layers.python.layers import initializers
import pdb
#refer:https://github.com/andyweizhao/capsule_text_classification/blob/master/network.py

epsilon = 1e-9

def softmax(x, axis=-1):
ex = K.exp(x - K.max(x, axis=axis, keepdims=True))
return ex/K.sum(ex, axis=axis, keepdims=True)

def squash_v1(x, axis=-1):
s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon()
scale = K.sqrt(s_squared_norm)/ (0.5 + s_squared_norm)
return scale * x

def squash_v0(s, axis=-1, epsilon=1e-7, name=None):
s_squared_norm = K.sum(K.square(s), axis, keepdims=True) + K.epsilon()
safe_norm = K.sqrt(s_squared_norm)
scale = 1 - tf.exp(-safe_norm)
return scale * s / safe_norm

def routing(u_hat_vecs, beta_a, iterations, output_capsule_num, i_activations):
b = keras.backend.zeros_like(u_hat_vecs[:,:,:,0])
if i_activations is not None:
i_activations = i_activations[...,tf.newaxis]
for i in range(iterations):
if False:
leak = tf.zeros_like(b, optimize=True)
leak = tf.reduce_sum(leak, axis=1, keep_dims=True)
leaky_logits = tf.concat([leak, b], axis=1)
leaky_routing = tf.nn.softmax(leaky_logits, dim=1)
c = tf.split(leaky_routing, [1, output_capsule_num], axis=1)[1]
else:
c = softmax(b, 1)
# if i_activations is not None:
# tf.transpose(tf.transpose(c, perm=[0,2,1]) * i_activations, perm=[0,2,1])
outputs = squash_v1(K.batch_dot(c, u_hat_vecs, [2, 2]))
if i < iterations - 1:
b = b + K.batch_dot(outputs, u_hat_vecs, [2, 3])
poses = outputs
activations = K.sqrt(K.sum(K.square(poses), 2))
return poses, activations

def _matmul_broadcast(x, y, name):
"""Compute x @ y, broadcasting over the first `N - 2` ranks.
"""
with tf.variable_scope(name) as scope:
return tf.reduce_sum(
tf.nn.dropout(x[..., tf.newaxis] * y[..., tf.newaxis, :, :],1), axis=-2
)


def _get_variable_wrapper(
name, shape=None, dtype=None, initializer=None,
regularizer=None,
trainable=True,
collections=None,
caching_device=None,
partitioner=None,
validate_shape=True,
custom_getter=None
):
"""Wrapper over tf.get_variable().
"""

with tf.device('/cpu:0'):
var = tf.get_variable(
name, shape=shape, dtype=dtype, initializer=initializer,
regularizer=regularizer, trainable=trainable,
collections=collections, caching_device=caching_device,
partitioner=partitioner, validate_shape=validate_shape,
custom_getter=custom_getter
)
return var


def _get_weights_wrapper(
name, shape, dtype=tf.float32, initializer=initializers.xavier_initializer(),
weights_decay_factor=None
):
"""Wrapper over _get_variable_wrapper() to get weights, with weights decay factor in loss.
"""

weights = _get_variable_wrapper(
name=name, shape=shape, dtype=dtype, initializer=initializer
)

if weights_decay_factor is not None and weights_decay_factor > 0.0:

weights_wd = tf.multiply(
tf.nn.l2_loss(weights), weights_decay_factor, name=name + '/l2loss'
)

tf.add_to_collection('losses', weights_wd)

return weights


def _get_biases_wrapper(
name, shape, dtype=tf.float32, initializer=tf.constant_initializer(0.0)
):
"""Wrapper over _get_variable_wrapper() to get bias.
"""

biases = _get_variable_wrapper(
name=name, shape=shape, dtype=dtype, initializer=initializer
)

return biases


def _conv2d_wrapper(inputs, shape, strides, padding, add_bias, activation_fn, name, stddev=0.1):
"""Wrapper over tf.nn.conv2d().
"""

with tf.variable_scope(name) as scope:
kernel = _get_weights_wrapper(
name='weights', shape=shape, weights_decay_factor=0.0, #initializer=tf.truncated_normal_initializer(stddev=stddev, dtype=tf.float32)
)
output = tf.nn.conv2d(
inputs, filter=kernel, strides=strides, padding=padding, name='conv'
)
if add_bias:
biases = _get_biases_wrapper(
name='biases', shape=[shape[-1]]
)
output = tf.add(
output, biases, name='biasAdd'
)
if activation_fn is not None:
output = activation_fn(
output, name='activation'
)

return output


def _separable_conv2d_wrapper(inputs, depthwise_shape, pointwise_shape, strides, padding, add_bias, activation_fn, name):
"""Wrapper over tf.nn.separable_conv2d().
"""

with tf.variable_scope(name) as scope:
dkernel = _get_weights_wrapper(
name='depthwise_weights', shape=depthwise_shape, weights_decay_factor=0.0
)
pkernel = _get_weights_wrapper(
name='pointwise_weights', shape=pointwise_shape, weights_decay_factor=0.0
)
output = tf.nn.separable_conv2d(
input=inputs, depthwise_filter=dkernel, pointwise_filter=pkernel,
strides=strides, padding=padding, name='conv'
)
if add_bias:
biases = _get_biases_wrapper(
name='biases', shape=[pointwise_shape[-1]]
)
output = tf.add(
output, biases, name='biasAdd'
)
if activation_fn is not None:
output = activation_fn(
output, name='activation'
)

return output


def _depthwise_conv2d_wrapper(inputs, shape, strides, padding, add_bias, activation_fn, name):
"""Wrapper over tf.nn.depthwise_conv2d().
"""

with tf.variable_scope(name) as scope:
dkernel = _get_weights_wrapper(
name='depthwise_weights', shape=shape, weights_decay_factor=0.0
)
output = tf.nn.depthwise_conv2d(
inputs, filter=dkernel, strides=strides, padding=padding, name='conv'
)
if add_bias:
d_ = output.get_shape()[-1].value
biases = _get_biases_wrapper(
name='biases', shape=[d_]
)
output = tf.add(
output, biases, name='biasAdd'
)
if activation_fn is not None:
output = activation_fn(
output, name='activation'
)

return output


def vec_transformationByConv(poses, input_capsule_dim, input_capsule_num, output_capsule_dim, output_capsule_num):
kernel = _get_weights_wrapper(
name='weights', shape=[1, input_capsule_dim, output_capsule_dim*output_capsule_num], weights_decay_factor=0.0
)
tf.logging.info('poses: {}'.format(poses.get_shape()))
tf.logging.info('kernel: {}'.format(kernel.get_shape()))
u_hat_vecs = keras.backend.conv1d(poses, kernel)
u_hat_vecs = keras.backend.reshape(u_hat_vecs, (-1, input_capsule_num, output_capsule_num, output_capsule_dim))
u_hat_vecs = keras.backend.permute_dimensions(u_hat_vecs, (0, 2, 1, 3))
return u_hat_vecs

def vec_transformationByMat(poses, input_capsule_dim, input_capsule_num, output_capsule_dim, output_capsule_num, shared=True):
inputs_poses_shape = poses.get_shape().as_list()
poses = poses[..., tf.newaxis, :]
poses = tf.tile(
poses, [1, 1, output_capsule_num, 1]
)
if shared:
kernel = _get_weights_wrapper(
name='weights', shape=[1, 1, output_capsule_num, output_capsule_dim, input_capsule_dim], weights_decay_factor=0.0
)
kernel = tf.tile(
kernel, [inputs_poses_shape[0], input_capsule_num, 1, 1, 1]
)
else:
kernel = _get_weights_wrapper(
name='weights', shape=[1, input_capsule_num, output_capsule_num, output_capsule_dim, input_capsule_dim], weights_decay_factor=0.0
)
kernel = tf.tile(
kernel, [inputs_poses_shape[0], 1, 1, 1, 1]
)
tf.logging.info('poses: {}'.format(poses[...,tf.newaxis].get_shape()))
tf.logging.info('kernel: {}'.format(kernel.get_shape()))
u_hat_vecs = tf.squeeze(tf.matmul(kernel, poses[...,tf.newaxis]),axis=-1)
u_hat_vecs = keras.backend.permute_dimensions(u_hat_vecs, (0, 2, 1, 3))
return u_hat_vecs


class Capsule():
def __init__(self, **kwargs):
self.output_size = 128
pass
self.seq_length = kwargs['maxlen']
self.embedding_size = kwargs['embedding_size']
self.keep_prob = kwargs['keep_prob']
self.num_output = kwargs['num_output']

def capsules_init(self, inputs, shape, strides, padding, pose_shape, add_bias, name):
with tf.variable_scope(name):
Expand Down Expand Up @@ -90,12 +322,14 @@ def capsule_conv_layer(self, nets, shape, strides, iterations, name):
)
poses, activations = routing(u_hat_vecs, beta_a, iterations, shape[3], i_activations_patches)
poses = tf.reshape(poses, [
inputs_poses_shape[0], inputs_poses_shape[1],
#inputs_poses_shape[0], inputs_poses_shape[1],
-1, inputs_poses_shape[1],
inputs_poses_shape[2], shape[3],
inputs_poses_shape[-1]]
)
activations = tf.reshape(activations, [
inputs_poses_shape[0],inputs_poses_shape[1],
#inputs_poses_shape[0],inputs_poses_shape[1],
-1,inputs_poses_shape[1],
inputs_poses_shape[2],shape[3]]
)
nets = poses, activations
Expand Down Expand Up @@ -125,7 +359,7 @@ def capsule_model_B(self, X):
for _, ngram in enumerate([3,4,5]):
with tf.variable_scope('capsule_'+str(ngram)):
nets = _conv2d_wrapper(
X, shape=[ngram, 300, 1, 32], strides=[1, 2, 1, 1], padding='VALID',
X, shape=[ngram, self.embedding_size, 1, 32], strides=[1, 2, 1, 1], padding='VALID',
add_bias=True, activation_fn=tf.nn.relu, name='conv1'
)
tf.logging.info('output shape: {}'.format(nets.get_shape()))
Expand All @@ -135,16 +369,16 @@ def capsule_model_B(self, X):
nets = self.capsule_conv_layer(nets, shape=[3, 1, 16, 16], strides=[1, 1, 1, 1], iterations=3, name='conv2')
nets = self.capsule_flatten(nets)
poses, activations = self.capsule_fc_layer(nets,
self.output_size, 3, 'fc2')
self.num_output, 3, 'fc2')
poses_list.append(poses)
poses = tf.reduce_mean(tf.convert_to_tensor(poses_list), axis=0)
activations = K.sqrt(K.sum(K.square(poses), 2))
return poses
return activations

def capsule_model_A(self, X):
with tf.variable_scope('capsule_'+str(3)):
nets = _conv2d_wrapper(
X, shape=[3, 300, 1, 32], strides=[1, 2, 1, 1], padding='VALID',
X, shape=[3, self.embedding_size, 1, 32], strides=[1, 2, 1, 1], padding='VALID',
add_bias=True, activation_fn=tf.nn.relu, name='conv1'
)
tf.logging.info('output shape: {}'.format(nets.get_shape()))
Expand All @@ -153,8 +387,17 @@ def capsule_model_A(self, X):
name='primary')
nets = self.capsule_conv_layer(nets, shape=[3, 1, 16, 16], strides=[1, 1, 1, 1], iterations=3, name='conv2')
nets = self.capsule_flatten(nets)
poses, activations = self.capsule_fc_layer(nets, self.output_size, 3, 'fc2')
return poses
poses, activations = self.capsule_fc_layer(nets, self.num_output, 3, 'fc2')
return activations

def feed_dict(self, **kwargs):
feed_dict = {}
return feed_dict

def pb_feed_dict(self, graph, **kwargs):
feed_dict = {}
return feed_dict

def __call__(self, embed, reuse = tf.AUTO_REUSE):
return capsule_model_A(embed)
embed = tf.expand_dims(embed, -1)
return self.capsule_model_A(embed)
Loading

0 comments on commit 1071962

Please sign in to comment.