损失不变有可能是因为数据中存在nan数据
如原始代码为:
class MeanPool(Layer):
def __init__(self, **kwargs):
self.supports_masking = True
super(MeanPool, self).__init__(**kwargs)
def compute_mask(self, input, input_mask=None):
# do not pass the mask to the next layers
return None
def call(self, x, mask=None):
if mask is not None:
# mask (batch, time)
mask = K.cast(mask, K.floatx())
# mask (batch, x_dim, time)
mask = K.repeat(mask, x.shape[-1])
# mask (batch, time, x_dim)
mask = tf.transpose(mask, [0,2,1])
x = x * mask
return tf.divide(K.sum(x, axis=1), K.sum(mask, axis=1))
else:
return K.mean(x, axis=1)
def compute_output_shape(self, input_shape):
# remove temporal dimension
return (input_shape[0], input_shape[2])
在做divide的时候,mask可能为0,因此会出现nan的数据,需要对分母做处理。
修改后如下:
class MeanPool(Layer):
def __init__(self, **kwargs):
self.supports_masking = True
super(MeanPool, self).__init__(**kwargs)
def compute_mask(self, input, input_mask=None):
# do not pass the mask to the next layers
return None
def call(self, x, mask=None):
if mask is not None:
# mask (batch, time)
mask = K.cast(mask, K.floatx())
# mask (batch, x_dim, time)
mask = K.repeat(mask, x.shape[-1])
# mask (batch, time, x_dim)
mask = tf.transpose(mask, [0,2,1])
x = x * mask
return tf.divide(K.sum(x, axis=1), K.sum(mask, axis=1) + 0.000001)
# return K.sum(x, axis=1)
else:
return K.mean(x, axis=1)
def compute_output_shape(self, input_shape):
# remove temporal dimension
return (input_shape[0], input_shape[2])