损失不变有可能是因为数据中存在nan数据

如原始代码为:

class MeanPool(Layer):
  def __init__(self, **kwargs):
      self.supports_masking = True
      super(MeanPool, self).__init__(**kwargs)
  def compute_mask(self, input, input_mask=None):
      # do not pass the mask to the next layers
      return None
  def call(self, x, mask=None):
    if mask is not None:
        # mask (batch, time)
        mask = K.cast(mask, K.floatx())
        # mask (batch, x_dim, time)
        mask = K.repeat(mask, x.shape[-1])
        # mask (batch, time, x_dim)
        mask = tf.transpose(mask, [0,2,1])
        x = x * mask
        return tf.divide(K.sum(x, axis=1), K.sum(mask, axis=1))
    else:
        return K.mean(x, axis=1)
  def compute_output_shape(self, input_shape):
      # remove temporal dimension
      return (input_shape[0], input_shape[2])

在做divide的时候,mask可能为0,因此会出现nan的数据,需要对分母做处理。

修改后如下:

class MeanPool(Layer):
  def __init__(self, **kwargs):
      self.supports_masking = True
      super(MeanPool, self).__init__(**kwargs)
  def compute_mask(self, input, input_mask=None):
      # do not pass the mask to the next layers
      return None
  def call(self, x, mask=None):
    if mask is not None:
        # mask (batch, time)
        mask = K.cast(mask, K.floatx())
        # mask (batch, x_dim, time)
        mask = K.repeat(mask, x.shape[-1])
        # mask (batch, time, x_dim)
        mask = tf.transpose(mask, [0,2,1])
        x = x * mask
        return tf.divide(K.sum(x, axis=1), K.sum(mask, axis=1) + 0.000001)
        # return K.sum(x, axis=1)
    else:
        return K.mean(x, axis=1)
  def compute_output_shape(self, input_shape):
      # remove temporal dimension
      return (input_shape[0], input_shape[2])