def multi_target_attention(q, k, v, m, seq_name, num_heads=4):
# query: [B, H]
# key: [B, T, H]
# mask: [B, T]
q = tf.expand_dims(q, axis=1)
q = tf.concat(tf.split(q, num_heads, axis=2), axis=0)
k = tf.concat(tf.split(k, num_heads, axis=2), axis=0)
v = tf.concat(tf.split(v, num_heads, axis=2), axis=0)
q = layer_normal(q)
k = layer_normal(k)
outputs = tf.matmul(q, k, transpose_b=True)
k_length = tf.shape(k)[1]
outputs = outputs * tf.pow(tf.cast(k_length, outputs.dtype), -0.5)
m = tf.expand_dims(m, axis=1)
masks = tf.tile(m, [num_heads, 1, 1])
masks = tf.equal(masks, tf.ones_like(masks))
paddings = tf.fill(tf.shape(outputs), tf.constant(-2 ** 32 + 1, dtype=outputs.dtype))
outputs = tf.where(masks, outputs, paddings)
outputs = tf.nn.softmax(outputs)
outputs = tf.matmul(outputs, v)
outputs = tf.concat(tf.split(outputs, num_heads, axis=0), axis=2)
outputs = tf.squeeze(outputs, axis=1)
return outputs
multi_target_attention
TensorFlow相关文章
最近热门
- 模型学习率预热 warm up
- Pandas实战:将分组后的结果展平并以列的形式展示
- 推荐系统 | LFM(Latent Factor Model) 隐因子模型的原理与应用
- Kendall秩相关系数,肯德尔秩相关系数
- 特征工程、特征设计、特征梳理
- mysql 显示运行的线程
- TimesNet:一个用于时间序列分析的通用基础模型
- 腾讯终身交叉网络LCN模型:Cross-Domain LifeLong Sequential Modeling for Online Click-Through Rate Prediction
- Can't reconnect until invalid transaction is rolled back
- PSI(Population Stability Index,群体稳定性指标)
最常浏览
- 016 推荐系统 | 排序学习(LTR - Learning To Rank)
- 偏微分符号
- i.i.d(又称IID)
- 利普希茨连续条件(Lipschitz continuity)
- (error) MOVED 原因和解决方案
- TextCNN详解
- 找不到com.google.protobuf.GeneratedMessageV3的类文件
- Deployment failed: repository element was not specified in the POM inside distributionManagement
- cannot access com.google.protobuf.GeneratedMessageV3 解决方案
- CLUSTERDOWN Hash slot not served 问题原因和解决办法
×