当前位置: 首页 > news >正文

网站建设培训四川设计制作小船

网站建设培训四川,设计制作小船,亚马逊跨境电商开店流程,哈尔滨网站建设哈尔滨文章目录 自注意力Transformer块编码器解码器块解码器整个Transformer参考来源全部代码#xff08;可直接运行#xff09; 自注意力 计算公式 代码实现 class SelfAttention(nn.Module):def __init__(self, embed_size, heads):super(SelfAttention, self).__init__()self.e… 文章目录 自注意力Transformer块编码器解码器块解码器整个Transformer参考来源全部代码可直接运行 自注意力 计算公式 代码实现 class SelfAttention(nn.Module):def __init__(self, embed_size, heads):super(SelfAttention, self).__init__()self.embed_size embed_sizeself.heads headsself.head_dim embed_size // headsassert (self.head_dim * heads embed_size), Embed size needs to be div by headsself.values nn.Linear(self.head_dim, self.head_dim, biasFalse)self.keys nn.Linear(self.head_dim, self.head_dim, biasFalse)self.queries nn.Linear(self.head_dim, self.head_dim, biasFalse)self.fc_out nn.Linear(heads*self.head_dim, embed_size)def forward(self, values, keys, query, mask):N query.shape[0] # the number of training examplesvalue_len, key_len, query_len values.shape[1], keys.shape[1], query.shape[1]# Split embedding into self.heads piecesvalues values.reshape(N, value_len, self.heads, self.head_dim)keys keys.reshape(N, key_len, self.heads, self.head_dim)queries query.reshape(N, query_len, self.heads, self.head_dim)values self.values(values)keys self.keys(keys)queries self.queries(queries)energy torch.einsum(nqhd,nkhd-nhqk, [queries, keys]) # 矩阵乘法使用爱因斯坦标记法# queries shape: (N, query_len, heads, heads_dim)# keys shape: (N, key_len, heads, heads_dim)# energy shape: (N, heads, query_len, key_len)if mask is not None:energy energy.masked_fill(mask0, float(-1e20)) #Fills elements of self tensor with value where mask is Trueattention torch.softmax(energy / (self.embed_size ** (1/2)), dim3)out torch.einsum(nhql, nlhd-nqhd, [attention, values]).reshape(N, query_len, self.heads*self.head_dim) # 矩阵乘法使用爱因斯坦标记法einsum# attention shape: (N, heads, query_len, key_len)# values shape: (N, value_len, heads, head_dim)# after einsum (N, query_len, heads, head_dim) then flatten last two dimensionsout self.fc_out(out)return outTransformer块 我们把Transfomer块定义为如下图所示的结构这个Transformer块在编码器和解码器中都有出现过。 代码实现 class TransformerBlock(nn.Module):def __init__(self, embed_size, heads, dropout, forward_expansion):super(TransformerBlock, self).__init__()self.attention SelfAttention(embed_size, heads)self.norm1 nn.LayerNorm(embed_size)self.norm2 nn.LayerNorm(embed_size)self.feed_forward nn.Sequential(nn.Linear(embed_size, forward_expansion*embed_size),nn.ReLU(),nn.Linear(forward_expansion*embed_size, embed_size))self.dropout nn.Dropout(dropout)def forward(self, value, key, query, mask):attention self.attention(value, key, query, mask)x self.dropout(self.norm1(attention query))forward self.feed_forward(x)out self.dropout(self.norm2(forward x))return out编码器 编码器结构如下所示Inputs经过Input Embedding 和Positional Encoding之后通过多个Transformer块 代码实现 class Encoder(nn.Module):def __init__(self, src_vocab_size,embed_size,num_layers,heads,device,forward_expansion,dropout,max_length):super(Encoder, self).__init__()self.embed_size embed_sizeself.device deviceself.word_embedding nn.Embedding(src_vocab_size, embed_size)self.position_embedding nn.Embedding(max_length, embed_size)self.layers nn.ModuleList([TransformerBlock(embed_size,heads,dropoutdropout,forward_expansionforward_expansion)for _ in range(num_layers)])self.dropout nn.Dropout(dropout)def forward(self, x, mask):N, seq_lengh x.shapepositions torch.arange(0, seq_lengh).expand(N, seq_lengh).to(self.device)out self.dropout(self.word_embedding(x) self.position_embedding(positions))for layer in self.layers:out layer(out, out, out, mask)return out解码器块 解码器块结构如下图所示 代码实现 class DecoderBlock(nn.Module):def __init__(self, embed_size, heads, forward_expansion, dropout, device):super(DecoderBlock, self).__init__()self.attention SelfAttention(embed_size, heads)self.norm nn.LayerNorm(embed_size)self.transformer_block TransformerBlock(embed_size, heads, dropout, forward_expansion)self.dropout nn.Dropout(dropout)def forward(self, x, value, key, src_mask, trg_mask):attention self.attention(x, x, x, trg_mask)query self.dropout(self.norm(attention x))out self.transformer_block(value, key, query, src_mask)return out解码器 解码器块加上word embedding 和 positional embedding之后构成解码器 代码实现 class Decoder(nn.Module):def __init__(self, trg_vocab_size, embed_size, num_layers, heads, forward_expansion, dropout, device, max_length):super(Decoder, self).__init__()self.device deviceself.word_embedding nn.Embedding(trg_vocab_size, embed_size)self.position_embedding nn.Embedding(max_length, embed_size)self.layers nn.ModuleList([DecoderBlock(embed_size, heads, forward_expansion, dropout, device)for _ in range(num_layers)])self.fc_out nn.Linear(embed_size, trg_vocab_size)self.dropout nn.Dropout(dropout)def forward(self, x, enc_out, src_mask, trg_mask):N, seq_length x.shapepositions torch.arange(0, seq_length).expand(N, seq_length).to(self.device)x self.dropout((self.word_embedding(x) self.position_embedding(positions)))for layer in self.layers:x layer(x, enc_out, enc_out, src_mask, trg_mask)out self.fc_out(x)return out整个Transformer 代码实现 class Transformer(nn.Module):def __init__(self,src_vocab_size, trg_vocab_size,src_pad_idx,trg_pad_idx,embed_size256,num_layers6,forward_expansion4,heads8,dropout0,devicecuda,max_length100):super(Transformer, self).__init__()self.encoder Encoder(src_vocab_size,embed_size,num_layers,heads,device,forward_expansion,dropout,max_length)self.decoder Decoder(trg_vocab_size,embed_size,num_layers,heads,forward_expansion,dropout,device,max_length)self.src_pad_idx src_pad_idxself.trg_pad_idx trg_pad_idxself.device devicedef make_src_mask(self, src):src_mask (src ! self.src_pad_idx).unsqueeze(1).unsqueeze(2)#(N, 1, 1, src_len)return src_mask.to(self.device)def make_trg_mask(self, trg):N, trg_len trg.shapetrg_mask torch.tril(torch.ones((trg_len, trg_len))).expand(N, 1, trg_len, trg_len)return trg_mask.to(self.device)def forward(self, src, trg):src_mask self.make_src_mask(src)trg_mask self.make_trg_mask(trg)enc_src self.encoder(src, src_mask)out self.decoder(trg, enc_src, src_mask, trg_mask)return out 参考来源 [1] https://www.youtube.com/watch?vU0s0f995w14 [2] https://github.com/aladdinpersson/Machine-Learning-Collection/blob/master/ML/Pytorch/more_advanced/transformer_from_scratch/transformer_from_scratch.py [3] https://arxiv.org/abs/1706.03762 [4] https://www.youtube.com/watch?vpkVwUVEHmfI 全部代码可直接运行 import torch import torch.nn as nnclass SelfAttention(nn.Module):def __init__(self, embed_size, heads):super(SelfAttention, self).__init__()self.embed_size embed_sizeself.heads headsself.head_dim embed_size // headsassert (self.head_dim * heads embed_size), Embed size needs to be div by headsself.values nn.Linear(self.head_dim, self.head_dim, biasFalse)self.keys nn.Linear(self.head_dim, self.head_dim, biasFalse)self.queries nn.Linear(self.head_dim, self.head_dim, biasFalse)self.fc_out nn.Linear(heads*self.head_dim, embed_size)def forward(self, values, keys, query, mask):N query.shape[0] # the number of training examplesvalue_len, key_len, query_len values.shape[1], keys.shape[1], query.shape[1]# Split embedding into self.heads piecesvalues values.reshape(N, value_len, self.heads, self.head_dim)keys keys.reshape(N, key_len, self.heads, self.head_dim)queries query.reshape(N, query_len, self.heads, self.head_dim)values self.values(values)keys self.keys(keys)queries self.queries(queries)energy torch.einsum(nqhd,nkhd-nhqk, [queries, keys])# queries shape: (N, query_len, heads, heads_dim)# keys shape: (N, key_len, heads, heads_dim)# energy shape: (N, heads, query_len, key_len)if mask is not None:energy energy.masked_fill(mask0, float(-1e20)) #Fills elements of self tensor with value where mask is Trueattention torch.softmax(energy / (self.embed_size ** (1/2)), dim3)out torch.einsum(nhql, nlhd-nqhd, [attention, values]).reshape(N, query_len, self.heads*self.head_dim)# attention shape: (N, heads, query_len, key_len)# values shape: (N, value_len, heads, head_dim)# after einsum (N, query_len, heads, head_dim) then flatten last two dimensionsout self.fc_out(out)return outclass TransformerBlock(nn.Module):def __init__(self, embed_size, heads, dropout, forward_expansion):super(TransformerBlock, self).__init__()self.attention SelfAttention(embed_size, heads)self.norm1 nn.LayerNorm(embed_size)self.norm2 nn.LayerNorm(embed_size)self.feed_forward nn.Sequential(nn.Linear(embed_size, forward_expansion*embed_size),nn.ReLU(),nn.Linear(forward_expansion*embed_size, embed_size))self.dropout nn.Dropout(dropout)def forward(self, value, key, query, mask):attention self.attention(value, key, query, mask)x self.dropout(self.norm1(attention query))forward self.feed_forward(x)out self.dropout(self.norm2(forward x))return outclass Encoder(nn.Module):def __init__(self, src_vocab_size,embed_size,num_layers,heads,device,forward_expansion,dropout,max_length):super(Encoder, self).__init__()self.embed_size embed_sizeself.device deviceself.word_embedding nn.Embedding(src_vocab_size, embed_size)self.position_embedding nn.Embedding(max_length, embed_size)self.layers nn.ModuleList([TransformerBlock(embed_size,heads,dropoutdropout,forward_expansionforward_expansion)for _ in range(num_layers)])self.dropout nn.Dropout(dropout)def forward(self, x, mask):N, seq_lengh x.shapepositions torch.arange(0, seq_lengh).expand(N, seq_lengh).to(self.device)out self.dropout(self.word_embedding(x) self.position_embedding(positions))for layer in self.layers:out layer(out, out, out, mask)return outclass DecoderBlock(nn.Module):def __init__(self, embed_size, heads, forward_expansion, dropout, device):super(DecoderBlock, self).__init__()self.attention SelfAttention(embed_size, heads)self.norm nn.LayerNorm(embed_size)self.transformer_block TransformerBlock(embed_size, heads, dropout, forward_expansion)self.dropout nn.Dropout(dropout)def forward(self, x, value, key, src_mask, trg_mask):attention self.attention(x, x, x, trg_mask)query self.dropout(self.norm(attention x))out self.transformer_block(value, key, query, src_mask)return outclass Decoder(nn.Module):def __init__(self, trg_vocab_size, embed_size, num_layers, heads, forward_expansion, dropout, device, max_length):super(Decoder, self).__init__()self.device deviceself.word_embedding nn.Embedding(trg_vocab_size, embed_size)self.position_embedding nn.Embedding(max_length, embed_size)self.layers nn.ModuleList([DecoderBlock(embed_size, heads, forward_expansion, dropout, device)for _ in range(num_layers)])self.fc_out nn.Linear(embed_size, trg_vocab_size)self.dropout nn.Dropout(dropout)def forward(self, x, enc_out, src_mask, trg_mask):N, seq_length x.shapepositions torch.arange(0, seq_length).expand(N, seq_length).to(self.device)x self.dropout((self.word_embedding(x) self.position_embedding(positions)))for layer in self.layers:x layer(x, enc_out, enc_out, src_mask, trg_mask)out self.fc_out(x)return outclass Transformer(nn.Module):def __init__(self,src_vocab_size, trg_vocab_size,src_pad_idx,trg_pad_idx,embed_size256,num_layers6,forward_expansion4,heads8,dropout0,devicecuda,max_length100):super(Transformer, self).__init__()self.encoder Encoder(src_vocab_size,embed_size,num_layers,heads,device,forward_expansion,dropout,max_length)self.decoder Decoder(trg_vocab_size,embed_size,num_layers,heads,forward_expansion,dropout,device,max_length)self.src_pad_idx src_pad_idxself.trg_pad_idx trg_pad_idxself.device devicedef make_src_mask(self, src):src_mask (src ! self.src_pad_idx).unsqueeze(1).unsqueeze(2)#(N, 1, 1, src_len)return src_mask.to(self.device)def make_trg_mask(self, trg):N, trg_len trg.shapetrg_mask torch.tril(torch.ones((trg_len, trg_len))).expand(N, 1, trg_len, trg_len)return trg_mask.to(self.device)def forward(self, src, trg):src_mask self.make_src_mask(src)trg_mask self.make_trg_mask(trg)enc_src self.encoder(src, src_mask)out self.decoder(trg, enc_src, src_mask, trg_mask)return outif __name__ __main__:device torch.device(cuda if torch.cuda.is_available() else cpu)print(device)x torch.tensor([[1, 5, 6, 4, 3, 9, 5, 2, 0], [1, 8, 7, 3, 4, 5, 6, 7, 2]]).to(device)trg torch.tensor([[1, 7, 4, 3, 5, 9, 2, 0], [1, 5, 6, 2, 4, 7, 6, 2]]).to(device)src_pad_idx 0trg_pad_idx 0src_vocab_size 10trg_vocab_size 10model Transformer(src_vocab_size, trg_vocab_size, src_pad_idx, trg_pad_idx, devicedevice).to(device)out model(x, trg[:, :-1])print(out.shape)
http://www.hkea.cn/news/14267959/

相关文章:

  • 企业网站建设 论文浙江省住房和城乡建设厅电话
  • 徐州好点的做网站的公司有哪些做宣传类网站需要什么资质
  • 福州网站设计哪里比较好android手机版下载
  • 凡科建站的怎么取消手机网站网站开发配置表格
  • 佛山网站建设哪个好北京网站优化公司 卓立海创
  • 免费建设论坛网站公司网站程序
  • 东营网站关键词那个网站的系统好
  • 网站建设朋友圈怎么写商铺装修找谁
  • 新站网站收录减少怎么做网站关键词视频
  • 钓鱼网站开发系列教程wordpress编辑器文件大小
  • 网站建设推广总结如何设计响应式布局网站
  • 应届生在淮北招的网站建设类型岗位网站推广工作职责
  • 江油专业网站建设咨询中国最大的建材网站
  • 上海 房地产网站建设网站开发项目详细计划
  • 杭州市建设监理协会网站苏州论坛
  • wordpress 站群插件wordpress ajax加载
  • wordpress地址跟站点长沙建站找有为太极就治就
  • 教育企业网站源码xampp wordpress安装教程
  • 公司网站怎么注册ui设计师面试
  • 做网站没有数据库哈尔滨网站建设培训班
  • 深圳网站设计教程wordpress 主题和搭建
  • 网站模板怎么导入双流规划建设管理局网站
  • 网站开发 为什么要用缩略图汕头企业网站建设公司
  • 太原网站制作定制开发wordpress博客优秀
  • 山西省住房建设厅网站wordpress 搜索调用
  • 网站及微站建设合同验收盗版小说网站怎么做的
  • 二级学院网站建设方案澄海建网站
  • 微网站开发费用网站管理后台登录地址
  • 上传文档到网站上怎么做做淘宝店铺有哪些好的网站
  • 做微商代理去哪个网站淘宝网页视频如何下载