当前位置: 首页 > news >正文

北京建设商业网站广州天河网站制作

北京建设商业网站,广州天河网站制作,郑州建网站价格,标书制作一般给多少钱文章目录 自注意力Transformer块编码器解码器块解码器整个Transformer参考来源全部代码#xff08;可直接运行#xff09; 自注意力 计算公式 代码实现 class SelfAttention(nn.Module):def __init__(self, embed_size, heads):super(SelfAttention, self).__init__()self.e… 文章目录 自注意力Transformer块编码器解码器块解码器整个Transformer参考来源全部代码可直接运行 自注意力 计算公式 代码实现 class SelfAttention(nn.Module):def __init__(self, embed_size, heads):super(SelfAttention, self).__init__()self.embed_size embed_sizeself.heads headsself.head_dim embed_size // headsassert (self.head_dim * heads embed_size), Embed size needs to be div by headsself.values nn.Linear(self.head_dim, self.head_dim, biasFalse)self.keys nn.Linear(self.head_dim, self.head_dim, biasFalse)self.queries nn.Linear(self.head_dim, self.head_dim, biasFalse)self.fc_out nn.Linear(heads*self.head_dim, embed_size)def forward(self, values, keys, query, mask):N query.shape[0] # the number of training examplesvalue_len, key_len, query_len values.shape[1], keys.shape[1], query.shape[1]# Split embedding into self.heads piecesvalues values.reshape(N, value_len, self.heads, self.head_dim)keys keys.reshape(N, key_len, self.heads, self.head_dim)queries query.reshape(N, query_len, self.heads, self.head_dim)values self.values(values)keys self.keys(keys)queries self.queries(queries)energy torch.einsum(nqhd,nkhd-nhqk, [queries, keys]) # 矩阵乘法使用爱因斯坦标记法# queries shape: (N, query_len, heads, heads_dim)# keys shape: (N, key_len, heads, heads_dim)# energy shape: (N, heads, query_len, key_len)if mask is not None:energy energy.masked_fill(mask0, float(-1e20)) #Fills elements of self tensor with value where mask is Trueattention torch.softmax(energy / (self.embed_size ** (1/2)), dim3)out torch.einsum(nhql, nlhd-nqhd, [attention, values]).reshape(N, query_len, self.heads*self.head_dim) # 矩阵乘法使用爱因斯坦标记法einsum# attention shape: (N, heads, query_len, key_len)# values shape: (N, value_len, heads, head_dim)# after einsum (N, query_len, heads, head_dim) then flatten last two dimensionsout self.fc_out(out)return outTransformer块 我们把Transfomer块定义为如下图所示的结构这个Transformer块在编码器和解码器中都有出现过。 代码实现 class TransformerBlock(nn.Module):def __init__(self, embed_size, heads, dropout, forward_expansion):super(TransformerBlock, self).__init__()self.attention SelfAttention(embed_size, heads)self.norm1 nn.LayerNorm(embed_size)self.norm2 nn.LayerNorm(embed_size)self.feed_forward nn.Sequential(nn.Linear(embed_size, forward_expansion*embed_size),nn.ReLU(),nn.Linear(forward_expansion*embed_size, embed_size))self.dropout nn.Dropout(dropout)def forward(self, value, key, query, mask):attention self.attention(value, key, query, mask)x self.dropout(self.norm1(attention query))forward self.feed_forward(x)out self.dropout(self.norm2(forward x))return out编码器 编码器结构如下所示Inputs经过Input Embedding 和Positional Encoding之后通过多个Transformer块 代码实现 class Encoder(nn.Module):def __init__(self, src_vocab_size,embed_size,num_layers,heads,device,forward_expansion,dropout,max_length):super(Encoder, self).__init__()self.embed_size embed_sizeself.device deviceself.word_embedding nn.Embedding(src_vocab_size, embed_size)self.position_embedding nn.Embedding(max_length, embed_size)self.layers nn.ModuleList([TransformerBlock(embed_size,heads,dropoutdropout,forward_expansionforward_expansion)for _ in range(num_layers)])self.dropout nn.Dropout(dropout)def forward(self, x, mask):N, seq_lengh x.shapepositions torch.arange(0, seq_lengh).expand(N, seq_lengh).to(self.device)out self.dropout(self.word_embedding(x) self.position_embedding(positions))for layer in self.layers:out layer(out, out, out, mask)return out解码器块 解码器块结构如下图所示 代码实现 class DecoderBlock(nn.Module):def __init__(self, embed_size, heads, forward_expansion, dropout, device):super(DecoderBlock, self).__init__()self.attention SelfAttention(embed_size, heads)self.norm nn.LayerNorm(embed_size)self.transformer_block TransformerBlock(embed_size, heads, dropout, forward_expansion)self.dropout nn.Dropout(dropout)def forward(self, x, value, key, src_mask, trg_mask):attention self.attention(x, x, x, trg_mask)query self.dropout(self.norm(attention x))out self.transformer_block(value, key, query, src_mask)return out解码器 解码器块加上word embedding 和 positional embedding之后构成解码器 代码实现 class Decoder(nn.Module):def __init__(self, trg_vocab_size, embed_size, num_layers, heads, forward_expansion, dropout, device, max_length):super(Decoder, self).__init__()self.device deviceself.word_embedding nn.Embedding(trg_vocab_size, embed_size)self.position_embedding nn.Embedding(max_length, embed_size)self.layers nn.ModuleList([DecoderBlock(embed_size, heads, forward_expansion, dropout, device)for _ in range(num_layers)])self.fc_out nn.Linear(embed_size, trg_vocab_size)self.dropout nn.Dropout(dropout)def forward(self, x, enc_out, src_mask, trg_mask):N, seq_length x.shapepositions torch.arange(0, seq_length).expand(N, seq_length).to(self.device)x self.dropout((self.word_embedding(x) self.position_embedding(positions)))for layer in self.layers:x layer(x, enc_out, enc_out, src_mask, trg_mask)out self.fc_out(x)return out整个Transformer 代码实现 class Transformer(nn.Module):def __init__(self,src_vocab_size, trg_vocab_size,src_pad_idx,trg_pad_idx,embed_size256,num_layers6,forward_expansion4,heads8,dropout0,devicecuda,max_length100):super(Transformer, self).__init__()self.encoder Encoder(src_vocab_size,embed_size,num_layers,heads,device,forward_expansion,dropout,max_length)self.decoder Decoder(trg_vocab_size,embed_size,num_layers,heads,forward_expansion,dropout,device,max_length)self.src_pad_idx src_pad_idxself.trg_pad_idx trg_pad_idxself.device devicedef make_src_mask(self, src):src_mask (src ! self.src_pad_idx).unsqueeze(1).unsqueeze(2)#(N, 1, 1, src_len)return src_mask.to(self.device)def make_trg_mask(self, trg):N, trg_len trg.shapetrg_mask torch.tril(torch.ones((trg_len, trg_len))).expand(N, 1, trg_len, trg_len)return trg_mask.to(self.device)def forward(self, src, trg):src_mask self.make_src_mask(src)trg_mask self.make_trg_mask(trg)enc_src self.encoder(src, src_mask)out self.decoder(trg, enc_src, src_mask, trg_mask)return out 参考来源 [1] https://www.youtube.com/watch?vU0s0f995w14 [2] https://github.com/aladdinpersson/Machine-Learning-Collection/blob/master/ML/Pytorch/more_advanced/transformer_from_scratch/transformer_from_scratch.py [3] https://arxiv.org/abs/1706.03762 [4] https://www.youtube.com/watch?vpkVwUVEHmfI 全部代码可直接运行 import torch import torch.nn as nnclass SelfAttention(nn.Module):def __init__(self, embed_size, heads):super(SelfAttention, self).__init__()self.embed_size embed_sizeself.heads headsself.head_dim embed_size // headsassert (self.head_dim * heads embed_size), Embed size needs to be div by headsself.values nn.Linear(self.head_dim, self.head_dim, biasFalse)self.keys nn.Linear(self.head_dim, self.head_dim, biasFalse)self.queries nn.Linear(self.head_dim, self.head_dim, biasFalse)self.fc_out nn.Linear(heads*self.head_dim, embed_size)def forward(self, values, keys, query, mask):N query.shape[0] # the number of training examplesvalue_len, key_len, query_len values.shape[1], keys.shape[1], query.shape[1]# Split embedding into self.heads piecesvalues values.reshape(N, value_len, self.heads, self.head_dim)keys keys.reshape(N, key_len, self.heads, self.head_dim)queries query.reshape(N, query_len, self.heads, self.head_dim)values self.values(values)keys self.keys(keys)queries self.queries(queries)energy torch.einsum(nqhd,nkhd-nhqk, [queries, keys])# queries shape: (N, query_len, heads, heads_dim)# keys shape: (N, key_len, heads, heads_dim)# energy shape: (N, heads, query_len, key_len)if mask is not None:energy energy.masked_fill(mask0, float(-1e20)) #Fills elements of self tensor with value where mask is Trueattention torch.softmax(energy / (self.embed_size ** (1/2)), dim3)out torch.einsum(nhql, nlhd-nqhd, [attention, values]).reshape(N, query_len, self.heads*self.head_dim)# attention shape: (N, heads, query_len, key_len)# values shape: (N, value_len, heads, head_dim)# after einsum (N, query_len, heads, head_dim) then flatten last two dimensionsout self.fc_out(out)return outclass TransformerBlock(nn.Module):def __init__(self, embed_size, heads, dropout, forward_expansion):super(TransformerBlock, self).__init__()self.attention SelfAttention(embed_size, heads)self.norm1 nn.LayerNorm(embed_size)self.norm2 nn.LayerNorm(embed_size)self.feed_forward nn.Sequential(nn.Linear(embed_size, forward_expansion*embed_size),nn.ReLU(),nn.Linear(forward_expansion*embed_size, embed_size))self.dropout nn.Dropout(dropout)def forward(self, value, key, query, mask):attention self.attention(value, key, query, mask)x self.dropout(self.norm1(attention query))forward self.feed_forward(x)out self.dropout(self.norm2(forward x))return outclass Encoder(nn.Module):def __init__(self, src_vocab_size,embed_size,num_layers,heads,device,forward_expansion,dropout,max_length):super(Encoder, self).__init__()self.embed_size embed_sizeself.device deviceself.word_embedding nn.Embedding(src_vocab_size, embed_size)self.position_embedding nn.Embedding(max_length, embed_size)self.layers nn.ModuleList([TransformerBlock(embed_size,heads,dropoutdropout,forward_expansionforward_expansion)for _ in range(num_layers)])self.dropout nn.Dropout(dropout)def forward(self, x, mask):N, seq_lengh x.shapepositions torch.arange(0, seq_lengh).expand(N, seq_lengh).to(self.device)out self.dropout(self.word_embedding(x) self.position_embedding(positions))for layer in self.layers:out layer(out, out, out, mask)return outclass DecoderBlock(nn.Module):def __init__(self, embed_size, heads, forward_expansion, dropout, device):super(DecoderBlock, self).__init__()self.attention SelfAttention(embed_size, heads)self.norm nn.LayerNorm(embed_size)self.transformer_block TransformerBlock(embed_size, heads, dropout, forward_expansion)self.dropout nn.Dropout(dropout)def forward(self, x, value, key, src_mask, trg_mask):attention self.attention(x, x, x, trg_mask)query self.dropout(self.norm(attention x))out self.transformer_block(value, key, query, src_mask)return outclass Decoder(nn.Module):def __init__(self, trg_vocab_size, embed_size, num_layers, heads, forward_expansion, dropout, device, max_length):super(Decoder, self).__init__()self.device deviceself.word_embedding nn.Embedding(trg_vocab_size, embed_size)self.position_embedding nn.Embedding(max_length, embed_size)self.layers nn.ModuleList([DecoderBlock(embed_size, heads, forward_expansion, dropout, device)for _ in range(num_layers)])self.fc_out nn.Linear(embed_size, trg_vocab_size)self.dropout nn.Dropout(dropout)def forward(self, x, enc_out, src_mask, trg_mask):N, seq_length x.shapepositions torch.arange(0, seq_length).expand(N, seq_length).to(self.device)x self.dropout((self.word_embedding(x) self.position_embedding(positions)))for layer in self.layers:x layer(x, enc_out, enc_out, src_mask, trg_mask)out self.fc_out(x)return outclass Transformer(nn.Module):def __init__(self,src_vocab_size, trg_vocab_size,src_pad_idx,trg_pad_idx,embed_size256,num_layers6,forward_expansion4,heads8,dropout0,devicecuda,max_length100):super(Transformer, self).__init__()self.encoder Encoder(src_vocab_size,embed_size,num_layers,heads,device,forward_expansion,dropout,max_length)self.decoder Decoder(trg_vocab_size,embed_size,num_layers,heads,forward_expansion,dropout,device,max_length)self.src_pad_idx src_pad_idxself.trg_pad_idx trg_pad_idxself.device devicedef make_src_mask(self, src):src_mask (src ! self.src_pad_idx).unsqueeze(1).unsqueeze(2)#(N, 1, 1, src_len)return src_mask.to(self.device)def make_trg_mask(self, trg):N, trg_len trg.shapetrg_mask torch.tril(torch.ones((trg_len, trg_len))).expand(N, 1, trg_len, trg_len)return trg_mask.to(self.device)def forward(self, src, trg):src_mask self.make_src_mask(src)trg_mask self.make_trg_mask(trg)enc_src self.encoder(src, src_mask)out self.decoder(trg, enc_src, src_mask, trg_mask)return outif __name__ __main__:device torch.device(cuda if torch.cuda.is_available() else cpu)print(device)x torch.tensor([[1, 5, 6, 4, 3, 9, 5, 2, 0], [1, 8, 7, 3, 4, 5, 6, 7, 2]]).to(device)trg torch.tensor([[1, 7, 4, 3, 5, 9, 2, 0], [1, 5, 6, 2, 4, 7, 6, 2]]).to(device)src_pad_idx 0trg_pad_idx 0src_vocab_size 10trg_vocab_size 10model Transformer(src_vocab_size, trg_vocab_size, src_pad_idx, trg_pad_idx, devicedevice).to(device)out model(x, trg[:, :-1])print(out.shape)
http://www.hkea.cn/news/14277484/

相关文章:

  • 茶网站设计素材下载网站seo跟短视频
  • 苏州网站开发公司兴田德润在那里做一个赚钱的网站
  • 江西建设银行招聘网站外资企业
  • 在线搭建网站扬州建设工程招标信息网
  • 张家口认证助手app南昌seo推广优化
  • 软件最全的网站营销 推广 网站
  • 网站后台如何添加新闻wordpress post meta
  • 青岛建设管理局网站wordpress怎么设计
  • 铲车找事做找哪些网站wordpress主题升级文件
  • 丽水做网站的公司启明星网站建设
  • 苍梧网站建设wordpress的安装界面
  • 杭州企业如何建网站新浪云上传wordpress
  • 新网站百度有审核期wordpress添加页面模块
  • seo外贸网站做网站需要哪类商标
  • 网站管理密码资阳网站设计
  • 德阳网站建设平台安徽鸿顺鑫城建设集团网站
  • 腾讯云怎么建网站建站系统破解
  • p2p网站建设小微金融php电子商务网站开发
  • 中山网站建设策划方案如何做vip视频网站
  • 福州企业制作网站遵义网签备案查询系统
  • 女的和女的做那个视频网站网站访问人数代码
  • 做个网站需要学会什么东莞企业网站开发
  • 织梦 帝国 php cms 媒体网站 哪个烟台 做网站
  • 十八把网站做扇子免费下载微信
  • 苏中建设集团网站官网深圳网站建设选哪家
  • 用软件做模板下载网站大型网站 cms
  • 邢台县教育局五库建设网站高效网站推广设计
  • 怎么申请建立个人免费网站天津哪里有做网站的
  • 请人做网站注意事项网站标题的关键字怎么写
  • 电力建设工程质量监督总网站周口高端网站建设