Transformer的第一个部分 - 输入部分

1、inputs

'''
    词向量编码器
'''
import torch.nn as nn
import math
from torch.autograd import Variable


class Embeddings(nn.Module):
    def __init__(self, d_model, vocab):
        '''
            d_model: 词的嵌入维度
            vocab  : 词表的大小
        '''
        super(Embeddings, self).__init__()
        self.d_model = d_model
        self.lut = nn.Embedding(vocab, d_model)

    def forward(self, x):
        x = self.lut(x)
        x = x * math.sqrt(self.d_model)  # 起到一个缩放的作用
        return x

2、Test For inputs

from torch.autograd import Variable
import torch

from Embedding import Embeddings

d_model = 8
vocab = 4
emb = Embeddings(d_model, vocab)

x = Variable(torch.LongTensor([[1, 1, 1, 1]]))
y = emb(x)

print(y)
print(y.shape)

3、positionEncoding

'''
位置编码器
    每一个词在不同的位置上面有不同的意思
'''
import torch.nn as nn
import torch
import math
from torch.autograd import Variable


class PositionEncoding(nn.Module):
    def __init__(self, d_model, dropout, max_len=5000) -> None:
        super(PositionEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1)

        div_term = torch.exp(torch.arange(0, d_model, 2) *
                             -(math.log(10000.0), d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + Variable(self.pe[:, x.size(1)], requires_grad=False)
        return self.dropout(x)