[I2S] 어텐션모델이 하는 역할은? 무슨 효과가 있는가?

하나의 고정된 크기의 벡터에 모든 정보를 압출하려고 하니까 정보 손실이 발생한다.

i2s에서 사용한 모델의 모양은 다음과 같다

Model: "cnn__encoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
efficientnet-b1 (Model)      (None, 7, 7, 1280)        6575232   
_________________________________________________________________
dropout (Dropout)            multiple                  0         
_________________________________________________________________
dense (Dense)                multiple                  655872    
=================================================================
Total params: 7,231,104
Trainable params: 655,872
Non-trainable params: 6,575,232
_________________________________________________________________

Model: "rnn__decoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding (Embedding)        multiple                  18944     
_________________________________________________________________
gru (GRU)                    multiple                  6297600   
_________________________________________________________________
dropout_1 (Dropout)          multiple                  0         
_________________________________________________________________
dense_1 (Dense)              multiple                  1049600   
_________________________________________________________________
dense_2 (Dense)              multiple                  37925     
_________________________________________________________________
bahdanau_attention (Bahdanau multiple                  1575937   
=================================================================
Total params: 8,980,006
Trainable params: 8,980,006
Non-trainable params: 0
_________________________________________________________________

  encoder = CNN_Encoder(embedding_dim)
  decoder = RNN_Decoder(embedding_dim, units, vocab_size)
  
class CNN_Encoder(tf.keras.Model): # tf.keras.Model을 상속받음.
  # you should define your layers in __init__ 
  def __init__(self, embedding_dim):
      super(CNN_Encoder, self).__init__() # super : 자식 클래스에서 부모클래스의 내용을 사용하고 싶을 경우 사용.
      self.base = EFNS[ef](input_shape=(224,224,3),weights='imagenet',include_top=False) # --> (None,7,7,1280)
      for i, layer in enumerate(self.base.layers): 
        layer.trainable = False
      self.dropout = tf.keras.layers.Dropout(0.25)
      self.fc = tf.keras.layers.Dense(embedding_dim, dtype='float32')

  #you should implement the model's forward pass in call.
  def call(self, x):
      x = self.base(x)
      x = self.dropout(x)
      x = tf.reshape(x, [tf.shape(x)[0],tf.shape(x)[1]*tf.shape(x)[2],feature_shape]) # (BATCH_SIZE, 49,1280)
      x = self.fc(x)
      x = tf.nn.relu(x)

      return x
      
class RNN_Decoder(tf.keras.Model):
    def __init__(self, embedding_dim, units, vocab_size):
        super(RNN_Decoder, self).__init__()
        self.units = units

        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim) #(37,512)
        self.gru = tf.keras.layers.GRU(self.units,
                                       return_sequences=True,
                                       return_state=True,
                                       recurrent_initializer='glorot_uniform')
        self.dropout =tf.keras.layers.Dropout(0.25)
        self.fc1 = tf.keras.layers.Dense(self.units)
        self.fc2 = tf.keras.layers.Dense(vocab_size, dtype='float32')

        self.attention = BahdanauAttention(self.units)

    def call(self, x, features, hidden):
        context_vector, attention_weights = self.attention(features, hidden)

        x = self.embedding(x)

        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

        output, state = self.gru(x)

        x = self.fc1(output)
        x = self.dropout(x)

        x = tf.reshape(x, (-1, x.shape[2]))

        x = self.fc2(x)

        return x, state, attention_weights
        
class BahdanauAttention(tf.keras.Model):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = tf.keras.layers.Dense(units) #units = 1024
        self.W2 = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)

    def call(self, features, hidden):
        hidden_with_time_axis = tf.expand_dims(hidden, 1) #Returns a tensor with a length 1 axis inserted at index axis.
        
        score = tf.nn.tanh(self.W1(features) + self.W2(hidden_with_time_axis))

        attention_weights = tf.nn.softmax(self.V(score), axis=1)

        context_vector = attention_weights * features
        context_vector = tf.reduce_sum(context_vector, axis=1)

        return context_vector, attention_weights

시퀸스 투 시퀸스 모델부터 공부하고오자..

앙창

[I2S] 어텐션모델이 하는 역할은? 무슨 효과가 있는가?

티스토리툴바