I’m trying to train a seq2seq model, but the encoder of the model has to be trained with same input and output pairs. Basically I am trying to only train the encoder with the two pairs and freeze it, but this didn’t work without keeping the decoder class.

Here is my encoder

class Encoder2(tf.keras.layers.Layer): def __init__(self, text_processor, units): super(Encoder2, self).__init__() self.text_processor = text_processor self.vocab_size = text_processor.vocabulary_size() self.units = units # The embedding layer converts tokens to vectors self.embedding = tf.keras.layers.Embedding(self.vocab_size, units, mask_zero=True) # The RNN layer processes those vectors sequentially. self.rnn = tf.keras.layers.Bidirectional( merge_mode='sum', layer=tf.keras.layers.GRU(units, # Return the sequence and state return_sequences=True, recurrent_initializer='glorot_uniform')) def call(self, x): shape_checker = ShapeChecker() shape_checker(x, 'batch s') # 2. The embedding layer looks up the embedding vector for each token. x = self.embedding(x) shape_checker(x, 'batch s units') # 3. The GRU processes the sequence of embeddings. x = self.rnn(x) shape_checker(x, 'batch s units') # 4. Returns the new sequence of embeddings. return x def convert_input(self, texts): texts = tf.convert_to_tensor(texts) if len(texts.shape) == 0: texts = tf.convert_to_tensor(texts)[tf.newaxis] context = self.text_processor(texts).to_tensor() context = self(context) return context

and below is my decoder

class Decoder2(tf.keras.layers.Layer): @classmethod def add_method(cls, fun): setattr(cls, fun.__name__, fun) return fun def __init__(self, text_processor, units): super(Decoder2, self).__init__() self.text_processor = text_processor self.word_to_id = tf.keras.layers.StringLookup( vocabulary=text_processor.get_vocabulary(), mask_token='', oov_token='[UNK]') self.id_to_word = tf.keras.layers.StringLookup( vocabulary=text_processor.get_vocabulary(), mask_token='', oov_token='[UNK]', invert=True) self.start_token = self.word_to_id('[START]') self.end_token = self.word_to_id('[END]') self.vocab_size = text_processor.vocabulary_size() self.units = units # 1. The embedding layer converts token IDs to vectors self.embedding = tf.keras.layers.Embedding(self.vocab_size, units, mask_zero=True) # 2. The RNN keeps track of what's been generated so far. self.rnn = tf.keras.layers.GRU(units, return_sequences=True, return_state=True, recurrent_initializer='glorot_uniform') # 3. The RNN output will be the query for the attention layer. self.attention = CrossAttention(units) # 4. This fully connected layer produces the logits for each # output token. self.output_layer = tf.keras.layers.Dense(self.vocab_size) @Decoder2.add_method def call(self, context, x, state=None, return_state=False): shape_checker = ShapeChecker() shape_checker(x, 'batch t') shape_checker(context, 'batch s units') # 1. Lookup the embeddings x = self.embedding(x) shape_checker(x, 'batch t units') # 2. Process the target sequence. x, state = self.rnn(x, initial_state=state) shape_checker(x, 'batch t units') # 3. Use the RNN output as the query for the attention over the context. x = self.attention(x, context) self.last_attention_weights = self.attention.last_attention_weights shape_checker(x, 'batch t units') shape_checker(self.last_attention_weights, 'batch t s') # Step 4. Generate logit predictions for the next token. logits = self.output_layer(x) shape_checker(logits, 'batch t target_vocab_size') if return_state: return logits, state else: return logits @Decoder2.add_method def tokens_to_text(self, tokens): words = self.id_to_word(tokens) result = tf.strings.reduce_join(words, axis=-1, separator=' ') result = tf.strings.regex_replace(result, '^ *\[START\] *', '') result = tf.strings.regex_replace(result, ' *\[END\] *$', '') return result @Decoder2.add_method def get_next_token(self, context, next_token, done, state, temperature = 0.0): logits, state = self( context, next_token, state = state, return_state=True) if temperature == 0.0: next_token = tf.argmax(logits, axis=-1) else: logits = logits[:, -1, :]/temperature next_token = tf.random.categorical(logits, num_samples=1) # If a sequence produces an `end_token`, set it `done` done = done | (next_token == self.end_token) # Once a sequence is done it only produces 0-padding. next_token = tf.where(done, tf.constant(0, dtype=tf.int64), next_token) return next_token, done, state @Decoder2.add_method def get_initial_state(self, context): batch_size = tf.shape(context)[0] start_tokens = tf.fill([batch_size, 1], self.start_token) done = tf.zeros([batch_size, 1], dtype=tf.bool) embedded = self.embedding(start_tokens) return start_tokens, done, self.rnn.get_initial_state(embedded)[0]

The translator

class Translator2(tf.keras.Model): @classmethod def add_method(cls, fun): setattr(cls, fun.__name__, fun) return fun def __init__(self, units, context_text_processor, target_text_processor): super().__init__() # Build the encoder and decoder encoder = Encoder2(context_text_processor2, units) decoder = Decoder2(target_text_processor2, units) self.encoder = encoder self.decoder = decoder def call(self, inputs): context, x = inputs context = self.encoder(context) logits = self.decoder(context, x) # TODO(b/250038731): remove this try: # Delete the keras mask, so keras doesn't scale the loss+accuracy. del logits._keras_mask except AttributeError: pass return context

The model training code:

model2 = Translator2(UNITS, context_text_processor, target_text_processor2) logits2 = model2((ex_context_tok2, ex_context_tok2)) model2.compile(optimizer='adam', loss=masked_loss, metrics=[masked_acc, masked_loss]) history2 = model2.fit( train_ds2.repeat(), epochs=200, steps_per_epoch = 100, validation_data=val_ds2, validation_steps = 20, callbacks=[tf.keras.callbacks.EarlyStopping(patience=5) ])

I do understand that the error is usually because if dimension mismatch between input provided to the performance metric and the input expected by it. I fixed possible mismatches and printed the shapes too, but can’t seem to pin down the issue or fix it, please help.

You are watching: “InvalidArgumentError: Graph execution error:” for Seq2seq model with encoder having same input and output pairs. Info created by GBee English Center selection and synthesis along with other related topics.