1

我一直在Tensorflow(python)中使用Seq2Seq幾個星期,現在有一個工作模型,它使用雙向編碼器和基於注意的解碼器工作正常,我已經添加了Beam今天搜索,但我注意到,推論現在正在採用1或更大的波束寬度,當我僅使用雙向編碼器和注意解碼器時,推斷需要幾秒鐘。BeamSearch永遠在Tensorflow

環境的詳細信息: TensorFlow版本:1.3.0 的MacOS 10.12.4

下面

是我的代碼的相關部分:

def decoding_layer(dec_input, encoder_state, 
        target_sequence_length, max_target_sequence_length, 
        rnn_size, 
        num_layers, target_vocab_to_int, target_vocab_size, 
        batch_size, keep_prob, decoding_embedding_size , encoder_outputs): 
    """ 
    Create decoding layer 
    :param dec_input: Decoder input 
    :param encoder_state: Encoder state 
    :param target_sequence_length: The lengths of each sequence in the target batch 
    :param max_target_sequence_length: Maximum length of target sequences 
    :param rnn_size: RNN Size 
    :param num_layers: Number of layers 
    :param target_vocab_to_int: Dictionary to go from the target words to an id 
    :param target_vocab_size: Size of target vocabulary 
    :param batch_size: The size of the batch 
    :param keep_prob: Dropout keep probability 
    :param decoding_embedding_size: Decoding embedding size 
    :encoder_outputs : encoder's output 
    :return: Tuple of (Training BasicDecoderOutput, Inference BasicDecoderOutput) 
    """ 
    encoder_outputs_tr =encoder_outputs #tf.transpose(encoder_outputs,[1,0,2]) 
    # 1. Decoder Embedding 
    dec_embeddings = tf.Variable(tf.random_uniform([target_vocab_size, decoding_embedding_size])) 
    dec_embed_input = tf.nn.embedding_lookup(dec_embeddings, dec_input) 

    # 2. Construct the decoder cell 
    def create_cell(rnn_size): 
     lstm_cell = tf.contrib.rnn.LSTMCell(rnn_size, 
              initializer=tf.random_uniform_initializer(-0.1,0.1,seed=2)) 
     drop = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob) 
     return drop 

    def create_complete_cell(rnn_size,num_layers,encoder_outputs_tr,batch_size,encoder_state , infer): 

     if infer and beam_width >0: 
      encoder_outputs_tr = tf.contrib.seq2seq.tile_batch(encoder_outputs_tr, multiplier=beam_width) 

      encoder_state = tf.contrib.seq2seq.tile_batch(encoder_state, multiplier=beam_width) 

      batch_size = batch_size * beam_width 


     dec_cell = tf.contrib.rnn.MultiRNNCell([create_cell(rnn_size) for _ in range(num_layers)]) 
     attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(num_units=rnn_size, memory=encoder_outputs_tr) 
     attn_cell = tf.contrib.seq2seq.AttentionWrapper(dec_cell, attention_mechanism , attention_layer_size=rnn_size , output_attention=False) 
     attn_zero = attn_cell.zero_state(batch_size , tf.float32) 
     attn_zero = attn_zero.clone(cell_state = encoder_state) 
     return attn_zero , attn_cell 


    intial_train_state , train_cell = create_complete_cell(rnn_size,num_layers,encoder_outputs_tr,batch_size,encoder_state , False) 
    intial_infer_state , infer_cell = create_complete_cell(rnn_size,num_layers,encoder_outputs_tr,batch_size,encoder_state , True) 
    output_layer = Dense(target_vocab_size, 
         kernel_initializer = tf.truncated_normal_initializer(mean = 0.0, stddev=0.1)) 

    with tf.variable_scope("decode"): 
     train_decoder_out = decoding_layer_train(intial_train_state, train_cell, dec_embed_input, 
         target_sequence_length, max_target_sequence_length, output_layer, keep_prob) 

    with tf.variable_scope("decode", reuse=True): 
     if beam_width == 0 : 
      infer_decoder_out = decoding_layer_infer(intial_infer_state, infer_cell, dec_embeddings, 
           target_vocab_to_int['<GO>'], target_vocab_to_int['<EOS>'], max_target_sequence_length, 
           target_vocab_size, output_layer, batch_size, keep_prob) 
     else : 
      infer_decoder_out = decoding_layer_infer_with_Beam(intial_infer_state, infer_cell, dec_embeddings, 
           target_vocab_to_int['<GO>'], target_vocab_to_int['<EOS>'], max_target_sequence_length, 
           target_vocab_size, output_layer, batch_size, keep_prob) 
      print('beam search') 

    return (train_decoder_out, infer_decoder_out) 

""" 
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE 
""" 
#tests.test_decoding_layer(decoding_layer) 


def decoding_layer_infer_with_Beam(encoder_state, dec_cell, dec_embeddings, start_of_sequence_id, 
         end_of_sequence_id, max_target_sequence_length, 
         vocab_size, output_layer, batch_size, keep_prob): 
    """ 
    Create a decoding layer for inference 
    :param encoder_state: Encoder state 
    :param dec_cell: Decoder RNN Cell 
    :param dec_embeddings: Decoder embeddings 
    :param start_of_sequence_id: GO ID 
    :param end_of_sequence_id: EOS Id 
    :param max_target_sequence_length: Maximum length of target sequences 
    :param vocab_size: Size of decoder/target vocabulary 
    :param decoding_scope: TenorFlow Variable Scope for decoding 
    :param output_layer: Function to apply the output layer 
    :param batch_size: Batch size 
    :param keep_prob: Dropout keep probability 
    :return: BasicDecoderOutput containing inference logits and sample_id 
    """ 

    start_tokens = tf.tile(tf.constant([start_of_sequence_id], dtype=tf.int32), [batch_size], name='start_tokens') 



    inference_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
       cell=dec_cell, 
       embedding=dec_embeddings, 
       start_tokens=start_tokens, 
       end_token=end_of_sequence_id, 
       initial_state=encoder_state, 
       beam_width=beam_width, 
       output_layer=output_layer) 


    inference_decoder_output = tf.contrib.seq2seq.dynamic_decode(inference_decoder, 
                  impute_finished=False 
                  )[0] 
    return inference_decoder_output 



""" 
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE 
""" 
#tests.test_decoding_layer_infer(decoding_layer_infer) 
下面

是模型參數:

# Number of Epochs 
epochs = 200 
# Batch Size 
batch_size = 30 
# RNN Size 
rnn_size = 512 
# Number of Layers 
num_layers = 2 
# Embedding Size 
encoding_embedding_size = 100 
decoding_embedding_size = 100 
# Learning Rate 
learning_rate = 0.001 
# Dropout Keep Probability 
keep_probability = 0.55 
display_step = 10 
beam_width=1 

我真的很感激你的幫助,我不確定到底發生了什麼問題。

謝謝

+0

多大的詞彙? – Aaron

+0

源詞彙是47個單詞,目標詞彙約74個單詞 –

回答

1

好所以我剛剛發現我做錯了什麼。

我只需要在動態解碼功能設置最大迭代值如下:

inference_decoder_output = tf.contrib.seq2seq.dynamic_decode(inference_decoder, 
                 impute_finished=False, 
                 maximum_iterations=max_target_sequence_length)[0]