如何使用ScheduledOutputTrainingHelper調節RNN動態解碼器輸入端的編碼器最終隱藏狀態？

我試圖使用張量流來編碼RDD編碼器和解碼器，並使用不同長度的序列輸入，所以希望編碼器和解碼器都是動態的。此外，解碼器輸入由編碼器最終隱藏狀態（上下文向量）調節，其與Related Paper類似於第3頁的圖片a。解碼器嘗試在訓練期間完全推理，饋送先前的輸出和上下文向量作爲輸入，每一步。如何使用ScheduledOutputTrainingHelper調節RNN動態解碼器輸入端的編碼器最終隱藏狀態？

import tensorflow as tf 
import copy 
import math 
from tensorflow.python.layers.core import Dense 
class RNNEncoder_Decoder(object): 

def __init__(self,input_dim, 
      context_dim,output_dim,hidden_dim, 
      layers_stacked_count,learning_rate): 

    self.graph = tf.get_default_graph() 
    self.input_dim = input_dim 
    self.output_dim = output_dim 
    self.context_dim = context_dim 
    self.hidden_dim = hidden_dim 
    self.layers_stacked_count = layers_stacked_count 
    self.learning_rate = learning_rate 
    self.sampling_probability = tf.constant(dtype=tf.float32,value=1.0) 

    # [batch_size,sequence_length,input_dimension] 
    self.enc_inp = tf.placeholder(tf.float32, [None,None,self.input_dim], name='encoder_inputs') 
    self.expected_out = tf.placeholder(tf.float32, [None,None,self.input_dim], name='expected_outs') 
    # fullly inference during trianing 
    self.dec_inp = tf.zeros_like(self.expected_out,dtype=tf.float32,name='decoder_inputs') 

    seq_length = tf.reduce_sum(tf.sign(tf.reduce_max(tf.abs(self.enc_inp), 2)), 1) 
    self.seq_length = tf.cast(seq_length, tf.int32) 

    with tf.variable_scope('RNNEncoderDecoder'): 
     with tf.variable_scope("Enocder") as encoder_varscope: 
      # create encoder LSTM cell 
      encoder_cells = [] 
      for i in range(self.layers_stacked_count): 
       with tf.variable_scope('EncoderCell_{}'.format(i)): 
        encoder_cells.append(tf.nn.rnn_cell.LSTMCell(self.hidden_dim, 
                 use_peepholes=True)) 
      self.encoder_cell = tf.nn.rnn_cell.MultiRNNCell(encoder_cells) 

      # ruuning dynamic rnn encoder     
      _, enc_state = tf.nn.dynamic_rnn(cell = self.encoder_cell, 
              initial_state=None, 
              dtype=tf.float32, 
              inputs = self.enc_inp, 
              sequence_length = self.seq_length 
              ) 

      # extract top layer hidden state as feature representation 
      self.context_vector = enc_state[-1].h 

      cell_state0 = tf.zeros_like(enc_state[0].c,dtype=tf.float32) 
      hidden_state0 = tf.zeros_like(enc_state[0].h,dtype=tf.float32) 

      dec_init_state = (enc_state[1], # pass the top layer state of enocder to the bottom layer of decoder 
           tf.nn.rnn_cell.LSTMStateTuple(cell_state0, hidden_state0)) 

      # condition extracted features on decoder inputs 
      # with a shape that matches decoder inputs in all but (potentially) the final dimension. 
      # tile context vector from [batch_size,context_dim] to [batch_size,decoder_sequence_length,context_dim] 
      context_vector_shape = tf.shape(self.context_vector) 
      context_vector_reshaped = tf.reshape(self.context_vector, 
               [context_vector_shape[0], 1, context_vector_shape[1]] 
               ) 
      enc_inp_shape = tf.shape(self.enc_inp) 
      self.auxiliary_inputs = tf.tile(context_vector_reshaped, 
             multiples=[1,enc_inp_shape[1],1] 
            ) 

     with tf.variable_scope("Deocder") as decoder_varscope: 
      # create decoder LSTM cell 
      decoder_cells = [] 
      for i in range(self.layers_stacked_count): 
       with tf.variable_scope('DecoderCell_{}'.format(i)): 
        decoder_cells.append(tf.nn.rnn_cell.LSTMCell(self.hidden_dim, 
                 use_peepholes=True)) 
      self.decoder_cell = tf.nn.rnn_cell.MultiRNNCell(decoder_cells) 

      dec_out_dense = Dense(units = self.output_dim, 
            activation = None, 
            use_bias = False, 
            kernel_initializer = tf.truncated_normal_initializer(
             dtype=tf.float32, 
             stddev = 1.0/math.sqrt(float(self.hidden_dim)) 
           ), 
            name = 'dec_outp_linear_projection' 
           ) 

      training_helper = tf.contrib.seq2seq.ScheduledOutputTrainingHelper(
       inputs = self.dec_inp, 
       sequence_length = self.seq_length, 
       auxiliary_inputs = self.auxiliary_inputs, # condtional on inputs 
       sampling_probability = 1.0, # for fullly inference 
       name = 'feeding_conditional_input' 
      ) 

      decoder = tf.contrib.seq2seq.BasicDecoder(
       cell = self.decoder_cell, 
       helper = training_helper, 
       initial_state = dec_init_state, 
       output_layer = dec_out_dense 
      ) 

      outputs, _ , final_seq_lengths = tf.contrib.seq2seq.dynamic_decode(decoder=decoder, 
                       impute_finished = True 
                      ) 
     self.outputs = outputs 

### optimize loss part 

def get_decoder_prediction(self,X,session): 
    feed_dict = { 
     self.enc_inp:X 
    } 
    feed_dict.update({self.expected_out:X}) 
    run = [self.outputs] 
    return session.run(run,feed_dict=feed_dict) 

context_dim = 32 
output_dim = input_dim = 1 
hidden_dim = 32 
layers_stacked_count = 2 
learning_rate = 0.01 
test = RNNEncoder_Decoder(input_dim=input_dim, 
         context_dim=context_dim, 
         output_dim=output_dim, 
         hidden_dim=hidden_dim, 
         layers_stacked_count=layers_stacked_count, 
         learning_rate=learning_rate 
        )

沒有「auxiliary_inputs = self.auxiliary_inputs」，它成功運行，

但隨着auxiliary_inputs = self.auxiliary_inputs我得到了以下錯誤：

--------------------------------------------------------------------------- 
ValueError        Traceback (most recent call last) 
<ipython-input-3-02522a01f0d8> in <module>() 
     9       hidden_dim=hidden_dim, 
    10       layers_stacked_count=layers_stacked_count, 
---> 11       learning_rate=learning_rate 
    12       ) 

<ipython-input-2-86494b8d99fa> in __init__(self, input_dim, context_dim, output_dim, hidden_dim, layers_stacked_count, learning_rate) 
    98 
    99     outputs, _ , final_seq_lengths = tf.contrib.seq2seq.dynamic_decode(decoder=decoder, 
--> 100                     impute_finished = True 
    101                     ) 
    102    self.outputs = outputs 

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py in dynamic_decode(decoder, output_time_major, impute_finished, maximum_iterations, parallel_iterations, swap_memory, scope) 
    284   ], 
    285   parallel_iterations=parallel_iterations, 
--> 286   swap_memory=swap_memory) 
    287 
    288  final_outputs_ta = res[1] 

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in while_loop(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, name) 
    2773  context = WhileContext(parallel_iterations, back_prop, swap_memory, name) 
    2774  ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, context) 
-> 2775  result = context.BuildLoop(cond, body, loop_vars, shape_invariants) 
    2776  return result 
    2777 

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in BuildLoop(self, pred, body, loop_vars, shape_invariants) 
    2602  self.Enter() 
    2603  original_body_result, exit_vars = self._BuildLoop(
-> 2604   pred, body, original_loop_vars, loop_vars, shape_invariants) 
    2605  finally: 
    2606  self.Exit() 

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in _BuildLoop(self, pred, body, original_loop_vars, loop_vars, shape_invariants) 
    2552   structure=original_loop_vars, 
    2553   flat_sequence=vars_for_body_with_tensor_arrays) 
-> 2554  body_result = body(*packed_vars_for_body) 
    2555  if not nest.is_sequence(body_result): 
    2556  body_result = [body_result] 

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py in body(time, outputs_ta, state, inputs, finished, sequence_lengths) 
    232  """ 
    233  (next_outputs, decoder_state, next_inputs, 
--> 234  decoder_finished) = decoder.step(time, inputs, state) 
    235  next_finished = math_ops.logical_or(decoder_finished, finished) 
    236  if maximum_iterations is not None: 

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/basic_decoder.py in step(self, time, inputs, state, name) 
    137  """ 
    138  with ops.name_scope(name, "BasicDecoderStep", (time, inputs, state)): 
--> 139  cell_outputs, cell_state = self._cell(inputs, state) 
    140  if self._output_layer is not None: 
    141   cell_outputs = self._output_layer(cell_outputs) 

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in __call__(self, inputs, state, scope) 
    178  with vs.variable_scope(vs.get_variable_scope(), 
    179        custom_getter=self._rnn_get_variable): 
--> 180   return super(RNNCell, self).__call__(inputs, state) 
    181 
    182 def _rnn_get_variable(self, getter, *args, **kwargs): 

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/layers/base.py in __call__(self, inputs, *args, **kwargs) 
    448   # Check input assumptions set after layer building, e.g. input shape. 
    449   self._assert_input_compatibility(inputs) 
--> 450   outputs = self.call(inputs, *args, **kwargs) 
    451 
    452   # Apply activity regularization. 

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in call(self, inputs, state) 
    936          [-1, cell.state_size]) 
    937   cur_state_pos += cell.state_size 
--> 938   cur_inp, new_state = cell(cur_inp, cur_state) 
    939   new_states.append(new_state) 
    940 

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in __call__(self, inputs, state, scope) 
    178  with vs.variable_scope(vs.get_variable_scope(), 
    179        custom_getter=self._rnn_get_variable): 
--> 180   return super(RNNCell, self).__call__(inputs, state) 
    181 
    182 def _rnn_get_variable(self, getter, *args, **kwargs): 

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/layers/base.py in __call__(self, inputs, *args, **kwargs) 
    448   # Check input assumptions set after layer building, e.g. input shape. 
    449   self._assert_input_compatibility(inputs) 
--> 450   outputs = self.call(inputs, *args, **kwargs) 
    451 
    452   # Apply activity regularization. 

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in call(self, inputs, state) 
    554  input_size = inputs.get_shape().with_rank(2)[1] 
    555  if input_size.value is None: 
--> 556  raise ValueError("Could not infer input size from inputs.get_shape()[-1]") 
    557  scope = vs.get_variable_scope() 
    558  with vs.variable_scope(scope, initializer=self._initializer) as unit_scope: 

ValueError: Could not infer input size from inputs.get_shape()[-1]

我剛開始使用tensforflow，所以任何人都可以幫助我：這是一個正確的方式來調節解碼器輸入端上編碼器的最後隱藏狀態嗎？以及爲什麼在我將auxiliary_inputs作爲錯誤輸入之後解碼器的輸入變爲None？

來源

2017-10-09 Shang Wang

只是覺得我犯過的錯誤：使用「context_vector_shape」來定義auxiliary_inputs張量的形狀會導致所有的無量綱的大小，從而導致「ValueError異常

（，，？？？）：無法推斷inputs.get_shape（）[ - 1]的輸入大小「，

直接定義輔助輸入張量的形狀爲（？，？，context_dim）將解決這個問題。

來源

2017-10-10 00:20:52

如何使用ScheduledOutputTrainingHelper調節RNN動態解碼器輸入端的編碼器最終隱藏狀態？

回答

相關問題