2017-08-28 126 views
0

我試圖在TensorFlow中構建一個相當特殊的網絡,並且我有點讓它工作。可悲的是,我偶然發現了一個我無法解決的錯誤,甚至找不到合適的地方。 據我所知,網絡已成功構建,直到損失函數被定義。然後該錯誤消息說,有關未對應的形狀:在梯度計算MaxPoolWithArgmax時,形狀在TF中不兼容

ValueError: Shapes (1, 17, 17, 44) and (1, 16, 16, 44) are not compatible

的東西是錯誤不說在哪張或代碼行出現問題。我已經打印出所有可以提出的形狀,而且我甚至找不到這種形狀(1,17,17,44)。

from tensorflow.python.framework import ops 
from tensorflow.python.ops import gen_nn_ops 
@ops.RegisterGradient("MaxPoolWithArgmax") 
def _MaxPoolWithArgmaxGrad(op, grad, some_other_arg): 
    return gen_nn_ops._max_pool_grad(op.inputs[0], 
            op.outputs[0], 
            grad, 
            op.get_attr("ksize"), 
            op.get_attr("strides"), 
            padding=op.get_attr("padding"), 
            data_format='NHWC') 
class FCN_RGBD: 

    def __init__(self, checkpoint_dir='./checkpoints/'): 
     self.build(1) 

     # "allow_soft_placement = True" makes TensorFlow automatically choose an existing and supported GPU device 
     self.config = tf.ConfigProto(allow_soft_placement = True) 
     self.session = tf.Session(config = self.config) 
     self.session.run(tf.global_variables_initializer()) 

    def weight_variable(self, shape): 
     initial = tf.truncated_normal(shape, stddev=0.1) 
     return tf.Variable(initial) 

    def bias_variable(self, shape): 
     initial = tf.constant(0.1, shape=shape) 
     return tf.Variable(initial) 

    def conv_layer(self, x, W_shape, b_shape, strides, name, padding): 
     W = self.weight_variable(W_shape) 
     b = self.bias_variable([b_shape]) 
     return tf.nn.relu(tf.nn.conv2d(x, W, strides=strides, padding=padding) + b) 

    def conv_skip_layer(self, x, W_shape, b_shape, name, padding): 
     W = self.weight_variable(W_shape) 
     b = self.bias_variable([b_shape]) 
     return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding=padding) + b 

    def deconv_layer(self, x, out_shape, W_shape, b_shape, strides, name, padding): 
     W = self.weight_variable(W_shape) 
     b = self.bias_variable([b_shape]) 
     return tf.nn.conv2d_transpose(x, W, output_shape=out_shape, strides=strides, padding=padding) + b 

    def pool_layer3x3(self, x): 
     with tf.device('/gpu:0'): 
      return tf.nn.max_pool_with_argmax(x, ksize=[1, 3, 3, 1], strides=[1, 3, 3, 1], padding='SAME') 

    def pool_layer2x2(self, x): 
     with tf.device('/gpu:0'): 
      return tf.nn.max_pool_with_argmax(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 

    def build(self, batchsize): 

     print('Building the FCN...') 

     with tf.device('/gpu:0'): 

      self.x = tf.placeholder(tf.float32, shape=(batchsize, 250, 250, 1)) 
      self.y = tf.placeholder(tf.int64, shape=(batchsize, 250, 250, 1)) 

      self.rate = tf.placeholder(tf.float32, shape=[]) 

      conv1 = self.conv_layer(self.x, [5, 5, 1, 64], 64, [1, 2, 2, 1], 'conv1', 'SAME') 

      pool1, pool_1_argmax = self.pool_layer3x3(conv1) 

      conv1_skip = self.conv_skip_layer(pool1, [1, 1, 64, 44], 44, 'conv1_skip', 'VALID') 

      conv2 = self.conv_layer(pool1, [3, 3, 64, 128], 128, [1, 1, 1, 1], 'conv2', 'VALID') 

      pool2, pool_2_argmax = self.pool_layer2x2(conv2) 

      conv2_skip = self.conv_skip_layer(pool2, [1, 1, 128, 44], 44, 'conv2_skip', 'VALID') 

      conv3 = self.conv_layer(pool2, [5, 5, 128, 256], 256, [1, 1, 1, 1], 'conv3', 'VALID') 

      conv4 = self.conv_layer(conv3, [3, 3, 256, 44], 44, [1, 1, 1, 1], 'conv4', 'SAME') 

      deconv1 = self.deconv_layer(conv4, tf.stack([batchsize, 16, 16, 44]), [3, 3, 44, 44], 44, [1, 1, 1, 1], 'deconv1', 'SAME') 

      conv2_skip = tf.image.resize_image_with_crop_or_pad(conv2_skip, 16, 16) 

      sum1 = conv2_skip + deconv1 

      dropout1 = tf.nn.dropout(sum1, keep_prob=0.5) 

      deconv2 = self.deconv_layer(dropout1, tf.stack([batchsize, 34, 34, 44]), [4, 4, 44, 44], 44, [1, 2, 2, 1], 'deconv2', 'SAME') 

      conv1_skip = tf.image.resize_image_with_crop_or_pad(conv1_skip, 34, 34) 

      sum2 = conv1_skip + deconv2 

      dropout2 = tf.nn.dropout(sum2, keep_prob=0.5) 

      deconv_final = self.deconv_layer(dropout2, tf.stack([batchsize, 250, 250, 44]), [19, 19, 44, 44], 44, [1, 7, 7, 1], 'deconv_final', 'VALID') 

      annotation_pred = tf.argmax(deconv_final, dimension=3, name='prediction') 

      cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.squeeze(self.y, squeeze_dims=[3]), logits=deconv_final) 

      self.loss = tf.reduce_mean(cross_entropy, name='cross_entropy_mean') 

      self.train_step = tf.train.AdamOptimizer(self.rate).minimize(self.loss) 

      self.prediction = tf.argmax(tf.reshape(tf.nn.softmax(logits), tf.shape(deconv_final)), dimension=3) 

這是錯誤消息:

Traceback (most recent call last): 
    File "C:\Users\user\AppData\Local\conda\conda\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\framework\tensor_shape.py", line 560, in merge_with 
    new_dims.append(dim.merge_with(other[i])) 
    File "C:\Users\user\AppData\Local\conda\conda\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\framework\tensor_shape.py", line 135, in merge_with 
    self.assert_is_compatible_with(other) 
    File "C:\Users\user\AppData\Local\conda\conda\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\framework\tensor_shape.py", line 108, in assert_is_compatible_with 
    % (self, other)) 
ValueError: Dimensions 17 and 16 are not compatible 

During handling of the above exception, another exception occurred: 

Traceback (most recent call last): 
    File "main.py", line 5, in <module> 
    fcn_rgbd = FCN_RGBD() 
    File "C:\Users\user\netcase\Workspace\Depth_BPC_v1\FCN_RGBD.py", line 23, in __init__ 
    self.build(1) 
    File "C:\Users\user\netcase\Workspace\Depth_BPC_v1\FCN_RGBD.py", line 162, in build 
    self.train_step = tf.train.AdamOptimizer(self.rate).minimize(self.loss) 
    File "C:\Users\user\AppData\Local\conda\conda\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\training\optimizer.py", line 315, in minimize 
    grad_loss=grad_loss) 
    File "C:\Users\user\AppData\Local\conda\conda\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\training\optimizer.py", line 386, in compute_gradients 
    colocate_gradients_with_ops=colocate_gradients_with_ops) 
    File "C:\Users\user\AppData\Local\conda\conda\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\ops\gradients_impl.py", line 580, in gradients 
    in_grad.set_shape(t_in.get_shape()) 
    File "C:\Users\user\AppData\Local\conda\conda\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\framework\ops.py", line 413, in set_shape 
    self._shape = self._shape.merge_with(shape) 
    File "C:\Users\user\AppData\Local\conda\conda\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\framework\tensor_shape.py", line 564, in merge_with 
    (self, other)) 
ValueError: Shapes (1, 17, 17, 44) and (1, 16, 16, 44) are not compatible 

我這個隱約說明問題很抱歉,但我真的沒有一個想法從哪裏開始。

+1

你爲什麼要定義你自己的漸變?我猜想這是一個錯誤的來源,因爲在梯度計算過程中拋出異常。 – lejlot

+0

因爲據我所知,沒有針對MaxPoolWithArgmax的漸變atm(https://github.com/tensorflow/tensorflow/issues/1793)。但我已經嘗試了與'普通'max_pool一樣的方法,而且我也遇到了同樣的錯誤。 – Gizmo

+1

你確定有沒有它在乾淨的運行**有相同的錯誤**? (人們有時只是註釋掉筆記本中的行,它不會從內存中刪除已註冊的漸變)。由於這個錯誤純粹是在梯度計算中,如果這與你的覆蓋無關,那看起來就像是tf中的一個錯誤,這很不可能。 – lejlot

回答

1

原來是不同層次尺寸錯誤的問題。不幸的是,conv2d_transpose的錯誤消息不是很有幫助。這個貼子幫了我很多:Confused about conv2d_transpose

+1

很有意思,它並沒有在網絡建設期間引發任何錯誤。這是否涉及一些廣播? – lejlot

+0

我不知道,但我相信答案是否定的。無論如何謝謝你! :) – Gizmo