2016-09-19 47 views
0

我想通過張量流建立一個CNN + LSTM + CTC模型,但是我在訓練過程中總是得到NAN值,如何避免這種情況?另一方面,我發現劑量INPUT需要專門處理,我發現LOSS值保持不變大約30,再也不會減少,這種情況是否正常?(我之前用theano寫過這個模型,並面臨同樣的問題)感謝您的幫助!如何避免CTC培訓中的NAN價值?

#incoding:utf-8 
import tensorflow as tf 
import numpy as np 
import cv2 
import Get_Data 
import random 
import time 

#load data 
model_data_path = "checkpoints.data" 
images_path = "/home/liuyi/test/images" 
#images_path = "/home/night/test/images" 
ans_name = "answer" 
images_data, ans_data = Get_Data.get_data(images_path, ans_name) 
np.save("/home/liuyi/test/tf_images.npy", images_data) 
np.save("/home/liuyi/test/tf_labels.npy", ans_data) 
print images_data.shape 
print ans_data 
#bulid model 
#----define---- 
def conv2d(x, w, b, strides=1): 
    x = tf.nn.conv2d(x, w, (1, strides, strides, 1), "SAME") 
    x = tf.nn.bias_add(x, b) 
    return tf.nn.relu(x) 

def dropout(x, rate): 
    return tf.nn.dropout(x, rate) 

def maxpool2d(x, poolsize=(2,2)): 
    px = poolsize[0] 
    py = poolsize[1] 
    return tf.nn.max_pool(x, ksize=(1, px, py, 1), strides=(1, px, py, 1),padding="SAME") 

def flatten(x): 
    return tf.contrib.layers.flatten(x) 

def full_con(x, w, b): 
    x = tf.matmul(x, w) 
    return tf.nn.bias_add(x, b) 

def LSTM(x, n_input, hidden_units, out_dim, forget_bias = 1.0, layer_num = 1): 
    lstm = tf.nn.rnn_cell.LSTMCell(hidden_units, forget_bias=forget_bias, state_is_tuple=True,num_proj=out_dim) 
    lstms = tf.nn.rnn_cell.MultiRNNCell([lstm]*layer_num ,state_is_tuple=True) 
    x = tf.reshape(x, (int(x.get_shape()[0]), int(x.get_shape()[1]), n_input)) 
    out, _ = tf.nn.dynamic_rnn(lstms, x, dtype="float") 
    out = tf.transpose(out, [1, 0, 2]) 
    return out 
#----define weight---- 
weights = { 
    'wc1': tf.Variable(tf.random_normal([5, 5, 1, 8])), 
    'wc2': tf.Variable(tf.random_normal([5, 5, 8, 16])), 
    'wc3': tf.Variable(tf.random_normal([5, 5, 16, 16])), 
    'wc4': tf.Variable(tf.random_normal([5, 5, 16, 16])), 
    'wc5': tf.Variable(tf.random_normal([5, 5, 16, 16])), 
    'wc6': tf.Variable(tf.random_normal([5, 5, 16, 16])), 
    'wf1': tf.Variable(tf.random_normal([3200, 1000])), 
    'wf2': tf.Variable(tf.random_normal([1000, 50])), 
} 
biases = { 
    'bc1': tf.Variable(tf.random_normal([8])), 
    'bc2': tf.Variable(tf.random_normal([16])), 
    'bc3': tf.Variable(tf.random_normal([16])), 
    'bc4': tf.Variable(tf.random_normal([16])), 
    'bc5': tf.Variable(tf.random_normal([16])), 
    'bc6': tf.Variable(tf.random_normal([16])), 
    'bf1': tf.Variable(tf.random_normal([1000])), 
    'bf2': tf.Variable(tf.random_normal([50])), 
} 
#----define model---- 
batch_size = 200 
num_classes = 26+1+1 
max_len = 21 
sequence_length = np.full((batch_size),max_len,dtype=np.int32)#! 
x = tf.placeholder("float", [batch_size, 200, 60, 1], "images") 
y_i = tf.placeholder(tf.int64, [None, 2], "y_i") 
y_v = tf.placeholder(tf.int32, [None,], "y_v") 
y_shape = tf.placeholder(tf.int64, [2,], "y_shape") 
#--------CNN Layer-------- 
conv2do1 = conv2d(x, weights['wc1'], biases['bc1']) 
conv2do2 = conv2d(conv2do1, weights['wc2'], biases['bc2']) 
conv2do2 = maxpool2d(conv2do2) 
#--------CNN Layer-------- 
conv2do3 = conv2d(conv2do2, weights['wc3'], biases['bc3']) 
conv2do4 = conv2d(conv2do3, weights['wc4'], biases['bc4']) 
conv2do4 = maxpool2d(conv2do4) 
#--------CNN Layer-------- 
conv2do5 = conv2d(conv2do4, weights['wc5'], biases['bc5']) 
conv2do6 = conv2d(conv2do5, weights['wc6'], biases['bc6']) 
conv2do6 = maxpool2d(conv2do6) 
#--------Flatten Layer-------- 
conv2do6 = flatten(conv2do6) 
#--------Full Connection-------- 
fc1 = full_con(conv2do6, weights['wf1'], biases['bf1']) 
fc2 = full_con(fc1, weights['wf2'], biases['bf2']) 
#--------LSTM Layer-------- 
lstms = LSTM(fc2, n_input=1, hidden_units=32, out_dim=num_classes, layer_num=3) 
#--------CTC Layer-------- 
ctc_o = tf.nn.ctc_loss(lstms, tf.SparseTensor(y_i, y_v, y_shape), sequence_length) 
#---------------- 
loss = tf.reduce_mean(ctc_o) 
ctc_p = tf.nn.ctc_greedy_decoder(lstms, sequence_length)[0][0] 
o = ctc_p 
train = tf.train.AdagradOptimizer(learning_rate=0.01).minimize(loss) 
saver = tf.train.Saver(tf.all_variables()) 
#run model 
epoch = 200 
images_sum = 10000 
train_rate = 0.8 
slice_pos = 9800 

train_images = images_data[:slice_pos] 
train_labels = ans_data[:slice_pos] 

test_images = images_data[slice_pos:] 
test_labels = ans_data[slice_pos:] 

random_list = np.arange(slice_pos) 

batch_sum = int(slice_pos/batch_size) 
test_batch = int(images_sum-slice_pos)/batch_size 

init = tf.initialize_all_variables() 
sess = tf.InteractiveSession() 
sess.run(init) 
file_name = "out" 
for e in range(epoch): 
    random.shuffle(random_list) 
    for i in range(batch_sum): 
     begin_time = time.clock() 
     train_x = [train_images[m] for m in random_list[i*batch_size:(i+1)*batch_size]] 
     train_y = [train_labels[m] for m in random_list[i*batch_size:(i+1)*batch_size]] 
     train_yi, train_yv, train_ys = Get_Data.SparseDataFrom(train_y) 
     batch_loss = sess.run(loss, feed_dict={x: train_x, y_i: train_yi, y_v: train_yv, y_shape: train_ys}) 
     sess.run(train, feed_dict={x: train_x, y_i: train_yi, y_v: train_yv, y_shape: train_ys}) 
     end_time = time.clock() 
     print "epoch{0}/{1}: batch{2}/{3} loss={4} time={5}s".format(e+1, epoch, (i+1)*batch_size, slice_pos, batch_loss,(end_time-begin_time)*(batch_sum-i)) 
    right_num = 0 
    loss_sum = 0 
    begin_time = time.clock() 
    for i in range(test_batch): 
     test_x = [test_images[i * batch_size:(i + 1) * batch_size]] 
     test_y = [test_labels[i * batch_size:(i + 1) * batch_size]] 
     test_yi, test_yv, test_ys = Get_Data.SparseDataFrom(test_y) 
     tmp_loss = sess.run(loss, feed_dict={x: test_x, y_i: test_yi, y_v: test_yv, y_shape: test_ys}) 
     test_ans = sess.run(o, feed_dict={x: test_x, y_i: test_yi, y_v: test_yv, y_shape: test_ys}) 
     test_ans = Get_Data.SparseDatatoDense(test_ans) 
     tmp_right_num = Get_Data.data_difference(test_ans, test_y) 
     loss_sum += tmp_loss 
     right_num += tmp_right_num 
    end_time = time.clock() 
    print "epoch{0}/{1}: loss={2} right_num = {3} time={4}s".format(e + 1, epoch, loss_sum, right_num, end_time - begin_time) 
    saver.save(sess, model_data_path) 
sess.close() 
+0

Nan值通常由操作產生,例如零對數或數字除以非常接近零。考慮發佈您的代碼以獲得更詳細的答案。 –

回答

0

您可能已經解決了這個給定的問題有多老,但...

使用了經CTC火炬綁定我也產生nans直到我增加了一個額外的輸出尺寸爲BLANK標籤。

在您的LSTM中嘗試out_dim=num_classes+1

我希望有幫助。

+0

另外,確保輸入到CTC損失函數中的序列長度反映了針對卷積效應調整後的真實輸出大小,掩蓋小批量張量中的填充非常重要,您將得到無效結果。 –