2017-09-01 124 views
0

我遇到一個非常奇怪的問題,我的平方損失變爲負值。這是我的代碼。爲什麼我的平方損失在TensorFlow中變爲負值?

#!/usr/bin/python 
# -*- coding:utf8 -*- 
from __future__ import print_function 
from models.vgg16 import VGG16_fixed 
from keras.backend.tensorflow_backend import set_session 
from scipy.misc import imsave 
from models.generative_model_v2 import gen_model_v2 
from scripts.image_process import * 
from scripts.utils_func import * 
from tensorflow.python import debug as tf_debug 

import tensorflow as tf 
import os 
import time 

# configure gpu usage 
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 
os.environ["CUDA_VISIBLE_DEVICES"] = "1" 
config = tf.ConfigProto() 
config.gpu_options.per_process_gpu_memory_fraction = 0.5 
set_session(tf.Session(config=config)) # pass gpu setting to Keras 

# set learning phase, or batch norm won't work 
K.set_learning_phase(1) 

# dataset setting 
width, height = 256, 256 
coco_img_path = '../../dataset/coco/images/train2014/' 
sl_img_path = './images/style/' 

# a trade-off coefficient between content loss and style loss, which is multiplied with style loss 
alpha = 1 

# create placeholders for input images 
if K.image_data_format() == 'channels_last': 
    content_img_shape = [width, height, 3] 
    style_img_shape = [width, height, 3] 
else: 
    content_img_shape = [3, width, height] 
    style_img_shape = [3, width, height] 

with tf.name_scope('input'): 
    content_img = tf.placeholder(dtype='float32', 
           shape=(None, content_img_shape[0], content_img_shape[1], content_img_shape[2]), 
           name='content_img') 
    style_img = tf.placeholder(dtype='float32', 
           shape=(None, style_img_shape[0], style_img_shape[1], style_img_shape[2]), 
           name='style_img') 

# load model 
main_model, outputs = gen_model_v2(input_content_tensor=content_img, input_style_tensor=style_img) 
concact_input = K.concatenate([content_img, 
           outputs, 
           style_img], axis=0) 
vgg16_model = VGG16_fixed(input_tensor=concact_input, 
          weights='imagenet', include_top=False) 

# get the symbolic outputs of each "key" layer (we gave them unique names). 
vgg16_outputs_dict = dict([(layer.name, layer.output) for layer in vgg16_model.layers]) 

# get relevant layers 
content_feature_layers = 'block3_conv3' 
style_feature_layers = ['block1_conv2', 'block2_conv2', 
         'block3_conv3', 'block4_conv3'] 

# content loss 
ct_loss = K.variable(0.) 
layer_features = vgg16_outputs_dict[content_feature_layers] 
content_img_features = layer_features[0, :, :, :] 
outputs_img_features = layer_features[1, :, :, :] 
ct_loss += content_loss(content_img_features, outputs_img_features) 

# style loss 
sl_loss_temp = K.variable(0.) 
for layer_name in style_feature_layers: 
    layer_features = vgg16_outputs_dict[layer_name] 
    outputs_img_features = layer_features[1, :, :, :] 
    style_img_features = layer_features[2, :, :, :] 
    sl = style_loss(style_img_features, outputs_img_features) 
    sl_loss_temp += (alpha/len(style_feature_layers)) * sl 
sl_loss = sl_loss_temp 

# combine loss 
loss = ct_loss + sl_loss 

# write in summary 
tf.summary.scalar('content_loss', ct_loss) 
tf.summary.scalar("style_loss", sl_loss) 
tf.summary.scalar("loss", loss) 

# optimization 
train_op = tf.train.AdamOptimizer(learning_rate=0.001, 
            beta1=0.9, 
            beta2=0.999, 
            epsilon=1e-08).minimize(loss) 

with tf.Session(config=config) as sess: 
    # Merge all the summaries and write them out to /tmp/mnist_logs (by default) 
    merged = tf.summary.merge_all() 
    train_writer = tf.summary.FileWriter('./logs/gen_model_v2', 
             sess.graph) 

    # initialize all variables 
    tf.global_variables_initializer().run() 

    # get training image 
    ct_img_name = [x for x in os.listdir(coco_img_path) if x.endswith(".jpg")] 
    ct_img_num = len(ct_img_name) 
    print("content image number: ", ct_img_num) 
    sl_img_name = [x for x in os.listdir(sl_img_path) if x.endswith(".jpg")] 
    sl_img_num = len(sl_img_name) 
    print("style image number: ", sl_img_num) 

    # start training 
    start_time = time.time() 
    for i in range(1): 
     itr = 0 
     for ct_name in ct_img_name: 
      if itr > 10: # used to train a small sample of ms coco 
       break 
      sl_name = sl_img_name[itr % sl_img_num] 
      _, loss_val, summary = sess.run([train_op, loss, merged], 
            feed_dict={content_img: preprocess_image(coco_img_path + ct_name, height, width), 
               style_img: preprocess_image(sl_img_path + sl_name, height, width)}) 
      train_writer.add_summary(summary, itr * (i+1)) 
      print('iteration', itr, 'loss =', loss_val) 
      itr += 1 
    end_time = time.time() 
    print('Training completed in %ds' % (end_time - start_time)) 

    # save model 
    main_model.save('./models/gen_model_v2_1.h5') 

    # use images to test 
    test_ct_img_path = './images/content/train-1.jpg' 
    test_ct_img = preprocess_image(test_ct_img_path, height, width) 
    test_sl_img_path = './images/style/starry_night.jpg' 
    test_sl_img = preprocess_image(test_ct_img_path, height, width) 

    # feed test images into model 
    output = sess.run(outputs, feed_dict={content_img: test_ct_img, style_img: test_sl_img}) 
    output = deprocess_image(output) 
    print('Output image shape:', output.shape[1:4]) 
    imsave('./images/autoencoder/test_v2_1.png', output[0]) 

,我的損失函數的定義如下:

#!/usr/bin/python 
# -*- coding:utf8 -*- 
import numpy as np 
from keras import backend as K 
import tensorflow as tf 

# the gram matrix of an image tensor (feature-wise outer product) 


def gram_matrix(x): 
    assert K.ndim(x) == 3 
    if K.image_data_format() == 'channels_first': 
     features = K.batch_flatten(x) 
    else: 
     features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1))) 
    gram = K.dot(features, K.transpose(features)) 
    return gram 


def style_loss(featuremap_1, featuremap_2): 
    assert K.ndim(featuremap_1) == 3 
    assert K.ndim(featuremap_2) == 3 
    g1 = gram_matrix(featuremap_1) 
    g2 = gram_matrix(featuremap_2) 
    channels = 3 
    if K.image_data_format() == 'channels_first': 
     size = featuremap_1.shape[1] * featuremap_1[2] 
    else: 
     size = K.shape(featuremap_1)[0] * K.shape(featuremap_1)[1] 
    size = K.cast(size, tf.float32) 
    return K.sum(K.square(g1 - g2))/(4. * (channels ** 2) * (size ** 2)) 


def content_loss(base, combination): 
    return K.sum(K.square(combination - base)) 

所以,你可以看到我的損失值是使用K.square平方()。它怎麼會是負值?

這是我的代碼的結果,損失急劇下降,這似乎是不可能的。 result

回答

1

您以ct_loss作爲變量開頭。只需將其設置爲內容丟失。

ct_loss = content_loss(content_img_features, outputs_img_features) 
+0

嘿,傑克,我修改我的代碼,如你所說,但它似乎不起作用。結果不再改變。 –

+0

如果你運行'tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)'你有什麼收穫? – DomJack

+0

是的,它給我所有變量的信息在我的圖中,但它太多了... –

相關問題