2016-05-31 92 views
0

我想調整示例cifar10的代碼,我不知道爲什麼當我運行我的調整cifar10_eval.py時出現分段錯誤(核心轉儲)錯誤。它看起來像這個代碼實際上在Mac中的作品,我不知道爲什麼它不適用於Linux。cifar10示例tensorflow的分段錯誤(核心轉儲)錯誤

感謝您的幫助。

----------------------- Below Code --------------------- ---------

# Copyright 2015 Google Inc. All Rights Reserved. 
# 
# Licensed under the Apache License, Version 2.0 (the "License"); 
# you may not use this file except in compliance with the License. 
# You may obtain a copy of the License at 
# 
#  http://www.apache.org/licenses/LICENSE-2.0 
# 
# Unless required by applicable law or agreed to in writing, software 
# distributed under the License is distributed on an "AS IS" BASIS, 
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
# See the License for the specific language governing permissions and 
# limitations under the License.c 
# ============================================================================== 

"""Evaluation for CIFAR-10 
Accuracy: 
cifar10_train.py achieves 83.0% accuracy after 100K steps (256 epochs 
of data) as judged by cifar10_eval.py. 
Speed: 
On a single Tesla K40, cifar10_train.py processes a single batch of 128 imagecs 
in 0.25-0.35 sec (i.e. 350 - 600 images /sec). The model reaches ~86% 
accuracy after 100K steps in 8 hours of training time. 
Usage: 
Please see the tutorial and website for how to download the CIFAR-10 
data set, compile the program and train the model. 
http://tensorflow.org/tutorials/deep_cnn/ 
""" 
from __future__ import absolute_import 
from __future__ import division 
from __future__ import print_function 

from datetime import datetime 
import math 
import time 

import numpy as np 
import tensorflow as tf 
import os 
import StringIO 
import cv 
import cv2 
import urllib 


from PIL import Image 

import matplotlib 

import glob 

import cifar10 

cur_dir = os.getcwd() 

FLAGS = tf.app.flags.FLAGS 

tf.app.flags.DEFINE_string('eval_dir', '/tmp/cifar10_eval', 
          """Directory where to write event logs.""") 
tf.app.flags.DEFINE_string('eval_data', 'test', 
          """Either 'test' or 'train_eval'.""") 
tf.app.flags.DEFINE_string('checkpoint_dir', '/tmp/cifar10_train', 
          """Directory where to read model checkpoints.""") 
tf.app.flags.DEFINE_integer('eval_interval_secs', 60 * 5, 
          """How often to run the eval.""") 
tf.app.flags.DEFINE_integer('num_examples', 128, 
          """Number of examples to run.""") 
tf.app.flags.DEFINE_boolean('run_once', False, 
         """Whether to run eval only once.""") 


def eval_once(saver, summary_writer, top_k_op, summary_op,images,labels, logits): 
"""Run Eval once. 
Args: 
    saver: Saver. 
    summary_writer: Summary writer. 
    top_k_op: Top K op. 
    summary_op: Summary op. 
""" 
with tf.Session() as sess: 
    ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) 
    if ckpt and ckpt.model_checkpoint_path: 
    # Restores from checkpoint 
    saver.restore(sess, ckpt.model_checkpoint_path) 
    # Assuming model_checkpoint_path looks something like: 
    # /my-favorite-path/cifar10_train/model.ckpt-0, 
    # extract global_step from it. 
    global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] 
    else: 
    print('No checkpoint file found') 
    return 

    # Start the queue runners. 
    coord = tf.train.Coordinator() 
    try: 
    threads = [] 
    for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): 
     threads.extend(qr.create_threads(sess, coord=coord, daemon=True, 
             start=True)) 

    num_iter = int(math.ceil(FLAGS.num_examples/FLAGS.batch_size)) 
    true_count = 0 # Counts the number of correct predictions. 
    total_sample_count = num_iter * FLAGS.batch_size 
    step = 0 





    while step < num_iter and not coord.should_stop(): 
     predictions = sess.run([top_k_op]) 
     true_count += np.sum(predictions) 
     step += 1 
    # Compute precision @ 1. 
     precision = true_count/total_sample_count 
     print('%s: precision @ 1 = %.3f' % (datetime.now(), precision)) 
     e = tf.nn.softmax(logits) 
     log = sess.run(e) 
     #print(log) 
     predict = np.zeros([FLAGS.batch_size]) 
     max_logi = np.zeros([FLAGS.batch_size]) 

     for i in xrange(FLAGS.batch_size): 
     predict[i] = np.argmax(log[i, :]) 
     max_logi[i] = log[i, :].max() 
     lab = sess.run(labels) 
     top = sess.run([top_k_op]) 
     predictions = sess.run([top_k_op]) 
     true_count = 0 
     true_count += np.sum(predictions) 
     # chk = sess.run(images) 
     #print(top)c 
     for i in xrange(FLAGS.batch_size): 
     # tf.cast(images, tf.uint8) 
     img = sess.run(images) 
     save_img = img[i, :] 

     save_img = ((save_img - save_img.min())/(save_img.max() - save_img.min()) * 255) 

     #  save_img2 = Image.fromarray(save_img, "RGB") 

     path = cur_dir + "/result/" 

     if not os.path.exists(path): 
      os.mkdir(path, 0755) 
     if predictions[0][i]==True: 
      path = path + "Correct/" 
     else: 
      path = path + "Incorect/" 

     if not os.path.exists(path): 
      os.mkdir(path, 0755) 
     class_fold = path + str(predict[i]) + "/" 
     # class_fold = path + str(max_logi[i]) + "/ 
     if not os.path.exists(path + str(predict[i]) + "/"): 
      os.mkdir(class_fold, 0755) 

     cv2.imwrite(os.path.join(class_fold, str(i) + ".jpeg"), save_img) 



    summary = tf.Summary() 
    summary.ParseFromString(sess.run(summary_op)) 
    summary.value.add(tag='Precision @ 1', simple_value=precision) 
    summary_writer.add_summary(summary, global_step) 
    except Exception as e: # pylint: disable=broad-except 
    coord.request_stop(e) 

    coord.request_stop() 
    coord.join(threads, stop_grace_period_secs=10) 


def evaluate(): 
"""Eval CIFAR-10 for a number of steps.""" 
with tf.Graph().as_default() as g: 
    # Get images and labels for CIFAR-10. 
    eval_data = FLAGS.eval_data == 'test' 
    images, labels = cifar10.inputs(eval_data=eval_data) 

    # Build a Graph that computes the logits predictions from the 
    # inference model. 
    logits = cifar10.inference(images) 
    true_count = 0 
    # Calculate predictions. 
    top_k_op = tf.nn.in_top_k(logits, labels, 1) 




    # Restore the moving average version of the learned variables for eval. 
    variable_averages = tf.train.ExponentialMovingAverage(
     cifar10.MOVING_AVERAGE_DECAY) 
    variables_to_restore = variable_averages.variables_to_restore() 
    saver = tf.train.Saver(variables_to_restore) 

    # Build the summary operation based on the TF collection of Summaries. 
    summary_op = tf.merge_all_summaries() 

    summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, g) 

    #while True: 
    eval_once(saver, summary_writer, top_k_op, summary_op,images,labels, logits) 
    # if False: 
    # break 
    # time.sleep(FLAGS.eval_interval_secs) 


def main(argv=None): # pylint: disable=unused-argument 
cifar10.maybe_download_and_extract() 
if tf.gfile.Exists(FLAGS.eval_dir): 
    tf.gfile.DeleteRecursively(FLAGS.eval_dir) 
tf.gfile.MakeDirs(FLAGS.eval_dir) 
evaluate() 


if __name__ == '__main__': 
tf.app.run() 

回答

3

這看起來像一個recurring issue其中具有代碼TensorFlow Python模塊衝突在OpenCV的和/或PIL庫。根本原因通常是包含在這些庫中的libjpeglibpng的不兼容版本。

在TensorFlow的最新晚上版本中,此問題應該是fixed。作爲一個替代的解決方法,你可以嘗試移動線:

import tensorflow as tf 

...下面的導入語句cvcv2PIL

+0

似乎沒有更多的錯誤信息了。謝謝你的幫助! – RSBS