2017-05-28 67 views
0

我正在使用下面的類來序列化和反序列化我的類。OptionalDataException沒有明顯的原因

import java.io.*; 

public final class Serialization { 

    public static void writeObject(Object obj, String path){ 
     try (ObjectOutputStream oos = 
        new ObjectOutputStream(new FileOutputStream(path))) { 

      oos.writeObject(obj); 
      //System.out.println("Done"); 
     } catch (Exception ex) { 
      ex.printStackTrace(); 
     } 
    } 


    public static Object readObject(String path){ 
     Object obj = null; 
     FileInputStream fin = null; 
     ObjectInputStream ois = null; 

     try { 
      fin = new FileInputStream(path); 
      ois = new ObjectInputStream(fin); 
      obj = ois.readObject(); 
     } catch (Exception ex) { 
      ex.printStackTrace(); 
     } finally { 
      if (fin != null) { 
       try { 
        fin.close(); 
       } catch (IOException e) { 
        e.printStackTrace(); 
       } 
      } 

      if (ois != null) { 
       try { 
        ois.close(); 
       } catch (IOException e) { 
        e.printStackTrace(); 
       } 
      } 
     } 

     return obj; 
    } 
} 

我有一個實現了Serializable接口的類:TextCategorizator。我正在嘗試使用這個類作爲分類模型。因此,序列化這個類的對象,我用

TextCategorizator tc = new TextCategorizator(trainingFiles, vecFile); 
Serialization.writeObject(tc, MODEL_PATH); 

,然後當我嘗試用

TextCategorizator model = (TextCategorizator) Serialization.readObject(MODEL_PATH); 

閱讀本序列化對象,我得到了下面的異常跟蹤:

java.io.OptionalDataException 
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1373) 
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:373) 
at java.util.HashMap.readObject(HashMap.java:1402) 
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) 
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 
at java.lang.reflect.Method.invoke(Method.java:498) 
at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1058) 
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1909) 
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808) 
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353) 
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018) 
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942) 
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808) 
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353) 
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:373) 
at utils.Serialization.readObject(Serialization.java:27) 
at Main.main(Main.java:33) 

引發異常的部分是:

obj = ois.readObject(); 

當我看到這個異常的reference page時,它說在這個異常中有兩個選項用eof標誌和長度變量表示。 我打印出來看看。這意味着,根據參考頁面,

嘗試讀取類可定義的readObject或readExternal方法消耗的數據的末尾。在這種情況下,OptionalDataException的eof字段被設置爲true,並且長度字段被設置爲0.

我之前使用過這些方法,但沒有遇到此異常。什麼是錯的,什麼是「讀過去」的意思?

編輯: TextCategorizator類是在這裏:

import utils.FileUtils; 

import java.io.File; 
import java.io.Serializable; 
import java.util.*; 
import java.util.stream.Collectors; 

public class TextCategorizator implements Serializable { 
    private Map<String, String> wordVectors; 
    private Map<File, List<List<Double>>> docVectors; 
    private Map<File, String> trainingFiles; 
    private Set<String> classes; 

    public TextCategorizator(Map<File, String> trainingFiles, String trainedVectors) { 
     wordVectors = new HashMap<>(); 
     docVectors = new HashMap<>(); 
     classes = new HashSet<>(); 

     this.trainingFiles = trainingFiles; 
     List<String> lines = FileUtils.readFileAsList(new File(trainedVectors)); 

     System.out.println("> Reading word vector file."); 
     lines.parallelStream().forEach(line -> { 
      String name = line.substring(0, line.indexOf(' ')); 
      wordVectors.put(name, line); 
     }); 

     train(trainingFiles); 
    } 

    private void train(Map<File, String> trainingFiles) { 
     System.out.println("> Starting training parallel."); 
     trainingFiles.entrySet().parallelStream().forEach(entry -> { 
      docVectors.put(entry.getKey(), getVectorsOfDoc(entry.getKey())); 
      classes.add(entry.getValue()); 
     }); 
    } 

    private List<List<Double>> getVectorsOfDoc(File doc) { 
     List<List<Double>> lists = new ArrayList<>(); 

     List<Double> resultVecAvg = new ArrayList<>(); 
     List<Double> resultVecMax = new ArrayList<>(); 
     List<Double> resultVecMin = new ArrayList<>(); 
     int vecSize = 100; 

     for (int i = 0; i < vecSize; i++) { 
      resultVecAvg.add(0.0); 
      resultVecMax.add(0.0); 
      resultVecMin.add(0.0); 
     } 

     String[] words = FileUtils.readWords(doc); 
     for (String word : words) { 
      String line = wordVectors.get(word); 
      if (line != null) { 
       List<Double> vec = new ArrayList<>(); 
       String[] tokens = line.split(" "); 
       for (int i = 1; i < tokens.length; i++) { 
        vec.add(Double.parseDouble(tokens[i])); 
       } 

       for (int i = 0; i < vec.size(); i++) { 
        resultVecAvg.set(i, resultVecAvg.get(i) + (vec.get(i)/vecSize)); 
        resultVecMax.set(i, Math.max(resultVecMax.get(i), vec.get(i))); 
        resultVecMin.set(i, Math.min(resultVecMin.get(i), vec.get(i))); 
       } 
      } 
     } 

     lists.add(resultVecAvg); lists.add(resultVecMax); lists.add(resultVecMin); 

     return lists; 
    } 

    private void getCosineSimilarities(List<Double> givenVec, int option, Map<File, Double> distances) { 
     for (Map.Entry<File, List<List<Double>>> entry : docVectors.entrySet()) { 
      List<Double> vec = null; 
      if (option == 1) // AVG 
       vec = entry.getValue().get(0); 
      else if (option == 2) // MAX 
       vec = entry.getValue().get(1); 
      else if (option == 3) // MIN 
       vec = entry.getValue().get(2); 

      distances.put(entry.getKey(), cosSimilarity(givenVec, vec)); 
     } 
    } 

    private double cosSimilarity(List<Double> vec1, List<Double> vec2) { 
     double norm1 = 0.0; 
     double norm2 = 0.0; 
     double dotProduct = 0.0; 
     for (int i = 0; i < vec1.size(); i++) { 
      norm1 += Math.pow(vec1.get(i), 2); 
      norm2 += Math.pow(vec2.get(i), 2); 
      dotProduct += vec1.get(i) * vec2.get(i); 
     } 
     return dotProduct/(Math.sqrt(norm1) * Math.sqrt(norm2)); 
    } 

    // from http://stackoverflow.com/questions/109383/sort-a-mapkey-value-by-values-java 
    private <K, V extends Comparable<? super V>> Map<K, V> 
    sortByValue(Map<K, V> map, boolean reverse) { 
     return map.entrySet() 
       .stream() 
       .sorted((reverse ? 
         Map.Entry.comparingByValue(Collections.reverseOrder()) : 
         Map.Entry.comparingByValue())) 
       .collect(Collectors.toMap(
         Map.Entry::getKey, 
         Map.Entry::getValue, 
         (e1, e2) -> e1, 
         LinkedHashMap::new 
       )); 
    } 

    private int countClass(List<File> files, String c) { 
     int counter = 0; 
     for (File file : files) { 
      if (trainingFiles.get(file).equals(c)) 
       ++counter; 
     } 
     return counter; 
    } 

    public Map.Entry<String, Integer> classifyKnn(File file, int k, int option) { 
     List<List<Double>> vecs = getVectorsOfDoc(file); 
     List<Double> vec = getProperVector(vecs, option); 

     Map<File, Double> distances = new HashMap<>(); 

     getCosineSimilarities(vec, option, distances); 
     distances = sortByValue(distances, true); 

     List<File> sortedFiles = new ArrayList<>(distances.keySet()); 
     sortedFiles = sortedFiles.subList(0, k); 
     Map<String, Integer> counts = new HashMap<>(); 
     for (String category : classes) { 
      counts.put(category, countClass(sortedFiles, category)); 
     } 

     ArrayList<Map.Entry<String, Integer>> resultList = 
       new ArrayList(sortByValue(counts, true).entrySet()); 

     return resultList.get(0); 
    } 

    private List<Double> getProperVector(List<List<Double>> lists, int option) { 
     List<Double> vec = null; 
     if (option == 1) // AVG 
      vec = lists.get(0); 
     else if (option == 2) // MAX 
      vec = lists.get(1); 
     else if (option == 3) // MIN 
      vec = lists.get(2); 

     return vec; 
    } 

    public Map.Entry<String, Double> classifyRocchio(File file, int option) { 
     List<List<Double>> vecs = getVectorsOfDoc(file); 
     List<Double> vec = getProperVector(vecs, option); 

     Map<File, Double> distances = new HashMap<>(); 
     getCosineSimilarities(vec, option, distances); 

     distances = sortByValue(distances, true); 
     List<Map.Entry<File, Double>> sortedFiles = 
       new ArrayList<>(distances.entrySet()); 

     return new AbstractMap.SimpleEntry<> 
       (trainingFiles.get(sortedFiles.get(0).getKey()), 
         sortedFiles.get(0).getValue()); 
    } 
} 
+0

發佈您的'TextCategorizator'代碼。 – EJP

+0

@EJP我添加了TextCategorizator類的鏈接。 – eytireyup

+1

不可接受。張貼在這裏。 – EJP

回答

1

最後我得到了這個工作。問題是我試圖序列化和反序列化的對象的大小(即wordVectors是480 MB)。

爲了解決這個問題,我使用了同步映射。所以

wordVectors = new HashMap<>(); 

改爲

wordVectors = Collections.synchronizedMap(new HashMap<>()); 

我從here的想法。

+1

問題是缺乏同步。看到你自己的鏈接。 – EJP

+0

是的。這很難找到,因爲我之前序列化小hashmaps所以這個例外從未拋出。如果對象很大,我不知道應該同步。謝謝。 – eytireyup

相關問題