2013-04-08 35 views
0

你好我的深化發展,通過郵件掃描應用電子郵件過濾器,以確定它們是否是與否的垃圾郵件,這是我的類:java.io.UnsupportedEncodingException

import java.io.BufferedReader; 
import java.io.File; 
import java.io.FileInputStream; 
import java.io.FileNotFoundException; 
import java.io.IOException; 
import java.io.InputStreamReader; 
import java.util.ArrayList; 
import java.util.List; 
import java.util.regex.Matcher; 
import java.util.regex.Pattern; 

import javax.mail.MessagingException; 
import javax.mail.internet.MimeMessage; 

import ca.etsmtl.logti.log619.lab05.utilities.EmailSplicer; 

public class MotsClesFilter implements EmailFilter { 

    final String NAME = "Filtrage par mots cles"; 
    private Pattern chaineSpam; 
    private Matcher chaineCourriel; 
    private int nbOccMotSpam =0; 
    private byte confidenceLevel; 
    @Override 
    public String getFilterName() { 
     return this.NAME; 

    } 

    @Override 
    public byte checkSpam(MimeMessage message) { 
     analyze(message); 
     switch(this.nbOccMotSpam){ 
     case 0: 
      this.confidenceLevel = 1; 
      break; 
     case 1: 
      this.confidenceLevel = CANT_SAY; 
      break; 
     case 2: 
      this.confidenceLevel= 50; 
      break; 
     case 3: 
      this.confidenceLevel = 70; 
      break; 
     case 4 : 
      this.confidenceLevel = 80; 
      break; 



     } return (getConfidenceLevel()); 
    } 


    public void analyze(MimeMessage message){ 
     try { 
      List<String> listeChaines = new ArrayList<String>(); 
      BufferedReader bis = new BufferedReader(new InputStreamReader(new FileInputStream(new File("SpamWords.txt")))); 
      while(bis.ready()){ 
       String ligne = bis.readLine(); 
       listeChaines.add(ligne); 
      } 
      String[] tabMots = EmailSplicer.getMessageContent(message); 
      for (int i =0;i<tabMots.length;i++){ 
       /*System.out.print("*************************************"); 
       System.out.print(tabMots[0]); 
       System.out.print("**************************************");*/ 
       for (int j =0; j<listeChaines.size();j++){ 
        this.chaineSpam = Pattern.compile(listeChaines.get(j)); 
        this.chaineCourriel = this.chaineSpam.matcher(tabMots[i]); 
        if (this.chaineCourriel.matches()) 
         this.nbOccMotSpam++; 

       } 
      } 
     } catch (FileNotFoundException e) { 
      // TODO Auto-generated catch block 
      e.printStackTrace(); 
     } catch (IOException e) { 
      // TODO Auto-generated catch block 
      e.printStackTrace(); 
     } catch (MessagingException e) { 
      // TODO Auto-generated catch block 
      e.printStackTrace(); 
     } 
    } 
    @Override 
    public byte getConfidenceLevel() { 
     // TODO Auto-generated method stub 
     return this.confidenceLevel; 
    } 

    @Override 
    public boolean enabled() { 
     // TODO Auto-generated method stub 
     return true; 
    } 
} 

和這裏的EmailSplicer工具類我使用:

import java.io.IOException; 
import java.util.ArrayList; 

import javax.mail.MessagingException; 
import javax.mail.Multipart; 
import javax.mail.internet.MimeMessage; 

/** 
* Utility class to return all the content of a MimeMessage 
* @author Maxime Caumartin <[email protected]> 
*/ 
public class EmailSplicer { 

    /** 
    * Contains the types of email parts that can be analyzed by this class. 
    * @author Maxime Caumartin <[email protected]> 
    */ 
    private enum ContentTypes 
    { 
     Plain("text/plain"), HTML("text/html"), Multipart("multipart"), Unknown(
       "?"); 

     private String type; 

     ContentTypes(String type) 
     { 
      this.type = type; 
     } 

     public static ContentTypes getType(String type) 
     { 
      if (type.contains(Plain.type)) 
       return Plain; 
      if (type.contains(HTML.type)) 
       return HTML; 
      if (type.contains(Multipart.type)) 
       return Multipart; 
      return Unknown; 
     } 

    } 

    /** 
    * Recursive method that passes through all the parts of the Mutlipart message and returns an ArrayList<String> of the content of these parts. 
    * @param multiPartMsg The Multipart that needs to be dissected. 
    * @return The ArrayList<String> containing all the content of the Mutlipart message. 
    * @throws MessagingException Exception thrown if the analyzer cannot read the message. 
    * @throws IOException Exception thrown if the encoding type isn't valid. 
    */ 
    private static ArrayList<String> getMutlipartContent(Multipart multiPartMsg) 
      throws MessagingException, IOException 
    { 
     ArrayList<String> returnTable = new ArrayList<String>(
       multiPartMsg.getCount()); 

     for (int i = 0; i < multiPartMsg.getCount(); i++) 
     { 
      switch (ContentTypes.getType(multiPartMsg.getBodyPart(i) 
        .getContentType())) 
      { 
       case Plain: 
        returnTable.add((String) multiPartMsg.getBodyPart(i) 
          .getContent()); 
        break; 
       case HTML: 
        String s = org.clapper.util.html.HTMLUtil.textFromHTML((String) multiPartMsg.getBodyPart(i) 
          .getContent()).trim(); 
        if (s.length() != 0) 
         returnTable.add(s); 
        break; 
       case Multipart: 
        returnTable 
          .addAll(getMutlipartContent((Multipart) multiPartMsg 
            .getBodyPart(i).getContent())); 
        break; 
       default: 
      } 
     } 
     return returnTable; 
    } 

    /** 
    * Returns all the content of the MimeMessage passed as a parameter. The whole content will be parsed. 
    * @param message The MimeMessage containing textual information. 
    * @return The array of string containing all the strings from the content of the message. 
    * @throws MessagingException Exception thrown if the analyzer cannot read the message. 
    * @throws IOException Exception thrown if the encoding type isn't valid. 
    */ 
    public static String[] getMessageContent(MimeMessage message) 
      throws MessagingException, IOException 
    { 
     String contentType = message.getContentType(); 

     switch (ContentTypes.getType(contentType)) 
     { 
      case Plain: 
       return new String[] { (String) message.getContent() }; 
      case Multipart: 
       return getMutlipartContent(
         (Multipart) message.getContent()).toArray(new String[0]); 
      case HTML: 
       String s = org.clapper.util.html.HTMLUtil.textFromHTML((String) message 
         .getContent()).trim(); 
       if (s.length() != 0) 
        return new String[] {s}; 
      default: 
       return new String[0]; 
     } 

    } 

} 

現在,當我執行整個應用程序的主要方法,這是我收到的例外:

java.io.UnsupportedEncodingException: iso-0621-9 
at sun.nio.cs.StreamDecoder.forInputStreamReader(Unknown Source) 
at java.io.InputStreamReader.<init>(Unknown Source) 
at com.sun.mail.handlers.text_plain.getContent(text_plain.java:82) 
at javax.activation.DataSourceDataContentHandler.getContent(Unknown Source) 
at javax.activation.DataHandler.getContent(Unknown Source) 
at javax.mail.internet.MimeBodyPart.getContent(MimeBodyPart.java:629) 
at ca.etsmtl.logti.log619.lab05.utilities.EmailSplicer.getMutlipartContent(EmailSplicer.java:69) 
at ca.etsmtl.logti.log619.lab05.utilities.EmailSplicer.getMessageContent(EmailSplicer.java:101) 
at ca.etsmtl.logti.log619.lab05.filter.MotsClesFilter.analyze(MotsClesFilter.java:66) 
at ca.etsmtl.logti.log619.lab05.filter.MotsClesFilter.checkSpam(MotsClesFilter.java:34) 
at ca.etsmtl.logti.log619.lab05.Application.main(Application.java:107) 

有人可以啓發我如何解決它?

回答

2

This document列出了Java支持的編碼。

iso-0621-9不在列表中。

+0

鏈接似乎被破壞,但是這裏是1.6等效:https://docs.oracle.com/javase/6/ docs/technotes/guides/intl/encoding.doc.html – 2015-10-05 17:01:20

+0

發佈更新爲Java 6 – Aubin 2015-10-06 16:30:10

2

ISO-0621-9不是編碼,如果它不是supported by Java。我猜這可能是垃圾郵件的一個很好的指標:沒有有效的編碼=>垃圾郵件。

小谷歌搜索ISO 621表明ISO-621的INTERNATION標準「錳礦石 - 的金屬鐵含量的測定(金屬鐵含量不超過2%) - 磺基水楊酸光度法」

我認爲這與計算機無關,甚至更少使用編碼;)

+0

Wher電子拼接器中的電子郵件不能捕獲異常? – user2133558 2013-04-08 20:17:25

+0

這取決於你想如何處理它。 'EmailSplicer'可能不是正確的地方。 – 2013-04-08 20:48:40

相關問題