2012-03-06 60 views
3

我想借助java的HTMLEditorKit檢索TITLE屬性? 這是我寫的,但它會一直返回「null」,在eclipse中檢查器並沒有那麼多幫助!如何通過HTMLEditorKit檢索HTML的TITLE

import java.io.FileReader; 
import java.io.InputStream; 
import java.io.InputStreamReader; 
import java.io.Reader; 
import java.net.URL; 
import javax.swing.text.MutableAttributeSet; 
import javax.swing.text.html.HTML; 
import javax.swing.text.html.HTMLEditorKit; 
import javax.swing.text.html.parser.ParserDelegator; 
public class testHTML 

{ 
    public static void main(String args[]) throws Exception 
    { 

    Reader reader = new FileReader("C:\\wamp\\www\\t\\index.html"); 

    new ParserDelegator().parse(reader, new LinkPage(), true); 

    } 
} 
class LinkPage extends HTMLEditorKit.ParserCallback 
{ 
    public void handleSimpleTag(HTML.Tag tag, 
      MutableAttributeSet attributes, int pos) { 

     if (tag == HTML.Tag.TITLE) 
     { 
      System.out.println(attributes.getAttribute(HTML.Attribute.TITLE)); 
     } 
    } 
    public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) 
    { 
// if (t == HTML.Tag.A) 
// { 
//  //System.out.println("<BR>"); 
//  
// } 
// if(t == HTML.Tag.TITLE) 
// { 
//  System.out.println(t.toString()); 
//  System.out.println(t.TITLE); 
//  System.out.println(); 
//  String text = (String)a.getAttribute(HTML.Attribute.TITLE); 
//  Object o = a.getAttribute(HTML.Attribute.TITLE); 
//  System.out.println(a); 
//  System.out.println(o); 
//  System.out.println(text); 
// } 
// 
     handleSimpleTag(t, a, pos); 
    } 
} 

和HTML的內容是:

<html> 
<head> 
<title>test</title> 
</head> 
<body> 
test 
<a href="http://localhost/t/1.html">link1</a> 
sdf 
<a href="http://localhost/t/2.html">link2</a> 
sdf 
<a href="http://localhost/t/1.html">link3</a> 
sdf 
<a href="http://localhost/t/2.html">link3</a> 
</body> 
</html> 

PS:我知道XPATH,正則表達式和任何其他第三方組件在一個簡單的方法來檢索HTML attrinutes的,但我想學的也很難。

+2

您可以發佈SSCCE嗎? – StanislavL 2012-03-06 09:44:52

+0

編輯.......... – 2012-03-06 09:53:39

回答

6
import javax.swing.text.MutableAttributeSet; 
import javax.swing.text.html.HTML; 
import javax.swing.text.html.HTMLEditorKit; 
import javax.swing.text.html.parser.ParserDelegator; 
import java.io.Reader; 
import java.io.StringReader; 

public class Test2 { 
    public static final String content = "<html> \n" + 
      "<head> \n" + 
      "<title>test</title> \n" + 
      "</head> \n" + 
      "<body> \n" + 
      "test \n" + 
      "<a href=\"http://localhost/t/1.html\">link1</a> \n" + 
      "sdf \n" + 
      "<a href=\"http://localhost/t/2.html\">link2</a> \n" + 
      "sdf \n" + 
      "<a href=\"http://localhost/t/1.html\">link3</a> \n" + 
      "sdf \n" + 
      "<a href=\"http://localhost/t/2.html\">link3</a> \n" + 
      "</body> \n" + 
      "</html> "; 

    public static void main(String args[]) throws Exception { 
     Reader reader = new StringReader(content); 
     new ParserDelegator().parse(reader, new LinkPage(), true); 
    } 
} 

class LinkPage extends HTMLEditorKit.ParserCallback { 
    int startPos = -1; 

    public void handleText(char[] data, int pos) { 
     if (startPos >= 0) { 
      startPos = pos; 
     } 
    } 

    public void handleEndTag(HTML.Tag t, int pos) { 
     super.handleEndTag(t, pos); 
     if (t == HTML.Tag.TITLE) { 
      System.out.println(Test2.content.substring(startPos, pos)); 
      startPos = -1; 
     } 
    } 

    public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) { 
     super.handleStartTag(t, a, pos); 
     if (t == HTML.Tag.TITLE) { 
      startPos = pos; 
     } 
    } 
}