日期:2014-05-17 浏览次数:20714 次
import org.cyberneko.html.parsers.DOMParser; DOMParser parser = new DOMParser(); //下面这段代码缺少了</p></pre>标记 InputStream in = new ByteArrayInputStream("<pre><pre>Product Name:lady slipper </pre><pre>Model Number: 816</pre><pre>Size:36-41#</pre><p>Color: all colors available </p><pre>Place of Origin: China</pre><pre> </pre><pre>Feature:</pre><pre>1)UPPER: pvc</pre><pre>2)OUTSOLE:pvc</pre><pre>3)Suitable age:women</pre><p>Packing:polybag or according to your requests".getBytes()); InputSource source = new InputSource(in); //解析源 parser.parse(source); //得到解析完成的DOCUMENT Document doc = parser.getDocument(); // 获得将DOM文档转化为XML文件的转换器。 TransformerFactory tfactory = TransformerFactory.newInstance(); Transformer transformer = tfactory.newTransformer(); // 将DOM对象转化为DOMSource类对象 DOMSource dsource = new DOMSource(doc); StringWriter write = new StringWriter(); // 获得一个StreamResult类对象,该对象是DOM文档转化成的其他形式的文档的容器,可以是XML文件,文本文件,HTML文件。这里为一个XML文件 StreamResult result = new StreamResult(write); // 调用API,将DOM文档转化成XML文件 transformer.transform(dsource, result); System.out.println(write.getBuffer().toString());