用java实现将html保存为txt文本时,怎样去掉body { font-family: SimSun; font-size:22px; .....}
编写了一个java类,将一个html网页保存为txt文本,保存后的txt文本内容都正确,但是总是带着
body {
font-family: SimSun;
font-size:22px;
font-style:italic;
font-weight:bold;
color:#00F;
}
不知道该怎样去掉,求大侠帮忙
java部分代码:
package format.conversion;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import
java.io.IOException;
import java.io.InputStreamReader;
import javax.servlet.jsp.tagext.BodyTag;
import javax.swing.JFileChooser;
import javax.swing.filechooser.FileNameExtensionFilter;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.filters.OrFilter;
import org.htmlparser.nodes.RemarkNode;
import org.htmlparser.nodes.TextNode;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tags.MetaTag;
import org.htmlparser.tags.StyleTag;
import org.htmlparser.tags.TitleTag;
import org.htmlparser.util.NodeList;
import
org.htmlparser.util.ParserException;
import org.htmlparser.visitors.HtmlPage;
public class HtmlToTxt {
public static void main(String[] args) throws Exception {
HtmlToTxt test=new HtmlToTxt();
test.go();
}
public void go(){
try{
JFileChooser fileSave=new JFileChooser(".");
FileNameExtensionFilter extension=new FileNameExtensionFilter("txt Files(.txt)","txt");
fileSave.setFileFilter(extension);
fileSave.showSaveDialog(null);
File file=fileSave.getSelectedFile();
if(!file.getPath().endsWith(".txt")){
file=new File(file.getPath()+".txt");
}
String outputFile =file.toString();