跪求解决 !!!利用Htmlparser抓取网页正文时出错,求教大神解决!
跪求解决 !!!
利用Htmlparser抓取网页正文时出错,求教大神解决!
/**包含正文的标签通常是TABLE、DIV或ParagraphTag里,因而找到包含文字最多的DIV或TABLE,通常就是正文**/
//table有效性的记录
public class TableValid {
private int trnum;
private int tdnum;
private int linknum;
private int textnum;
private int scriptnum;
public int getScriptnum() {
return scriptnum;
}
public void setScriptnum(int scriptnum) {
this.scriptnum = scriptnum;
}
public int getLinknum() {
return linknum;
}
public void setLinknum(int linknum) {
this.linknum = linknum;
}
public int getTdnum() {
return tdnum;
}
public void setTdnum(int tdnum) {
this.tdnum = tdnum;
}
public int getTextnum() {
return textnum;
}
public void setTextnum(int textnum) {
this.textnum = textnum;
}
public int getTrnum() {
return trnum;
}
public void setTrnum(int trnum) {
this.trnum = trnum;
}
}
//table中的内容
import java.util.List;
public class TableContext {
private List<?> linkList;
private StringBuffer textBuffer;
private int tableRow;
private int totalRow;
private String sign;
public String getSign() {
return sign;
}
public void setSign(String sign) {
this.sign = sign;
}
public int getTotalRow() {
return totalRow;
}
public void setTotalRow(int totalRow) {
this.totalRow = totalRow;
}
public int getTableRow() {
return tableRow;
}
public void setTableRow(int tableRow) {
this.tableRow = tableRow;
}
public List<?> getLinkList() {
return linkList;
}
public void setLinkList(List<?> linkList) {
this.linkList = linkList;
}
public StringBuffer getTextBuffer() {
return textBuffer;
}
public void setTextBuffer(StringBuffer textBuffer) {
this.textBuffer = textBuffer;
}
}
//column有效性的记录
public class TableColumnValid {
int tdNum;
boolean valid;