日期:2014-05-16  浏览次数:20477 次

java抓取页面数据保存至数据库乱码怎么解决?
public class subject {
public static void getOutpatientService(String url) {
try {
Parser myParser = new Parser(url);
NodeList nodeList = null;
myParser.setEncoding("gbk");

NodeFilter tableFilter = new NodeClassFilter(TableTag.class);
OrFilter lastFilter = new OrFilter();
lastFilter.setPredicates(new NodeFilter[] { tableFilter });

NodeFilter linkFilter = new NodeClassFilter(LinkTag.class);

nodeList = myParser.parse(lastFilter);

int class1 = 0;
int class2 = 0;
int class3 = 0;

for (int i = 0; i <= nodeList.size(); i++) {
if (nodeList.elementAt(i) instanceof TableTag) {
TableTag tag = (TableTag) nodeList.elementAt(i);

String tableid = tag.getAttribute("id");
if (tableid != null
&& tableid
.trim()
.equals(
"_ctl0_cphMain_WucOPRegister_Step1_wucStep1_WucOPDepartmentList1_dgOPDepartment")) {
TableRow[] rows = tag.getRows();
System.out.println(rows.length);

for (int j = 0; j < rows.length; j++) {
TableRow tr = (TableRow) rows[j];

TableColumn[] td = tr.getColumns();


if (td.length == 5) {

class1 = insertIntoDb(td[0].toPlainTextString()
.trim(), "", 0);

class2 = insertIntoDb(td[1].toPlainTextString()
.trim(), "", class1);



//测试
       byte [] s=td[0].toPlainTextString().trim().getBytes();
       for (byte b : s) {
  System.err.println(b);
   }


for (int tdl = 2; tdl < 5; tdl++) {
NodeList aList = td[tdl].getChildren();
for (int aListcount = 0; aListcount < aList
.size(); aListcount++) {
if (aList.elementAt(aListcount) instanceof LinkTag) {
LinkTag linkTag = (LinkTag) aList
.elementAt(aListcount);
if (linkTag.getAttribute("href") != null) {
insertIntoDb(td[tdl]
.toPlainTextString()
.trim(), linkTag
.getAttribute("href")
.replace("&amp;", "&"),
class2);
}
}
}
}
}

if (td.length == 4) {
// 浜岀被
class2 = insertIntoDb(td[0].toPlainTextString()
.trim(), "", class1);

for (int tdl = 1; tdl < 4; tdl++) {
NodeList aList = td[tdl].getChildren();
for (int aListcount = 0; aListcount < aList
.size(); aListcount++) {
if (aList.elementAt(aListcount) instanceof LinkTag) {
LinkTag linkTag = (LinkTag) aList
.elementAt(aListcount);

if (linkTag.getAttribute("href") != null) {
insertIntoDb(td[tdl]
.toPlainTextString()
.trim(), linkTag
.getAttribute("href")
.replace("&amp;", "&"),
class2);
}
}
}
}

}

if (td.length == 3) {
// 涓夌被
for (int tdl = 0; tdl < 3; tdl++) {
NodeList aList = td[tdl].getChildren();
for (int aListcount = 0; aListcount < aList
.size(); aListcount++) {
if (aList.elementAt(aListcount) instanceof LinkTag) {
LinkTag linkTag = (LinkTag) aList.elementAt(aLis