日期:2014-05-17 浏览次数:21174 次
import java.io.*;
import java.net.HttpURLConnection;
import jxl.*;
import java.net.*;
import jxl.Cell;
import jxl.Sheet;
import jxl.Workbook;
public class URLUtil {
static int x;
static int y;
static String ip=null;
static String input1=null;
static String input2=null;
public static String getHtml(String urlString){
try{
StringBuffer html= new StringBuffer();
URL url=new URL(urlString);
HttpURLConnection conn=(HttpURLConnection)url.openConnection();
InputStreamReader isr=new InputStreamReader(conn.getInputStream());
BufferedReader br = new BufferedReader(isr);
String temp;
while((temp = br.readLine())!=null){
html.append(temp).append("\n");
}
br.close();
isr.close();
return html.toString();
}catch (Exception e){
e.printStackTrace();
return null;
}
}
public static void main(String[] args){
try{
Workbook book=Workbook.getWorkbook(new File("产品分类.map.xls"));
Sheet sheet=book.getSheet(0);
for(int i=1;i<sheet.getRows();i++){
Cell c=sheet.getCell(5,i);
System.out.println(URLUtil.getHtml(sheet.getCell(5,i).getContents()));
}book.close();
}catch(Exception e){
System.out.println(e);
}
}
}
Parser parser = new Parser("http://****");
NodeFilter filter = new AndFilter(new TagNameFilter("a"),new HasParentFilter(new TagNameFilter("li"))) ;
NodeList nodes = parser.extractAllNodesThatMatch(filter);
int resultNum = nodes.size();
if (resulNum > 0) {
for (NodeIterator ni = nodes.elements(); ni.hasMoreNodes();) {
System.out.println(ni.nextNode().toHtml());
}
}