日期:2014-05-17 浏览次数:20778 次
import java.io.*; import java.net.HttpURLConnection; import jxl.*; import java.net.*; import jxl.Cell; import jxl.Sheet; import jxl.Workbook; public class URLUtil { static int x; static int y; static String ip=null; static String input1=null; static String input2=null; public static String getHtml(String urlString){ try{ StringBuffer html= new StringBuffer(); URL url=new URL(urlString); HttpURLConnection conn=(HttpURLConnection)url.openConnection(); InputStreamReader isr=new InputStreamReader(conn.getInputStream()); BufferedReader br = new BufferedReader(isr); String temp; while((temp = br.readLine())!=null){ html.append(temp).append("\n"); } br.close(); isr.close(); return html.toString(); }catch (Exception e){ e.printStackTrace(); return null; } } public static void main(String[] args){ try{ Workbook book=Workbook.getWorkbook(new File("产品分类.map.xls")); Sheet sheet=book.getSheet(0); for(int i=1;i<sheet.getRows();i++){ Cell c=sheet.getCell(5,i); System.out.println(URLUtil.getHtml(sheet.getCell(5,i).getContents())); }book.close(); }catch(Exception e){ System.out.println(e); } } }
Parser parser = new Parser("http://****");
NodeFilter filter = new AndFilter(new TagNameFilter("a"),new HasParentFilter(new TagNameFilter("li"))) ;
NodeList nodes = parser.extractAllNodesThatMatch(filter);
int resultNum = nodes.size();
if (resulNum > 0) {
for (NodeIterator ni = nodes.elements(); ni.hasMoreNodes();) {
System.out.println(ni.nextNode().toHtml());
}
}