日期:2014-05-17  浏览次数:20778 次

下面代码,已经实现网页抓取,但是我想用indexOf选取其中的代码
Java code

import java.io.*;
import java.net.HttpURLConnection;
import jxl.*;
import java.net.*;

import jxl.Cell;
import jxl.Sheet;
import jxl.Workbook;

public class URLUtil {
    static int x;
    static int y;
    static String ip=null;
    static String input1=null;
    static String input2=null;
    public static String getHtml(String urlString){
        try{
            StringBuffer html= new StringBuffer();
            URL url=new URL(urlString);
            HttpURLConnection conn=(HttpURLConnection)url.openConnection();
            InputStreamReader isr=new InputStreamReader(conn.getInputStream());
            BufferedReader br = new BufferedReader(isr);
            String temp;
            

            while((temp = br.readLine())!=null){
                html.append(temp).append("\n");
            }
            br.close();
            isr.close();
            return html.toString();
        }catch (Exception e){
            e.printStackTrace();
            return null;
        }
        
    }
    
    
    public static void main(String[] args){

        
         try{
            Workbook book=Workbook.getWorkbook(new File("产品分类.map.xls"));
            Sheet sheet=book.getSheet(0);
            for(int i=1;i<sheet.getRows();i++){
                Cell c=sheet.getCell(5,i);
        System.out.println(URLUtil.getHtml(sheet.getCell(5,i).getContents()));
    }book.close();
    }catch(Exception e){
        
        System.out.println(e);
    }
    
    
}
}

    
    




坐等高人请教!!!小弟不胜感激

------解决方案--------------------
Java code
 
Parser parser = new Parser("http://****");
NodeFilter filter = new AndFilter(new TagNameFilter("a"),new HasParentFilter(new TagNameFilter("li"))) ;

NodeList nodes = parser.extractAllNodesThatMatch(filter);
int resultNum = nodes.size();
if (resulNum > 0) {
for (NodeIterator ni = nodes.elements(); ni.hasMoreNodes();) {
System.out.println(ni.nextNode().toHtml());
}
}