日期:2014-05-17  浏览次数:20580 次

Html parser 代码集锦 3
通过百度获取天气预报

通过百度获取天气预报
http://htmlparser.com.cn/post/20090917323.html

public class Getweather {     
     
    /**    
     * @param args    
     * @throws ParserException     
     */     
    public static void getWeather(String url) throws ParserException//通过百度获得天气预报,     
    {     
        Parser parser=new Parser("http://www.baidu.com/s?wd=%CC%EC%C6%F8");//URLDecoder码。代表天气自己转换就行     
        NodeFilter filter=new HasAttributeFilter("class","al_tr");     
        NodeList nodelist=parser.extractAllNodesThatMatch(filter);     
        for(String a:nodelist.elementAt(0).toPlainTextString().trim().split(" "))     
        {     
            if(!"".equals(a))     
            System.out.println(a);     
        }     
    }     
    public static void getWeatherImage() throws ParserException//获得天气图片的链接URL     
    {     
        Parser parser=new Parser("http://www.baidu.com/s?wd=%CC%EC%C6%F8");     
        NodeFilter filter=new HasAttributeFilter("class","al_tr");     
        NodeList nodelist=parser.extractAllNodesThatMatch(filter);     
        nodelist=nodelist.elementAt(0).getChildren();     
        NodeFilter filter1=new NodeClassFilter(ImageTag.class);     
        nodelist=nodelist.extractAllNodesThatMatch(filter1,true);     
        for(int i=0;i<nodelist.size();i++)     
        {     
            ImageTag image=(ImageTag) nodelist.elementAt(i);     
            DownLoadImg(image.getImageURL(),String.valueOf(i));     
        }     
    }     
    public static void DownLoadImg(String url,String name)//下载对应的天气图片。     
    {     
        HttpClient hc=new HttpClient();     
        GetMethod gm=new GetMethod(url);     
        try {     
            hc.executeMethod(gm);     
            String path="/home/weather/";     
            File file=new File(path);     
            if(!file.exists())     
            {     
                file.mkdirs();     
            }     
            String imagepath=path+name+".gif";     
            file=new File(imagepath);     
            if(!file.exists())     
            {     
                file.createNewFile();     
            }     
            FileOutputStream out=new FileOutputStream(file);     
            out.write(gm.getResponseBody());     
            out.close();     
        }  catch (Exception e) {     
        }     
    }     
    public static void main(String[] args) throws UnsupportedEncodingException, ParserException {     
        getWeatherImage();     
    }     
     
}   




nekohtml 用法

//nekohtml结合xpath用法   
DOMParser parser = new DOMParser();      
    try {      
           //设置网页的默认编码      
           parser.setProperty("http://cyberneko.org/html/properties/default-encoding","gb2312");      
           /*The Xerces HTML DOM implementation does not support namespaces     
           and cannot represent XHTML documents with namespace information.     
           Therefore, in order to use the default HTML DOM implementation with NekoHTML's     
           DOMParser to parse XHTML documents, you must turn off namespace processing.*/     
           parser.setFeature("http://xml.org/sax/features/namespaces", false);      
     
           String strURL = "http://product.dangdang.com/product.aspx?product_id=9317290";      
           BufferedReader in = new BufferedReader(      
                   new InputStreamReader(      
                           new URL(strURL).openStream()));      
           parser.parse(new InputSource(in));      
           in.close();      
          } catch (Exception e) {      
           e.printStackTrace();      
          }      
          Document doc = parser.getDocument();      
          // tags should be in upper case      
          String productsXpath = "/HTML/BODY/DIV[2]/DIV[4]/DIV[2]/DIV/DIV[3]/UL[@class]/LI[9]";