日期:2014-05-17 浏览次数:20866 次
通过jsoup解析页面html获取优酷页面视频列表
作者: javaboy2012
Email:yanek@163.com
qq: 1046011462
代码如下:
package com.yanek; import java.io.IOException; import java.util.HashMap; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class Tool { /** * @param args */ public static void main(String[] args) { String url="http://www.youku.com"; //url="http://movie.youku.com"; HashMap list=new HashMap(); Document doc; try { doc = Jsoup.connect(url).get(); Elements links = doc.select("a[href]"); int s=0; for (Element link : links) { String v_url=link.attr("abs:href"); if (link.ownText().length()==0) { continue; } if (list.containsKey(v_url)) { continue; } if (v_url.startsWith("http://v.youku.com/v_show")) { System.out.println(link.attr("abs:href")+"-"+link.ownText()); list.put(v_url, v_url); s++; } } System.out.println("total:"+ s); } catch (IOException e) { e.printStackTrace(); } } }