日期:2014-05-16  浏览次数:20357 次

jsoup java 抓取百度MP3 top500 ----冰之龙代码
冰之龙原创 冰之龙代码 因为正则表达式不支持中文所以在MP3下载列表中没有包含中文连接 要包含中文连接也很容易,自己抓取http MP3用字符串查找 jsoup下载地址为[url=http://jsoup.org/download]http://jsoup.org/download[/url] 代码
1 楼 zzjb011 2012-04-12  
package soso.tool;

import java.io.IOException;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jsoup.Jsoup;
//import org.jsoup.helper.Validate;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/*
* jsoup java 抓取百度MP3  top500 ----冰之龙代码
* 冰之龙原创 冰之龙代码
* 时间 2012-4-12 1:10:49
* */

public class MyUrls {

/**
* @param args
*/
public MyUrls(String url) {
startUrl = url;
}

String startUrl;

Document doc;
Elements links;
Elements media;
Elements imports;
ArrayList<UrlAndTitle> al = new ArrayList<UrlAndTitle>();
ArrayList<SongInfo> songInfolList = new ArrayList<MyUrls.SongInfo>();

public boolean getUrlContent() {
try {
doc = Jsoup.connect(startUrl).get();
} catch (IOException e) {
// TODO: handle exception
System.out.println(e.getMessage());
return false;
}
return true;

}

public void initElements() {
links = doc.select("a[href]");
media = doc.select("[src]");
imports = doc.select("link[href]");
}

public void UrlsLinksArrayListDisplay() {
int i = 0;
for (UrlAndTitle uat : al) {
i++;
System.out.println("标题:" + uat.title);

System.out.println("网址:" + uat.myURL);
System.out.println();
}
System.out.println("共有" + i + "个符合结果");
}

public void initUrlsLinksArrayList() {
al.clear();
for (Element link : links) {
UrlAndTitle uat = new UrlAndTitle();
uat.myURL = link.attr("abs:href");
uat.title = trim(link.text(), 35 * 10);
al.add(uat);
}
}

private static String trim(String s, int width) {
if (s.length() > width)
return s.substring(0, width - 1) + ".";
else
return s;
}

public void initEverySongSoSoPara() {
String partHare = "http://mp3.baidu.com/m?rf=top-index&tn=baidump";
String sosoH = "&word=";
String sosoE = "&lm=";

for (UrlAndTitle uat : al) {
if (uat.myURL.contains(partHare)) {
String song = uat.myURL.substring(uat.myURL.indexOf(sosoH)
+ sosoH.length(), uat.myURL.indexOf(sosoE));
SongInfo songInfo = new SongInfo();
if (song.contains("+")) {
song = song.replace('+', ':');
// System.out.println(song);
String[] songI = song.split(":");
songInfo.name = songI[0];
if (songI.length == 1) {

} else
songInfo.actor = songI[1];
songInfo.sosoList = uat.myURL;
songInfolList.add(songInfo);
} else {

songInfo.name = song;
songInfo.sosoList = uat.myURL;
songInfolList.add(songInfo);

}

}
}
}

public void SongInfoArrayListDisplay() {
int i = 0;
for (SongInfo si : songInfolList) {
i++;
System.out.println(i + ":");
System.out.println("歌曲名称:" + si.name);

System.out.println("艺术家:" + si.actor);

System.out.println("网址:" + si.sosoList);
System.out.println();
}
System.out.println("共有" + i + "个符合结果");
}

public void fillMp3DownList(SongInfo songInfo) {
if (songInfo.mp3downList == null) {