日期:2014-05-20 浏览次数:21234 次
public class 代码分析 { public static void main(String[] args) throws Exception { Matcher ma; String caricatureUrl, caricatureName, html, picUrl, temp, temp1; StringBuffer sb; Pattern pa; int bookmark_1, bookmark_2, i, j; System.out.println("分析地址是:http://www.kkkmh.com/manhua/0503/jie-jie-shi.html"); html = getOneHtml("http://www.kkkmh.com/manhua/0503/jie-jie-shi.html"); // <li><a href="/manhua/0503/151/3051.html" title="结界师 第1本" target="_blank">第1本</a></li> pa = Pattern.compile("<li><a href=\"/manhua/(.*?) title=(.*?)target=\"_blank\">(.*?)</a></li>", Pattern.DOTALL); ma = pa.matcher(html); while (ma.find()) { temp1 = ma.group(); temp = "target=\"_blank\">"; bookmark_2 = temp.length(); bookmark_1 = temp1.indexOf(temp) + temp.length(); bookmark_2 = temp1.indexOf("<", bookmark_1); caricatureName = temp1.substring(bookmark_1, bookmark_2); temp = "<li><a href=\""; bookmark_1 = temp.length(); bookmark_2 = temp1.indexOf("\"", bookmark_1); caricatureUrl = "http://www.kkkmh.com" + temp1.substring(bookmark_1, bookmark_2); System.out.println("[" + caricatureName + "]" + caricatureUrl); } System.out.println("分析地址是:http://www.kkkmh.com/manhua/0503/151/29376.html"); html = getOneHtml("http://www.kkkmh.com/manhua/0503/151/29376.html"); // pic[0] = '2f636f6d696364617465322f636f6d6963616263642f652d682f6a6a732f7462702f3030316c7763796b62632e706e67'; pa = Pattern.compile("pic\\W(\\d.*?)\\W(.*?);", Pattern.DOTALL); ma = pa.matcher(html); while (ma.find()) { System.out.println(ma.group()); temp = ma.group(); sb = new StringBuffer(temp.substring(temp.indexOf("'")).replaceAll("'", "").replaceAll(";", "").trim()); j = sb.length() / 2; for (i = 0; i < j; i++) { sb.insert(i * 3, "%"); } picUrl = URLDecoder.decode(sb.toString(), "UTF-8"); System.out.println("http://mhc1.kkkmh.com" + picUrl); } } private static String getOneHtml(String htmlurl) throws Exception { URL url; String temp; StringBuffer sb = new StringBuffer(); url = new URL(htmlurl); BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream(), "utf-8")); while ((temp = in.readLine()) != null) { sb.append(temp); } in.close(); return sb.toString(); } }
for (i = 0; i < j; i++) { sb.insert(i * 3, "%"); } picUrl = URLDecoder.decode(sb.toString(), "UTF-8");
StringBuffer ss = new StringBuffer(); for (int i = 0; i < sb.length(); i++) { if (i % 2 == 0) { ss.append("%").append(sb.substring(i, i + 2)); } }