日期:2014-05-17 浏览次数:20885 次
// String u="http://music.qq.com"; // URL url=new URL(u); // URLConnection con=url.openConnection(); // InputStream is=con.getInputStream(); // OutputStream os=new FileOutputStream(new File("D:\\test\\aa.html")); // int len=0; // byte[] buffer=new byte[1024]; // while ((len=(is.read(buffer)))!=-1) { // os.write(buffer, 0, len); // } // os.close(); // is.close(); //********************************************************************************************************* // String regex="<a[^>]*href=\"([^\"]*)\"[^>]*>(.*?)<\\/a>"; String hrefRegex="href=['\"]?(.*?)['\" ]\""; StringBuilder sb=new StringBuilder(); Map<String, String> map=new HashMap<String, String>(); String href=""; String text=""; String matchedText=""; Pattern pattern=Pattern.compile(hrefRegex); File file=new File("D:\\test\\aa.html"); InputStream is=new FileInputStream(file); BufferedReader reader=new BufferedReader(new InputStreamReader(is, "GBK")); while (reader.read()!=-1) { sb.append(reader.readLine()); } Matcher matcher=pattern.matcher(sb); while (matcher.find()) { System.out.println(matcher.group(1)); } }