日期:2014-05-19 浏览次数:20726 次
/** * 文件中读取 目标文件 * @return * @author wangjikuan */ private static StringBuffer getSb(){ StringBuffer sb = new StringBuffer(); File f = new File("c:/xx.txt"); try { BufferedReader reader = new BufferedReader( new InputStreamReader(new FileInputStream(f), "gbk")); String s = ""; while((s = reader.readLine()) != null){ sb.append(s); } } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return sb; } /** * 解析 字符串,得到目标 * @param sb * @author wangjikuan */ private static void parse(StringBuffer sb){ String regx = "<a.*?</a>"; Pattern p = Pattern.compile(regx); Matcher m = p.matcher(sb.toString()); String regx1 = "(?<=href=\").*(?=\")"; Pattern p1 = Pattern.compile(regx1); String regx2 = "(?<=>).*(?=<)"; Pattern p2 = Pattern.compile(regx2); while(m.find()){ String child = m.group(); Matcher m1 = p1.matcher(child); if(m1.find()){ System.out.print(m1.group()); } Matcher m2 = p2.matcher(child); if(m2.find()){ System.out.println(m2.group()); } } } public static void main(String[] args) { parse(getSb()); }
------解决方案--------------------
public static void main(String args[]) { String str = "<img src=\"http://www.baidu.com/icon.png\" /><a href=\"http://guide.sina.cn/?pos=1&vt=1\">导航</a><a href=\"http://sina.cn/nc.php?pos=1&vt=1\">新闻</a><a href=\"http://mil.sina.cn/?pos=1&vt=1\">军事</a><a href=\"http://weibo.cn/?gotoreg=1&from=index&s2w=index&wm=ig_0001_index&pos=1&vt=1\">微博</a><a href=\"http://finance.sina.cn/?sa=t60d13v512&pos=1&vt=1\">股票</a><br/>"; String regex = "href=\"(.*?)\">(.*?)<"; Pattern p = Pattern.compile(regex); Matcher m = p.matcher(str); while (m.find()) { System.out.println(m.group(1)); System.out.println(m.group(2)); System.out.println("-------------"