日期:2014-05-19 浏览次数:20957 次
/**
* 文件中读取 目标文件
* @return
* @author wangjikuan
*/
private static StringBuffer getSb(){
StringBuffer sb = new StringBuffer();
File f = new File("c:/xx.txt");
try {
BufferedReader reader = new BufferedReader( new InputStreamReader(new FileInputStream(f), "gbk"));
String s = "";
while((s = reader.readLine()) != null){
sb.append(s);
}
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return sb;
}
/**
* 解析 字符串,得到目标
* @param sb
* @author wangjikuan
*/
private static void parse(StringBuffer sb){
String regx = "<a.*?</a>";
Pattern p = Pattern.compile(regx);
Matcher m = p.matcher(sb.toString());
String regx1 = "(?<=href=\").*(?=\")";
Pattern p1 = Pattern.compile(regx1);
String regx2 = "(?<=>).*(?=<)";
Pattern p2 = Pattern.compile(regx2);
while(m.find()){
String child = m.group();
Matcher m1 = p1.matcher(child);
if(m1.find()){
System.out.print(m1.group());
}
Matcher m2 = p2.matcher(child);
if(m2.find()){
System.out.println(m2.group());
}
}
}
public static void main(String[] args) {
parse(getSb());
}
------解决方案--------------------
public static void main(String args[]) {
String str = "<img src=\"http://www.baidu.com/icon.png\" /><a href=\"http://guide.sina.cn/?pos=1&vt=1\">导航</a><a href=\"http://sina.cn/nc.php?pos=1&vt=1\">新闻</a><a href=\"http://mil.sina.cn/?pos=1&vt=1\">军事</a><a href=\"http://weibo.cn/?gotoreg=1&from=index&s2w=index&wm=ig_0001_index&pos=1&vt=1\">微博</a><a href=\"http://finance.sina.cn/?sa=t60d13v512&pos=1&vt=1\">股票</a><br/>";
String regex = "href=\"(.*?)\">(.*?)<";
Pattern p = Pattern.compile(regex);
Matcher m = p.matcher(str);
while (m.find()) {
System.out.println(m.group(1));
System.out.println(m.group(2));
System.out.println("-------------"