日期:2014-05-18  浏览次数:20669 次

JSP中如何构造正则表达式
类似:
<a   href= "xxx.xxxx.com/ssss/sss/fff/ddd.html "> xxxx </a>
请问这样的HTML如何构造正则表达式。

如何将符合规则的字符串返回?
多谢了!!

------解决方案--------------------
String regEx= "([\\w-]+\\.)+[\\w-]+(/[\\w-\\./?%=]*)? "
------解决方案--------------------
package test;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Test {

private static String regex = " <(\\w+)> (.*) </\\1> | <(\\w+)/> ";

private static Pattern pattern = Pattern.compile(regex);

public static void main(String[] args) {

String test = " <html> <head> <title> 333 < 444 </title> </head> <h1> 4 < 6 </h1> <h2> 55 < 66 </h2> <br/> </html> ";

List list = foo(test);
for (int i = 0; i < list.size(); i++) {
String element = (String) list.get(i);
System.out.println((i + 1) + ": " + element);
}
}

public static List foo(String element) {
Matcher m = pattern.matcher(element);

List list = new ArrayList();
while (m.find()) {
if (m.group().endsWith( "/> ")) {
list.add(m.group());
} else {
String subelement = m.group(2);
List sublist = foo(subelement);
if (sublist.isEmpty()) {
list.add( " < " + m.group(1) + "> " + m.group(2));
} else {
list.add( " < " + m.group(1) + "> ");
}
list.addAll(sublist);
list.add( " </ " + m.group(1) + "> ");
}
}
return list;
}
}