日期:2014-05-17 浏览次数:20716 次
package com.gwideal.jxwfkjlweb.util; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * 去除字符串中的HTML元素 * @author zhou_chaofei * */ public class TxtWithoutHTMLElement { public static String getTxtWithoutHTMLElement (String element) { if(null==element||"".equals(element.trim())) { return element; } Pattern pattern=Pattern.compile("<[^<|^>]*>"); Matcher matcher=pattern.matcher(element); StringBuffer txt=new StringBuffer(); while(matcher.find()) { String group=matcher.group(); if(group.matches("<[\\s]*>")) { matcher.appendReplacement(txt,group); } else { matcher.appendReplacement(txt,""); } } matcher.appendTail(txt); repaceEntities(txt,"&","&"); repaceEntities(txt,"<","<"); repaceEntities(txt,">",">"); repaceEntities(txt,""","\""); repaceEntities(txt," ",""); return txt.toString(); } private static void repaceEntities ( StringBuffer txt,String entity,String replace) { int pos=-1; while(-1!=(pos=txt.indexOf(entity))) { txt.replace(pos,pos+entity.length(),replace); } } public static void main(String[] args) { System.out.println(getTxtWithoutHTMLElement("<a href='a/test'>test</a>")); System.out.println(getTxtWithoutHTMLElement("<a href='a/test'>test</a>")); } }
?