日期:2014-05-17  浏览次数:20716 次

去除字符串中的HTML元素
package com.gwideal.jxwfkjlweb.util;

import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
 * 去除字符串中的HTML元素
 * @author zhou_chaofei
 *
 */
public class TxtWithoutHTMLElement {

	 public static String getTxtWithoutHTMLElement (String element)
     {
        
         if(null==element||"".equals(element.trim()))
         {
             return element;
         }

         Pattern pattern=Pattern.compile("<[^<|^>]*>");
         Matcher matcher=pattern.matcher(element);
         StringBuffer txt=new StringBuffer();
         while(matcher.find())
         {
             String group=matcher.group();
             if(group.matches("<[\\s]*>"))
             {
                 matcher.appendReplacement(txt,group);    
             }
             else
             {
                 matcher.appendReplacement(txt,"");
             }
         }
         matcher.appendTail(txt);
         repaceEntities(txt,"&amp;","&");
         repaceEntities(txt,"&lt;","<");        
         repaceEntities(txt,"&gt;",">");
         repaceEntities(txt,"&quot;","\"");
         repaceEntities(txt,"&nbsp;","");        
         return txt.toString();
     }
	 private static void repaceEntities ( StringBuffer txt,String entity,String replace)
     {
         int pos=-1;
         while(-1!=(pos=txt.indexOf(entity)))
         {
             txt.replace(pos,pos+entity.length(),replace);
         }
     }

	public static void main(String[] args) {
	   System.out.println(getTxtWithoutHTMLElement("<a href='a/test'>test</a>"));
	   System.out.println(getTxtWithoutHTMLElement("<a href='a/test'>test</a>"));

	
	}
}

?