日期:2014-05-20  浏览次数:20482 次

正则表达式,急!
如何取得 <form   action= "lc.asp "   method= "post "> <BR> <BR> <BR> 正文 <BR> <BR> </form> 之间 "正文 "的内容啊?
还请高手指教.

------解决方案--------------------
顶..
------解决方案--------------------
string a= " <form action=\ "lc.asp\ " method=\ "post\ "> <BR> <BR> <BR> 正文 <BR> <BR> </form> ";
Regex reg=new Regex( "(? <= <BR> )[^ <]+(?= <BR> ) ");
a=reg.Match(a).Value;
------解决方案--------------------
ArrayList results = new ArrayList();
string str = " <form action=\ "lc.asp\ " method=\ "post\ "> <BR> <BR> <BR> 正文 <BR> <BR> </form> ";
string pattern1 = @ "\ <form action=[^ <> ]*\> (.*)\ <\/form\> ";
string pattern2 = @ "\ <[^\ <\> ]*\> "; //这部分是你要删除的部分,可以改为“\ <BR\> ”或其它
int pos = 1;

Regex re1 = new Regex(pattern1, RegexOptions.Singleline);
Regex re2 = new Regex(pattern2, RegexOptions.Singleline);

MatchCollection mc = re1.Matches(str);
IEnumerator ie = mc.GetEnumerator();
while (ie.MoveNext())
{
Match m = ie.Current as Math;
results.Add(re2.Replace(m.Groups[pos].Value, " "));
}

return results;
------解决方案--------------------
去掉所有HTML语言,便可以正文内容!

str = " <form action= "lc.asp " method= "post "> <BR> <BR> <BR> 正文 <BR> <BR> </form> ";
str = Regex.Replace(str, @ " <[^ <]*> ", " ", RegexOptions.IgnoreCase | RegexOptions.Compiled);

ok!