去除网页注释的正则式
string mm1 = @ "稿源:
<!--function source_without_pub_date() parse begin-->
北方网—天津日报
<!--function: source_without_pub_date() parse end 0ms cost! -->
编辑:
<!--function init_editor() parse begin-->
朱豪然
<!--function: init_editor() parse end 0ms cost! -->
<!--function comment_this_news() parse begin-->
[发表评论]
<!--function: comment_this_news() parse end 0ms cost! --> ";
string ss = Regex.Replace(mm1, "( <!--[\\S\\s]+--> )+ ", " ");
我本想去除 <!-- --> 里面的字符,可以这样写把所有都去除了,该怎么改,请教!
------解决方案--------------------可以不用正则表达式..
string ChangeWithDoubleFlag(string MainText, string FlagStart, string FlagEnd)
{
string str = MainText;
string chgValue;
string returnValue;
int Fi1 = -1;//First Flag Index;
int Fi2 = -1;//Second Flag Index;
Fi1 = str.IndexOf(FlagStart);
if (Fi1 != -1)
Fi2 = str.IndexOf(FlagEnd, Fi1);
if (Fi1 != -1 && Fi2 != -1)
{
chgValue = str.Remove(Fi1, Fi2 - Fi1 + FlagEnd.Length);
returnValue = ChangeWithDoubleFlag(chgValue, FlagStart, FlagEnd);
}
else
{
returnValue = MainText;
}
return returnValue;
}
string ChangeWithSingleFlag(string MainText, string Flag)
{
string str = MainText;
string chgValue;
int Fi1 = -1;//First Flag Index;
int Fi2 = -1;//Second Flag Index;
string returnValue = " ";
Fi1 = str.IndexOf(Flag);
if (Fi1 != -1)
Fi2 = str.IndexOf( "\r\n ", Fi1);
if (Fi1 != -1 && Fi2 != -1)
{
chgValue = str.Remove(Fi1, Fi2 - Fi1 + 2);
returnValue = ChangeWithSingleFlag(chgValue, Flag);
}
else
returnValue = MainText;
return returnValue;
}
//使用
string sxml = mm1 ;
string strRemark = " <!--..--> ";
string[] RemarkFlag = Regex.Split(strRemark, ", ");
for (int i = 0; i <= RemarkFlag.GetUpperBound(0); i++)
{
string Rf = RemarkFlag[i];
if (Rf.IndexOf( ".. ") != -1)
{
string Flag1 = Rf.Substring(0, Rf.IndexOf( ".. "));
string Flag2 = Rf.Substring(Rf.IndexOf( ".. ") + 2, Rf.Length - Rf.IndexOf( ".. ") - 2);
sxml = ChangeWithDoubleFlag(sxml, Flag1, Flag2);
}
else
{
sxml = ChangeWithSingleFlag(sxml, Rf);
}
}
------解决方案--------------------try
string ss = Regex.Replace(mm1, "( <!--[\\S\\s]*?--> )+ ", " ");