日期:2014-05-17 浏览次数:20573 次
<table width="85%" height="216" border="0" cellpadding="0" cellspacing="0">
<tr>
<td>A</td>
<td>B</td>
<td>1</td>
<td>2</td>
<td>3</td>
</tr>
<tr>
<td>C</td>
<td>D</td>
<td>4</td>
<td>5</td>
<td>6</td>
</tr>
<tr>
<td rowspan="2">EG</td>
<td rowspan="2">FH</td>
<td>7</td>
<td>8</td>
<td>9</td>
</tr>
<tr>
<td>10</td>
<td>11</td>
<td>12</td>
</tr>
<tr>
<td>I</td>
<td>J</td>
<td>13</td>
<td>14</td>
<td>15</td>
</tr>
</table>
string tempStr = File.ReadAllText(@"C:\Users\M\Desktop\Test.txt", Encoding.GetEncoding("GB2312"));//读取文档
var result = Regex.Matches(tempStr, @"<tr[^>]*?>\s*?<td[^>]*?>([^<]*?)</td>\s*?<td[^>]*?>([^<]*?)</td>\s*?(((?:\s*?<tr>\s*?)?\s*?<td>\d+?</td>\s*?(?:\s*?</tr>\s*?)?)+)(?=\s*?</tr>)").Cast<Match>().Select(a => new {
列1=a.Groups[1].Value,
列2=a.Groups[2].Value,
列3=Regex.Replace(a.Groups[3].Value,@"\s+",string.Empty)//去除多余空格
});
/*
+ [0] { 列1 = "A", 列2 = "B", 列3 = "<td>1</td><td>2</td><td>3</td>" } <Anonymous Type>
+ [1] { 列1 = "C", 列2 = "D", 列3 = "<td>4</td><td>5</td><td>6</td>" } <Anonymous Type>
+ [2] { 列1 = "EG", 列2 = "FH", 列3 = "<td>7</td><td>8</td><td>9</td></tr><tr><td>10</td><td>11</td><td>12</td>" } <Anonymous Type>
+ [3] { 列1 = "I", 列2 = "J", 列3 = "<td>13</td><td>14</td><td>15</td>" } <Anonymous Type>
*/
------解决方案--------------------
string Pattern = "<tr>[^<]+<td>([^<]+)</td>[^<]+<td>([^<]+)</td>[^<]+<td>([^<]+)</td>[^<]+<td>([^<]+)</td>[^<]+<td>([^<]+)</td>[^<]+<td>([^<]+)</td>[^<]+<td>([^<]+)</td>[^<]+<td>([^<]+)</td>[^<]+<td>([^<]+)</td>[^<]+<td>([^<]+)</td>[^<]+<td>([^<]+)</td>[^<]+<td>([^<]+)</td>[^<]+<td>([^<]+)</td>[^<]+</tr>";
Match m = Regex.Match("InputString"