日期:2014-05-17 浏览次数:20496 次
<table width="85%" height="216" border="0" cellpadding="0" cellspacing="0"> <tr> <td>A</td> <td>B</td> <td>1</td> <td>2</td> <td>3</td> </tr> <tr> <td>C</td> <td>D</td> <td>4</td> <td>5</td> <td>6</td> </tr> <tr> <td rowspan="2">EG</td> <td rowspan="2">FH</td> <td>7</td> <td>8</td> <td>9</td> </tr> <tr> <td>10</td> <td>11</td> <td>12</td> </tr> <tr> <td>I</td> <td>J</td> <td>13</td> <td>14</td> <td>15</td> </tr> </table>
string tempStr = File.ReadAllText(@"C:\Users\M\Desktop\Test.txt", Encoding.GetEncoding("GB2312"));//读取文档 var result = Regex.Matches(tempStr, @"<tr[^>]*?>\s*?<td[^>]*?>([^<]*?)</td>\s*?<td[^>]*?>([^<]*?)</td>\s*?(((?:\s*?<tr>\s*?)?\s*?<td>\d+?</td>\s*?(?:\s*?</tr>\s*?)?)+)(?=\s*?</tr>)").Cast<Match>().Select(a => new { 列1=a.Groups[1].Value, 列2=a.Groups[2].Value, 列3=Regex.Replace(a.Groups[3].Value,@"\s+",string.Empty)//去除多余空格 }); /* + [0] { 列1 = "A", 列2 = "B", 列3 = "<td>1</td><td>2</td><td>3</td>" } <Anonymous Type> + [1] { 列1 = "C", 列2 = "D", 列3 = "<td>4</td><td>5</td><td>6</td>" } <Anonymous Type> + [2] { 列1 = "EG", 列2 = "FH", 列3 = "<td>7</td><td>8</td><td>9</td></tr><tr><td>10</td><td>11</td><td>12</td>" } <Anonymous Type> + [3] { 列1 = "I", 列2 = "J", 列3 = "<td>13</td><td>14</td><td>15</td>" } <Anonymous Type> */
------解决方案--------------------
string Pattern = "<tr>[^<]+<td>([^<]+)</td>[^<]+<td>([^<]+)</td>[^<]+<td>([^<]+)</td>[^<]+<td>([^<]+)</td>[^<]+<td>([^<]+)</td>[^<]+<td>([^<]+)</td>[^<]+<td>([^<]+)</td>[^<]+<td>([^<]+)</td>[^<]+<td>([^<]+)</td>[^<]+<td>([^<]+)</td>[^<]+<td>([^<]+)</td>[^<]+<td>([^<]+)</td>[^<]+<td>([^<]+)</td>[^<]+</tr>";
Match m = Regex.Match("InputString"