日期:2014-05-17 浏览次数:21251 次
<div class="dTrans cl">
<p><span class="dt">n.</span><span class="dd">u8bd5u9a8cuff1bu8003u9a8cuff1bu6d4bu9a8cuff1bu5316u9a8cuff1b</span></p>
<p><span class="dt">vt.</span><span class="dd">u6d4bu9a8cuff1bu8003u67e5uff1bu8003u9a8cuff1bu52d8u63a2uff1b</span></p>
<p><span class="dt">vi.</span><span class="dd">u53d7u8bd5u9a8cuff1bu53d7u6d4bu9a8cuff1bu53d7u8003u9a8cuff1bu6d4bu5f97u7ed3u679cuff1b</span></p>
</div>
               string tempStr = File.ReadAllText(@"C:\Users\myx\Desktop\Test.txt", Encoding.GetEncoding("GB2312"));//读取txt
               var _list = Regex.Matches(tempStr, @"(?i)(?<=<div[^>]*?class=(['""]?)dTrans cl(['""]?)[^>]*?>((?!</div>)[\s\S])*?)<p[^>]*?><span[^>]*?>(?<V1>[^<>]*?)</span>\s*?<span[^>]*?>(?<V2>[^<>]*?)</span>\s*?</p>").Cast<Match>().Select(a => 
                   {
                       string unicode_str = string.Empty;
                       MatchCollection mc = Regex.Matches(a.Groups["V2"].Value, @"u([\w]{2})([\w]{2})", RegexOptions.Compiled 
------解决方案--------------------
 RegexOptions.IgnoreCase);
                       byte[] bts = new byte[2];
                       foreach (Match m in mc)
                       {
                           bts[0] = (byte)int.Parse(m.Groups[2].Value, NumberStyles.HexNumber);
                           bts[1] = (byte)int.Parse(m.Groups[1].Value, NumberStyles.HexNumber);
       &n