winform中怎么提取指定网页中的类似<a href="news.asp?id=3"的超连接-C#教程-爱易网页

winform中怎么提取指定网页中的类似<a href="news.asp?id=3"的超连接

日期：2014-05-19　浏览次数：21016 次

winform中如何提取指定网页中的类似<a href="news.asp?id=3"的超连接
如标题
CSDN上都是如下的答案，可获得超连接都是http://开头的，而网页中以 <a href= "news.asp?id=3 " > 内容 </a> 中的news.asp?id=3如何获得？检测它是否可以正常打开？
using System;
using System.Xml;
using System.Text;
using System.Net;
using System.IO;
using System.Collections;
using System.Text.RegularExpressions;

public class App
{
public static void Main()
{
string strCode;
ArrayList alLinks;

Console.Write( "请输入一个网页地址： ");
string strURL = Console.ReadLine();
if(strURL.Substring(0,7) != @ "http:// ")
{
strURL = @ "http:// " + strURL;
}

Console.WriteLine( "正在获取页面代码，请稍侯... ");
strCode = GetPageSource(strURL);

Console.WriteLine( "正在提取超链接，请稍侯... ");
alLinks = GetHyperLinks(strCode);

Console.WriteLine( "正在写入文件，请稍侯... ");
WriteToXml(strURL,alLinks);
}

// 获取指定网页的HTML代码
static string GetPageSource(string URL)
{
Uri uri =new Uri(URL);

HttpWebRequest hwReq = (HttpWebRequest)WebRequest.Create(uri);
HttpWebResponse hwRes = (HttpWebResponse)hwReq.GetResponse();

hwReq.Method = "Get ";

hwReq.KeepAlive = false;

StreamReader reader = new StreamReader(hwRes.GetResponseStream(),System.Text.Encoding.GetEncoding( "GB2312 "));

return reader.ReadToEnd();
}

// 提取HTML代码中的网址
static ArrayList GetHyperLinks(string htmlCode)
{
ArrayList al = new ArrayList();

string strRegex = @ "http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)? ";

Regex r = new Regex(strRegex,RegexOptions.IgnoreCase);
MatchCollection m = r.Matches(htmlCode);

for(int i=0; i <=m.Count-1; i++)
{
bool rep = false;
string strNew = m[i].ToString();

// 过滤重复的URL
foreach(string str in al)
{
if(strNew==str)
{
rep =true;
break;
}
}

if(!rep) al.Add(strNew);
}

al.Sort();

return al;
}

// 把网址写入xml文件
static void WriteToXml(string strURL, ArrayList alHyperLinks)
{
XmlTextWriter writer = new XmlTextWriter( "HyperLinks.xml ",Encoding.UTF8);

writer.Formatting = Formatting.Indented;
writer.WriteStartDocument(false);
writer.WriteDocType( "HyperLinks ", null, "urls.dtd ", null);
writer.WriteComment( "提取自 " + strURL + "的超链接 ");

免责声明： 本文仅代表作者个人观点，与爱易网无关。其原创性以及文中陈述文字和内容未经本站证实，对本文以及其中全部或者部分内容、文字的真实性、完整性、及时性本站不作任何保证或承诺，请读者仅作参考，并请自行核实相关内容。

winform中怎么提取指定网页中的类似<a href="news.asp?id=3"的超连接

相关资料更多>

推荐阅读更多>