日期:2014-05-18  浏览次数:20832 次

倾家荡产--求解获取网页内容;
我想通过程序从这个网页中获取内容:(诸如:姓名,电话,地址......)

http://kevdb.infospace.com/_1_2TICU1D026IBWZ9__intldb/wp/results/kevdb?KCFG=UK&otmpl=/wp/results.htm&qsubcat=1&KSN=intl-uk&KS=_0_n_0_0_Hrnd_1_3s_0_19qiua_149sPF&MinKS=_7BuK_0&MaxKS=_7DwR_0&CurKS=_7BuK_14i&QN=john&QF=&QC=&QS=&QP=&QST=&QHN=&QTAPPEND=qn,qf&QFM=N&QK=5&QO=uk&QD=&DM=&qi=35  


------解决方案--------------------
System.Net.WebClient
获取数据,再用正则分析数据,获取指定内容
------解决方案--------------------
#region 解析企业信息
/// <summary>
/// 解析企业信息
/// </summary>
/// <param name= "url "> </param>
private string[] AnalysisHtml(string strIndex,string strProfile,string strContact)
{
string[] companyInfo = new string[19];
// "公司名称 ", "企业性质 ", "所属省 ", "地区 ", "公司简介 ", "主要产品 ", "注册资金 ", "总经理 ", "主要客户 ", "成立时间 ",
// "年营业额 ", "主营产品 ", "公司地址 ", "邮编 ", "联系电话 ", "传真 ", "联系人 ", "电子信箱 ", "网站地址 "

// <b> 公司名称: </b> 林燕企业有限公司 </font> </td>
string companyName = Regex.Match(strProfile, @ " <b> 公司名称: </b> (.*) </font> </td> ").Groups[1].Value;
companyName = companyName.Trim();
//string companyName = Regex.Match(strProfile, @ " <td\s*width=\ " "575\ " "\s*height=\ " "23\ " "\s*valign=\ " "top\ " "\s*class=\ " "t10h18\ " "> (.*) </td> ").Groups[1].Value;

// <b> 企业性质: </b> 贸易商 </font> </td>
string company_type = Regex.Match(strProfile, @ " <b> 企业性质: </b> (.*)\s* </font> </td> ").Groups[1].Value;
company_type = company_type.Trim();

string province = " ";
string city = " ";

string companyIntro = Regex.Match(strIndex, @ " <td\s*width=\ " "575\ " "\s*height=\ " "23\ " "\s*valign=\ " "top\ " "\s*class=\ " "t10h18\ " "> (.*)\s* </td> ").Groups[1].Value;
companyIntro = companyIntro.Trim();

string main_product = Regex.Match(strIndex, @ " <b> 主要产品: </b> </span> <br> (.*)\s* </td> ").Groups[1].Value;
main_product = main_product.Trim();

string registMoney = Regex.Match(strProfile, @ " <b> 注册资金: </b> (.*)\s* </font> </td> ").Groups[1].Value;
registMoney = registMoney.Trim();

string manager = Regex.Match(strProfile, @ " <b> 总\s*经\s*理: </b> (.*)\s* </font> </td> ").Groups[1].Value;
manager = manager.Trim();

string main_customer = Regex.Match(strProfile, @ " <b> 主要客户: </b> (.*)\s* </font> </td> ").Groups[1].Value;
main_customer = main_customer.Trim();

string registTime = Regex.Match(strProfile, @ " <b> 成立时间: </b> (.*)\s* </font> </td> ").Groups[1].Value;
registTime = registTime.Trim();


s