日期:2014-05-18  浏览次数:20430 次

asp.net自动采集
求思路,在b/s架构下实现自动采集新闻,注意自动两个字。问题在于,当服务器没访问的时候,这个时候怎么实现采集。例如ie关闭了。

------解决方案--------------------
写个winform程序或windows服务放在后台.

------解决方案--------------------
为什么要用B/S?
------解决方案--------------------
在Application_start中加一个timer循环去做.

------解决方案--------------------
在Application_start中加一个timer循环去做.
------解决方案--------------------
//PUSH登录的用户名和密码
MSXML2.XMLHTTP req = new MSXML2.XMLHTTP();
string query = "callCount=1" + (char)13 + (char)10 + "c0-scriptName=SecurityAccess" + (char)13 + (char)10 + "c0-methodName=validate" + (char)13 + (char)10 + "c0-id=990_1180409315890" + (char)13 + (char)10 + "c0-param0=string:cx_330300" + (char)13 + (char)10 + "c0-param1=string:123456" + (char)13 + (char)10 + "c0-param2=null:null" + (char)13 + (char)10 + "xml=true";
req.open("POST", " http://10.1.0.3/JDApp/dwr/exec/SecurityAccess.validate.dwr ", false, "", "");
req.setRequestHeader("Content-Type", "text/plain");
req.send(query);

MSXML2.XMLHTTP Zpages = new MSXML2.XMLHTTP();
Zpages.open("GET", "http://10.1.0.3/JDApp/enterprise/enterpriseListAction.do?searchChilds=true&method=manageList&areaId=20050124181116206783432593202462", false, "", "");
Zpages.send("");

//获取总页数
Byte[] cons = (Byte[])Zpages.responseBody;
string HtmlCodes = System.Text.ASCIIEncoding.GetEncoding("GB2312").GetString(cons, 0, cons.Length);
Match z = Regex.Match(HtmlCodes, @"页次:<b>(?<pageid>[^<]*)</b>页/<b>(?<pages>[^<]*)", RegexOptions.IgnoreCase);
int pagesz=Convert.ToInt32( z.Groups["pages"].Value);

//翻页
for (int pageid = 1; pageid <= 2; pageid++)
{

MSXML2.XMLHTTP oBao = new MSXML2.XMLHTTP();
oBao.open("GET", "http://wwww.aaa.com/JDApp/enterprise/enterpriseListAction.do?method=manageList&areaId=20050124181116206783432593202462&pageId=" + (int)pageid + "", false, "", "");
oBao.send("");

Byte[] b1 = (Byte[])oBao.responseBody;
string HtmlCode1 = System.Text.ASCIIEncoding.GetEncoding("GB2312").GetString(b1, 0, b1.Length);

string yourStr = HtmlCode1;
yourStr = yourStr.Replace("&nbsp;", "");

Match H0 = Regex.Match(yourStr, @"PK0([""']?)\svalue=([""']?)(?<PK0>[^""']*)", RegexOptions.IgnoreCase);

if (H0.Groups["PK0"].Value != "") DetailData("" + H0.Groups["PK0"].Value + "");


oBao.abort();
}

//上传文件后记录更新时间"DM"模块名称
WebService webService = new WebService();
webService.DataUpDate("CJ");

Response.Write("采集成功|");
}




 protected void DetailData(string pageid)
{
MSXML2.XMLHTTP Bao = new MSXML2.XMLHTTP();
Bao.open("GET", "http://10.1.0.3/JDApp/enterprise/view.jsp?id=" + pageid + "", false, "", "");
Bao.send("");