日期:2014-05-16  浏览次数:20434 次

读取解析购物网站的目录,链接和文字,并存到数据库
package com.yihaodian.pricehisotry;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;

import javax.swing.tree.TreeNode;

import org.apache.log4j.Logger;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;

import com.yihaodian.pis.dto.SiteCategoryDto;
import com.yihaodian.pis.timer.DailyIterator;
import com.yihaodian.pis.timer.Scheduler;
import com.yihaodian.pis.timer.SchedulerTask;
import com.yihaodian.pricehisotry.dao.SiteCategoryDao;

public class ExecuteTimerTask {
	private static final Logger logger = Logger.getRootLogger();
	private final Scheduler scheduler = new Scheduler();
	private final SimpleDateFormat dateFormat = new SimpleDateFormat(
			"dd MMM yyyy HH:mm:ss.SSS");
	private int dayOfMonth = 0;
	private int hourOfDay = 0;
	private int minute = 0;
	private int second = 0;

	private static ApplicationContext context = null;
	
	static{
		context = new ClassPathXmlApplicationContext(
				new String[] { "/spring-bean.xml", "/spring-dao.xml", });	
		logger.info("------> init program .....");
	}
	
	public ExecuteTimerTask(int dayOfWeek,int hourOfDay, int minute, int second) {
		this.dayOfMonth = dayOfWeek;
		this.hourOfDay = hourOfDay;
		this.minute = minute;
		this.second = second;
	}

	public void start() {
		scheduler.schedule(new SchedulerTask() {
			public void run() {
				// Start a new thread to sound an alarm...
				try {
					excuteTimerTask();
				} catch (Exception e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
			}
			
			private void excuteTimerTask()  {
				logger.info("Wake up! " + "It&quots " + dateFormat.format(new Date()));
				//每天把当前的商品价格加入到库中
				try {
					
					TreeNode[] treeNodes;
					String url1="http://www.suning.cn/";
					String anResult="";
					String finalReString="";
					String d ="";
					   try {
						   URL b=new URL(url1);
						   BufferedReader in = new BufferedReader(new InputStreamReader(b.openStream()));
						   String rString =in.readLine();
						   while (rString!=null) {
							   anResult+=rString;
							   rString=in.readLine();
						}
						   anResult=anResult.trim();
					} catch (Exception e) {
						// TODO: handle exception
					}
					Parser parser = Parser.createParser(anResult, "utf-8");
					NodeFilter filter = new HasAttributeFilter("id", "SNmenuNav");;
			        NodeList children = parser.extractAllNodesThatMatch(filter);
			        NodeList dlList = children.elementAt(0).getChildren();
			        int count =0;
			        for (int i = 0; i < 3; i++) {
						Node node = dlList.elementAt(i);
						if (node.getText().equals("dl")) {
							getallLink(node,i);//传递一级目录的
						}
					}
			        
					System.out.println("test成功。。。。。。。--------");
				} catch (Exception e) {
					// TODO: handle exception
					e.printStackTrace();
					logger.info(e.getMessage());
				}
			}
		}, new DailyIterator(dayOfMonth,hourOfDay, minute, second));
	}
	int nextid=0;//标志二级目录ID
	int firstid=0;//标志一级目录ID
	public TreeNode getallLink(Node d,int n){
		SiteCategoryDao siteCategoryDao = (SiteCategoryDao)(context.getBean("siteCategoryDao"));
		if (d.getText().indexOf("dl")>=0) {
			//System.out.println("1");
		}else if (d.getText().indexOf("dt")>=0) {
			getallLink(d.getChildren().elementAt(0),100); 
			return null;
		}else if (d.getText().indexOf("dd")>=0) {
			//System.out.println("12");
		}else if (d.getText().indexOf("ul class=\"sideleft\"")>=0) {
			//System.out.println("121");
		}else if (d.getText().indexOf("li")>=0) {
			if(d.getChildren().size()>