日期:2014-05-16  浏览次数:20509 次

利用lucene对整个数据库建立索引(lucene,SQL,JDBC)(

导言:

如果要对整个数据库做精确查询或模糊查询,我们怎么才可以做到?还是通过SQL查询吗?答案是否定的。因为,通过SQL对整个数据库做精确查询或模糊查询,速度将非常的慢;

lucene解决了这个问题。通过对表或者文本文件预先建立索引,可以很快的实现全文检索。

思路:

1、通过SQL得到所有表名的集合---->2、遍历所有的表,分别为每个表的每个记录建立索引;同时添加表的中文名以及表的说明的索引---->按Writer\analyzer\document\field的循序建索引。

package com.jrj.datamart.tree;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Writer;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;

import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
//import com.jrj.datamart.model.ApiIndicator;
//import com.jrj.datamart.model.ApiInfo;
//import com.jrj.datamart.model.ApiInfoQuery;
//import com.jrj.datamart.service.ApiInfoService;


//对整个数据库的建立索引;并给每个表添加,表的说明和字段的中文名;方便查询
//索引 Lucene 3.0.2
public class IndexerDB {

?// 保存索引文件的地方
?private static? String INDEX_DIR = "F:\\MyLuceneDB2\\LuceneFileIndexDir";
?private String index_dir;
?private File file=new File(INDEX_DIR);
?// 将要搜索TXT文件的地方
?private String data_dir;
?private String DATA_DIR = "F:\\Lucene";
?private String entityName;
//?private ApiInfo apiInfo;
//?private ApiInfoQuery apiInfoQuery = new ApiInfoQuery();
//?private ApiInfoService apiInfoService;
//?private List<ApiIndicator> apiIndicators = new ArrayList<ApiIndicator>();
//?private ApiIndicator apiIndicator;
?private StringBuilder newsb = new StringBuilder();
?private ResultSet rs = null;
?private ResultSet tempRs = null;
?private ResultSetMetaData rsmd = null;
//?private SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(
//???Version.LUCENE_30, true);
?private Document doc;
?private String tableName;

?public static void main(String[] args) throws Exception {
??IndexerDB indexDB=new IndexerDB();
??????? File file=new File(INDEX_DIR);
??//1
??indexDB.execute(file);
?}
?

//执行对数据库的索引化
//@param file
//@return
//?@throws Exception
//??
?public String execute(File file) throws Exception {
??long start = new Date().getTime();
??//2
??int numIndexed=getAllTableNameFromDBAndIndexing(file);
??long end = new Date().getTime();
??System.out.println("Indexing " + numIndexed + " files took "
?? + (end - start) + " milliseconds");
??return "success";
?}
?
// 查询所有的表,并遍历所有的表;
//调用getDataFromTable(tableName)获取表的记录;
//调用indexData(writer, rs, tableName)对单张表做索引
?public int get