日期:2014-05-16  浏览次数:20426 次

运用lucene+庖丁解牛分词器,集成ssh,对数据库全文搜索
用lucene+庖丁解牛主要是要完成对数据库比如某张表单的某些字段的搜索,由于lucene本身对中文支持非常不好,像StandardAnalyzer基本都是一个字一个字匹配的,而用了庖丁解牛分词之后主要是它有一个dic字典,进行分词,效率很高。
首先我封装了一个lucene类,为了简单一点,对数据库操作直接用jdbc进行数据库操作

package lucene;

import java.io.File;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;

import net.paoding.analysis.analyzer.PaodingAnalyzer;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;

import com.model.searchResult;

public class lucene {

private String searchString;
private List<searchResult> list = new ArrayList<searchResult>();;

public List<searchResult> getList() {
return list;
}

public void setList(List<searchResult> list) {
this.list = list;
}

public lucene() {
}

public lucene(String searchString) {
this.setSearchString(searchString);
}

public void setSearchString(String searchString) {
this.searchString = searchString;
}

public String getSearchString() {
return searchString;
}

// 对于name属性进行全文搜索,返回hits值
public Hits seacher(String queryString) {
Hits hits = null;

try {
File indexFile = new File("d:/index/");
IndexReader reader = IndexReader.open(indexFile);
Analyzer analyzer = new PaodingAnalyzer();
QueryParser parser = new QueryParser("name", analyzer);
IndexSearcher searcher = new IndexSearcher(reader);
Query query = parser.parse(queryString);
hits = searcher.search(query);

} catch (Exception e) {
System.out.print(e);
}
return hits;
}

//创建索引
public void indexCreateUtil() {
String driver = "com.mysql.jdbc.Driver";
String url = "jdbc:mysql://localhost:3306/rent";
String user = "root";
String password = "111111";
//查询所有房屋信息放在index索引中
String query = "select a.house_id,a.house_name,b.house_type_name,house_rent_type_name,a.single_day_price from t_house a,t_house_type b,t_house_rent_type c where a.house_type_id=b.house_type_id and a.house_rent_type_id=c.house_rent_type_id";
try {
Class.forName(driver);
Connection conn = DriverManager.getConnection(url, user, password);

if (!conn.isClosed())
System.out.println("数据库连接成功!");
Statement stmt = conn.createStatement();
ResultSet rs = stmt.executeQuery(query);
Analyzer analyzer = new PaodingAnalyzer();
try {
// 使用索引文件夹,庖丁解牛分词器创建IndexWriter
IndexWriter indexWriter = new IndexWriter("d:/index/",
analyzer, true);

while (rs.next()) {
Document doc = new Document();
doc.add(new Field("id", rs.getString("house_id"),
Field.Store.YES, Field.Index.TOKENIZED,
Field.TermVector.NO));
doc.add(new Field("name", rs.getString("house_name"),
Field.Store.YES, Field.Index.TOKENIZED,
Field.TermVector.NO));
doc.add(new Field("type_name", rs
.getString("house_type_name"), Field.Store.YES,
Field.Index.TOKENIZED, Field.TermVector.NO));
doc.add(new Field("rent_name", rs
.getString("house_rent_type_name"),
Field.Sto