日期:2014-05-17 浏览次数:20913 次
public class testIndexer {
private IndexWriter writer;
public testIndexer(String indexDir) throws CorruptIndexException, LockObtainFailedException, IOException
{
Directory dir = FSDirectory.open(new File(indexDir));
writer = new IndexWriter(dir,new IKAnalyzer(),true,IndexWriter.MaxFieldLength.UNLIMITED);
}
public void close() throws CorruptIndexException, IOException
{
writer.close();
}
public void indexPDFile(String filename) throws Exception
{
File file = new File(filename);
String content = PdfExtractor.getText(file);
Document doc = new Document();
doc.add(new Field("content",content,Field.Store.YES,Field.Index.ANALYZED));
writer.addDocument(doc);
}
public static void main(String args[])
{
String path="k:/aaaa";
String pdfile="k:/kaks.pdf";
try {
testIndexer indx = new testIndexer(path);
indx.indexPDFile(pdfile);
}catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
public class testSearch {
public static void main(String args[]) throws IOException, ParseException
{
String indexDir = "K:/aaaa";
String q = "分子";
search(indexDir,q);
}
public static void search(String indexDir,String q) throws IOException,ParseException
{
Directory dir = FSDirectory.open(new File(indexDir));
IndexSearcher is = new IndexSearcher(dir,true);
QueryParser parser = new QueryParser(Version.LUCENE_35,"content",new IKAnalyzer());
Query query = parser.parse(q);
TopDocs hits = is.search(query, 10);
for(ScoreDoc scoreDoc:hits.scoreDocs)
{
Document doc = is.doc(scoreDoc.doc);
System.out.println(doc.get("content"));
}
is.close();
}
}