日期:2014-05-17 浏览次数:20755 次
public class testIndexer { private IndexWriter writer; public testIndexer(String indexDir) throws CorruptIndexException, LockObtainFailedException, IOException { Directory dir = FSDirectory.open(new File(indexDir)); writer = new IndexWriter(dir,new IKAnalyzer(),true,IndexWriter.MaxFieldLength.UNLIMITED); } public void close() throws CorruptIndexException, IOException { writer.close(); } public void indexPDFile(String filename) throws Exception { File file = new File(filename); String content = PdfExtractor.getText(file); Document doc = new Document(); doc.add(new Field("content",content,Field.Store.YES,Field.Index.ANALYZED)); writer.addDocument(doc); } public static void main(String args[]) { String path="k:/aaaa"; String pdfile="k:/kaks.pdf"; try { testIndexer indx = new testIndexer(path); indx.indexPDFile(pdfile); }catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
public class testSearch { public static void main(String args[]) throws IOException, ParseException { String indexDir = "K:/aaaa"; String q = "分子"; search(indexDir,q); } public static void search(String indexDir,String q) throws IOException,ParseException { Directory dir = FSDirectory.open(new File(indexDir)); IndexSearcher is = new IndexSearcher(dir,true); QueryParser parser = new QueryParser(Version.LUCENE_35,"content",new IKAnalyzer()); Query query = parser.parse(q); TopDocs hits = is.search(query, 10); for(ScoreDoc scoreDoc:hits.scoreDocs) { Document doc = is.doc(scoreDoc.doc); System.out.println(doc.get("content")); } is.close(); } }