日期:2014-05-20 浏览次数:20733 次
//定义存放索引的目录 。 File indexDir = new File("D:\\luceneIndex"); //测试用法的一种,对目录中的txt文件的内容进行索引,供查询。 File dataDir = new File("D:\\luceneData"); //确定分词的实现方法。这是Lucene自带的分词器 //Analyzer writerAnalyzer = new SimpleAnalyzer(Version.LUCENE_33); //庖丁解牛 建立中文分词解析 Analyzer writerAnalyzer = new PaodingAnalyzer(); // IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_33, writerAnalyzer); //设定是对索引增量,还是新建索引。 indexWriterConfig.setOpenMode(OpenMode.CREATE); //索引写入流 IndexWriter indexWriter = new IndexWriter(FSDirectory.open(indexDir),indexWriterConfig); File[] dataFiles = dataDir.listFiles(); long startTime = new Date().getTime(); for(int i = 0; i < dataFiles.length; i++){ if(dataFiles[i].isFile() && dataFiles[i].getName().endsWith(".txt")){ System.out.println("Indexing file " + dataFiles[i].getCanonicalPath()); Document document = new Document(); FileInputStream fileInputStream = new FileInputStream(dataFiles[i]); InputStreamReader reader = new InputStreamReader(fileInputStream,"GBK"); document.add(new Field("path",dataFiles[i].getCanonicalPath(),Field.Store.YES,Field.Index.ANALYZED)); document.add(new Field("filename",dataFiles[i].getName(),Field.Store.YES,Field.Index.ANALYZED,TermVector.WITH_POSITIONS_OFFSETS)); document.add(new Field("contents",reader,TermVector.WITH_POSITIONS_OFFSETS)); indexWriter.addDocument(document); } } //对IndexWriter进行优化 indexWriter.optimize(); indexWriter.close();