日期:2014-05-16 浏览次数:20557 次
    <requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
        <lst name="defaults">
          <str name="config">data-config.xml</str>
        </lst>
    </requestHandler>
    <dataConfig>
  <dataSource type="JdbcDataSource" 
              driver="com.mysql.jdbc.Driver"
              url="jdbc:mysql://localhost/dbname" 
              user="user-name" 
              password="password"/>
  <document>
    <entity name="myentyty" transformer="ClobTransformer"
        query="select id, title, detail from mytable">
        <field column="DETAIL" clob="true"/>
    </entity>
  </document>
</dataConfig>    <fieldType name="text" class="solr.TextField" positionIncrementGap="100">  
       <analyzer type="index">  
           <tokenizer class="net.paoding.analysis.analyzer.ChineseTokenizerFactory" mode="most-words"/>  
           <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>  
           <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>  
           <filter class="solr.LowerCaseFilterFactory"/>  
           <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>  
       </analyzer>  
       <analyzer type="query">  
           <tokenizer class="net.paoding.analysis.analyzer.ChineseTokenizerFactory" mode="most-words"/>                  
           <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>  
           <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>  
           <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>  
           <filter class="solr.LowerCaseFilterFactory"/>  
           <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>  
       </analyzer>  
   </fieldType><field name="detail" type="text" indexed="true" stored="true" /> <!-- 添加到默认的查询字段,可根据需要修改 --> <copyField source="title" dest="text"/> <copyField source="detail" dest="text"/>
package net.paoding.analysis.analyzer;
import java.io.Reader;
import java.util.Map;
import net.paoding.analysis.analyzer.impl.MaxWordLengthTokenCollector;
import net.paoding.analysis.analyzer.impl.MostWordsTokenCollector;
import net.paoding.analysis.knife.PaodingMaker;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.solr.analysis.BaseTokenizerFactory;
/**
 * Created by IntelliJ IDEA. 
 * User: ronghao 
 * Date: 2007-11-3 
 * Time: 14:40:59 中文切词 对庖丁