日期:2014-05-16 浏览次数:20478 次
<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler"> <lst name="defaults"> <str name="config">data-config.xml</str> </lst> </requestHandler>
<dataConfig> <dataSource type="JdbcDataSource" driver="com.mysql.jdbc.Driver" url="jdbc:mysql://localhost/dbname" user="user-name" password="password"/> <document> <entity name="myentyty" transformer="ClobTransformer" query="select id, title, detail from mytable"> <field column="DETAIL" clob="true"/> </entity> </document> </dataConfig>
<fieldType name="text" class="solr.TextField" positionIncrementGap="100"> <analyzer type="index"> <tokenizer class="net.paoding.analysis.analyzer.ChineseTokenizerFactory" mode="most-words"/> <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/> <filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> </analyzer> <analyzer type="query"> <tokenizer class="net.paoding.analysis.analyzer.ChineseTokenizerFactory" mode="most-words"/> <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/> <filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> </analyzer> </fieldType>
<field name="detail" type="text" indexed="true" stored="true" /> <!-- 添加到默认的查询字段,可根据需要修改 --> <copyField source="title" dest="text"/> <copyField source="detail" dest="text"/>
package net.paoding.analysis.analyzer; import java.io.Reader; import java.util.Map; import net.paoding.analysis.analyzer.impl.MaxWordLengthTokenCollector; import net.paoding.analysis.analyzer.impl.MostWordsTokenCollector; import net.paoding.analysis.knife.PaodingMaker; import org.apache.lucene.analysis.Tokenizer; import org.apache.solr.analysis.BaseTokenizerFactory; /** * Created by IntelliJ IDEA. * User: ronghao * Date: 2007-11-3 * Time: 14:40:59 中文切词 对庖丁