日期:2014-05-20 浏览次数:20998 次
package ICTCLAS.I3S.Test;
import java.io.UnsupportedEncodingException;
import ICTCLAS.I3S.AC.ICTCLAS50;
public class Test_UserDic {
/**
* @param args
* @throws UnsupportedEncodingException
*/
public static void main(String[] args) throws UnsupportedEncodingException {
ICTCLAS50 ictclas = new ICTCLAS50();
//initial
String argu = "."; //当前目录
if (ictclas.ICTCLAS_Init(argu.getBytes("UTF-8")) == false) {
System.err.println("Initail fail!");
return;
}
System.out.println("Initial success!");
String input = "中国科学院计算技术研究所在多年研究工作积累的基础上,研制出了汉语词法分析系统ICTCLAS。千万科学家";
//未添加词典前分词
System.out.println(input);
ictclas.ICTCLAS_SetPOSmap(ictclas.PKU_POS_MAP_FIRST);
byte nativeBytes[] = ictclas.ICTCLAS_ParagraphProcess(input.getBytes("UTF-8"), 0, 1);
String result = new String(nativeBytes, 0, nativeBytes.length, "UTF-8");
System.out.println("未导入用户词典的分词结果是:\t" + result);
//添加用户词典分词
int count = 0;
String userDir = "userDict.txt"; //用户词典路径
byte[] userDirb = userDir.getBytes();
count = ictclas.ICTCLAS_ImportUserDictFile(userDirb, 3);
System.out.println("\n导入用户词个数:\t" + count);
count = 0;
//导入用户词典后再分词
byte[] nativeBytes1 = ictclas.ICTCLAS_ParagraphProcess(input.getBytes("UTF-8"), 0, 1);
String result1 = new String(nativeBytes1, 0, nativeBytes1.length, "UTF-8");
System.out.println("导入用户词典后的分词结果是:\t" + result1);
//退出,释放分词组件资源
ictclas.ICTCLAS_Exit();
}
}