日期:2014-05-20  浏览次数:20682 次

java程序的乱码问题
这段代码可以直接运行,就是输入输出有乱码问题,请高手指点一下
package com.robot.analyzer;

import java.io.*;
import java.util.TreeMap;
import java.util.TreeSet;

/**
 * *****************************************************
 * 分词  
 * *****************************************************
 */

public class SegCn {

private String separator=" ";
private static SegCn segmenter = null;
private TreeMap<String, Boolean> cnWords;
private TreeSet<String> cForeign, cNumbers;

//加载词典
private SegCn() {

cForeign = new TreeSet<String>();
cNumbers = new TreeSet<String>();

loadset(cNumbers, "..//Number.txt");
loadset(cForeign, "..//Foreign.txt");


System.out.print("Loading Lexicon");
cnWords = new TreeMap<String, Boolean>();

String newword = null;
try {
InputStream worddata = null;
worddata =new FileInputStream(new File("..//Dictionary.txt"));

BufferedReader in = new BufferedReader(new InputStreamReader(
worddata));
int i=0;
while ((newword = in.readLine()) != null) {
if((++i)%10000==0){
System.out.print('.');
}
if ((newword.indexOf("#") == -1) && (newword.length() < 5)) {

cnWords.put(newword.intern(), true);

if (newword.length() == 3) {
if (cnWords.containsKey(newword.substring(0, 2)
.intern()) == false) {
cnWords.put(newword.substring(0, 2).intern(), false);
}
}

if (newword.length() == 4) {
if (cnWords.containsKey(newword.substring(0, 2)
.intern()) == false) {
cnWords.put(newword.substring(0, 2).intern(), false);
}
if (cnWords.containsKey(newword.substring(0, 3)
.intern()) == false) {
cnWords.put(newword.substring(0, 3).intern(), false);
}
}
}
}
in.close();
System.out.println();
System.out.println("词典加载成功");
System.out.println("load words number is "+i);
} catch (IOException e) {
System.out.println("Loading Lexicon failuer");
e.printStackTrace();

}

}

public synchronized static void reset() {
SegCn.segmenter = null;
}

public synchronized static SegCn getSegmenter() {

if (SegCn.segmenter == null) {
SegCn.segmenter = new SegCn();
}
return SegCn.segmenter;
}

private void loadset(TreeSet<String> targetset, String sourcefile) {
String dataline;
try {
InputStream fr=new FileInputStream(new File(sourcefile));
BufferedReader in = new BufferedReader(new InputStreamReader(
fr, "UTF-8"));
while ((dataline = in.readLine()) != null) {
if ((dataline.indexOf("#") > -1) || (dataline.length() == 0)) {
continue;
}
targetset.add(dataline.intern());
}
in.close();
} catch (Exception e) {
System.err.println("Exception loading data file" + sourcefile + " "
+ e);
e.printStackTrace();
}

}
// 全是数字的情况
public boolean isNumber(String testword) {
boolean result = true;
for (int i = 0; i < testword.length(); i++) {
if (!cNumbers.contains(testword.substring(i, i + 1).intern()) ) {
result = false;
break;
}
}
return result;
}
//全是外语的情况
public boolean isAllForeign(String testword) {
boolean result = true;
for (int i = 0; i < testword.length(); i++) {
if (!cForeign.contains(testword.substring(i, i + 1).intern())) {
result = false;
break;