当有多个不同编码的文件需要统一处理时,如果不转换成同样的编码,会出现中文乱码的问题。转换编码,又需要先知道源文件的编码。 判断文件编码,可以使用开源库 cpdetector:http://sourceforge.net/projects/cpdetector/
try {
File file = new File(lrcPath);
// 获取原始文件编码
CodepageDetectorProxy detector = CodepageDetectorProxy.getInstance();
detector.add(JChardetFacade.getInstance());
Charset charset = null;
charset = detector.detectCodepage(file.toURL());
if (charset != null) {
//使用源文件的编码,读取文件
InputStreamReader bufRead = new InputStreamReader(new FileInputStream(file),charset.name());
StringBuilder sb = new StringBuilder();
char[] buffer = new char[2048];
int len = 0;
while ((len = bufRead.read(buffer, 0, 2048)) > 0) {
sb.append(buffer, 0, len);
}
String text = sb.toString();
bufRead.close();
//使用UTF-8编码写入文件
BufferedWriter bufWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file),"UTF-8"));
bufWriter.write(text);
bufWriter.flush();
bufWriter.close();
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}