0%

Java 判断文件编码,转换编码,解决中文乱码

当有多个不同编码的文件需要统一处理时,如果不转换成同样的编码,会出现中文乱码的问题。转换编码,又需要先知道源文件的编码。 判断文件编码,可以使用开源库 cpdetector:http://sourceforge.net/projects/cpdetector/

        try {
            File file = new File(lrcPath);

            // 获取原始文件编码
            CodepageDetectorProxy detector = CodepageDetectorProxy.getInstance();
            detector.add(JChardetFacade.getInstance());
            Charset charset = null;
            charset = detector.detectCodepage(file.toURL());
            
            if (charset != null) {
                                //使用源文件的编码,读取文件
                InputStreamReader bufRead = new InputStreamReader(new FileInputStream(file),charset.name());
                StringBuilder sb = new StringBuilder();
                char[] buffer = new char[2048];
                int len = 0;
                while ((len = bufRead.read(buffer, 0, 2048)) > 0) {
                    sb.append(buffer, 0, len);
                }
                String text = sb.toString();
                bufRead.close();
                                //使用UTF-8编码写入文件
                BufferedWriter bufWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file),"UTF-8"));
                bufWriter.write(text);
                bufWriter.flush();
                bufWriter.close();
            }
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }