用java程序将GBK字符转成UTF-8编码格式
上网查找一下 用java程序将GBK字符转成UTF-8编码格式的代码,发现全是C++ 写的,而且很烦琐,
现在自己写了一个java的,以供参考。
package com.lang.string;
public class ConverFromGBKToUTF8 { public static void main(String[] args){ try { ConverFromGBKToUTF8 convert = new ConverFromGBKToUTF8(); byte [] fullByte = convert.gbk2utf8(chenese); String fullStr = new String(fullByte, "UTF-8"); System.out.println("string from GBK to UTF-8 byte: " + fullStr);
} catch (Exception e) { e.printStackTrace(); } } public byte[] gbk2utf8(String chenese){ char c[] = chenese.toCharArray(); byte [] fullByte =new byte[3*c.length]; for(int i=0; i<c.length; i++){ int m = (int)c[i]; String word = Integer.toBinaryString(m); // System.out.println(word); StringBuffer sb = new StringBuffer(); int len = 16 - word.length(); //补零 for(int j=0; j<len; j++){ sb.append("0"); } sb.append(word); sb.insert(0, "1110"); sb.insert(8, "10"); sb.insert(16, "10"); // System.out.println(sb.toString()); String s1 = sb.substring(0, 8); String s2 = sb.substring(8, 16); String s3 = sb.substring(16); byte b0 = Integer.valueOf(s1, 2).byteValue(); byte b1 = Integer.valueOf(s2, 2).byteValue(); byte b2 = Integer.valueOf(s3, 2).byteValue(); byte[] bf = new byte[3]; bf[0] = b0; fullByte[i*3] = bf[0]; bf[1] = b1; fullByte[i*3+1] = bf[1]; bf[2] = b2; fullByte[i*3+2] = bf[2]; } return fullByte; } }
UTF-8的编码原理和特性:
U+0000~U+007E 1 _ _ _ _ _ _ _ (7bits)
U+0080~U+07FF 1 1 0_ _ _ _ _ 1 0_ _ _ _ _ _ (11bits)
U+0800~U+FFFF 1 1 1 0 _ _ _ _ 1 0 _ _ _ _ _ _ 1 0 _ _ _ _ _ _ (16bits)