close
/** * @author Robert Chen */ public class CharUtil { /** * Ref: http://www.micmiu.com/lang/java/java-check-chinese/ * @param ch * @return */ public static boolean IsChinese( char ch ) { Character.UnicodeBlock ub = Character.UnicodeBlock.of( ch ); if ( ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B || ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS || ub == Character.UnicodeBlock.GENERAL_PUNCTUATION) { return true; } return false; } /** * Printable characters * @param ch * @return */ public static boolean IsPrintableAsciiChar( char ch ) { if ( 32 <= ch && ch <= 126 ) return true ; return false ; } public static void main( String[] args ) { // Ref: http://tw.gitbook.net/java/lang/character.unicodeblock_of.html // 32~126: Printable characters for ( char ch = 0; ch < 128 ; ++ch ) System.out.println( Character.UnicodeBlock.of( ch ) ); // All is "BASIC_LATIN". System.out.println(); // Ref: https://www.yiibai.com/java/lang/character.unicodeblock_of_codepoint.html#article-start System.out.println( Character.UnicodeBlock.of(950) ); // "GREEK" } }
使用:
String utf8Text = "" ; int utf8TextLen = utf8Text.length() ; StringBuffer illegalCharSB = new StringBuffer( utf8TextLen / 2 ); // It's legal character or not. for ( int i=0; i < utf8TextLen ; ++i ) { char testChar = utf8Text.charAt( i ) ; if ( CharUtil.IsPrintableAsciiChar( testChar ) || CharUtil.IsChinese( testChar ) ) continue; illegalCharSB.append( " '" + testChar + "'" ) ; }
還參考了:
● Regex Tutorial - Unicode Characters and Properties
● Regex Tutorial - Literal Characters and Special Characters
● Unicode等價性 - 維基百科,自由的百科全書
● 对字符串进行验证之前先进行规范化 - 我的技术旅程 - ITeye博客
● unicode - Detect Chinese character in java - Stack Overflow
● Java 中文字符判断 中文标点符号判断 - Tong Zeng - 博客园
● 使用Java代码过滤掉乱码字符 - CSDN博客
文章標籤
全站熱搜
留言列表