Rapicorn - Experimental UI Toolkit - Source Code
13.07.0
|
00001 // Licensed GNU LGPL v3 or later: http://www.gnu.org/licenses/lgpl.html 00002 #ifndef __RAPICORN_UNICODE_HH__ 00003 #define __RAPICORN_UNICODE_HH__ 00004 00005 #include <rcore/utilities.hh> 00006 00007 namespace Rapicorn { 00008 00009 namespace Unicode { 00010 inline bool isvalid (unichar uc) RAPICORN_CONST; 00011 bool isalnum (unichar uc) RAPICORN_CONST; 00012 bool isalpha (unichar uc) RAPICORN_CONST; 00013 bool iscntrl (unichar uc) RAPICORN_CONST; 00014 bool isdigit (unichar uc) RAPICORN_CONST; 00015 int digit_value (unichar uc) RAPICORN_CONST; 00016 bool isgraph (unichar uc) RAPICORN_CONST; 00017 bool islower (unichar uc) RAPICORN_CONST; 00018 unichar tolower (unichar uc) RAPICORN_CONST; 00019 bool isprint (unichar uc) RAPICORN_CONST; 00020 bool ispunct (unichar uc) RAPICORN_CONST; 00021 bool isspace (unichar uc) RAPICORN_CONST; 00022 bool isupper (unichar uc) RAPICORN_CONST; 00023 unichar toupper (unichar uc) RAPICORN_CONST; 00024 bool isxdigit (unichar uc) RAPICORN_CONST; 00025 int xdigit_value (unichar uc) RAPICORN_CONST; 00026 bool istitle (unichar uc) RAPICORN_CONST; 00027 unichar totitle (unichar uc) RAPICORN_CONST; 00028 bool isdefined (unichar uc) RAPICORN_CONST; 00029 bool iswide (unichar uc) RAPICORN_CONST; 00030 bool iswide_cjk (unichar uc) RAPICORN_CONST; 00031 typedef enum { 00032 CONTROL, FORMAT, UNASSIGNED, 00033 PRIVATE_USE, SURROGATE, LOWERCASE_LETTER, 00034 MODIFIER_LETTER, OTHER_LETTER, TITLECASE_LETTER, 00035 UPPERCASE_LETTER, COMBINING_MARK, ENCLOSING_MARK, 00036 NON_SPACING_MARK, DECIMAL_NUMBER, LETTER_NUMBER, 00037 OTHER_NUMBER, CONNECT_PUNCTUATION, DASH_PUNCTUATION, 00038 CLOSE_PUNCTUATION, FINAL_PUNCTUATION, INITIAL_PUNCTUATION, 00039 OTHER_PUNCTUATION, OPEN_PUNCTUATION, CURRENCY_SYMBOL, 00040 MODIFIER_SYMBOL, MATH_SYMBOL, OTHER_SYMBOL, 00041 LINE_SEPARATOR, PARAGRAPH_SEPARATOR, SPACE_SEPARATOR 00042 } Type; 00043 Type get_type (unichar uc) RAPICORN_CONST; 00044 typedef enum { 00045 BREAK_MANDATORY, BREAK_CARRIAGE_RETURN, BREAK_LINE_FEED, 00046 BREAK_COMBINING_MARK, BREAK_SURROGATE, BREAK_ZERO_WIDTH_SPACE, 00047 BREAK_INSEPARABLE, BREAK_NON_BREAKING_GLUE, BREAK_CONTINGENT, 00048 BREAK_SPACE, BREAK_AFTER, BREAK_BEFORE, 00049 BREAK_BEFORE_AND_AFTER, BREAK_HYPHEN, BREAK_NON_STARTER, 00050 BREAK_OPEN_PUNCTUATION, BREAK_CLOSE_PUNCTUATION, BREAK_QUOTATION, 00051 BREAK_EXCLAMATION, BREAK_IDEOGRAPHIC, BREAK_NUMERIC, 00052 BREAK_INFIX_SEPARATOR, BREAK_SYMBOL, BREAK_ALPHABETIC, 00053 BREAK_PREFIX, BREAK_POSTFIX, BREAK_COMPLEX_CONTEXT, 00054 BREAK_AMBIGUOUS, BREAK_UNKNOWN, BREAK_NEXT_LINE, 00055 BREAK_WORD_JOINER, BREAK_HANGUL_L_JAMO, BREAK_HANGUL_V_JAMO, 00056 BREAK_HANGUL_T_JAMO, BREAK_HANGUL_LV_SYLLABLE, BREAK_HANGUL_LVT_SYLLABLE 00057 } BreakType; 00058 BreakType get_break (unichar uc) RAPICORN_CONST; 00059 00060 } // Unicode 00061 00062 /* --- UTF-8 movement --- */ 00063 inline const char* utf8_next (const char *c); 00064 inline char* utf8_next (char *c); 00065 inline const char* utf8_prev (const char *c); 00066 inline char* utf8_prev (char *c); 00067 inline const char* utf8_find_next (const char *c, 00068 const char *bound = NULL); 00069 inline char* utf8_find_next (char *current, 00070 const char *bound = NULL); 00071 inline const char* utf8_find_prev (const char *start, 00072 const char *current); 00073 inline char* utf8_find_prev (const char *start, 00074 char *currrent); 00075 inline const char* utf8_align (const char *start, 00076 const char *current); 00077 inline char* utf8_align (const char *start, 00078 char *current); 00079 inline bool utf8_aligned (const char *c); 00080 unichar utf8_to_unichar (const char *str); 00081 int utf8_from_unichar (unichar uc, 00082 char str[8]); 00083 bool utf8_validate (const String &string, 00084 int *bound = NULL); 00085 00086 /* --- implementation bits --- */ 00087 namespace Unicode { 00088 inline bool 00089 isvalid (unichar uc) 00090 { 00091 if (RAPICORN_UNLIKELY (uc > 0xfdcf && uc < 0xfdf0)) 00092 return false; 00093 if (RAPICORN_UNLIKELY ((uc & 0xfffe) == 0xfffe)) 00094 return false; 00095 if (RAPICORN_UNLIKELY (uc > 0x10ffff)) 00096 return false; 00097 if (RAPICORN_UNLIKELY ((uc & 0xfffff800) == 0xd800)) 00098 return false; 00099 return true; 00100 } 00101 } // Unicode 00102 00103 extern const int8 utf8_skip_table[256]; 00104 00105 inline const char* 00106 utf8_next (const char *c) 00107 { 00108 return c + utf8_skip_table[(uint8) *c]; 00109 } 00110 00111 inline char* 00112 utf8_next (char *c) 00113 { 00114 return c + utf8_skip_table[(uint8) *c]; 00115 } 00116 00117 inline const char* 00118 utf8_prev (const char *c) 00119 { 00120 do 00121 c--; 00122 while ((*c & 0xc0) == 0x80); 00123 return c; 00124 } 00125 00126 inline char* 00127 utf8_prev (char *c) 00128 { 00129 do 00130 c--; 00131 while ((*c & 0xc0) == 0x80); 00132 return c; 00133 } 00134 00135 inline const char* 00136 utf8_find_next (const char *c, 00137 const char *bound) 00138 { 00139 if (*c) 00140 do 00141 c++; 00142 while ((!bound || c < bound) && (*c & 0xc0) == 0x80); 00143 return !bound || c < bound ? c : NULL; 00144 } 00145 00146 inline char* 00147 utf8_find_next (char *c, 00148 const char *bound) 00149 { 00150 return const_cast<char*> (utf8_find_next (const_cast<const char*> (c), bound)); 00151 } 00152 00153 inline const char* 00154 utf8_find_prev (const char *start, 00155 const char *current) 00156 { 00157 do 00158 current--; 00159 while (current >= start && (*current & 0xc0) == 0x80); 00160 return current >= start ? current : NULL; 00161 } 00162 00163 inline char* 00164 utf8_find_prev (const char *start, 00165 char *current) 00166 { 00167 return const_cast<char*> (utf8_find_prev (start, const_cast<const char*> (current))); 00168 } 00169 00170 inline const char* 00171 utf8_align (const char *start, 00172 const char *current) 00173 { 00174 while (current > start && (*current & 0xc0) == 0x80) 00175 current--; 00176 return current; 00177 } 00178 00179 inline char* 00180 utf8_align (const char *start, 00181 char *current) 00182 { 00183 return const_cast<char*> (utf8_align (start, const_cast<const char*> (current))); 00184 } 00185 00186 inline bool 00187 utf8_aligned (const char *c) 00188 { 00189 return (*c & 0xc0) == 0x80; 00190 } 00191 00192 } // Rapicorn 00193 00194 #endif /* __RAPICORN_UNICODE_HH__ */