Rapicorn - Experimental UI Toolkit - Source Code  13.07.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines
unicode.hh
Go to the documentation of this file.
00001  // Licensed GNU LGPL v3 or later: http://www.gnu.org/licenses/lgpl.html
00002 #ifndef __RAPICORN_UNICODE_HH__
00003 #define __RAPICORN_UNICODE_HH__
00004 
00005 #include <rcore/utilities.hh>
00006 
00007 namespace Rapicorn {
00008 
00009 namespace Unicode {
00010 inline bool isvalid      (unichar uc) RAPICORN_CONST;
00011 bool        isalnum      (unichar uc) RAPICORN_CONST;
00012 bool        isalpha      (unichar uc) RAPICORN_CONST;
00013 bool        iscntrl      (unichar uc) RAPICORN_CONST;
00014 bool        isdigit      (unichar uc) RAPICORN_CONST;
00015 int         digit_value  (unichar uc) RAPICORN_CONST;
00016 bool        isgraph      (unichar uc) RAPICORN_CONST;
00017 bool        islower      (unichar uc) RAPICORN_CONST;
00018 unichar     tolower      (unichar uc) RAPICORN_CONST;
00019 bool        isprint      (unichar uc) RAPICORN_CONST;
00020 bool        ispunct      (unichar uc) RAPICORN_CONST;
00021 bool        isspace      (unichar uc) RAPICORN_CONST;
00022 bool        isupper      (unichar uc) RAPICORN_CONST;
00023 unichar     toupper      (unichar uc) RAPICORN_CONST;
00024 bool        isxdigit     (unichar uc) RAPICORN_CONST;
00025 int         xdigit_value (unichar uc) RAPICORN_CONST;
00026 bool        istitle      (unichar uc) RAPICORN_CONST;
00027 unichar     totitle      (unichar uc) RAPICORN_CONST;
00028 bool        isdefined    (unichar uc) RAPICORN_CONST;
00029 bool        iswide       (unichar uc) RAPICORN_CONST;
00030 bool        iswide_cjk   (unichar uc) RAPICORN_CONST;
00031 typedef enum {
00032   CONTROL,              FORMAT,                 UNASSIGNED,
00033   PRIVATE_USE,          SURROGATE,              LOWERCASE_LETTER,
00034   MODIFIER_LETTER,      OTHER_LETTER,           TITLECASE_LETTER,
00035   UPPERCASE_LETTER,     COMBINING_MARK,         ENCLOSING_MARK,
00036   NON_SPACING_MARK,     DECIMAL_NUMBER,         LETTER_NUMBER,
00037   OTHER_NUMBER,         CONNECT_PUNCTUATION,    DASH_PUNCTUATION,
00038   CLOSE_PUNCTUATION,    FINAL_PUNCTUATION,      INITIAL_PUNCTUATION,
00039   OTHER_PUNCTUATION,    OPEN_PUNCTUATION,       CURRENCY_SYMBOL,
00040   MODIFIER_SYMBOL,      MATH_SYMBOL,            OTHER_SYMBOL,
00041   LINE_SEPARATOR,       PARAGRAPH_SEPARATOR,    SPACE_SEPARATOR
00042 } Type;
00043 Type    get_type     (unichar uc) RAPICORN_CONST;
00044 typedef enum {
00045   BREAK_MANDATORY,        BREAK_CARRIAGE_RETURN,    BREAK_LINE_FEED,
00046   BREAK_COMBINING_MARK,   BREAK_SURROGATE,          BREAK_ZERO_WIDTH_SPACE,
00047   BREAK_INSEPARABLE,      BREAK_NON_BREAKING_GLUE,  BREAK_CONTINGENT,
00048   BREAK_SPACE,            BREAK_AFTER,              BREAK_BEFORE,
00049   BREAK_BEFORE_AND_AFTER, BREAK_HYPHEN,             BREAK_NON_STARTER,
00050   BREAK_OPEN_PUNCTUATION, BREAK_CLOSE_PUNCTUATION,  BREAK_QUOTATION,
00051   BREAK_EXCLAMATION,      BREAK_IDEOGRAPHIC,        BREAK_NUMERIC,
00052   BREAK_INFIX_SEPARATOR,  BREAK_SYMBOL,             BREAK_ALPHABETIC,
00053   BREAK_PREFIX,           BREAK_POSTFIX,            BREAK_COMPLEX_CONTEXT,
00054   BREAK_AMBIGUOUS,        BREAK_UNKNOWN,            BREAK_NEXT_LINE,
00055   BREAK_WORD_JOINER,      BREAK_HANGUL_L_JAMO,      BREAK_HANGUL_V_JAMO,
00056   BREAK_HANGUL_T_JAMO,    BREAK_HANGUL_LV_SYLLABLE, BREAK_HANGUL_LVT_SYLLABLE
00057 } BreakType;
00058 BreakType get_break  (unichar uc) RAPICORN_CONST;
00059 
00060 } // Unicode
00061 
00062 /* --- UTF-8 movement --- */
00063 inline const char*    utf8_next         (const char     *c);
00064 inline char*          utf8_next         (char           *c);
00065 inline const char*    utf8_prev         (const char     *c);
00066 inline char*          utf8_prev         (char           *c);
00067 inline const char*    utf8_find_next    (const char     *c,
00068                                          const char     *bound = NULL);
00069 inline char*          utf8_find_next    (char           *current,
00070                                          const char     *bound = NULL);
00071 inline const char*    utf8_find_prev    (const char     *start,
00072                                          const char     *current);
00073 inline char*          utf8_find_prev    (const char     *start,
00074                                          char           *currrent);
00075 inline const char*    utf8_align        (const char     *start,
00076                                          const char     *current);
00077 inline char*          utf8_align        (const char     *start,
00078                                          char           *current);
00079 inline bool           utf8_aligned      (const char     *c);
00080 unichar               utf8_to_unichar   (const char     *str);
00081 int                   utf8_from_unichar (unichar         uc,
00082                                          char            str[8]);
00083 bool                  utf8_validate     (const String   &string,
00084                                          int            *bound = NULL);
00085 
00086 /* --- implementation bits --- */
00087 namespace Unicode {
00088 inline bool
00089 isvalid (unichar uc)
00090 {
00091   if (RAPICORN_UNLIKELY (uc > 0xfdcf && uc < 0xfdf0))
00092     return false;
00093   if (RAPICORN_UNLIKELY ((uc & 0xfffe) == 0xfffe))
00094     return false;
00095   if (RAPICORN_UNLIKELY (uc > 0x10ffff))
00096     return false;
00097   if (RAPICORN_UNLIKELY ((uc & 0xfffff800) == 0xd800))
00098     return false;
00099   return true;
00100 }
00101 } // Unicode
00102 
00103 extern const int8 utf8_skip_table[256];
00104 
00105 inline const char*
00106 utf8_next (const char *c)
00107 {
00108   return c + utf8_skip_table[(uint8) *c];
00109 }
00110 
00111 inline char*
00112 utf8_next (char *c)
00113 {
00114   return c + utf8_skip_table[(uint8) *c];
00115 }
00116 
00117 inline const char*
00118 utf8_prev (const char *c)
00119 {
00120   do
00121     c--;
00122   while ((*c & 0xc0) == 0x80);
00123   return c;
00124 }
00125 
00126 inline char*
00127 utf8_prev (char *c)
00128 {
00129   do
00130     c--;
00131   while ((*c & 0xc0) == 0x80);
00132   return c;
00133 }
00134 
00135 inline const char*
00136 utf8_find_next (const char *c,
00137                 const char *bound)
00138 {
00139   if (*c)
00140     do
00141       c++;
00142     while ((!bound || c < bound) && (*c & 0xc0) == 0x80);
00143   return !bound || c < bound ? c : NULL;
00144 }
00145 
00146 inline char*
00147 utf8_find_next (char       *c,
00148                 const char *bound)
00149 {
00150   return const_cast<char*> (utf8_find_next (const_cast<const char*> (c), bound));
00151 }
00152 
00153 inline const char*
00154 utf8_find_prev (const char     *start,
00155                 const char     *current)
00156 {
00157   do
00158     current--;
00159   while (current >= start && (*current & 0xc0) == 0x80);
00160   return current >= start ? current : NULL;
00161 }
00162 
00163 inline char*
00164 utf8_find_prev (const char     *start,
00165                 char           *current)
00166 {
00167   return const_cast<char*> (utf8_find_prev (start, const_cast<const char*> (current)));
00168 }
00169 
00170 inline const char*
00171 utf8_align (const char     *start,
00172             const char     *current)
00173 {
00174   while (current > start && (*current & 0xc0) == 0x80)
00175     current--;
00176   return current;
00177 }
00178 
00179 inline char*
00180 utf8_align (const char *start,
00181             char       *current)
00182 {
00183   return const_cast<char*> (utf8_align (start, const_cast<const char*> (current)));
00184 }
00185 
00186 inline bool
00187 utf8_aligned (const char *c)
00188 {
00189   return (*c & 0xc0) == 0x80;
00190 }
00191 
00192 } // Rapicorn
00193 
00194 #endif /* __RAPICORN_UNICODE_HH__ */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines