Main Page | Class Hierarchy | Alphabetical List | Data Structures | Directories | File List | Data Fields | Globals | Related Pages

utf16.h File Reference

C API: UTF-16 macros. More...

Go to the source code of this file.

Defines

#define UTF_IS_FIRST_SURROGATE(uchar)   (((uchar)&0xfffffc00)==0xd800)
#define UTF_IS_SECOND_SURROGATE(uchar)   (((uchar)&0xfffffc00)==0xdc00)
#define UTF_IS_SURROGATE_FIRST(c)   (((c)&0x400)==0)
#define UTF_SURROGATE_OFFSET   ((0xd800<<10UL)+0xdc00-0x10000)
#define UTF16_GET_PAIR_VALUE(first, second)   (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
#define UTF_FIRST_SURROGATE(supplementary)   (UChar)(((supplementary)>>10)+0xd7c0)
 Takes a supplementary code point (0x10000.
#define UTF_SECOND_SURROGATE(supplementary)   (UChar)(((supplementary)&0x3ff)|0xdc00)
 Takes a supplementary code point (0x10000.
#define UTF16_LEAD(supplementary)   UTF_FIRST_SURROGATE(supplementary)
 alias for UTF_FIRST_SURROGATE
#define UTF16_TRAIL(supplementary)   UTF_SECOND_SURROGATE(supplementary)
 alias for UTF_SECOND_SURROGATE
#define UTF16_IS_SINGLE(uchar)   !UTF_IS_SURROGATE(uchar)
#define UTF16_IS_LEAD(uchar)   UTF_IS_FIRST_SURROGATE(uchar)
#define UTF16_IS_TRAIL(uchar)   UTF_IS_SECOND_SURROGATE(uchar)
#define UTF16_NEED_MULTIPLE_UCHAR(c)   ((uint32_t)(c)>0xffff)
#define UTF16_CHAR_LENGTH(c)   ((uint32_t)(c)<=0xffff ? 1 : 2)
#define UTF16_MAX_CHAR_LENGTH   2
#define UTF16_ARRAY_SIZE(size)   (size)
#define UTF16_GET_CHAR_UNSAFE(s, i, c)
#define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
#define UTF16_NEXT_CHAR_UNSAFE(s, i, c)
#define UTF16_APPEND_CHAR_UNSAFE(s, i, c)
#define UTF16_FWD_1_UNSAFE(s, i)
#define UTF16_FWD_N_UNSAFE(s, i, n)
#define UTF16_SET_CHAR_START_UNSAFE(s, i)
#define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
#define UTF16_APPEND_CHAR_SAFE(s, i, length, c)
#define UTF16_FWD_1_SAFE(s, i, length)
#define UTF16_FWD_N_SAFE(s, i, length, n)
#define UTF16_SET_CHAR_START_SAFE(s, start, i)
#define UTF16_PREV_CHAR_UNSAFE(s, i, c)
#define UTF16_BACK_1_UNSAFE(s, i)
#define UTF16_BACK_N_UNSAFE(s, i, n)
#define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
#define UTF16_BACK_1_SAFE(s, start, i)
#define UTF16_BACK_N_SAFE(s, start, i, n)
#define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)


Detailed Description

C API: UTF-16 macros.

This file defines macros to deal with UTF-16 code units and code points. "Safe" macros check for length overruns and illegal sequences, and also for irregular sequences when the strict option is set. "Unsafe" macros are designed for maximum speed. utf16.h is included by utf.h after unicode/umachine.h and some common definitions.

Usage: ICU coding guidelines for if() statements should be followed when using these macros. Compound statements (curly braces {}) must be used for if-else-while... bodies and all macro statements should be terminated with semicolon.


Define Documentation

#define UTF16_APPEND_CHAR_SAFE s,
i,
length,
 ) 
 

Value:

{ \
    if((uint32_t)(c)<=0xffff) { \
        (s)[(i)++]=(uint16_t)(c); \
    } else if((uint32_t)(c)<=0x10ffff) { \
        if((i)+1<(length)) { \
            (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
            (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
        } else /* not enough space */ { \
            (s)[(i)++]=UTF_ERROR_VALUE; \
        } \
    } else /* c>0x10ffff, write error value */ { \
        (s)[(i)++]=UTF_ERROR_VALUE; \
    } \
}

#define UTF16_APPEND_CHAR_UNSAFE s,
i,
 ) 
 

Value:

{ \
    if((uint32_t)(c)<=0xffff) { \
        (s)[(i)++]=(uint16_t)(c); \
    } else { \
        (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
        (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
    } \
}

#define UTF16_BACK_1_SAFE s,
start,
 ) 
 

Value:

{ \
    if(UTF_IS_SECOND_SURROGATE((s)[--(i)]) && (i)>(start) && UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
        --(i); \
    } \
}

#define UTF16_BACK_1_UNSAFE s,
 ) 
 

Value:

{ \
    if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
        --(i); \
    } \
}

#define UTF16_BACK_N_SAFE s,
start,
i,
 ) 
 

Value:

{ \
    int32_t __N=(n); \
    while(__N>0 && (i)>(start)) { \
        UTF16_BACK_1_SAFE(s, start, i); \
        --__N; \
    } \
}

#define UTF16_BACK_N_UNSAFE s,
i,
 ) 
 

Value:

{ \
    int32_t __N=(n); \
    while(__N>0) { \
        UTF16_BACK_1_UNSAFE(s, i); \
        --__N; \
    } \
}

#define UTF16_FWD_1_SAFE s,
i,
length   ) 
 

Value:

{ \
    if(UTF_IS_FIRST_SURROGATE((s)[(i)++]) && (i)<(length) && UTF_IS_SECOND_SURROGATE((s)[i])) { \
        ++(i); \
    } \
}

#define UTF16_FWD_1_UNSAFE s,
 ) 
 

Value:

{ \
    if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
        ++(i); \
    } \
}

#define UTF16_FWD_N_SAFE s,
i,
length,
 ) 
 

Value:

{ \
    int32_t __N=(n); \
    while(__N>0 && (i)<(length)) { \
        UTF16_FWD_1_SAFE(s, i, length); \
        --__N; \
    } \
}

#define UTF16_FWD_N_UNSAFE s,
i,
 ) 
 

Value:

{ \
    int32_t __N=(n); \
    while(__N>0) { \
        UTF16_FWD_1_UNSAFE(s, i); \
        --__N; \
    } \
}

#define UTF16_GET_CHAR_SAFE s,
start,
i,
length,
c,
strict   ) 
 

Value:

{ \
    (c)=(s)[i]; \
    if(UTF_IS_SURROGATE(c)) { \
        uint16_t __c2; \
        if(UTF_IS_SURROGATE_FIRST(c)) { \
            if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \
                (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
                /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
            } else if(strict) {\
                /* unmatched first surrogate */ \
                (c)=UTF_ERROR_VALUE; \
            } \
        } else { \
            if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
                (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
                /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
            } else if(strict) {\
                /* unmatched second surrogate */ \
                (c)=UTF_ERROR_VALUE; \
            } \
        } \
    } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
        (c)=UTF_ERROR_VALUE; \
    } \
}

#define UTF16_GET_CHAR_UNSAFE s,
i,
 ) 
 

Value:

{ \
    (c)=(s)[i]; \
    if(UTF_IS_SURROGATE(c)) { \
        if(UTF_IS_SURROGATE_FIRST(c)) { \
            (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \
        } else { \
            (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \
        } \
    } \
}

#define UTF16_NEXT_CHAR_SAFE s,
i,
length,
c,
strict   ) 
 

Value:

{ \
    (c)=(s)[(i)++]; \
    if(UTF_IS_FIRST_SURROGATE(c)) { \
        uint16_t __c2; \
        if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \
            ++(i); \
            (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
            /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
        } else if(strict) {\
            /* unmatched first surrogate */ \
            (c)=UTF_ERROR_VALUE; \
        } \
    } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
        /* unmatched second surrogate or other non-character */ \
        (c)=UTF_ERROR_VALUE; \
    } \
}

#define UTF16_NEXT_CHAR_UNSAFE s,
i,
 ) 
 

Value:

{ \
    (c)=(s)[(i)++]; \
    if(UTF_IS_FIRST_SURROGATE(c)) { \
        (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
    } \
}

#define UTF16_PREV_CHAR_SAFE s,
start,
i,
c,
strict   ) 
 

Value:

{ \
    (c)=(s)[--(i)]; \
    if(UTF_IS_SECOND_SURROGATE(c)) { \
        uint16_t __c2; \
        if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
            --(i); \
            (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
            /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
        } else if(strict) {\
            /* unmatched second surrogate */ \
            (c)=UTF_ERROR_VALUE; \
        } \
    } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
        /* unmatched first surrogate or other non-character */ \
        (c)=UTF_ERROR_VALUE; \
    } \
}

#define UTF16_PREV_CHAR_UNSAFE s,
i,
 ) 
 

Value:

{ \
    (c)=(s)[--(i)]; \
    if(UTF_IS_SECOND_SURROGATE(c)) { \
        (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
    } \
}

#define UTF16_SET_CHAR_LIMIT_SAFE s,
start,
i,
length   ) 
 

Value:

{ \
    if((start)<(i) && (i)<(length) && UTF_IS_FIRST_SURROGATE((s)[(i)-1]) && UTF_IS_SECOND_SURROGATE((s)[i])) { \
        ++(i); \
    } \
}

#define UTF16_SET_CHAR_LIMIT_UNSAFE s,
 ) 
 

Value:

{ \
    if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
        ++(i); \
    } \
}

#define UTF16_SET_CHAR_START_SAFE s,
start,
 ) 
 

Value:

{ \
    if(UTF_IS_SECOND_SURROGATE((s)[i]) && (i)>(start) && UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
        --(i); \
    } \
}

#define UTF16_SET_CHAR_START_UNSAFE s,
 ) 
 

Value:

{ \
    if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
        --(i); \
    } \
}

#define UTF_FIRST_SURROGATE supplementary   )     (UChar)(((supplementary)>>10)+0xd7c0)
 

Takes a supplementary code point (0x10000.

.0x10ffff) and computes the first surrogate (0xd800..0xdbff) for UTF-16 encoding.

#define UTF_SECOND_SURROGATE supplementary   )     (UChar)(((supplementary)&0x3ff)|0xdc00)
 

Takes a supplementary code point (0x10000.

.0x10ffff) and computes the second surrogate (0xdc00..0xdfff) for UTF-16 encoding.


Generated on Mon May 23 13:34:37 2005 for ICU 2.1 by  doxygen 1.4.2