diff options
author | Igor Sysoev <igor@sysoev.ru> | 2017-01-17 20:00:00 +0300 |
---|---|---|
committer | Igor Sysoev <igor@sysoev.ru> | 2017-01-17 20:00:00 +0300 |
commit | 16cbf3c076a0aca6d47adaf3f719493674cf2363 (patch) | |
tree | e6530480020f62a2bdbf249988ec3e2a751d3927 /src/nxt_utf8.h | |
download | unit-16cbf3c076a0aca6d47adaf3f719493674cf2363.tar.gz unit-16cbf3c076a0aca6d47adaf3f719493674cf2363.tar.bz2 |
Initial version.
Diffstat (limited to 'src/nxt_utf8.h')
-rw-r--r-- | src/nxt_utf8.h | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/src/nxt_utf8.h b/src/nxt_utf8.h new file mode 100644 index 00000000..92847545 --- /dev/null +++ b/src/nxt_utf8.h @@ -0,0 +1,60 @@ + +/* + * Copyright (C) Igor Sysoev + * Copyright (C) NGINX, Inc. + */ + +#ifndef _NXT_UTF8_H_INCLUDED_ +#define _NXT_UTF8_H_INCLUDED_ + + +/* + * Since the maximum valid Unicode character is 0x0010ffff, the maximum + * difference between Unicode characters is lesser 0x0010ffff and + * 0x0eee0eee can be used as value to indicate UTF-8 encoding error. + */ +#define NXT_UTF8_SORT_INVALID 0x0eee0eee + + +NXT_EXPORT u_char *nxt_utf8_encode(u_char *p, uint32_t u); +NXT_EXPORT uint32_t nxt_utf8_decode(const u_char **start, const u_char *end); +NXT_EXPORT uint32_t nxt_utf8_decode2(const u_char **start, const u_char *end); +NXT_EXPORT nxt_int_t nxt_utf8_casecmp(const u_char *start1, + const u_char *start2, size_t len1, size_t len2); +NXT_EXPORT uint32_t nxt_utf8_lowcase(const u_char **start, const u_char *end); +NXT_EXPORT ssize_t nxt_utf8_length(const u_char *p, size_t len); +NXT_EXPORT nxt_bool_t nxt_utf8_is_valid(const u_char *p, size_t len); + + +/* nxt_utf8_next() expects a valid UTF-8 string. */ + +nxt_inline const u_char * +nxt_utf8_next(const u_char *p, const u_char *end) +{ + u_char c; + + c = *p++; + + if ((c & 0x80) != 0) { + + do { + /* + * The first UTF-8 byte is either 0xxxxxxx or 11xxxxxx. + * The next UTF-8 bytes are 10xxxxxx. + */ + c = *p; + + if ((c & 0xc0) != 0x80) { + return p; + } + + p++; + + } while (p < end); + } + + return p; +} + + +#endif /* _NXT_UTF8_H_INCLUDED_ */ |