summaryrefslogtreecommitdiffhomepage
path: root/src/nxt_utf8.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/nxt_utf8.h60
1 files changed, 60 insertions, 0 deletions
diff --git a/src/nxt_utf8.h b/src/nxt_utf8.h
new file mode 100644
index 00000000..92847545
--- /dev/null
+++ b/src/nxt_utf8.h
@@ -0,0 +1,60 @@
+
+/*
+ * Copyright (C) Igor Sysoev
+ * Copyright (C) NGINX, Inc.
+ */
+
+#ifndef _NXT_UTF8_H_INCLUDED_
+#define _NXT_UTF8_H_INCLUDED_
+
+
+/*
+ * Since the maximum valid Unicode character is 0x0010ffff, the maximum
+ * difference between Unicode characters is lesser 0x0010ffff and
+ * 0x0eee0eee can be used as value to indicate UTF-8 encoding error.
+ */
+#define NXT_UTF8_SORT_INVALID 0x0eee0eee
+
+
+NXT_EXPORT u_char *nxt_utf8_encode(u_char *p, uint32_t u);
+NXT_EXPORT uint32_t nxt_utf8_decode(const u_char **start, const u_char *end);
+NXT_EXPORT uint32_t nxt_utf8_decode2(const u_char **start, const u_char *end);
+NXT_EXPORT nxt_int_t nxt_utf8_casecmp(const u_char *start1,
+ const u_char *start2, size_t len1, size_t len2);
+NXT_EXPORT uint32_t nxt_utf8_lowcase(const u_char **start, const u_char *end);
+NXT_EXPORT ssize_t nxt_utf8_length(const u_char *p, size_t len);
+NXT_EXPORT nxt_bool_t nxt_utf8_is_valid(const u_char *p, size_t len);
+
+
+/* nxt_utf8_next() expects a valid UTF-8 string. */
+
+nxt_inline const u_char *
+nxt_utf8_next(const u_char *p, const u_char *end)
+{
+ u_char c;
+
+ c = *p++;
+
+ if ((c & 0x80) != 0) {
+
+ do {
+ /*
+ * The first UTF-8 byte is either 0xxxxxxx or 11xxxxxx.
+ * The next UTF-8 bytes are 10xxxxxx.
+ */
+ c = *p;
+
+ if ((c & 0xc0) != 0x80) {
+ return p;
+ }
+
+ p++;
+
+ } while (p < end);
+ }
+
+ return p;
+}
+
+
+#endif /* _NXT_UTF8_H_INCLUDED_ */