summaryrefslogtreecommitdiffhomepage
path: root/test/nxt_utf8_test.c
diff options
context:
space:
mode:
Diffstat (limited to 'test/nxt_utf8_test.c')
-rw-r--r--test/nxt_utf8_test.c191
1 files changed, 191 insertions, 0 deletions
diff --git a/test/nxt_utf8_test.c b/test/nxt_utf8_test.c
new file mode 100644
index 00000000..c7323876
--- /dev/null
+++ b/test/nxt_utf8_test.c
@@ -0,0 +1,191 @@
+
+/*
+ * Copyright (C) Igor Sysoev
+ * Copyright (C) NGINX, Inc.
+ */
+
+#include <nxt_main.h>
+#include "nxt_tests.h"
+
+
+#define NXT_UTF8_START_TEST 0xc2
+//#define NXT_UTF8_START_TEST 0
+
+
+static u_char invalid[] = {
+
+ /* Invalid first byte less than 0xc2. */
+ 1, 0x80, 0x00, 0x00, 0x00,
+ 1, 0xc0, 0x00, 0x00, 0x00,
+ 2, 0xc0, 0x00, 0x00, 0x00,
+ 3, 0xc0, 0x00, 0x00, 0x00,
+ 4, 0xc0, 0x00, 0x00, 0x00,
+
+ /* Invalid 0x0x110000 value. */
+ 4, 0xf4, 0x90, 0x80, 0x80,
+
+ /* Incomplete length. */
+ 2, 0xe0, 0xaf, 0xb5, 0x00,
+
+ /* Overlong values. */
+ 2, 0xc0, 0x80, 0x00, 0x00,
+ 2, 0xc1, 0xb3, 0x00, 0x00,
+ 3, 0xe0, 0x80, 0x80, 0x00,
+ 3, 0xe0, 0x81, 0xb3, 0x00,
+ 3, 0xe0, 0x90, 0x9a, 0x00,
+ 4, 0xf0, 0x80, 0x8a, 0x80,
+ 4, 0xf0, 0x80, 0x81, 0xb3,
+ 4, 0xf0, 0x80, 0xaf, 0xb5,
+};
+
+
+static nxt_int_t
+nxt_utf8_overlong(nxt_thread_t *thr, u_char *overlong, size_t len)
+{
+ u_char *p, utf8[4];
+ size_t size;
+ uint32_t u, d;
+ nxt_uint_t i;
+ const u_char *pp;
+
+ pp = overlong;
+
+ d = nxt_utf8_decode(&pp, overlong + len);
+
+ len = pp - overlong;
+
+ if (d != 0xffffffff) {
+ p = nxt_utf8_encode(utf8, d);
+
+ size = (p != NULL) ? p - utf8 : 0;
+
+ if (len != size || nxt_memcmp(overlong, utf8, size) != 0) {
+
+ u = 0;
+ for (i = 0; i < len; i++) {
+ u = (u << 8) + overlong[i];
+ }
+
+ nxt_log_alert(thr->log,
+ "nxt_utf8_decode(%05uxD, %uz) failed: %05uxD, %uz",
+ u, len, d, size);
+
+ return NXT_ERROR;
+ }
+ }
+
+ return NXT_OK;
+}
+
+
+nxt_int_t
+nxt_utf8_test(nxt_thread_t *thr)
+{
+ u_char *p, utf8[4];
+ size_t len;
+ int32_t n;
+ uint32_t u, d;
+ nxt_uint_t i, k, l, m;
+ const u_char *pp;
+
+ nxt_thread_time_update(thr);
+
+ nxt_log_error(NXT_LOG_NOTICE, thr->log, "utf8 test started");
+
+ /* Test valid UTF-8. */
+
+ for (u = 0; u < 0x110000; u++) {
+
+ p = nxt_utf8_encode(utf8, u);
+
+ if (p == NULL) {
+ nxt_log_alert(thr->log, "nxt_utf8_encode(%05uxD) failed", u);
+ return NXT_ERROR;
+ }
+
+ pp = utf8;
+
+ d = nxt_utf8_decode(&pp, p);
+
+ if (u != d) {
+ nxt_log_alert(thr->log, "nxt_utf8_decode(%05uxD) failed: %05uxD",
+ u, d);
+ return NXT_ERROR;
+ }
+ }
+
+ /* Test some invalid UTF-8. */
+
+ for (i = 0; i < sizeof(invalid); i += 5) {
+
+ len = invalid[i];
+ utf8[0] = invalid[i + 1];
+ utf8[1] = invalid[i + 2];
+ utf8[2] = invalid[i + 3];
+ utf8[3] = invalid[i + 4];
+
+ pp = utf8;
+
+ d = nxt_utf8_decode(&pp, utf8 + len);
+
+ if (d != 0xffffffff) {
+
+ u = 0;
+ for (i = 0; i < len; i++) {
+ u = (u << 8) + utf8[i];
+ }
+
+ nxt_log_alert(thr->log,
+ "nxt_utf8_decode(%05uxD, %uz) failed: %05uxD",
+ u, len, d);
+ return NXT_ERROR;
+ }
+ }
+
+ /* Test all overlong UTF-8. */
+
+ for (i = NXT_UTF8_START_TEST; i < 256; i++) {
+ utf8[0] = i;
+
+ if (nxt_utf8_overlong(thr, utf8, 1) != NXT_OK) {
+ return NXT_ERROR;
+ }
+
+ for (k = 0; k < 256; k++) {
+ utf8[1] = k;
+
+ if (nxt_utf8_overlong(thr, utf8, 2) != NXT_OK) {
+ return NXT_ERROR;
+ }
+
+ for (l = 0; l < 256; l++) {
+ utf8[2] = l;
+
+ if (nxt_utf8_overlong(thr, utf8, 3) != NXT_OK) {
+ return NXT_ERROR;
+ }
+
+ for (m = 0; m < 256; m++) {
+ utf8[3] = m;
+
+ if (nxt_utf8_overlong(thr, utf8, 4) != NXT_OK) {
+ return NXT_ERROR;
+ }
+ }
+ }
+ }
+ }
+
+ n = nxt_utf8_casecmp((u_char *) "ABC АБВ ΑΒΓ",
+ (u_char *) "abc абв αβγ",
+ sizeof("ABC АБВ ΑΒΓ") - 1,
+ sizeof("abc абв αβγ") - 1);
+
+ if (n != 0) {
+ nxt_log_alert(thr->log, "nxt_utf8_casecmp() failed");
+ return NXT_ERROR;
+ }
+
+ nxt_log_error(NXT_LOG_NOTICE, thr->log, "utf8 test passed");
+ return NXT_OK;
+}