diff options
Diffstat (limited to '')
-rw-r--r-- | test/nxt_utf8_unit_test.c | 190 |
1 files changed, 190 insertions, 0 deletions
diff --git a/test/nxt_utf8_unit_test.c b/test/nxt_utf8_unit_test.c new file mode 100644 index 00000000..3bba9681 --- /dev/null +++ b/test/nxt_utf8_unit_test.c @@ -0,0 +1,190 @@ + +/* + * Copyright (C) Igor Sysoev + * Copyright (C) NGINX, Inc. + */ + +#include <nxt_main.h> + + +#define NXT_UTF8_START_TEST 0xc2 +//#define NXT_UTF8_START_TEST 0 + + +static u_char invalid[] = { + + /* Invalid first byte less than 0xc2. */ + 1, 0x80, 0x00, 0x00, 0x00, + 1, 0xc0, 0x00, 0x00, 0x00, + 2, 0xc0, 0x00, 0x00, 0x00, + 3, 0xc0, 0x00, 0x00, 0x00, + 4, 0xc0, 0x00, 0x00, 0x00, + + /* Invalid 0x0x110000 value. */ + 4, 0xf4, 0x90, 0x80, 0x80, + + /* Incomplete length. */ + 2, 0xe0, 0xaf, 0xb5, 0x00, + + /* Overlong values. */ + 2, 0xc0, 0x80, 0x00, 0x00, + 2, 0xc1, 0xb3, 0x00, 0x00, + 3, 0xe0, 0x80, 0x80, 0x00, + 3, 0xe0, 0x81, 0xb3, 0x00, + 3, 0xe0, 0x90, 0x9a, 0x00, + 4, 0xf0, 0x80, 0x8a, 0x80, + 4, 0xf0, 0x80, 0x81, 0xb3, + 4, 0xf0, 0x80, 0xaf, 0xb5, +}; + + +static nxt_int_t +nxt_utf8_overlong(nxt_thread_t *thr, u_char *overlong, size_t len) +{ + u_char *p, utf8[4]; + size_t size; + uint32_t u, d; + nxt_uint_t i; + const u_char *pp; + + pp = overlong; + + d = nxt_utf8_decode(&pp, overlong + len); + + len = pp - overlong; + + if (d != 0xffffffff) { + p = nxt_utf8_encode(utf8, d); + + size = (p != NULL) ? p - utf8 : 0; + + if (len != size || nxt_memcmp(overlong, utf8, size) != 0) { + + u = 0; + for (i = 0; i < len; i++) { + u = (u << 8) + overlong[i]; + } + + nxt_log_alert(thr->log, + "nxt_utf8_decode(%05uxD, %uz) failed: %05uxD, %uz", + u, len, d, size); + + return NXT_ERROR; + } + } + + return NXT_OK; +} + + +nxt_int_t +nxt_utf8_unit_test(nxt_thread_t *thr) +{ + u_char *p, utf8[4]; + size_t len; + int32_t n; + uint32_t u, d; + nxt_uint_t i, k, l, m; + const u_char *pp; + + nxt_thread_time_update(thr); + + nxt_log_error(NXT_LOG_NOTICE, thr->log, "utf8 unit test started"); + + /* Test valid UTF-8. */ + + for (u = 0; u < 0x110000; u++) { + + p = nxt_utf8_encode(utf8, u); + + if (p == NULL) { + nxt_log_alert(thr->log, "nxt_utf8_encode(%05uxD) failed", u); + return NXT_ERROR; + } + + pp = utf8; + + d = nxt_utf8_decode(&pp, p); + + if (u != d) { + nxt_log_alert(thr->log, "nxt_utf8_decode(%05uxD) failed: %05uxD", + u, d); + return NXT_ERROR; + } + } + + /* Test some invalid UTF-8. */ + + for (i = 0; i < sizeof(invalid); i += 5) { + + len = invalid[i]; + utf8[0] = invalid[i + 1]; + utf8[1] = invalid[i + 2]; + utf8[2] = invalid[i + 3]; + utf8[3] = invalid[i + 4]; + + pp = utf8; + + d = nxt_utf8_decode(&pp, utf8 + len); + + if (d != 0xffffffff) { + + u = 0; + for (i = 0; i < len; i++) { + u = (u << 8) + utf8[i]; + } + + nxt_log_alert(thr->log, + "nxt_utf8_decode(%05uxD, %uz) failed: %05uxD", + u, len, d); + return NXT_ERROR; + } + } + + /* Test all overlong UTF-8. */ + + for (i = NXT_UTF8_START_TEST; i < 256; i++) { + utf8[0] = i; + + if (nxt_utf8_overlong(thr, utf8, 1) != NXT_OK) { + return NXT_ERROR; + } + + for (k = 0; k < 256; k++) { + utf8[1] = k; + + if (nxt_utf8_overlong(thr, utf8, 2) != NXT_OK) { + return NXT_ERROR; + } + + for (l = 0; l < 256; l++) { + utf8[2] = l; + + if (nxt_utf8_overlong(thr, utf8, 3) != NXT_OK) { + return NXT_ERROR; + } + + for (m = 0; m < 256; m++) { + utf8[3] = m; + + if (nxt_utf8_overlong(thr, utf8, 4) != NXT_OK) { + return NXT_ERROR; + } + } + } + } + } + + n = nxt_utf8_casecmp((u_char *) "ABC АБВ ΑΒΓ", + (u_char *) "abc абв αβγ", + sizeof("ABC АБВ ΑΒΓ") - 1, + sizeof("abc абв αβγ") - 1); + + if (n != 0) { + nxt_log_alert(thr->log, "nxt_utf8_casecmp() failed"); + return NXT_ERROR; + } + + nxt_log_error(NXT_LOG_NOTICE, thr->log, "utf8 unit test passed"); + return NXT_OK; +} |