/* * Copyright (C) Igor Sysoev * Copyright (C) NGINX, Inc. */ #include #define NXT_UTF8_START_TEST 0xc2 //#define NXT_UTF8_START_TEST 0 static u_char invalid[] = { /* Invalid first byte less than 0xc2. */ 1, 0x80, 0x00, 0x00, 0x00, 1, 0xc0, 0x00, 0x00, 0x00, 2, 0xc0, 0x00, 0x00, 0x00, 3, 0xc0, 0x00, 0x00, 0x00, 4, 0xc0, 0x00, 0x00, 0x00, /* Invalid 0x0x110000 value. */ 4, 0xf4, 0x90, 0x80, 0x80, /* Incomplete length. */ 2, 0xe0, 0xaf, 0xb5, 0x00, /* Overlong values. */ 2, 0xc0, 0x80, 0x00, 0x00, 2, 0xc1, 0xb3, 0x00, 0x00, 3, 0xe0, 0x80, 0x80, 0x00, 3, 0xe0, 0x81, 0xb3, 0x00, 3, 0xe0, 0x90, 0x9a, 0x00, 4, 0xf0, 0x80, 0x8a, 0x80, 4, 0xf0, 0x80, 0x81, 0xb3, 4, 0xf0, 0x80, 0xaf, 0xb5, }; static nxt_int_t nxt_utf8_overlong(nxt_thread_t *thr, u_char *overlong, size_t len) { u_char *p, utf8[4]; size_t size; uint32_t u, d; nxt_uint_t i; const u_char *pp; pp = overlong; d = nxt_utf8_decode(&pp, overlong + len); len = pp - overlong; if (d != 0xffffffff) { p = nxt_utf8_encode(utf8, d); size = (p != NULL) ? p - utf8 : 0; if (len != size || nxt_memcmp(overlong, utf8, size) != 0) { u = 0; for (i = 0; i < len; i++) { u = (u << 8) + overlong[i]; } nxt_log_alert(thr->log, "nxt_utf8_decode(%05uxD, %uz) failed: %05uxD, %uz", u, len, d, size); return NXT_ERROR; } } return NXT_OK; } nxt_int_t nxt_utf8_unit_test(nxt_thread_t *thr) { u_char *p, utf8[4]; size_t len; int32_t n; uint32_t u, d; nxt_uint_t i, k, l, m; const u_char *pp; nxt_thread_time_update(thr); nxt_log_error(NXT_LOG_NOTICE, thr->log, "utf8 unit test started"); /* Test valid UTF-8. */ for (u = 0; u < 0x110000; u++) { p = nxt_utf8_encode(utf8, u); if (p == NULL) { nxt_log_alert(thr->log, "nxt_utf8_encode(%05uxD) failed", u); return NXT_ERROR; } pp = utf8; d = nxt_utf8_decode(&pp, p); if (u != d) { nxt_log_alert(thr->log, "nxt_utf8_decode(%05uxD) failed: %05uxD", u, d); return NXT_ERROR; } } /* Test some invalid UTF-8. */ for (i = 0; i < sizeof(invalid); i += 5) { len = invalid[i]; utf8[0] = invalid[i + 1]; utf8[1] = invalid[i + 2]; utf8[2] = invalid[i + 3]; utf8[3] = invalid[i + 4]; pp = utf8; d = nxt_utf8_decode(&pp, utf8 + len); if (d != 0xffffffff) { u = 0; for (i = 0; i < len; i++) { u = (u << 8) + utf8[i]; } nxt_log_alert(thr->log, "nxt_utf8_decode(%05uxD, %uz) failed: %05uxD", u, len, d); return NXT_ERROR; } } /* Test all overlong UTF-8. */ for (i = NXT_UTF8_START_TEST; i < 256; i++) { utf8[0] = i; if (nxt_utf8_overlong(thr, utf8, 1) != NXT_OK) { return NXT_ERROR; } for (k = 0; k < 256; k++) { utf8[1] = k; if (nxt_utf8_overlong(thr, utf8, 2) != NXT_OK) { return NXT_ERROR; } for (l = 0; l < 256; l++) { utf8[2] = l; if (nxt_utf8_overlong(thr, utf8, 3) != NXT_OK) { return NXT_ERROR; } for (m = 0; m < 256; m++) { utf8[3] = m; if (nxt_utf8_overlong(thr, utf8, 4) != NXT_OK) { return NXT_ERROR; } } } } } n = nxt_utf8_casecmp((u_char *) "ABC АБВ ΑΒΓ", (u_char *) "abc абв αβγ", sizeof("ABC АБВ ΑΒΓ") - 1, sizeof("abc абв αβγ") - 1); if (n != 0) { nxt_log_alert(thr->log, "nxt_utf8_casecmp() failed"); return NXT_ERROR; } nxt_log_error(NXT_LOG_NOTICE, thr->log, "utf8 unit test passed"); return NXT_OK; }