summaryrefslogblamecommitdiffhomepage
path: root/test/nxt_utf8_unit_test.c
blob: 3bba968161bdda3fa05d7196117aa12dfef36f4f (plain) (tree)





























































































































































































                                                                             

/*
 * Copyright (C) Igor Sysoev
 * Copyright (C) NGINX, Inc.
 */

#include <nxt_main.h>


#define NXT_UTF8_START_TEST  0xc2
//#define NXT_UTF8_START_TEST  0


static u_char  invalid[] = {

    /* Invalid first byte less than 0xc2. */
    1, 0x80, 0x00, 0x00, 0x00,
    1, 0xc0, 0x00, 0x00, 0x00,
    2, 0xc0, 0x00, 0x00, 0x00,
    3, 0xc0, 0x00, 0x00, 0x00,
    4, 0xc0, 0x00, 0x00, 0x00,

    /* Invalid 0x0x110000 value. */
    4, 0xf4, 0x90, 0x80, 0x80,

    /* Incomplete length. */
    2, 0xe0, 0xaf, 0xb5, 0x00,

    /* Overlong values. */
    2, 0xc0, 0x80, 0x00, 0x00,
    2, 0xc1, 0xb3, 0x00, 0x00,
    3, 0xe0, 0x80, 0x80, 0x00,
    3, 0xe0, 0x81, 0xb3, 0x00,
    3, 0xe0, 0x90, 0x9a, 0x00,
    4, 0xf0, 0x80, 0x8a, 0x80,
    4, 0xf0, 0x80, 0x81, 0xb3,
    4, 0xf0, 0x80, 0xaf, 0xb5,
};


static nxt_int_t
nxt_utf8_overlong(nxt_thread_t *thr, u_char *overlong, size_t len)
{
    u_char        *p, utf8[4];
    size_t        size;
    uint32_t      u, d;
    nxt_uint_t    i;
    const u_char  *pp;

    pp = overlong;

    d = nxt_utf8_decode(&pp, overlong + len);

    len = pp - overlong;

    if (d != 0xffffffff) {
        p = nxt_utf8_encode(utf8, d);

        size = (p != NULL) ? p - utf8 : 0;

        if (len != size || nxt_memcmp(overlong, utf8, size) != 0) {

            u = 0;
            for (i = 0; i < len; i++) {
                u = (u << 8) + overlong[i];
            }

            nxt_log_alert(thr->log,
                          "nxt_utf8_decode(%05uxD, %uz) failed: %05uxD, %uz",
                          u, len, d, size);

            return NXT_ERROR;
        }
    }

    return NXT_OK;
}


nxt_int_t
nxt_utf8_unit_test(nxt_thread_t *thr)
{
    u_char        *p, utf8[4];
    size_t        len;
    int32_t       n;
    uint32_t      u, d;
    nxt_uint_t    i, k, l, m;
    const u_char  *pp;

    nxt_thread_time_update(thr);

    nxt_log_error(NXT_LOG_NOTICE, thr->log, "utf8 unit test started");

    /* Test valid UTF-8. */

    for (u = 0; u < 0x110000; u++) {

        p = nxt_utf8_encode(utf8, u);

        if (p == NULL) {
            nxt_log_alert(thr->log, "nxt_utf8_encode(%05uxD) failed", u);
            return NXT_ERROR;
        }

        pp = utf8;

        d = nxt_utf8_decode(&pp, p);

        if (u != d) {
            nxt_log_alert(thr->log, "nxt_utf8_decode(%05uxD) failed: %05uxD",
                          u, d);
            return NXT_ERROR;
        }
    }

    /* Test some invalid UTF-8. */

    for (i = 0; i < sizeof(invalid); i += 5) {

        len = invalid[i];
        utf8[0] = invalid[i + 1];
        utf8[1] = invalid[i + 2];
        utf8[2] = invalid[i + 3];
        utf8[3] = invalid[i + 4];

        pp = utf8;

        d = nxt_utf8_decode(&pp, utf8 + len);

        if (d != 0xffffffff) {

            u = 0;
            for (i = 0; i < len; i++) {
                u = (u << 8) + utf8[i];
            }

            nxt_log_alert(thr->log,
                          "nxt_utf8_decode(%05uxD, %uz) failed: %05uxD",
                          u, len, d);
            return NXT_ERROR;
        }
    }

    /* Test all overlong UTF-8. */

    for (i = NXT_UTF8_START_TEST; i < 256; i++) {
        utf8[0] = i;

        if (nxt_utf8_overlong(thr, utf8, 1) != NXT_OK) {
            return NXT_ERROR;
        }

        for (k = 0; k < 256; k++) {
            utf8[1] = k;

            if (nxt_utf8_overlong(thr, utf8, 2) != NXT_OK) {
                return NXT_ERROR;
            }

            for (l = 0; l < 256; l++) {
                utf8[2] = l;

                if (nxt_utf8_overlong(thr, utf8, 3) != NXT_OK) {
                    return NXT_ERROR;
                }

                for (m = 0; m < 256; m++) {
                    utf8[3] = m;

                    if (nxt_utf8_overlong(thr, utf8, 4) != NXT_OK) {
                        return NXT_ERROR;
                    }
                }
            }
        }
    }

    n = nxt_utf8_casecmp((u_char *) "ABC АБВ ΑΒΓ",
                         (u_char *) "abc абв αβγ",
                             sizeof("ABC АБВ ΑΒΓ") - 1,
                             sizeof("abc абв αβγ") - 1);

    if (n != 0) {
        nxt_log_alert(thr->log, "nxt_utf8_casecmp() failed");
        return NXT_ERROR;
    }

    nxt_log_error(NXT_LOG_NOTICE, thr->log, "utf8 unit test passed");
    return NXT_OK;
}