summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorValentin Bartenev <vbart@nginx.com>2020-03-27 17:22:52 +0300
committerValentin Bartenev <vbart@nginx.com>2020-03-27 17:22:52 +0300
commit35d6f84426cfaa27587456a8ebb81b13f60e697a (patch)
treefb1d8cf6c49910275308700a9f29d848c4500f91
parentd4b4cb0438d753e7694f8f76c41207bbe01fe790 (diff)
downloadunit-35d6f84426cfaa27587456a8ebb81b13f60e697a.tar.gz
unit-35d6f84426cfaa27587456a8ebb81b13f60e697a.tar.bz2
Added nxt_is_complex_uri_encoded()/nxt_encode_complex_uri().
-rw-r--r--src/nxt_string.c199
-rw-r--r--src/nxt_string.h3
2 files changed, 158 insertions, 44 deletions
diff --git a/src/nxt_string.c b/src/nxt_string.c
index dfaea6bc..667146d6 100644
--- a/src/nxt_string.c
+++ b/src/nxt_string.c
@@ -457,34 +457,54 @@ nxt_strvers_match(u_char *version, u_char *prefix, size_t length)
}
+static const uint8_t nxt_hex2int[256]
+ nxt_aligned(32) =
+{
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 16, 16, 16, 16, 16,
+ 16, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+};
+
+
+static const uint32_t nxt_uri_escape[] = {
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+
+ /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */
+ 0xd000002d, /* 1101 0000 0000 0000 0000 0000 0010 1101 */
+
+ /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */
+ 0x50000000, /* 0101 0000 0000 0000 0000 0000 0000 0000 */
+
+ /* ~}| {zyx wvut srqp onml kjih gfed cba` */
+ 0xb8000001, /* 1011 1000 0000 0000 0000 0000 0000 0001 */
+
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+ 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+};
+
+
u_char *
nxt_decode_uri(u_char *dst, u_char *src, size_t length)
{
u_char *end, ch;
uint8_t d0, d1;
- static const uint8_t hex[256]
- nxt_aligned(32) =
- {
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 16, 16, 16, 16, 16,
- 16, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- };
-
- nxt_prefetch(&hex['0']);
+ nxt_prefetch(&nxt_hex2int['0']);
end = src + length;
@@ -496,8 +516,8 @@ nxt_decode_uri(u_char *dst, u_char *src, size_t length)
return NULL;
}
- d0 = hex[*src++];
- d1 = hex[*src++];
+ d0 = nxt_hex2int[*src++];
+ d1 = nxt_hex2int[*src++];
if (nxt_slow_path((d0 | d1) >= 16)) {
return NULL;
@@ -521,24 +541,6 @@ nxt_encode_uri(u_char *dst, u_char *src, size_t length)
static const u_char hex[16] = "0123456789ABCDEF";
- static const uint32_t escape[] = {
- 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
-
- /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */
- 0xd000002d, /* 1101 0000 0000 0000 0000 0000 0010 1101 */
-
- /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */
- 0x50000000, /* 0101 0000 0000 0000 0000 0000 0000 0000 */
-
- /* ~}| {zyx wvut srqp onml kjih gfed cba` */
- 0xb8000001, /* 1011 1000 0000 0000 0000 0000 0000 0001 */
-
- 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
- 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
- 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
- 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */
- };
-
end = src + length;
if (dst == NULL) {
@@ -549,7 +551,7 @@ nxt_encode_uri(u_char *dst, u_char *src, size_t length)
while (src < end) {
- if (escape[*src >> 5] & (1U << (*src & 0x1f))) {
+ if (nxt_uri_escape[*src >> 5] & (1U << (*src & 0x1f))) {
n++;
}
@@ -561,7 +563,7 @@ nxt_encode_uri(u_char *dst, u_char *src, size_t length)
while (src < end) {
- if (escape[*src >> 5] & (1U << (*src & 0x1f))) {
+ if (nxt_uri_escape[*src >> 5] & (1U << (*src & 0x1f))) {
*dst++ = '%';
*dst++ = hex[*src >> 4];
*dst++ = hex[*src & 0xf];
@@ -575,3 +577,112 @@ nxt_encode_uri(u_char *dst, u_char *src, size_t length)
return (uintptr_t) dst;
}
+
+
+uintptr_t
+nxt_encode_complex_uri(u_char *dst, u_char *src, size_t length)
+{
+ u_char *reserved, *end, ch;
+ nxt_uint_t n;
+
+ static const u_char hex[16] = "0123456789ABCDEF";
+
+ reserved = (u_char *) "?#\0";
+
+ end = src + length;
+
+ if (dst == NULL) {
+
+ /* Find the number of the characters to be escaped. */
+
+ n = 0;
+
+ while (src < end) {
+ ch = *src++;
+
+ if (nxt_uri_escape[ch >> 5] & (1U << (ch & 0x1f))) {
+ if (ch == reserved[0]) {
+ reserved++;
+ continue;
+ }
+
+ if (ch == reserved[1]) {
+ reserved += 2;
+ continue;
+ }
+
+ n++;
+ }
+ }
+
+ return (uintptr_t) n;
+ }
+
+ while (src < end) {
+ ch = *src++;
+
+ if (nxt_uri_escape[ch >> 5] & (1U << (ch & 0x1f))) {
+ if (ch == reserved[0]) {
+ reserved++;
+
+ } else if (ch == reserved[1]) {
+ reserved += 2;
+
+ } else {
+ *dst++ = '%';
+ *dst++ = hex[ch >> 4];
+ *dst++ = hex[ch & 0xf];
+ continue;
+ }
+ }
+
+ *dst++ = ch;
+ }
+
+ return (uintptr_t) dst;
+}
+
+
+nxt_bool_t
+nxt_is_complex_uri_encoded(u_char *src, size_t length)
+{
+ u_char *reserved, *end, ch;
+ uint8_t d0, d1;
+
+ reserved = (u_char *) "?#\0";
+
+ for (end = src + length; src < end; src++) {
+ ch = *src;
+
+ if (nxt_uri_escape[ch >> 5] & (1U << (ch & 0x1f))) {
+ if (ch == '%') {
+ if (end - src < 2) {
+ return 0;
+ }
+
+ d0 = nxt_hex2int[*++src];
+ d1 = nxt_hex2int[*++src];
+
+ if ((d0 | d1) >= 16) {
+ return 0;
+ }
+
+ continue;
+ }
+
+ if (ch == reserved[0]) {
+ reserved++;
+ continue;
+ }
+
+ if (ch == reserved[1]) {
+ reserved += 2;
+ continue;
+ }
+
+ return 0;
+ }
+ }
+
+ return 1;
+}
diff --git a/src/nxt_string.h b/src/nxt_string.h
index de498048..d10658f7 100644
--- a/src/nxt_string.h
+++ b/src/nxt_string.h
@@ -170,6 +170,9 @@ NXT_EXPORT nxt_bool_t nxt_strvers_match(u_char *version, u_char *prefix,
NXT_EXPORT u_char *nxt_decode_uri(u_char *dst, u_char *src, size_t length);
NXT_EXPORT uintptr_t nxt_encode_uri(u_char *dst, u_char *src, size_t length);
+NXT_EXPORT uintptr_t nxt_encode_complex_uri(u_char *dst, u_char *src,
+ size_t length);
+NXT_EXPORT nxt_bool_t nxt_is_complex_uri_encoded(u_char *s, size_t length);
#endif /* _NXT_STRING_H_INCLUDED_ */