diff options
author | Tiago de Bem Natel de Moura <t.nateldemoura@f5.com> | 2019-09-19 15:25:23 +0300 |
---|---|---|
committer | Tiago de Bem Natel de Moura <t.nateldemoura@f5.com> | 2019-09-19 15:25:23 +0300 |
commit | c554941b4f826d83d92d5ca8d7713bea4167896e (patch) | |
tree | 86afb0a5efc790e1852124426acb73d8164341af | |
parent | 6346e641eef4aacf92e81e0f1ea4f42ed1e62834 (diff) | |
download | unit-c554941b4f826d83d92d5ca8d7713bea4167896e.tar.gz unit-c554941b4f826d83d92d5ca8d7713bea4167896e.tar.bz2 |
Initial applications isolation support using Linux namespaces.
-rw-r--r-- | auto/capability | 19 | ||||
-rw-r--r-- | auto/isolation | 52 | ||||
-rw-r--r-- | auto/sources | 7 | ||||
-rw-r--r-- | auto/summary | 2 | ||||
-rwxr-xr-x | configure | 2 | ||||
-rw-r--r-- | src/nxt_application.h | 2 | ||||
-rw-r--r-- | src/nxt_capability.c | 104 | ||||
-rw-r--r-- | src/nxt_capability.h | 17 | ||||
-rw-r--r-- | src/nxt_clone.c | 263 | ||||
-rw-r--r-- | src/nxt_clone.h | 17 | ||||
-rw-r--r-- | src/nxt_conf_validation.c | 303 | ||||
-rw-r--r-- | src/nxt_main.h | 1 | ||||
-rw-r--r-- | src/nxt_main_process.c | 207 | ||||
-rw-r--r-- | src/nxt_process.c | 251 | ||||
-rw-r--r-- | src/nxt_process.h | 31 | ||||
-rw-r--r-- | src/nxt_runtime.c | 14 | ||||
-rw-r--r-- | src/nxt_runtime.h | 1 | ||||
-rw-r--r-- | src/nxt_unit.c | 2 | ||||
-rw-r--r-- | test/go/ns_inspect/app.go | 79 | ||||
-rw-r--r-- | test/test_go_isolation.py | 135 | ||||
-rw-r--r-- | test/unit/feature/isolation.py | 87 |
21 files changed, 1431 insertions, 165 deletions
diff --git a/auto/capability b/auto/capability new file mode 100644 index 00000000..48777665 --- /dev/null +++ b/auto/capability @@ -0,0 +1,19 @@ + +# Copyright (C) Igor Sysoev +# Copyright (C) NGINX, Inc. + +# Linux capability + +nxt_feature="Linux capability" +nxt_feature_name=NXT_HAVE_LINUX_CAPABILITY +nxt_feature_test="#include <linux/capability.h> + #include <unistd.h> + #include <sys/syscall.h> + + int main() { + struct __user_cap_header_struct hdr; + hdr.version = _LINUX_CAPABILITY_VERSION; + syscall(SYS_capget, &hdr, 0); + return 0; + }" +. auto/feature diff --git a/auto/isolation b/auto/isolation new file mode 100644 index 00000000..c26a4991 --- /dev/null +++ b/auto/isolation @@ -0,0 +1,52 @@ +# Copyright (C) Igor Sysoev +# Copyright (C) NGINX, Inc. + +# Linux clone syscall. + +NXT_ISOLATION=NO +NXT_HAVE_CLONE=NO + +nsflags="USER NS PID NET UTS CGROUP" + +nxt_feature="clone(2)" +nxt_feature_name=NXT_HAVE_CLONE +nxt_feature_run=no +nxt_feature_incs= +nxt_feature_libs= +nxt_feature_test="#include <sys/wait.h> + #include <sys/syscall.h> + + int main() { + return __NR_clone | SIGCHLD; + }" +. auto/feature + +if [ $nxt_found = yes ]; then + NXT_HAVE_CLONE=YES + + # Test all isolation flags + for flag in $nsflags; do + nxt_feature="CLONE_NEW${flag}" + nxt_feature_name=NXT_HAVE_CLONE_NEW${flag} + nxt_feature_run=no + nxt_feature_incs= + nxt_feature_libs= + nxt_feature_test="#define _GNU_SOURCE + #include <sys/wait.h> + #include <sys/syscall.h> + #include <sched.h> + + int main() { + return CLONE_NEW$flag; + }" + . auto/feature + + if [ $nxt_found = yes ]; then + if [ "$NXT_ISOLATION" = "NO" ]; then + NXT_ISOLATION=$flag + else + NXT_ISOLATION="$NXT_ISOLATION $flag" + fi + fi + done +fi diff --git a/auto/sources b/auto/sources index 8ac8fb19..858eaa8c 100644 --- a/auto/sources +++ b/auto/sources @@ -71,6 +71,7 @@ NXT_LIB_SRCS=" \ src/nxt_upstream_round_robin.c \ src/nxt_http_parse.c \ src/nxt_app_log.c \ + src/nxt_capability.c \ src/nxt_runtime.c \ src/nxt_conf.c \ src/nxt_conf_validation.c \ @@ -132,6 +133,7 @@ NXT_LIB_SOLARIS_SENDFILEV_SRCS="src/nxt_solaris_sendfilev.c" NXT_LIB_MACOSX_SENDFILE_SRCS="src/nxt_macosx_sendfile.c" NXT_LIB_AIX_SEND_FILE_SRCS="src/nxt_aix_send_file.c" NXT_LIB_HPUX_SENDFILE_SRCS="src/nxt_hpux_sendfile.c" +NXT_LIB_CLONE_SRCS="src/nxt_clone.c" NXT_TEST_BUILD_DEPS="src/nxt_test_build.h" NXT_TEST_BUILD_SRCS="src/nxt_test_build.c" @@ -257,6 +259,11 @@ if [ "$NXT_HAVE_HPUX_SENDFILE" = "YES" \ fi +if [ "$NXT_HAVE_CLONE" = "YES" ]; then + NXT_LIB_SRCS="$NXT_LIB_SRCS $NXT_LIB_CLONE_SRCS" +fi + + if [ "$NXT_TEST_BUILD" = "YES" ]; then NXT_LIB_SRCS="$NXT_LIB_SRCS $NXT_TEST_BUILD_SRCS" fi diff --git a/auto/summary b/auto/summary index 1c9df4b1..59267f6c 100644 --- a/auto/summary +++ b/auto/summary @@ -26,6 +26,8 @@ Unit configuration summary: Unix domain sockets support: $NXT_UNIX_DOMAIN TLS support: ............... $NXT_OPENSSL + process isolation: ......... $NXT_ISOLATION + debug logging: ............. $NXT_DEBUG END @@ -126,6 +126,8 @@ NXT_LIBRT= . auto/os/conf . auto/ssltls . auto/pcre +. auto/isolation +. auto/capability case "$NXT_SYSTEM_PLATFORM" in diff --git a/src/nxt_application.h b/src/nxt_application.h index 7ff4bb11..2a1fa39e 100644 --- a/src/nxt_application.h +++ b/src/nxt_application.h @@ -88,6 +88,8 @@ struct nxt_common_app_conf_s { char *working_directory; nxt_conf_value_t *environment; + nxt_conf_value_t *isolation; + union { nxt_external_app_conf_t external; nxt_python_app_conf_t python; diff --git a/src/nxt_capability.c b/src/nxt_capability.c new file mode 100644 index 00000000..805faff6 --- /dev/null +++ b/src/nxt_capability.c @@ -0,0 +1,104 @@ +/* + * Copyright (C) Igor Sysoev + * Copyright (C) NGINX, Inc. + */ + +#include <nxt_main.h> + +#if (NXT_HAVE_LINUX_CAPABILITY) + +#include <linux/capability.h> +#include <sys/syscall.h> + +#define nxt_capget(hdrp, datap) \ + syscall(SYS_capget, hdrp, datap) +#define nxt_capset(hdrp, datap) \ + syscall(SYS_capset, hdrp, datap) + +#endif /* NXT_HAVE_LINUX_CAPABILITY */ + + +static nxt_int_t nxt_capability_specific_set(nxt_task_t *task, + nxt_capabilities_t *cap); + + +nxt_int_t +nxt_capability_set(nxt_task_t *task, nxt_capabilities_t *cap) +{ + nxt_assert(cap->setid == 0); + + if (geteuid() == 0) { + cap->setid = 1; + return NXT_OK; + } + + return nxt_capability_specific_set(task, cap); +} + + +#if (NXT_HAVE_LINUX_CAPABILITY) + +static uint32_t +nxt_capability_linux_get_version() +{ + struct __user_cap_header_struct hdr; + + hdr.version = _LINUX_CAPABILITY_VERSION; + hdr.pid = nxt_pid; + + nxt_capget(&hdr, NULL); + return hdr.version; +} + + +static nxt_int_t +nxt_capability_specific_set(nxt_task_t *task, nxt_capabilities_t *cap) +{ + struct __user_cap_data_struct *val, data[2]; + struct __user_cap_header_struct hdr; + + /* + * Linux capability v1 fills an u32 struct. + * Linux capability v2 and v3 fills an u64 struct. + * We allocate data[2] for compatibility, we waste 4 bytes on v1. + * + * This is safe as we only need to check CAP_SETUID and CAP_SETGID + * that resides in the first 32-bit chunk. + */ + + val = &data[0]; + + /* + * Ask the kernel the preferred capability version + * instead of using _LINUX_CAPABILITY_VERSION from header. + * This is safer when distributing a pre-compiled Unit binary. + */ + hdr.version = nxt_capability_linux_get_version(); + hdr.pid = nxt_pid; + + if (nxt_slow_path(nxt_capget(&hdr, val) == -1)) { + nxt_alert(task, "failed to get process capabilities: %E", nxt_errno); + return NXT_ERROR; + } + + if ((val->effective & (1 << CAP_SETUID)) == 0) { + return NXT_OK; + } + + if ((val->effective & (1 << CAP_SETGID)) == 0) { + return NXT_OK; + } + + cap->setid = 1; + return NXT_OK; +} + +#else + +static nxt_int_t +nxt_capability_specific_set(nxt_task_t *task, nxt_capabilities_t *cap) +{ + return NXT_OK; +} + +#endif diff --git a/src/nxt_capability.h b/src/nxt_capability.h new file mode 100644 index 00000000..60bbd5f8 --- /dev/null +++ b/src/nxt_capability.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) Igor Sysoev + * Copyright (C) NGINX, Inc. + */ + +#ifndef _NXT_CAPABILITY_INCLUDED_ +#define _NXT_CAPABILITY_INCLUDED_ + +typedef struct { + uint8_t setid; /* 1 bit */ +} nxt_capabilities_t; + + +NXT_EXPORT nxt_int_t nxt_capability_set(nxt_task_t *task, + nxt_capabilities_t *cap); + +#endif /* _NXT_CAPABILITY_INCLUDED_ */ diff --git a/src/nxt_clone.c b/src/nxt_clone.c new file mode 100644 index 00000000..0fddd6c7 --- /dev/null +++ b/src/nxt_clone.c @@ -0,0 +1,263 @@ +/* + * Copyright (C) Igor Sysoev + * Copyright (C) NGINX, Inc. + */ + +#include <nxt_main.h> +#include <sys/types.h> +#include <nxt_conf.h> +#include <nxt_clone.h> + +#if (NXT_HAVE_CLONE) + +pid_t +nxt_clone(nxt_int_t flags) +{ +#if defined(__s390x__) || defined(__s390__) || defined(__CRIS__) + return syscall(__NR_clone, NULL, flags); +#else + return syscall(__NR_clone, flags, NULL); +#endif +} + +#endif + + +#if (NXT_HAVE_CLONE_NEWUSER) + +/* map uid 65534 to unit pid */ +#define NXT_DEFAULT_UNPRIV_MAP "65534 %d 1" + +nxt_int_t nxt_clone_proc_setgroups(nxt_task_t *task, pid_t child_pid, + const char *str); +nxt_int_t nxt_clone_proc_map_set(nxt_task_t *task, const char* mapfile, + pid_t pid, nxt_int_t defval, nxt_conf_value_t *mapobj); +nxt_int_t nxt_clone_proc_map_write(nxt_task_t *task, const char *mapfile, + pid_t pid, u_char *mapinfo); + + +typedef struct { + nxt_int_t container; + nxt_int_t host; + nxt_int_t size; +} nxt_clone_procmap_t; + + +nxt_int_t +nxt_clone_proc_setgroups(nxt_task_t *task, pid_t child_pid, const char *str) +{ + int fd, n; + u_char *p, *end; + u_char path[PATH_MAX]; + + end = path + PATH_MAX; + p = nxt_sprintf(path, end, "/proc/%d/setgroups", child_pid); + *p = '\0'; + + if (nxt_slow_path(p == end)) { + nxt_alert(task, "error write past the buffer: %s", path); + return NXT_ERROR; + } + + fd = open((char *)path, O_RDWR); + + if (fd == -1) { + /* + * If the /proc/pid/setgroups doesn't exists, we are + * safe to set uid/gid maps. But if the error is anything + * other than ENOENT, then we should abort and let user know. + */ + + if (errno != ENOENT) { + nxt_alert(task, "open(%s): %E", path, nxt_errno); + return NXT_ERROR; + } + + return NXT_OK; + } + + n = write(fd, str, strlen(str)); + close(fd); + + if (nxt_slow_path(n == -1)) { + nxt_alert(task, "write(%s): %E", path, nxt_errno); + return NXT_ERROR; + } + + return NXT_OK; +} + + +nxt_int_t +nxt_clone_proc_map_write(nxt_task_t *task, const char *mapfile, pid_t pid, + u_char *mapinfo) +{ + int len, mapfd; + u_char *p, *end; + ssize_t n; + u_char buf[256]; + + end = buf + sizeof(buf); + + p = nxt_sprintf(buf, end, "/proc/%d/%s", pid, mapfile); + if (nxt_slow_path(p == end)) { + nxt_alert(task, "writing past the buffer"); + return NXT_ERROR; + } + + *p = '\0'; + + mapfd = open((char*)buf, O_RDWR); + if (nxt_slow_path(mapfd == -1)) { + nxt_alert(task, "failed to open proc map (%s) %E", buf, nxt_errno); + return NXT_ERROR; + } + + len = nxt_strlen(mapinfo); + + n = write(mapfd, (char *)mapinfo, len); + if (nxt_slow_path(n != len)) { + + if (n == -1 && nxt_errno == EINVAL) { + nxt_alert(task, "failed to write %s: Check kernel maximum " \ + "allowed lines %E", buf, nxt_errno); + + } else { + nxt_alert(task, "failed to write proc map (%s) %E", buf, + nxt_errno); + } + + return NXT_ERROR; + } + + return NXT_OK; +} + + +nxt_int_t +nxt_clone_proc_map_set(nxt_task_t *task, const char* mapfile, pid_t pid, + nxt_int_t defval, nxt_conf_value_t *mapobj) +{ + u_char *p, *end, *mapinfo; + nxt_int_t container, host, size; + nxt_int_t ret, len, count, i; + nxt_conf_value_t *obj, *value; + + static nxt_str_t str_cont = nxt_string("container"); + static nxt_str_t str_host = nxt_string("host"); + static nxt_str_t str_size = nxt_string("size"); + + /* + * uid_map one-entry size: + * alloc space for 3 numbers (32bit) plus 2 spaces and \n. + */ + len = sizeof(u_char) * (10 + 10 + 10 + 2 + 1); + + if (mapobj != NULL) { + count = nxt_conf_array_elements_count(mapobj); + + if (count == 0) { + goto default_map; + } + + len = len * count + 1; + + mapinfo = nxt_malloc(len); + if (nxt_slow_path(mapinfo == NULL)) { + nxt_alert(task, "failed to allocate uid_map buffer"); + return NXT_ERROR; + } + + p = mapinfo; + end = mapinfo + len; + + for (i = 0; i < count; i++) { + obj = nxt_conf_get_array_element(mapobj, i); + + value = nxt_conf_get_object_member(obj, &str_cont, NULL); + container = nxt_conf_get_integer(value); + + value = nxt_conf_get_object_member(obj, &str_host, NULL); + host = nxt_conf_get_integer(value); + + value = nxt_conf_get_object_member(obj, &str_size, NULL); + size = nxt_conf_get_integer(value); + + p = nxt_sprintf(p, end, "%d %d %d", container, host, size); + if (nxt_slow_path(p == end)) { + nxt_alert(task, "write past the uid_map buffer"); + nxt_free(mapinfo); + return NXT_ERROR; + } + + if (i+1 < count) { + *p++ = '\n'; + + } else { + *p = '\0'; + } + } + + } else { + +default_map: + + mapinfo = nxt_malloc(len); + if (nxt_slow_path(mapinfo == NULL)) { + nxt_alert(task, "failed to allocate uid_map buffer"); + return NXT_ERROR; + } + + end = mapinfo + len; + p = nxt_sprintf(mapinfo, end, NXT_DEFAULT_UNPRIV_MAP, defval); + *p = '\0'; + + if (nxt_slow_path(p == end)) { + nxt_alert(task, "write past the %s buffer", mapfile); + nxt_free(mapinfo); + return NXT_ERROR; + } + } + + ret = nxt_clone_proc_map_write(task, mapfile, pid, mapinfo); + + nxt_free(mapinfo); + + return ret; +} + + +nxt_int_t +nxt_clone_proc_map(nxt_task_t *task, pid_t pid, nxt_process_clone_t *clone) +{ + nxt_int_t ret; + nxt_int_t uid, gid; + const char *rule; + nxt_runtime_t *rt; + + rt = task->thread->runtime; + uid = geteuid(); + gid = getegid(); + + rule = rt->capabilities.setid ? "allow" : "deny"; + + ret = nxt_clone_proc_map_set(task, "uid_map", pid, uid, clone->uidmap); + if (nxt_slow_path(ret != NXT_OK)) { + return NXT_ERROR; + } + + ret = nxt_clone_proc_setgroups(task, pid, rule); + if (nxt_slow_path(ret != NXT_OK)) { + nxt_alert(task, "failed to write /proc/%d/setgroups", pid); + return NXT_ERROR; + } + + ret = nxt_clone_proc_map_set(task, "gid_map", pid, gid, clone->gidmap); + if (nxt_slow_path(ret != NXT_OK)) { + return NXT_ERROR; + } + + return NXT_OK; +} + +#endif diff --git a/src/nxt_clone.h b/src/nxt_clone.h new file mode 100644 index 00000000..50dec0b4 --- /dev/null +++ b/src/nxt_clone.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) Igor Sysoev + * Copyright (C) NGINX, Inc. + */ + +#ifndef _NXT_CLONE_INCLUDED_ +#define _NXT_CLONE_INCLUDED_ + + +pid_t nxt_clone(nxt_int_t flags); + +#if (NXT_HAVE_CLONE_NEWUSER) +nxt_int_t nxt_clone_proc_map(nxt_task_t *task, pid_t pid, + nxt_process_clone_t *clone); +#endif + +#endif /* _NXT_CLONE_INCLUDED_ */ diff --git a/src/nxt_conf_validation.c b/src/nxt_conf_validation.c index ca8ec62e..078ddd17 100644 --- a/src/nxt_conf_validation.c +++ b/src/nxt_conf_validation.c @@ -39,9 +39,6 @@ typedef nxt_int_t (*nxt_conf_vldt_member_t)(nxt_conf_validation_t *vldt, nxt_conf_value_t *value); typedef nxt_int_t (*nxt_conf_vldt_element_t)(nxt_conf_validation_t *vldt, nxt_conf_value_t *value); -typedef nxt_int_t (*nxt_conf_vldt_system_t)(nxt_conf_validation_t *vldt, - char *name); - static nxt_int_t nxt_conf_vldt_type(nxt_conf_validation_t *vldt, nxt_str_t *name, nxt_conf_value_t *value, nxt_conf_vldt_type_t type); @@ -86,10 +83,6 @@ static nxt_int_t nxt_conf_vldt_object_iterator(nxt_conf_validation_t *vldt, nxt_conf_value_t *value, void *data); static nxt_int_t nxt_conf_vldt_array_iterator(nxt_conf_validation_t *vldt, nxt_conf_value_t *value, void *data); -static nxt_int_t nxt_conf_vldt_system(nxt_conf_validation_t *vldt, - nxt_conf_value_t *value, void *data); -static nxt_int_t nxt_conf_vldt_user(nxt_conf_validation_t *vldt, char *name); -static nxt_int_t nxt_conf_vldt_group(nxt_conf_validation_t *vldt, char *name); static nxt_int_t nxt_conf_vldt_environment(nxt_conf_validation_t *vldt, nxt_str_t *name, nxt_conf_value_t *value); static nxt_int_t nxt_conf_vldt_argument(nxt_conf_validation_t *vldt, @@ -101,6 +94,21 @@ static nxt_int_t nxt_conf_vldt_java_classpath(nxt_conf_validation_t *vldt, static nxt_int_t nxt_conf_vldt_java_option(nxt_conf_validation_t *vldt, nxt_conf_value_t *value); +static nxt_int_t +nxt_conf_vldt_isolation(nxt_conf_validation_t *vldt, nxt_conf_value_t *value, + void *data); +static nxt_int_t +nxt_conf_vldt_clone_namespaces(nxt_conf_validation_t *vldt, + nxt_conf_value_t *value, void *data); + +#if (NXT_HAVE_CLONE_NEWUSER) +static nxt_int_t nxt_conf_vldt_clone_procmap(nxt_conf_validation_t *vldt, + const char* mapfile, nxt_conf_value_t *value); +static nxt_int_t nxt_conf_vldt_clone_uidmap(nxt_conf_validation_t *vldt, + nxt_conf_value_t *value); +static nxt_int_t nxt_conf_vldt_clone_gidmap(nxt_conf_validation_t *vldt, + nxt_conf_value_t *value); +#endif static nxt_conf_vldt_object_t nxt_conf_vldt_websocket_members[] = { { nxt_string("read_timeout"), @@ -340,6 +348,100 @@ static nxt_conf_vldt_object_t nxt_conf_vldt_app_processes_members[] = { }; +static nxt_conf_vldt_object_t nxt_conf_vldt_app_namespaces_members[] = { + +#if (NXT_HAVE_CLONE_NEWUSER) + { nxt_string("credential"), + NXT_CONF_VLDT_BOOLEAN, + NULL, + NULL }, +#endif + +#if (NXT_HAVE_CLONE_NEWPID) + { nxt_string("pid"), + NXT_CONF_VLDT_BOOLEAN, + NULL, + NULL }, +#endif + +#if (NXT_HAVE_CLONE_NEWNET) + { nxt_string("network"), + NXT_CONF_VLDT_BOOLEAN, + NULL, + NULL }, +#endif + +#if (NXT_HAVE_CLONE_NEWNS) + { nxt_string("mount"), + NXT_CONF_VLDT_BOOLEAN, + NULL, + NULL }, +#endif + +#if (NXT_HAVE_CLONE_NEWUTS) + { nxt_string("uname"), + NXT_CONF_VLDT_BOOLEAN, + NULL, + NULL }, +#endif + +#if (NXT_HAVE_CLONE_NEWCGROUP) + { nxt_string("cgroup"), + NXT_CONF_VLDT_BOOLEAN, + NULL, + NULL }, +#endif + + NXT_CONF_VLDT_END +}; + + +#if (NXT_HAVE_CLONE_NEWUSER) + +static nxt_conf_vldt_object_t nxt_conf_vldt_app_procmap_members[] = { + { nxt_string("container"), + NXT_CONF_VLDT_INTEGER, + NULL, + NULL }, + + { nxt_string("host"), + NXT_CONF_VLDT_INTEGER, + NULL, + NULL }, + + { nxt_string("size"), + NXT_CONF_VLDT_INTEGER, + NULL, + NULL }, +}; + +#endif + + +static nxt_conf_vldt_object_t nxt_conf_vldt_app_isolation_members[] = { + { nxt_string("namespaces"), + NXT_CONF_VLDT_OBJECT, + &nxt_conf_vldt_clone_namespaces, + (void *) &nxt_conf_vldt_app_namespaces_members }, + +#if (NXT_HAVE_CLONE_NEWUSER) + + { nxt_string("uidmap"), + NXT_CONF_VLDT_ARRAY, + &nxt_conf_vldt_array_iterator, + (void *) &nxt_conf_vldt_clone_uidmap }, + + { nxt_string("gidmap"), + NXT_CONF_VLDT_ARRAY, + &nxt_conf_vldt_array_iterator, + (void *) &nxt_conf_vldt_clone_gidmap }, + +#endif + + NXT_CONF_VLDT_END +}; + + static nxt_conf_vldt_object_t nxt_conf_vldt_common_members[] = { { nxt_string("type"), NXT_CONF_VLDT_STRING, @@ -358,13 +460,13 @@ static nxt_conf_vldt_object_t nxt_conf_vldt_common_members[] = { { nxt_string("user"), NXT_CONF_VLDT_STRING, - nxt_conf_vldt_system, - (void *) &nxt_conf_vldt_user }, + NULL, + NULL }, { nxt_string("group"), NXT_CONF_VLDT_STRING, - nxt_conf_vldt_system, - (void *) &nxt_conf_vldt_group }, + NULL, + NULL }, { nxt_string("working_directory"), NXT_CONF_VLDT_STRING, @@ -376,6 +478,11 @@ static nxt_conf_vldt_object_t nxt_conf_vldt_common_members[] = { &nxt_conf_vldt_object_iterator, (void *) &nxt_conf_vldt_environment }, + { nxt_string("isolation"), + NXT_CONF_VLDT_OBJECT, + &nxt_conf_vldt_isolation, + (void *) &nxt_conf_vldt_app_isolation_members }, + NXT_CONF_VLDT_END }; @@ -1252,106 +1359,168 @@ nxt_conf_vldt_array_iterator(nxt_conf_validation_t *vldt, static nxt_int_t -nxt_conf_vldt_system(nxt_conf_validation_t *vldt, nxt_conf_value_t *value, - void *data) +nxt_conf_vldt_environment(nxt_conf_validation_t *vldt, nxt_str_t *name, + nxt_conf_value_t *value) { - size_t length; - nxt_str_t name; - nxt_conf_vldt_system_t validator; - char string[32]; + nxt_str_t str; + + if (name->length == 0) { + return nxt_conf_vldt_error(vldt, + "The environment name must not be empty."); + } - /* The cast is required by Sun C. */ - validator = (nxt_conf_vldt_system_t) data; + if (nxt_memchr(name->start, '\0', name->length) != NULL) { + return nxt_conf_vldt_error(vldt, "The environment name must not " + "contain null character."); + } - nxt_conf_get_string(value, &name); + if (nxt_memchr(name->start, '=', name->length) != NULL) { + return nxt_conf_vldt_error(vldt, "The environment name must not " + "contain '=' character."); + } + + if (nxt_conf_type(value) != NXT_CONF_STRING) { + return nxt_conf_vldt_error(vldt, "The \"%V\" environment value must be " + "a string.", name); + } - length = name.length + 1; - length = nxt_min(length, sizeof(string)); + nxt_conf_get_string(value, &str); - nxt_cpystrn((u_char *) string, name.start, length); + if (nxt_memchr(str.start, '\0', str.length) != NULL) { + return nxt_conf_vldt_error(vldt, "The \"%V\" environment value must " + "not contain null character.", name); + } - return validator(vldt, string); + return NXT_OK; } static nxt_int_t -nxt_conf_vldt_user(nxt_conf_validation_t *vldt, char *user) +nxt_conf_vldt_clone_namespaces(nxt_conf_validation_t *vldt, nxt_conf_value_t *value, + void *data) { - struct passwd *pwd; + return nxt_conf_vldt_object(vldt, value, data); +} - nxt_errno = 0; - pwd = getpwnam(user); +static nxt_int_t +nxt_conf_vldt_isolation(nxt_conf_validation_t *vldt, nxt_conf_value_t *value, + void *data) +{ + return nxt_conf_vldt_object(vldt, value, data); +} - if (pwd != NULL) { - return NXT_OK; - } - if (nxt_errno == 0) { - return nxt_conf_vldt_error(vldt, "User \"%s\" is not found.", user); - } +#if (NXT_HAVE_CLONE_NEWUSER) - return NXT_ERROR; -} +typedef struct { + nxt_int_t container; + nxt_int_t host; + nxt_int_t size; +} nxt_conf_vldt_clone_procmap_conf_t; + + +static nxt_conf_map_t nxt_conf_vldt_clone_procmap_conf_map[] = { + { + nxt_string("container"), + NXT_CONF_MAP_INT32, + offsetof(nxt_conf_vldt_clone_procmap_conf_t, container), + }, + + { + nxt_string("host"), + NXT_CONF_MAP_INT32, + offsetof(nxt_conf_vldt_clone_procmap_conf_t, host), + }, + + { + nxt_string("size"), + NXT_CONF_MAP_INT32, + offsetof(nxt_conf_vldt_clone_procmap_conf_t, size), + }, + +}; static nxt_int_t -nxt_conf_vldt_group(nxt_conf_validation_t *vldt, char *group) +nxt_conf_vldt_clone_procmap(nxt_conf_validation_t *vldt, const char *mapfile, + nxt_conf_value_t *value) { - struct group *grp; + nxt_int_t ret; + nxt_conf_vldt_clone_procmap_conf_t procmap; - nxt_errno = 0; + procmap.container = -1; + procmap.host = -1; + procmap.size = -1; - grp = getgrnam(group); + ret = nxt_conf_map_object(vldt->pool, value, + nxt_conf_vldt_clone_procmap_conf_map, + nxt_nitems(nxt_conf_vldt_clone_procmap_conf_map), + &procmap); + if (ret != NXT_OK) { + return ret; + } - if (grp != NULL) { - return NXT_OK; + if (procmap.container == -1) { + return nxt_conf_vldt_error(vldt, "The %s requires the " + "\"container\" field set.", mapfile); } - if (nxt_errno == 0) { - return nxt_conf_vldt_error(vldt, "Group \"%s\" is not found.", group); + if (procmap.host == -1) { + return nxt_conf_vldt_error(vldt, "The %s requires the " + "\"host\" field set.", mapfile); } - return NXT_ERROR; + if (procmap.size == -1) { + return nxt_conf_vldt_error(vldt, "The %s requires the " + "\"size\" field set.", mapfile); + } + + return NXT_OK; } static nxt_int_t -nxt_conf_vldt_environment(nxt_conf_validation_t *vldt, nxt_str_t *name, - nxt_conf_value_t *value) +nxt_conf_vldt_clone_uidmap(nxt_conf_validation_t *vldt, nxt_conf_value_t *value) { - nxt_str_t str; + nxt_int_t ret; - if (name->length == 0) { - return nxt_conf_vldt_error(vldt, - "The environment name must not be empty."); + if (nxt_conf_type(value) != NXT_CONF_OBJECT) { + return nxt_conf_vldt_error(vldt, "The \"uidmap\" array " + "must contain only object values."); } - if (nxt_memchr(name->start, '\0', name->length) != NULL) { - return nxt_conf_vldt_error(vldt, "The environment name must not " - "contain null character."); + ret = nxt_conf_vldt_object(vldt, value, + (void *) nxt_conf_vldt_app_procmap_members); + if (nxt_slow_path(ret != NXT_OK)) { + return ret; } - if (nxt_memchr(name->start, '=', name->length) != NULL) { - return nxt_conf_vldt_error(vldt, "The environment name must not " - "contain '=' character."); - } + return nxt_conf_vldt_clone_procmap(vldt, "uid_map", value); +} - if (nxt_conf_type(value) != NXT_CONF_STRING) { - return nxt_conf_vldt_error(vldt, "The \"%V\" environment value must be " - "a string.", name); - } - nxt_conf_get_string(value, &str); +static nxt_int_t +nxt_conf_vldt_clone_gidmap(nxt_conf_validation_t *vldt, nxt_conf_value_t *value) +{ + nxt_int_t ret; - if (nxt_memchr(str.start, '\0', str.length) != NULL) { - return nxt_conf_vldt_error(vldt, "The \"%V\" environment value must " - "not contain null character.", name); + if (nxt_conf_type(value) != NXT_CONF_OBJECT) { + return nxt_conf_vldt_error(vldt, "The \"gidmap\" array " + "must contain only object values."); } - return NXT_OK; + ret = nxt_conf_vldt_object(vldt, value, + (void *) nxt_conf_vldt_app_procmap_members); + if (nxt_slow_path(ret != NXT_OK)) { + return ret; + } + + return nxt_conf_vldt_clone_procmap(vldt, "gid_map", value); } +#endif + static nxt_int_t nxt_conf_vldt_argument(nxt_conf_validation_t *vldt, nxt_conf_value_t *value) diff --git a/src/nxt_main.h b/src/nxt_main.h index 23c55002..0afebb96 100644 --- a/src/nxt_main.h +++ b/src/nxt_main.h @@ -57,6 +57,7 @@ typedef uint16_t nxt_port_id_t; #include <nxt_fiber.h> #include <nxt_thread.h> #include <nxt_process_type.h> +#include <nxt_capability.h> #include <nxt_process.h> #include <nxt_utf8.h> #include <nxt_file_name.h> diff --git a/src/nxt_main_process.c b/src/nxt_main_process.c index 83c6d188..44deb272 100644 --- a/src/nxt_main_process.c +++ b/src/nxt_main_process.c @@ -14,6 +14,10 @@ #include <nxt_cert.h> #endif +#ifdef NXT_LINUX +#include <linux/sched.h> +#endif + typedef struct { nxt_socket_t socket; @@ -68,6 +72,10 @@ static void nxt_main_port_conf_store_handler(nxt_task_t *task, static void nxt_main_port_access_log_handler(nxt_task_t *task, nxt_port_recv_msg_t *msg); +static nxt_int_t nxt_init_set_isolation(nxt_task_t *task, + nxt_process_init_t *init, nxt_conf_value_t *isolation); +static nxt_int_t nxt_init_set_ns(nxt_task_t *task, + nxt_process_init_t *init, nxt_conf_value_t *ns); const nxt_sig_event_t nxt_main_process_signals[] = { nxt_event_signal(SIGHUP, nxt_main_process_signal_handler), @@ -134,6 +142,12 @@ static nxt_conf_map_t nxt_common_app_conf[] = { NXT_CONF_MAP_PTR, offsetof(nxt_common_app_conf_t, environment), }, + + { + nxt_string("isolation"), + NXT_CONF_MAP_PTR, + offsetof(nxt_common_app_conf_t, isolation), + } }; @@ -271,12 +285,11 @@ nxt_port_main_start_worker_handler(nxt_task_t *task, nxt_port_recv_msg_t *msg) nxt_int_t ret; nxt_buf_t *b; nxt_port_t *port; + nxt_runtime_t *rt; nxt_app_type_t idx; nxt_conf_value_t *conf; nxt_common_app_conf_t app_conf; - static nxt_str_t nobody = nxt_string("nobody"); - ret = NXT_ERROR; mp = nxt_mp_create(1024, 128, 256, 32); @@ -311,7 +324,10 @@ nxt_port_main_start_worker_handler(nxt_task_t *task, nxt_port_recv_msg_t *msg) goto failed; } - app_conf.user = nobody; + rt = task->thread->runtime; + + app_conf.user.start = (u_char*)rt->user_cred.user; + app_conf.user.length = nxt_strlen(rt->user_cred.user); ret = nxt_conf_map_object(mp, conf, nxt_common_app_conf, nxt_nitems(nxt_common_app_conf), &app_conf); @@ -458,6 +474,8 @@ nxt_main_start_controller_process(nxt_task_t *task, nxt_runtime_t *rt) return NXT_ERROR; } + nxt_memzero(init, sizeof(nxt_process_init_t)); + init->start = nxt_controller_start; init->name = "controller"; init->user_cred = &rt->user_cred; @@ -552,6 +570,8 @@ nxt_main_start_discovery_process(nxt_task_t *task, nxt_runtime_t *rt) return NXT_ERROR; } + nxt_memzero(init, sizeof(nxt_process_init_t)); + init->start = nxt_discovery_start; init->name = "discovery"; init->user_cred = &rt->user_cred; @@ -576,6 +596,8 @@ nxt_main_start_router_process(nxt_task_t *task, nxt_runtime_t *rt) return NXT_ERROR; } + nxt_memzero(init, sizeof(nxt_process_init_t)); + init->start = nxt_router_start; init->name = "router"; init->user_cred = &rt->user_cred; @@ -589,7 +611,6 @@ nxt_main_start_router_process(nxt_task_t *task, nxt_runtime_t *rt) return nxt_main_create_worker_process(task, rt, init); } - static nxt_int_t nxt_main_start_worker_process(nxt_task_t *task, nxt_runtime_t *rt, nxt_common_app_conf_t *app_conf, uint32_t stream) @@ -597,41 +618,72 @@ nxt_main_start_worker_process(nxt_task_t *task, nxt_runtime_t *rt, char *user, *group; u_char *title, *last, *end; size_t size; + nxt_int_t ret; nxt_process_init_t *init; size = sizeof(nxt_process_init_t) - + sizeof(nxt_user_cred_t) - + app_conf->user.length + 1 - + app_conf->group.length + 1 - + app_conf->name.length + sizeof("\"\" application"); + + app_conf->name.length + + sizeof("\"\" application"); + + if (rt->capabilities.setid) { + size += sizeof(nxt_user_cred_t) + + app_conf->user.length + 1 + + app_conf->group.length + 1; + } init = nxt_malloc(size); if (nxt_slow_path(init == NULL)) { return NXT_ERROR; } - init->user_cred = nxt_pointer_to(init, sizeof(nxt_process_init_t)); - user = nxt_pointer_to(init->user_cred, sizeof(nxt_user_cred_t)); + nxt_memzero(init, sizeof(nxt_process_init_t)); - nxt_memcpy(user, app_conf->user.start, app_conf->user.length); - last = nxt_pointer_to(user, app_conf->user.length); - *last++ = '\0'; + if (rt->capabilities.setid) { + init->user_cred = nxt_pointer_to(init, sizeof(nxt_process_init_t)); + user = nxt_pointer_to(init->user_cred, sizeof(nxt_user_cred_t)); - init->user_cred->user = user; + nxt_memcpy(user, app_conf->user.start, app_conf->user.length); + last = nxt_pointer_to(user, app_conf->user.length); + *last++ = '\0'; - if (app_conf->group.start != NULL) { - group = (char *) last; + init->user_cred->user = user; - nxt_memcpy(group, app_conf->group.start, app_conf->group.length); - last = nxt_pointer_to(group, app_conf->group.length); - *last++ = '\0'; + if (app_conf->group.start != NULL) { + group = (char *) last; + + nxt_memcpy(group, app_conf->group.start, app_conf->group.length); + last = nxt_pointer_to(group, app_conf->group.length); + *last++ = '\0'; + + } else { + group = NULL; + } + + ret = nxt_user_cred_get(task, init->user_cred, group); + if (ret != NXT_OK) { + return NXT_ERROR; + } } else { - group = NULL; - } + if (!nxt_str_eq(&app_conf->user, (u_char *) rt->user_cred.user, + nxt_strlen(rt->user_cred.user))) + { + nxt_alert(task, "cannot set user \"%V\" for app \"%V\": " + "missing capabilities", &app_conf->user, &app_conf->name); + return NXT_ERROR; + } - if (nxt_user_cred_get(task, init->user_cred, group) != NXT_OK) { - return NXT_ERROR; + if (app_conf->group.length > 0 + && !nxt_str_eq(&app_conf->group, (u_char *) rt->group, + nxt_strlen(rt->group))) + { + nxt_alert(task, "cannot set group \"%V\" for app \"%V\": " + "missing capabilities", &app_conf->group, + &app_conf->name); + return NXT_ERROR; + } + + last = nxt_pointer_to(init, sizeof(nxt_process_init_t)); } title = last; @@ -648,6 +700,11 @@ nxt_main_start_worker_process(nxt_task_t *task, nxt_runtime_t *rt, init->stream = stream; init->restart = NULL; + ret = nxt_init_set_isolation(task, init, app_conf->isolation); + if (nxt_slow_path(ret != NXT_OK)) { + return NXT_ERROR; + } + return nxt_main_create_worker_process(task, rt, init); } @@ -1246,7 +1303,7 @@ nxt_main_port_modules_handler(nxt_task_t *task, nxt_port_recv_msg_t *msg) nxt_conf_value_t *conf, *root, *value; nxt_app_lang_module_t *lang; - static nxt_str_t root_path = nxt_string("/"); + static nxt_str_t root_path = nxt_string("/"); rt = task->thread->runtime; @@ -1438,3 +1495,105 @@ nxt_main_port_access_log_handler(nxt_task_t *task, nxt_port_recv_msg_t *msg) msg->port_msg.stream, 0, NULL); } } + + +static nxt_int_t +nxt_init_set_isolation(nxt_task_t *task, nxt_process_init_t *init, + nxt_conf_value_t *isolation) +{ + nxt_int_t ret; + nxt_conf_value_t *object; + + static nxt_str_t nsname = nxt_string("namespaces"); + static nxt_str_t uidname = nxt_string("uidmap"); + static nxt_str_t gidname = nxt_string("gidmap"); + + if (isolation == NULL) { + return NXT_OK; + } + + object = nxt_conf_get_object_member(isolation, &nsname, NULL); + if (object != NULL) { + ret = nxt_init_set_ns(task, init, object); + if (ret != NXT_OK) { + return ret; + } + } + + object = nxt_conf_get_object_member(isolation, &uidname, NULL); + if (object != NULL) { + init->isolation.clone.uidmap = object; + } + + object = nxt_conf_get_object_member(isolation, &gidname, NULL); + if (object != NULL) { + init->isolation.clone.gidmap = object; + } + + return NXT_OK; +} + + +static nxt_int_t +nxt_init_set_ns(nxt_task_t *task, nxt_process_init_t *init, nxt_conf_value_t *namespaces) +{ + uint32_t index; + nxt_str_t name; + nxt_int_t flag; + nxt_conf_value_t *value; + + index = 0; + + while ((value = nxt_conf_next_object_member(namespaces, &name, &index)) != NULL) { + flag = 0; + +#if (NXT_HAVE_CLONE_NEWUSER) + if (nxt_str_eq(&name, "credential", 10)) { + flag = CLONE_NEWUSER; + } +#endif + +#if (NXT_HAVE_CLONE_NEWPID) + if (nxt_str_eq(&name, "pid", 3)) { + flag = CLONE_NEWPID; + } +#endif + +#if (NXT_HAVE_CLONE_NEWNET) + if (nxt_str_eq(&name, "network", 7)) { + flag = CLONE_NEWNET; + } +#endif + +#if (NXT_HAVE_CLONE_NEWUTS) + if (nxt_str_eq(&name, "uname", 5)) { + flag = CLONE_NEWUTS; + } +#endif + +#if (NXT_HAVE_CLONE_NEWNS) + if (nxt_str_eq(&name, "mount", 5)) { + flag = CLONE_NEWNS; + } +#endif + +#if (NXT_HAVE_CLONE_NEWCGROUP) + if (nxt_str_eq(&name, "cgroup", 6)) { + flag = CLONE_NEWCGROUP; + } +#endif + + if (!flag) { + nxt_alert(task, "unknown namespace flag: \"%V\"", &name); + return NXT_ERROR; + } + + if (nxt_conf_get_integer(value) == 0) { + continue; /* process shares everything by default */ + } + + init->isolation.clone.flags |= flag; + } + + return NXT_OK; +} diff --git a/src/nxt_process.c b/src/nxt_process.c index c4aef21c..638765a4 100644 --- a/src/nxt_process.c +++ b/src/nxt_process.c @@ -7,10 +7,16 @@ #include <nxt_main.h> #include <nxt_main_process.h> +#if (NXT_HAVE_CLONE) +#include <nxt_clone.h> +#endif + +#include <signal.h> static void nxt_process_start(nxt_task_t *task, nxt_process_t *process); static nxt_int_t nxt_user_groups_get(nxt_task_t *task, nxt_user_cred_t *uc); - +static nxt_int_t nxt_process_worker_setup(nxt_task_t *task, + nxt_process_t *process, int parentfd); /* A cached process pid. */ nxt_pid_t nxt_pid; @@ -34,84 +40,217 @@ nxt_bool_t nxt_proc_remove_notify_matrix[NXT_PROCESS_MAX][NXT_PROCESS_MAX] = { { 0, 0, 0, 1, 0 }, }; -nxt_pid_t -nxt_process_create(nxt_task_t *task, nxt_process_t *process) -{ - nxt_pid_t pid; + +static nxt_int_t +nxt_process_worker_setup(nxt_task_t *task, nxt_process_t *process, int parentfd) { + pid_t rpid, pid; + ssize_t n; + nxt_int_t parent_status; nxt_process_t *p; nxt_runtime_t *rt; + nxt_process_init_t *init; nxt_process_type_t ptype; - rt = task->thread->runtime; + pid = getpid(); + rpid = 0; + rt = task->thread->runtime; + init = process->init; - pid = fork(); + /* Setup the worker process. */ - switch (pid) { + n = read(parentfd, &rpid, sizeof(rpid)); + if (nxt_slow_path(n == -1 || n != sizeof(rpid))) { + nxt_alert(task, "failed to read real pid"); + return NXT_ERROR; + } - case -1: - nxt_alert(task, "fork() failed while creating \"%s\" %E", - process->init->name, nxt_errno); - break; + if (nxt_slow_path(rpid == 0)) { + nxt_alert(task, "failed to get real pid from parent"); + return NXT_ERROR; + } - case 0: - /* A child. */ - nxt_pid = getpid(); + nxt_pid = rpid; + + /* Clean inherited cached thread tid. */ + task->thread->tid = 0; + + process->pid = nxt_pid; + + if (nxt_pid != pid) { + nxt_debug(task, "app \"%s\" real pid %d", init->name, nxt_pid); + nxt_debug(task, "app \"%s\" isolated pid: %d", init->name, pid); + } - /* Clean inherited cached thread tid. */ - task->thread->tid = 0; + n = read(parentfd, &parent_status, sizeof(parent_status)); + if (nxt_slow_path(n == -1 || n != sizeof(parent_status))) { + nxt_alert(task, "failed to read parent status"); + return NXT_ERROR; + } - process->pid = nxt_pid; + if (nxt_slow_path(close(parentfd) == -1)) { + nxt_alert(task, "failed to close reader pipe fd"); + return NXT_ERROR; + } - ptype = process->init->type; + if (nxt_slow_path(parent_status != NXT_OK)) { + return parent_status; + } - nxt_port_reset_next_id(); + ptype = init->type; - nxt_event_engine_thread_adopt(task->thread->engine); + nxt_port_reset_next_id(); - /* Remove not ready processes */ - nxt_runtime_process_each(rt, p) { + nxt_event_engine_thread_adopt(task->thread->engine); - if (nxt_proc_conn_matrix[ptype][nxt_process_type(p)] == 0) { - nxt_debug(task, "remove not required process %PI", p->pid); + /* Remove not ready processes. */ + nxt_runtime_process_each(rt, p) { - nxt_process_close_ports(task, p); + if (nxt_proc_conn_matrix[ptype][nxt_process_type(p)] == 0) { + nxt_debug(task, "remove not required process %PI", p->pid); - continue; - } + nxt_process_close_ports(task, p); - if (!p->ready) { - nxt_debug(task, "remove not ready process %PI", p->pid); + continue; + } - nxt_process_close_ports(task, p); + if (!p->ready) { + nxt_debug(task, "remove not ready process %PI", p->pid); - continue; - } + nxt_process_close_ports(task, p); - nxt_port_mmaps_destroy(&p->incoming, 0); - nxt_port_mmaps_destroy(&p->outgoing, 0); + continue; + } - } nxt_runtime_process_loop; + nxt_port_mmaps_destroy(&p->incoming, 0); + nxt_port_mmaps_destroy(&p->outgoing, 0); - nxt_runtime_process_add(task, process); + } nxt_runtime_process_loop; - nxt_process_start(task, process); + nxt_runtime_process_add(task, process); - process->ready = 1; + nxt_process_start(task, process); - break; + process->ready = 1; - default: - /* A parent. */ - nxt_debug(task, "fork(\"%s\"): %PI", process->init->name, pid); + return NXT_OK; +} - process->pid = pid; - nxt_runtime_process_add(task, process); +nxt_pid_t +nxt_process_create(nxt_task_t *task, nxt_process_t *process) +{ + int pipefd[2]; + nxt_int_t ret; + nxt_pid_t pid; + nxt_process_init_t *init; - break; + if (nxt_slow_path(pipe(pipefd) == -1)) { + nxt_alert(task, "failed to create process pipe for passing rpid"); + return -1; + } + + init = process->init; + +#if (NXT_HAVE_CLONE) + pid = nxt_clone(SIGCHLD|init->isolation.clone.flags); +#else + pid = fork(); +#endif + + if (nxt_slow_path(pid < 0)) { +#if (NXT_HAVE_CLONE) + nxt_alert(task, "clone() failed while creating \"%s\" %E", + init->name, nxt_errno); +#else + nxt_alert(task, "fork() failed while creating \"%s\" %E", + init->name, nxt_errno); +#endif + + return pid; + } + + if (pid == 0) { + /* Child. */ + + if (nxt_slow_path(close(pipefd[1]) == -1)) { + nxt_alert(task, "failed to close writer pipe fd"); + return NXT_ERROR; + } + + ret = nxt_process_worker_setup(task, process, pipefd[0]); + if (nxt_slow_path(ret != NXT_OK)) { + exit(1); + } + + /* + * Explicitly return 0 to notice the caller function this is the child. + * The caller must return to the event engine work queue loop. + */ + return 0; + } + + /* Parent. */ + + if (nxt_slow_path(close(pipefd[0]) != 0)) { + nxt_alert(task, "failed to close pipe: %E", nxt_errno); + } + + /* + * At this point, the child process is blocked reading the + * pipe fd to get its real pid (rpid). + * + * If anything goes wrong now, we need to terminate the child + * process by sending a NXT_ERROR in the pipe. + */ + +#if (NXT_HAVE_CLONE) + nxt_debug(task, "clone(\"%s\"): %PI", init->name, pid); +#else + nxt_debug(task, "fork(\"%s\"): %PI", init->name, pid); +#endif + + if (nxt_slow_path(write(pipefd[1], &pid, sizeof(pid)) == -1)) { + nxt_alert(task, "failed to write real pid"); + goto fail_cleanup; + } + +#if (NXT_HAVE_CLONE_NEWUSER) + if ((init->isolation.clone.flags & CLONE_NEWUSER) == CLONE_NEWUSER) { + ret = nxt_clone_proc_map(task, pid, &init->isolation.clone); + if (nxt_slow_path(ret != NXT_OK)) { + goto fail_cleanup; + } + } +#endif + + ret = NXT_OK; + + if (nxt_slow_path(write(pipefd[1], &ret, sizeof(ret)) == -1)) { + nxt_alert(task, "failed to write status"); + goto fail_cleanup; } + process->pid = pid; + + nxt_runtime_process_add(task, process); + return pid; + +fail_cleanup: + + ret = NXT_ERROR; + + if (nxt_slow_path(write(pipefd[1], &ret, sizeof(ret)) == -1)) { + nxt_alert(task, "failed to write status"); + } + + if (nxt_slow_path(close(pipefd[1]) != 0)) { + nxt_alert(task, "failed to close pipe: %E", nxt_errno); + } + + waitpid(pid, NULL, 0); + + return -1; } @@ -133,22 +272,17 @@ nxt_process_start(nxt_task_t *task, nxt_process_t *process) nxt_process_title(task, "unit: %s", init->name); thread = task->thread; + rt = thread->runtime; nxt_random_init(&thread->random); - if (init->user_cred != NULL) { - /* - * Changing user credentials requires either root privileges - * or CAP_SETUID and CAP_SETGID capabilities on Linux. - */ + if (rt->capabilities.setid && init->user_cred != NULL) { ret = nxt_user_cred_set(task, init->user_cred); if (ret != NXT_OK) { goto fail; } } - rt = thread->runtime; - rt->type = init->type; engine = thread->engine; @@ -592,15 +726,8 @@ nxt_user_cred_set(nxt_task_t *task, nxt_user_cred_t *uc) uc->user, (uint64_t) uc->uid, (uint64_t) uc->base_gid); if (setgid(uc->base_gid) != 0) { - if (nxt_errno == NXT_EPERM) { - nxt_log(task, NXT_LOG_NOTICE, "setgid(%d) failed %E, ignored", - uc->base_gid, nxt_errno); - return NXT_OK; - - } else { - nxt_alert(task, "setgid(%d) failed %E", uc->base_gid, nxt_errno); - return NXT_ERROR; - } + nxt_alert(task, "setgid(%d) failed %E", uc->base_gid, nxt_errno); + return NXT_ERROR; } if (uc->gids != NULL) { diff --git a/src/nxt_process.h b/src/nxt_process.h index c6e19f97..df9ca038 100644 --- a/src/nxt_process.h +++ b/src/nxt_process.h @@ -7,6 +7,8 @@ #ifndef _NXT_PROCESS_H_INCLUDED_ #define _NXT_PROCESS_H_INCLUDED_ +#include <nxt_conf.h> + typedef pid_t nxt_pid_t; typedef uid_t nxt_uid_t; @@ -21,26 +23,35 @@ typedef struct { nxt_gid_t *gids; } nxt_user_cred_t; +typedef struct { + nxt_int_t flags; + nxt_conf_value_t *uidmap; + nxt_conf_value_t *gidmap; +} nxt_process_clone_t; + typedef struct nxt_process_init_s nxt_process_init_t; typedef nxt_int_t (*nxt_process_start_t)(nxt_task_t *task, void *data); typedef nxt_int_t (*nxt_process_restart_t)(nxt_task_t *task, nxt_runtime_t *rt, nxt_process_init_t *init); - struct nxt_process_init_s { - nxt_process_start_t start; - const char *name; - nxt_user_cred_t *user_cred; + nxt_process_start_t start; + const char *name; + nxt_user_cred_t *user_cred; + + nxt_port_handlers_t *port_handlers; + const nxt_sig_event_t *signals; - nxt_port_handlers_t *port_handlers; - const nxt_sig_event_t *signals; + nxt_process_type_t type; - nxt_process_type_t type; + void *data; + uint32_t stream; - void *data; - uint32_t stream; + nxt_process_restart_t restart; - nxt_process_restart_t restart; + union { + nxt_process_clone_t clone; + } isolation; }; diff --git a/src/nxt_runtime.c b/src/nxt_runtime.c index 06478f72..de41ba4d 100644 --- a/src/nxt_runtime.c +++ b/src/nxt_runtime.c @@ -692,14 +692,26 @@ nxt_runtime_conf_init(nxt_task_t *task, nxt_runtime_t *rt) rt->state = NXT_STATE; rt->control = NXT_CONTROL_SOCK; + nxt_memzero(&rt->capabilities, sizeof(nxt_capabilities_t)); + if (nxt_runtime_conf_read_cmd(task, rt) != NXT_OK) { return NXT_ERROR; } - if (nxt_user_cred_get(task, &rt->user_cred, rt->group) != NXT_OK) { + if (nxt_capability_set(task, &rt->capabilities) != NXT_OK) { return NXT_ERROR; } + if (rt->capabilities.setid) { + if (nxt_user_cred_get(task, &rt->user_cred, rt->group) != NXT_OK) { + return NXT_ERROR; + } + + } else { + nxt_log(task, NXT_LOG_WARN, "Unit is running unprivileged, then it " + "cannot use arbitrary user and group."); + } + /* An engine's parameters. */ interface = nxt_service_get(rt->services, "engine", rt->engine); diff --git a/src/nxt_runtime.h b/src/nxt_runtime.h index 496ae478..0791f8e7 100644 --- a/src/nxt_runtime.h +++ b/src/nxt_runtime.h @@ -59,6 +59,7 @@ struct nxt_runtime_s { uint32_t engine_connections; uint32_t auxiliary_threads; nxt_user_cred_t user_cred; + nxt_capabilities_t capabilities; const char *group; const char *pid; const char *log; diff --git a/src/nxt_unit.c b/src/nxt_unit.c index 4497d09d..9ccd1fd9 100644 --- a/src/nxt_unit.c +++ b/src/nxt_unit.c @@ -333,6 +333,7 @@ nxt_unit_init(nxt_unit_init_t *init) } } + lib->pid = read_port.id.pid; ctx = &lib->main_ctx.ctx; rc = lib->callbacks.add_port(ctx, &ready_port); @@ -398,7 +399,6 @@ nxt_unit_create(nxt_unit_init_t *init) lib->processes.slot = NULL; lib->ports.slot = NULL; - lib->pid = getpid(); lib->log_fd = STDERR_FILENO; lib->online = 1; diff --git a/test/go/ns_inspect/app.go b/test/go/ns_inspect/app.go new file mode 100644 index 00000000..ebecbb00 --- /dev/null +++ b/test/go/ns_inspect/app.go @@ -0,0 +1,79 @@ +package main + +import ( + "encoding/json" + "fmt" + "net/http" + "nginx/unit" + "os" + "strconv" +) + +type ( + NS struct { + USER uint64 + PID uint64 + IPC uint64 + CGROUP uint64 + UTS uint64 + MNT uint64 + NET uint64 + } + + Output struct { + PID int + UID int + GID int + NS NS + } +) + +func abortonerr(err error) { + if err != nil { + panic(err) + } +} + +// returns: [nstype]:[4026531835] +func getns(nstype string) uint64 { + str, err := os.Readlink(fmt.Sprintf("/proc/self/ns/%s", nstype)) + if err != nil { + return 0 + } + + str = str[len(nstype)+2:] + str = str[:len(str)-1] + val, err := strconv.ParseUint(str, 10, 64) + abortonerr(err) + return val +} + +func handler(w http.ResponseWriter, r *http.Request) { + pid := os.Getpid() + out := &Output{ + PID: pid, + UID: os.Getuid(), + GID: os.Getgid(), + NS: NS{ + PID: getns("pid"), + USER: getns("user"), + MNT: getns("mnt"), + IPC: getns("ipc"), + UTS: getns("uts"), + NET: getns("net"), + CGROUP: getns("cgroup"), + }, + } + data, err := json.Marshal(out) + if err != nil { + w.WriteHeader(http.StatusInternalServerError) + return + } + + w.Write(data) +} + +func main() { + http.HandleFunc("/", handler) + unit.ListenAndServe(":7080", nil) +} diff --git a/test/test_go_isolation.py b/test/test_go_isolation.py new file mode 100644 index 00000000..780c2b03 --- /dev/null +++ b/test/test_go_isolation.py @@ -0,0 +1,135 @@ +import os +import json +import unittest +from unit.applications.lang.go import TestApplicationGo +from unit.feature.isolation import TestFeatureIsolation + + +class TestGoIsolation(TestApplicationGo): + prerequisites = {'modules': ['go'], 'features': ['isolation']} + + isolation = TestFeatureIsolation() + + @classmethod + def setUpClass(cls, complete_check=True): + unit = super().setUpClass(complete_check=False) + + TestFeatureIsolation().check(cls.available, unit.testdir) + + return unit if not complete_check else unit.complete() + + def isolation_key(self, key): + return key in self.available['features']['isolation'].keys() + + def conf_isolation(self, isolation): + self.assertIn( + 'success', + self.conf(isolation, 'applications/ns_inspect/isolation'), + 'configure isolation', + ) + + def test_isolation_values(self): + self.load('ns_inspect') + + obj = self.isolation.parsejson(self.get()['body']) + + for ns, ns_value in self.available['features']['isolation'].items(): + if ns.upper() in obj['NS']: + self.assertEqual( + obj['NS'][ns.upper()], ns_value, '%s match' % ns + ) + + def test_isolation_user(self): + if not self.isolation_key('unprivileged_userns_clone'): + print('unprivileged clone is not available') + raise unittest.SkipTest() + + self.load('ns_inspect') + obj = self.isolation.parsejson(self.get()['body']) + + self.assertTrue(obj['UID'] != 0, 'uid not zero') + self.assertTrue(obj['GID'] != 0, 'gid not zero') + self.assertEqual(obj['UID'], os.getuid(), 'uid match') + self.assertEqual(obj['GID'], os.getgid(), 'gid match') + + self.conf_isolation({"namespaces": {"credential": True}}) + + obj = self.isolation.parsejson(self.get()['body']) + + # default uid and gid maps current user to nobody + self.assertEqual(obj['UID'], 65534, 'uid nobody') + self.assertEqual(obj['GID'], 65534, 'gid nobody') + + self.conf_isolation( + { + "namespaces": {"credential": True}, + "uidmap": [ + {"container": 1000, "host": os.geteuid(), "size": 1} + ], + "gidmap": [ + {"container": 1000, "host": os.getegid(), "size": 1} + ], + } + ) + + obj = self.isolation.parsejson(self.get()['body']) + + # default uid and gid maps current user to root + self.assertEqual(obj['UID'], 1000, 'uid root') + self.assertEqual(obj['GID'], 1000, 'gid root') + + def test_isolation_mnt(self): + if not self.isolation_key('mnt'): + print('mnt namespace is not supported') + raise unittest.SkipTest() + + if not self.isolation_key('unprivileged_userns_clone'): + print('unprivileged clone is not available') + raise unittest.SkipTest() + + self.load('ns_inspect') + self.conf_isolation( + {"namespaces": {"mount": True, "credential": True}} + ) + + obj = self.isolation.parsejson(self.get()['body']) + + # all but user and mnt + allns = list(self.available['features']['isolation'].keys()) + allns.remove('user') + allns.remove('mnt') + + for ns in allns: + if ns.upper() in obj['NS']: + self.assertEqual( + obj['NS'][ns.upper()], + self.available['features']['isolation'][ns], + '%s match' % ns, + ) + + self.assertNotEqual( + obj['NS']['MNT'], self.isolation.getns('mnt'), 'mnt set' + ) + self.assertNotEqual( + obj['NS']['USER'], self.isolation.getns('user'), 'user set' + ) + + def test_isolation_pid(self): + if not self.isolation_key('pid'): + print('pid namespace is not supported') + raise unittest.SkipTest() + + if not self.isolation_key('unprivileged_userns_clone'): + print('unprivileged clone is not available') + raise unittest.SkipTest() + + self.load('ns_inspect') + self.conf_isolation({"namespaces": {"pid": True, "credential": True}}) + + obj = self.isolation.parsejson(self.get()['body']) + + self.assertEqual(obj['PID'], 1, 'pid of container is 1') + + +if __name__ == '__main__': + TestGoIsolation.main() diff --git a/test/unit/feature/isolation.py b/test/unit/feature/isolation.py new file mode 100644 index 00000000..9b06ab3c --- /dev/null +++ b/test/unit/feature/isolation.py @@ -0,0 +1,87 @@ +import os +import json +from unit.applications.proto import TestApplicationProto +from unit.applications.lang.go import TestApplicationGo +from unit.applications.lang.java import TestApplicationJava +from unit.applications.lang.node import TestApplicationNode +from unit.applications.lang.perl import TestApplicationPerl +from unit.applications.lang.php import TestApplicationPHP +from unit.applications.lang.python import TestApplicationPython +from unit.applications.lang.ruby import TestApplicationRuby + + +class TestFeatureIsolation(TestApplicationProto): + allns = ['pid', 'mnt', 'ipc', 'uts', 'cgroup', 'net'] + + def check(self, available, testdir): + test_conf = {"namespaces": {"credential": True}} + + module = '' + app = 'empty' + if 'go' in available['modules']: + module = TestApplicationGo() + + elif 'java' in available['modules']: + module = TestApplicationJava() + + elif 'node' in available['modules']: + module = TestApplicationNode() + app = 'basic' + + elif 'perl' in available['modules']: + module = TestApplicationPerl() + app = 'body_empty' + + elif 'php' in available['modules']: + module = TestApplicationPHP() + app = 'phpinfo' + + elif 'python' in available['modules']: + module = TestApplicationPython() + + elif 'ruby' in available['modules']: + module = TestApplicationRuby() + + if not module: + return + + module.testdir = testdir + module.load(app) + + resp = module.conf(test_conf, 'applications/' + app + '/isolation') + if 'success' not in resp: + return + + userns = self.getns('user') + if not userns: + return + + available['features']['isolation'] = {'user': userns} + + unp_clone_path = '/proc/sys/kernel/unprivileged_userns_clone' + if os.path.exists(unp_clone_path): + with open(unp_clone_path, 'r') as f: + if str(f.read()).rstrip() == '1': + available['features']['isolation'][ + 'unprivileged_userns_clone' + ] = True + + for ns in self.allns: + ns_value = self.getns(ns) + if ns_value: + available['features']['isolation'][ns] = ns_value + + def getns(self, nstype): + # read namespace id from symlink file: + # it points to: '<nstype>:[<ns id>]' + # # eg.: 'pid:[4026531836]' + nspath = '/proc/self/ns/' + nstype + data = None + + if os.path.exists(nspath): + data = int(os.readlink(nspath)[len(nstype) + 2 : -1]) + + return data + + def parsejson(self, data): + return json.loads(data.split('\n')[1]) |