summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--auto/capability19
-rw-r--r--auto/isolation52
-rw-r--r--auto/sources7
-rw-r--r--auto/summary2
-rwxr-xr-xconfigure2
-rw-r--r--src/nxt_application.h2
-rw-r--r--src/nxt_capability.c104
-rw-r--r--src/nxt_capability.h17
-rw-r--r--src/nxt_clone.c263
-rw-r--r--src/nxt_clone.h17
-rw-r--r--src/nxt_conf_validation.c303
-rw-r--r--src/nxt_main.h1
-rw-r--r--src/nxt_main_process.c207
-rw-r--r--src/nxt_process.c251
-rw-r--r--src/nxt_process.h31
-rw-r--r--src/nxt_runtime.c14
-rw-r--r--src/nxt_runtime.h1
-rw-r--r--src/nxt_unit.c2
-rw-r--r--test/go/ns_inspect/app.go79
-rw-r--r--test/test_go_isolation.py135
-rw-r--r--test/unit/feature/isolation.py87
21 files changed, 1431 insertions, 165 deletions
diff --git a/auto/capability b/auto/capability
new file mode 100644
index 00000000..48777665
--- /dev/null
+++ b/auto/capability
@@ -0,0 +1,19 @@
+
+# Copyright (C) Igor Sysoev
+# Copyright (C) NGINX, Inc.
+
+# Linux capability
+
+nxt_feature="Linux capability"
+nxt_feature_name=NXT_HAVE_LINUX_CAPABILITY
+nxt_feature_test="#include <linux/capability.h>
+ #include <unistd.h>
+ #include <sys/syscall.h>
+
+ int main() {
+ struct __user_cap_header_struct hdr;
+ hdr.version = _LINUX_CAPABILITY_VERSION;
+ syscall(SYS_capget, &hdr, 0);
+ return 0;
+ }"
+. auto/feature
diff --git a/auto/isolation b/auto/isolation
new file mode 100644
index 00000000..c26a4991
--- /dev/null
+++ b/auto/isolation
@@ -0,0 +1,52 @@
+# Copyright (C) Igor Sysoev
+# Copyright (C) NGINX, Inc.
+
+# Linux clone syscall.
+
+NXT_ISOLATION=NO
+NXT_HAVE_CLONE=NO
+
+nsflags="USER NS PID NET UTS CGROUP"
+
+nxt_feature="clone(2)"
+nxt_feature_name=NXT_HAVE_CLONE
+nxt_feature_run=no
+nxt_feature_incs=
+nxt_feature_libs=
+nxt_feature_test="#include <sys/wait.h>
+ #include <sys/syscall.h>
+
+ int main() {
+ return __NR_clone | SIGCHLD;
+ }"
+. auto/feature
+
+if [ $nxt_found = yes ]; then
+ NXT_HAVE_CLONE=YES
+
+ # Test all isolation flags
+ for flag in $nsflags; do
+ nxt_feature="CLONE_NEW${flag}"
+ nxt_feature_name=NXT_HAVE_CLONE_NEW${flag}
+ nxt_feature_run=no
+ nxt_feature_incs=
+ nxt_feature_libs=
+ nxt_feature_test="#define _GNU_SOURCE
+ #include <sys/wait.h>
+ #include <sys/syscall.h>
+ #include <sched.h>
+
+ int main() {
+ return CLONE_NEW$flag;
+ }"
+ . auto/feature
+
+ if [ $nxt_found = yes ]; then
+ if [ "$NXT_ISOLATION" = "NO" ]; then
+ NXT_ISOLATION=$flag
+ else
+ NXT_ISOLATION="$NXT_ISOLATION $flag"
+ fi
+ fi
+ done
+fi
diff --git a/auto/sources b/auto/sources
index 8ac8fb19..858eaa8c 100644
--- a/auto/sources
+++ b/auto/sources
@@ -71,6 +71,7 @@ NXT_LIB_SRCS=" \
src/nxt_upstream_round_robin.c \
src/nxt_http_parse.c \
src/nxt_app_log.c \
+ src/nxt_capability.c \
src/nxt_runtime.c \
src/nxt_conf.c \
src/nxt_conf_validation.c \
@@ -132,6 +133,7 @@ NXT_LIB_SOLARIS_SENDFILEV_SRCS="src/nxt_solaris_sendfilev.c"
NXT_LIB_MACOSX_SENDFILE_SRCS="src/nxt_macosx_sendfile.c"
NXT_LIB_AIX_SEND_FILE_SRCS="src/nxt_aix_send_file.c"
NXT_LIB_HPUX_SENDFILE_SRCS="src/nxt_hpux_sendfile.c"
+NXT_LIB_CLONE_SRCS="src/nxt_clone.c"
NXT_TEST_BUILD_DEPS="src/nxt_test_build.h"
NXT_TEST_BUILD_SRCS="src/nxt_test_build.c"
@@ -257,6 +259,11 @@ if [ "$NXT_HAVE_HPUX_SENDFILE" = "YES" \
fi
+if [ "$NXT_HAVE_CLONE" = "YES" ]; then
+ NXT_LIB_SRCS="$NXT_LIB_SRCS $NXT_LIB_CLONE_SRCS"
+fi
+
+
if [ "$NXT_TEST_BUILD" = "YES" ]; then
NXT_LIB_SRCS="$NXT_LIB_SRCS $NXT_TEST_BUILD_SRCS"
fi
diff --git a/auto/summary b/auto/summary
index 1c9df4b1..59267f6c 100644
--- a/auto/summary
+++ b/auto/summary
@@ -26,6 +26,8 @@ Unit configuration summary:
Unix domain sockets support: $NXT_UNIX_DOMAIN
TLS support: ............... $NXT_OPENSSL
+ process isolation: ......... $NXT_ISOLATION
+
debug logging: ............. $NXT_DEBUG
END
diff --git a/configure b/configure
index 335a8c88..b6cd3087 100755
--- a/configure
+++ b/configure
@@ -126,6 +126,8 @@ NXT_LIBRT=
. auto/os/conf
. auto/ssltls
. auto/pcre
+. auto/isolation
+. auto/capability
case "$NXT_SYSTEM_PLATFORM" in
diff --git a/src/nxt_application.h b/src/nxt_application.h
index 7ff4bb11..2a1fa39e 100644
--- a/src/nxt_application.h
+++ b/src/nxt_application.h
@@ -88,6 +88,8 @@ struct nxt_common_app_conf_s {
char *working_directory;
nxt_conf_value_t *environment;
+ nxt_conf_value_t *isolation;
+
union {
nxt_external_app_conf_t external;
nxt_python_app_conf_t python;
diff --git a/src/nxt_capability.c b/src/nxt_capability.c
new file mode 100644
index 00000000..805faff6
--- /dev/null
+++ b/src/nxt_capability.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) Igor Sysoev
+ * Copyright (C) NGINX, Inc.
+ */
+
+#include <nxt_main.h>
+
+#if (NXT_HAVE_LINUX_CAPABILITY)
+
+#include <linux/capability.h>
+#include <sys/syscall.h>
+
+#define nxt_capget(hdrp, datap) \
+ syscall(SYS_capget, hdrp, datap)
+#define nxt_capset(hdrp, datap) \
+ syscall(SYS_capset, hdrp, datap)
+
+#endif /* NXT_HAVE_LINUX_CAPABILITY */
+
+
+static nxt_int_t nxt_capability_specific_set(nxt_task_t *task,
+ nxt_capabilities_t *cap);
+
+
+nxt_int_t
+nxt_capability_set(nxt_task_t *task, nxt_capabilities_t *cap)
+{
+ nxt_assert(cap->setid == 0);
+
+ if (geteuid() == 0) {
+ cap->setid = 1;
+ return NXT_OK;
+ }
+
+ return nxt_capability_specific_set(task, cap);
+}
+
+
+#if (NXT_HAVE_LINUX_CAPABILITY)
+
+static uint32_t
+nxt_capability_linux_get_version()
+{
+ struct __user_cap_header_struct hdr;
+
+ hdr.version = _LINUX_CAPABILITY_VERSION;
+ hdr.pid = nxt_pid;
+
+ nxt_capget(&hdr, NULL);
+ return hdr.version;
+}
+
+
+static nxt_int_t
+nxt_capability_specific_set(nxt_task_t *task, nxt_capabilities_t *cap)
+{
+ struct __user_cap_data_struct *val, data[2];
+ struct __user_cap_header_struct hdr;
+
+ /*
+ * Linux capability v1 fills an u32 struct.
+ * Linux capability v2 and v3 fills an u64 struct.
+ * We allocate data[2] for compatibility, we waste 4 bytes on v1.
+ *
+ * This is safe as we only need to check CAP_SETUID and CAP_SETGID
+ * that resides in the first 32-bit chunk.
+ */
+
+ val = &data[0];
+
+ /*
+ * Ask the kernel the preferred capability version
+ * instead of using _LINUX_CAPABILITY_VERSION from header.
+ * This is safer when distributing a pre-compiled Unit binary.
+ */
+ hdr.version = nxt_capability_linux_get_version();
+ hdr.pid = nxt_pid;
+
+ if (nxt_slow_path(nxt_capget(&hdr, val) == -1)) {
+ nxt_alert(task, "failed to get process capabilities: %E", nxt_errno);
+ return NXT_ERROR;
+ }
+
+ if ((val->effective & (1 << CAP_SETUID)) == 0) {
+ return NXT_OK;
+ }
+
+ if ((val->effective & (1 << CAP_SETGID)) == 0) {
+ return NXT_OK;
+ }
+
+ cap->setid = 1;
+ return NXT_OK;
+}
+
+#else
+
+static nxt_int_t
+nxt_capability_specific_set(nxt_task_t *task, nxt_capabilities_t *cap)
+{
+ return NXT_OK;
+}
+
+#endif
diff --git a/src/nxt_capability.h b/src/nxt_capability.h
new file mode 100644
index 00000000..60bbd5f8
--- /dev/null
+++ b/src/nxt_capability.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) Igor Sysoev
+ * Copyright (C) NGINX, Inc.
+ */
+
+#ifndef _NXT_CAPABILITY_INCLUDED_
+#define _NXT_CAPABILITY_INCLUDED_
+
+typedef struct {
+ uint8_t setid; /* 1 bit */
+} nxt_capabilities_t;
+
+
+NXT_EXPORT nxt_int_t nxt_capability_set(nxt_task_t *task,
+ nxt_capabilities_t *cap);
+
+#endif /* _NXT_CAPABILITY_INCLUDED_ */
diff --git a/src/nxt_clone.c b/src/nxt_clone.c
new file mode 100644
index 00000000..0fddd6c7
--- /dev/null
+++ b/src/nxt_clone.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright (C) Igor Sysoev
+ * Copyright (C) NGINX, Inc.
+ */
+
+#include <nxt_main.h>
+#include <sys/types.h>
+#include <nxt_conf.h>
+#include <nxt_clone.h>
+
+#if (NXT_HAVE_CLONE)
+
+pid_t
+nxt_clone(nxt_int_t flags)
+{
+#if defined(__s390x__) || defined(__s390__) || defined(__CRIS__)
+ return syscall(__NR_clone, NULL, flags);
+#else
+ return syscall(__NR_clone, flags, NULL);
+#endif
+}
+
+#endif
+
+
+#if (NXT_HAVE_CLONE_NEWUSER)
+
+/* map uid 65534 to unit pid */
+#define NXT_DEFAULT_UNPRIV_MAP "65534 %d 1"
+
+nxt_int_t nxt_clone_proc_setgroups(nxt_task_t *task, pid_t child_pid,
+ const char *str);
+nxt_int_t nxt_clone_proc_map_set(nxt_task_t *task, const char* mapfile,
+ pid_t pid, nxt_int_t defval, nxt_conf_value_t *mapobj);
+nxt_int_t nxt_clone_proc_map_write(nxt_task_t *task, const char *mapfile,
+ pid_t pid, u_char *mapinfo);
+
+
+typedef struct {
+ nxt_int_t container;
+ nxt_int_t host;
+ nxt_int_t size;
+} nxt_clone_procmap_t;
+
+
+nxt_int_t
+nxt_clone_proc_setgroups(nxt_task_t *task, pid_t child_pid, const char *str)
+{
+ int fd, n;
+ u_char *p, *end;
+ u_char path[PATH_MAX];
+
+ end = path + PATH_MAX;
+ p = nxt_sprintf(path, end, "/proc/%d/setgroups", child_pid);
+ *p = '\0';
+
+ if (nxt_slow_path(p == end)) {
+ nxt_alert(task, "error write past the buffer: %s", path);
+ return NXT_ERROR;
+ }
+
+ fd = open((char *)path, O_RDWR);
+
+ if (fd == -1) {
+ /*
+ * If the /proc/pid/setgroups doesn't exists, we are
+ * safe to set uid/gid maps. But if the error is anything
+ * other than ENOENT, then we should abort and let user know.
+ */
+
+ if (errno != ENOENT) {
+ nxt_alert(task, "open(%s): %E", path, nxt_errno);
+ return NXT_ERROR;
+ }
+
+ return NXT_OK;
+ }
+
+ n = write(fd, str, strlen(str));
+ close(fd);
+
+ if (nxt_slow_path(n == -1)) {
+ nxt_alert(task, "write(%s): %E", path, nxt_errno);
+ return NXT_ERROR;
+ }
+
+ return NXT_OK;
+}
+
+
+nxt_int_t
+nxt_clone_proc_map_write(nxt_task_t *task, const char *mapfile, pid_t pid,
+ u_char *mapinfo)
+{
+ int len, mapfd;
+ u_char *p, *end;
+ ssize_t n;
+ u_char buf[256];
+
+ end = buf + sizeof(buf);
+
+ p = nxt_sprintf(buf, end, "/proc/%d/%s", pid, mapfile);
+ if (nxt_slow_path(p == end)) {
+ nxt_alert(task, "writing past the buffer");
+ return NXT_ERROR;
+ }
+
+ *p = '\0';
+
+ mapfd = open((char*)buf, O_RDWR);
+ if (nxt_slow_path(mapfd == -1)) {
+ nxt_alert(task, "failed to open proc map (%s) %E", buf, nxt_errno);
+ return NXT_ERROR;
+ }
+
+ len = nxt_strlen(mapinfo);
+
+ n = write(mapfd, (char *)mapinfo, len);
+ if (nxt_slow_path(n != len)) {
+
+ if (n == -1 && nxt_errno == EINVAL) {
+ nxt_alert(task, "failed to write %s: Check kernel maximum " \
+ "allowed lines %E", buf, nxt_errno);
+
+ } else {
+ nxt_alert(task, "failed to write proc map (%s) %E", buf,
+ nxt_errno);
+ }
+
+ return NXT_ERROR;
+ }
+
+ return NXT_OK;
+}
+
+
+nxt_int_t
+nxt_clone_proc_map_set(nxt_task_t *task, const char* mapfile, pid_t pid,
+ nxt_int_t defval, nxt_conf_value_t *mapobj)
+{
+ u_char *p, *end, *mapinfo;
+ nxt_int_t container, host, size;
+ nxt_int_t ret, len, count, i;
+ nxt_conf_value_t *obj, *value;
+
+ static nxt_str_t str_cont = nxt_string("container");
+ static nxt_str_t str_host = nxt_string("host");
+ static nxt_str_t str_size = nxt_string("size");
+
+ /*
+ * uid_map one-entry size:
+ * alloc space for 3 numbers (32bit) plus 2 spaces and \n.
+ */
+ len = sizeof(u_char) * (10 + 10 + 10 + 2 + 1);
+
+ if (mapobj != NULL) {
+ count = nxt_conf_array_elements_count(mapobj);
+
+ if (count == 0) {
+ goto default_map;
+ }
+
+ len = len * count + 1;
+
+ mapinfo = nxt_malloc(len);
+ if (nxt_slow_path(mapinfo == NULL)) {
+ nxt_alert(task, "failed to allocate uid_map buffer");
+ return NXT_ERROR;
+ }
+
+ p = mapinfo;
+ end = mapinfo + len;
+
+ for (i = 0; i < count; i++) {
+ obj = nxt_conf_get_array_element(mapobj, i);
+
+ value = nxt_conf_get_object_member(obj, &str_cont, NULL);
+ container = nxt_conf_get_integer(value);
+
+ value = nxt_conf_get_object_member(obj, &str_host, NULL);
+ host = nxt_conf_get_integer(value);
+
+ value = nxt_conf_get_object_member(obj, &str_size, NULL);
+ size = nxt_conf_get_integer(value);
+
+ p = nxt_sprintf(p, end, "%d %d %d", container, host, size);
+ if (nxt_slow_path(p == end)) {
+ nxt_alert(task, "write past the uid_map buffer");
+ nxt_free(mapinfo);
+ return NXT_ERROR;
+ }
+
+ if (i+1 < count) {
+ *p++ = '\n';
+
+ } else {
+ *p = '\0';
+ }
+ }
+
+ } else {
+
+default_map:
+
+ mapinfo = nxt_malloc(len);
+ if (nxt_slow_path(mapinfo == NULL)) {
+ nxt_alert(task, "failed to allocate uid_map buffer");
+ return NXT_ERROR;
+ }
+
+ end = mapinfo + len;
+ p = nxt_sprintf(mapinfo, end, NXT_DEFAULT_UNPRIV_MAP, defval);
+ *p = '\0';
+
+ if (nxt_slow_path(p == end)) {
+ nxt_alert(task, "write past the %s buffer", mapfile);
+ nxt_free(mapinfo);
+ return NXT_ERROR;
+ }
+ }
+
+ ret = nxt_clone_proc_map_write(task, mapfile, pid, mapinfo);
+
+ nxt_free(mapinfo);
+
+ return ret;
+}
+
+
+nxt_int_t
+nxt_clone_proc_map(nxt_task_t *task, pid_t pid, nxt_process_clone_t *clone)
+{
+ nxt_int_t ret;
+ nxt_int_t uid, gid;
+ const char *rule;
+ nxt_runtime_t *rt;
+
+ rt = task->thread->runtime;
+ uid = geteuid();
+ gid = getegid();
+
+ rule = rt->capabilities.setid ? "allow" : "deny";
+
+ ret = nxt_clone_proc_map_set(task, "uid_map", pid, uid, clone->uidmap);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ return NXT_ERROR;
+ }
+
+ ret = nxt_clone_proc_setgroups(task, pid, rule);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ nxt_alert(task, "failed to write /proc/%d/setgroups", pid);
+ return NXT_ERROR;
+ }
+
+ ret = nxt_clone_proc_map_set(task, "gid_map", pid, gid, clone->gidmap);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ return NXT_ERROR;
+ }
+
+ return NXT_OK;
+}
+
+#endif
diff --git a/src/nxt_clone.h b/src/nxt_clone.h
new file mode 100644
index 00000000..50dec0b4
--- /dev/null
+++ b/src/nxt_clone.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) Igor Sysoev
+ * Copyright (C) NGINX, Inc.
+ */
+
+#ifndef _NXT_CLONE_INCLUDED_
+#define _NXT_CLONE_INCLUDED_
+
+
+pid_t nxt_clone(nxt_int_t flags);
+
+#if (NXT_HAVE_CLONE_NEWUSER)
+nxt_int_t nxt_clone_proc_map(nxt_task_t *task, pid_t pid,
+ nxt_process_clone_t *clone);
+#endif
+
+#endif /* _NXT_CLONE_INCLUDED_ */
diff --git a/src/nxt_conf_validation.c b/src/nxt_conf_validation.c
index ca8ec62e..078ddd17 100644
--- a/src/nxt_conf_validation.c
+++ b/src/nxt_conf_validation.c
@@ -39,9 +39,6 @@ typedef nxt_int_t (*nxt_conf_vldt_member_t)(nxt_conf_validation_t *vldt,
nxt_conf_value_t *value);
typedef nxt_int_t (*nxt_conf_vldt_element_t)(nxt_conf_validation_t *vldt,
nxt_conf_value_t *value);
-typedef nxt_int_t (*nxt_conf_vldt_system_t)(nxt_conf_validation_t *vldt,
- char *name);
-
static nxt_int_t nxt_conf_vldt_type(nxt_conf_validation_t *vldt,
nxt_str_t *name, nxt_conf_value_t *value, nxt_conf_vldt_type_t type);
@@ -86,10 +83,6 @@ static nxt_int_t nxt_conf_vldt_object_iterator(nxt_conf_validation_t *vldt,
nxt_conf_value_t *value, void *data);
static nxt_int_t nxt_conf_vldt_array_iterator(nxt_conf_validation_t *vldt,
nxt_conf_value_t *value, void *data);
-static nxt_int_t nxt_conf_vldt_system(nxt_conf_validation_t *vldt,
- nxt_conf_value_t *value, void *data);
-static nxt_int_t nxt_conf_vldt_user(nxt_conf_validation_t *vldt, char *name);
-static nxt_int_t nxt_conf_vldt_group(nxt_conf_validation_t *vldt, char *name);
static nxt_int_t nxt_conf_vldt_environment(nxt_conf_validation_t *vldt,
nxt_str_t *name, nxt_conf_value_t *value);
static nxt_int_t nxt_conf_vldt_argument(nxt_conf_validation_t *vldt,
@@ -101,6 +94,21 @@ static nxt_int_t nxt_conf_vldt_java_classpath(nxt_conf_validation_t *vldt,
static nxt_int_t nxt_conf_vldt_java_option(nxt_conf_validation_t *vldt,
nxt_conf_value_t *value);
+static nxt_int_t
+nxt_conf_vldt_isolation(nxt_conf_validation_t *vldt, nxt_conf_value_t *value,
+ void *data);
+static nxt_int_t
+nxt_conf_vldt_clone_namespaces(nxt_conf_validation_t *vldt,
+ nxt_conf_value_t *value, void *data);
+
+#if (NXT_HAVE_CLONE_NEWUSER)
+static nxt_int_t nxt_conf_vldt_clone_procmap(nxt_conf_validation_t *vldt,
+ const char* mapfile, nxt_conf_value_t *value);
+static nxt_int_t nxt_conf_vldt_clone_uidmap(nxt_conf_validation_t *vldt,
+ nxt_conf_value_t *value);
+static nxt_int_t nxt_conf_vldt_clone_gidmap(nxt_conf_validation_t *vldt,
+ nxt_conf_value_t *value);
+#endif
static nxt_conf_vldt_object_t nxt_conf_vldt_websocket_members[] = {
{ nxt_string("read_timeout"),
@@ -340,6 +348,100 @@ static nxt_conf_vldt_object_t nxt_conf_vldt_app_processes_members[] = {
};
+static nxt_conf_vldt_object_t nxt_conf_vldt_app_namespaces_members[] = {
+
+#if (NXT_HAVE_CLONE_NEWUSER)
+ { nxt_string("credential"),
+ NXT_CONF_VLDT_BOOLEAN,
+ NULL,
+ NULL },
+#endif
+
+#if (NXT_HAVE_CLONE_NEWPID)
+ { nxt_string("pid"),
+ NXT_CONF_VLDT_BOOLEAN,
+ NULL,
+ NULL },
+#endif
+
+#if (NXT_HAVE_CLONE_NEWNET)
+ { nxt_string("network"),
+ NXT_CONF_VLDT_BOOLEAN,
+ NULL,
+ NULL },
+#endif
+
+#if (NXT_HAVE_CLONE_NEWNS)
+ { nxt_string("mount"),
+ NXT_CONF_VLDT_BOOLEAN,
+ NULL,
+ NULL },
+#endif
+
+#if (NXT_HAVE_CLONE_NEWUTS)
+ { nxt_string("uname"),
+ NXT_CONF_VLDT_BOOLEAN,
+ NULL,
+ NULL },
+#endif
+
+#if (NXT_HAVE_CLONE_NEWCGROUP)
+ { nxt_string("cgroup"),
+ NXT_CONF_VLDT_BOOLEAN,
+ NULL,
+ NULL },
+#endif
+
+ NXT_CONF_VLDT_END
+};
+
+
+#if (NXT_HAVE_CLONE_NEWUSER)
+
+static nxt_conf_vldt_object_t nxt_conf_vldt_app_procmap_members[] = {
+ { nxt_string("container"),
+ NXT_CONF_VLDT_INTEGER,
+ NULL,
+ NULL },
+
+ { nxt_string("host"),
+ NXT_CONF_VLDT_INTEGER,
+ NULL,
+ NULL },
+
+ { nxt_string("size"),
+ NXT_CONF_VLDT_INTEGER,
+ NULL,
+ NULL },
+};
+
+#endif
+
+
+static nxt_conf_vldt_object_t nxt_conf_vldt_app_isolation_members[] = {
+ { nxt_string("namespaces"),
+ NXT_CONF_VLDT_OBJECT,
+ &nxt_conf_vldt_clone_namespaces,
+ (void *) &nxt_conf_vldt_app_namespaces_members },
+
+#if (NXT_HAVE_CLONE_NEWUSER)
+
+ { nxt_string("uidmap"),
+ NXT_CONF_VLDT_ARRAY,
+ &nxt_conf_vldt_array_iterator,
+ (void *) &nxt_conf_vldt_clone_uidmap },
+
+ { nxt_string("gidmap"),
+ NXT_CONF_VLDT_ARRAY,
+ &nxt_conf_vldt_array_iterator,
+ (void *) &nxt_conf_vldt_clone_gidmap },
+
+#endif
+
+ NXT_CONF_VLDT_END
+};
+
+
static nxt_conf_vldt_object_t nxt_conf_vldt_common_members[] = {
{ nxt_string("type"),
NXT_CONF_VLDT_STRING,
@@ -358,13 +460,13 @@ static nxt_conf_vldt_object_t nxt_conf_vldt_common_members[] = {
{ nxt_string("user"),
NXT_CONF_VLDT_STRING,
- nxt_conf_vldt_system,
- (void *) &nxt_conf_vldt_user },
+ NULL,
+ NULL },
{ nxt_string("group"),
NXT_CONF_VLDT_STRING,
- nxt_conf_vldt_system,
- (void *) &nxt_conf_vldt_group },
+ NULL,
+ NULL },
{ nxt_string("working_directory"),
NXT_CONF_VLDT_STRING,
@@ -376,6 +478,11 @@ static nxt_conf_vldt_object_t nxt_conf_vldt_common_members[] = {
&nxt_conf_vldt_object_iterator,
(void *) &nxt_conf_vldt_environment },
+ { nxt_string("isolation"),
+ NXT_CONF_VLDT_OBJECT,
+ &nxt_conf_vldt_isolation,
+ (void *) &nxt_conf_vldt_app_isolation_members },
+
NXT_CONF_VLDT_END
};
@@ -1252,106 +1359,168 @@ nxt_conf_vldt_array_iterator(nxt_conf_validation_t *vldt,
static nxt_int_t
-nxt_conf_vldt_system(nxt_conf_validation_t *vldt, nxt_conf_value_t *value,
- void *data)
+nxt_conf_vldt_environment(nxt_conf_validation_t *vldt, nxt_str_t *name,
+ nxt_conf_value_t *value)
{
- size_t length;
- nxt_str_t name;
- nxt_conf_vldt_system_t validator;
- char string[32];
+ nxt_str_t str;
+
+ if (name->length == 0) {
+ return nxt_conf_vldt_error(vldt,
+ "The environment name must not be empty.");
+ }
- /* The cast is required by Sun C. */
- validator = (nxt_conf_vldt_system_t) data;
+ if (nxt_memchr(name->start, '\0', name->length) != NULL) {
+ return nxt_conf_vldt_error(vldt, "The environment name must not "
+ "contain null character.");
+ }
- nxt_conf_get_string(value, &name);
+ if (nxt_memchr(name->start, '=', name->length) != NULL) {
+ return nxt_conf_vldt_error(vldt, "The environment name must not "
+ "contain '=' character.");
+ }
+
+ if (nxt_conf_type(value) != NXT_CONF_STRING) {
+ return nxt_conf_vldt_error(vldt, "The \"%V\" environment value must be "
+ "a string.", name);
+ }
- length = name.length + 1;
- length = nxt_min(length, sizeof(string));
+ nxt_conf_get_string(value, &str);
- nxt_cpystrn((u_char *) string, name.start, length);
+ if (nxt_memchr(str.start, '\0', str.length) != NULL) {
+ return nxt_conf_vldt_error(vldt, "The \"%V\" environment value must "
+ "not contain null character.", name);
+ }
- return validator(vldt, string);
+ return NXT_OK;
}
static nxt_int_t
-nxt_conf_vldt_user(nxt_conf_validation_t *vldt, char *user)
+nxt_conf_vldt_clone_namespaces(nxt_conf_validation_t *vldt, nxt_conf_value_t *value,
+ void *data)
{
- struct passwd *pwd;
+ return nxt_conf_vldt_object(vldt, value, data);
+}
- nxt_errno = 0;
- pwd = getpwnam(user);
+static nxt_int_t
+nxt_conf_vldt_isolation(nxt_conf_validation_t *vldt, nxt_conf_value_t *value,
+ void *data)
+{
+ return nxt_conf_vldt_object(vldt, value, data);
+}
- if (pwd != NULL) {
- return NXT_OK;
- }
- if (nxt_errno == 0) {
- return nxt_conf_vldt_error(vldt, "User \"%s\" is not found.", user);
- }
+#if (NXT_HAVE_CLONE_NEWUSER)
- return NXT_ERROR;
-}
+typedef struct {
+ nxt_int_t container;
+ nxt_int_t host;
+ nxt_int_t size;
+} nxt_conf_vldt_clone_procmap_conf_t;
+
+
+static nxt_conf_map_t nxt_conf_vldt_clone_procmap_conf_map[] = {
+ {
+ nxt_string("container"),
+ NXT_CONF_MAP_INT32,
+ offsetof(nxt_conf_vldt_clone_procmap_conf_t, container),
+ },
+
+ {
+ nxt_string("host"),
+ NXT_CONF_MAP_INT32,
+ offsetof(nxt_conf_vldt_clone_procmap_conf_t, host),
+ },
+
+ {
+ nxt_string("size"),
+ NXT_CONF_MAP_INT32,
+ offsetof(nxt_conf_vldt_clone_procmap_conf_t, size),
+ },
+
+};
static nxt_int_t
-nxt_conf_vldt_group(nxt_conf_validation_t *vldt, char *group)
+nxt_conf_vldt_clone_procmap(nxt_conf_validation_t *vldt, const char *mapfile,
+ nxt_conf_value_t *value)
{
- struct group *grp;
+ nxt_int_t ret;
+ nxt_conf_vldt_clone_procmap_conf_t procmap;
- nxt_errno = 0;
+ procmap.container = -1;
+ procmap.host = -1;
+ procmap.size = -1;
- grp = getgrnam(group);
+ ret = nxt_conf_map_object(vldt->pool, value,
+ nxt_conf_vldt_clone_procmap_conf_map,
+ nxt_nitems(nxt_conf_vldt_clone_procmap_conf_map),
+ &procmap);
+ if (ret != NXT_OK) {
+ return ret;
+ }
- if (grp != NULL) {
- return NXT_OK;
+ if (procmap.container == -1) {
+ return nxt_conf_vldt_error(vldt, "The %s requires the "
+ "\"container\" field set.", mapfile);
}
- if (nxt_errno == 0) {
- return nxt_conf_vldt_error(vldt, "Group \"%s\" is not found.", group);
+ if (procmap.host == -1) {
+ return nxt_conf_vldt_error(vldt, "The %s requires the "
+ "\"host\" field set.", mapfile);
}
- return NXT_ERROR;
+ if (procmap.size == -1) {
+ return nxt_conf_vldt_error(vldt, "The %s requires the "
+ "\"size\" field set.", mapfile);
+ }
+
+ return NXT_OK;
}
static nxt_int_t
-nxt_conf_vldt_environment(nxt_conf_validation_t *vldt, nxt_str_t *name,
- nxt_conf_value_t *value)
+nxt_conf_vldt_clone_uidmap(nxt_conf_validation_t *vldt, nxt_conf_value_t *value)
{
- nxt_str_t str;
+ nxt_int_t ret;
- if (name->length == 0) {
- return nxt_conf_vldt_error(vldt,
- "The environment name must not be empty.");
+ if (nxt_conf_type(value) != NXT_CONF_OBJECT) {
+ return nxt_conf_vldt_error(vldt, "The \"uidmap\" array "
+ "must contain only object values.");
}
- if (nxt_memchr(name->start, '\0', name->length) != NULL) {
- return nxt_conf_vldt_error(vldt, "The environment name must not "
- "contain null character.");
+ ret = nxt_conf_vldt_object(vldt, value,
+ (void *) nxt_conf_vldt_app_procmap_members);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ return ret;
}
- if (nxt_memchr(name->start, '=', name->length) != NULL) {
- return nxt_conf_vldt_error(vldt, "The environment name must not "
- "contain '=' character.");
- }
+ return nxt_conf_vldt_clone_procmap(vldt, "uid_map", value);
+}
- if (nxt_conf_type(value) != NXT_CONF_STRING) {
- return nxt_conf_vldt_error(vldt, "The \"%V\" environment value must be "
- "a string.", name);
- }
- nxt_conf_get_string(value, &str);
+static nxt_int_t
+nxt_conf_vldt_clone_gidmap(nxt_conf_validation_t *vldt, nxt_conf_value_t *value)
+{
+ nxt_int_t ret;
- if (nxt_memchr(str.start, '\0', str.length) != NULL) {
- return nxt_conf_vldt_error(vldt, "The \"%V\" environment value must "
- "not contain null character.", name);
+ if (nxt_conf_type(value) != NXT_CONF_OBJECT) {
+ return nxt_conf_vldt_error(vldt, "The \"gidmap\" array "
+ "must contain only object values.");
}
- return NXT_OK;
+ ret = nxt_conf_vldt_object(vldt, value,
+ (void *) nxt_conf_vldt_app_procmap_members);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ return ret;
+ }
+
+ return nxt_conf_vldt_clone_procmap(vldt, "gid_map", value);
}
+#endif
+
static nxt_int_t
nxt_conf_vldt_argument(nxt_conf_validation_t *vldt, nxt_conf_value_t *value)
diff --git a/src/nxt_main.h b/src/nxt_main.h
index 23c55002..0afebb96 100644
--- a/src/nxt_main.h
+++ b/src/nxt_main.h
@@ -57,6 +57,7 @@ typedef uint16_t nxt_port_id_t;
#include <nxt_fiber.h>
#include <nxt_thread.h>
#include <nxt_process_type.h>
+#include <nxt_capability.h>
#include <nxt_process.h>
#include <nxt_utf8.h>
#include <nxt_file_name.h>
diff --git a/src/nxt_main_process.c b/src/nxt_main_process.c
index 83c6d188..44deb272 100644
--- a/src/nxt_main_process.c
+++ b/src/nxt_main_process.c
@@ -14,6 +14,10 @@
#include <nxt_cert.h>
#endif
+#ifdef NXT_LINUX
+#include <linux/sched.h>
+#endif
+
typedef struct {
nxt_socket_t socket;
@@ -68,6 +72,10 @@ static void nxt_main_port_conf_store_handler(nxt_task_t *task,
static void nxt_main_port_access_log_handler(nxt_task_t *task,
nxt_port_recv_msg_t *msg);
+static nxt_int_t nxt_init_set_isolation(nxt_task_t *task,
+ nxt_process_init_t *init, nxt_conf_value_t *isolation);
+static nxt_int_t nxt_init_set_ns(nxt_task_t *task,
+ nxt_process_init_t *init, nxt_conf_value_t *ns);
const nxt_sig_event_t nxt_main_process_signals[] = {
nxt_event_signal(SIGHUP, nxt_main_process_signal_handler),
@@ -134,6 +142,12 @@ static nxt_conf_map_t nxt_common_app_conf[] = {
NXT_CONF_MAP_PTR,
offsetof(nxt_common_app_conf_t, environment),
},
+
+ {
+ nxt_string("isolation"),
+ NXT_CONF_MAP_PTR,
+ offsetof(nxt_common_app_conf_t, isolation),
+ }
};
@@ -271,12 +285,11 @@ nxt_port_main_start_worker_handler(nxt_task_t *task, nxt_port_recv_msg_t *msg)
nxt_int_t ret;
nxt_buf_t *b;
nxt_port_t *port;
+ nxt_runtime_t *rt;
nxt_app_type_t idx;
nxt_conf_value_t *conf;
nxt_common_app_conf_t app_conf;
- static nxt_str_t nobody = nxt_string("nobody");
-
ret = NXT_ERROR;
mp = nxt_mp_create(1024, 128, 256, 32);
@@ -311,7 +324,10 @@ nxt_port_main_start_worker_handler(nxt_task_t *task, nxt_port_recv_msg_t *msg)
goto failed;
}
- app_conf.user = nobody;
+ rt = task->thread->runtime;
+
+ app_conf.user.start = (u_char*)rt->user_cred.user;
+ app_conf.user.length = nxt_strlen(rt->user_cred.user);
ret = nxt_conf_map_object(mp, conf, nxt_common_app_conf,
nxt_nitems(nxt_common_app_conf), &app_conf);
@@ -458,6 +474,8 @@ nxt_main_start_controller_process(nxt_task_t *task, nxt_runtime_t *rt)
return NXT_ERROR;
}
+ nxt_memzero(init, sizeof(nxt_process_init_t));
+
init->start = nxt_controller_start;
init->name = "controller";
init->user_cred = &rt->user_cred;
@@ -552,6 +570,8 @@ nxt_main_start_discovery_process(nxt_task_t *task, nxt_runtime_t *rt)
return NXT_ERROR;
}
+ nxt_memzero(init, sizeof(nxt_process_init_t));
+
init->start = nxt_discovery_start;
init->name = "discovery";
init->user_cred = &rt->user_cred;
@@ -576,6 +596,8 @@ nxt_main_start_router_process(nxt_task_t *task, nxt_runtime_t *rt)
return NXT_ERROR;
}
+ nxt_memzero(init, sizeof(nxt_process_init_t));
+
init->start = nxt_router_start;
init->name = "router";
init->user_cred = &rt->user_cred;
@@ -589,7 +611,6 @@ nxt_main_start_router_process(nxt_task_t *task, nxt_runtime_t *rt)
return nxt_main_create_worker_process(task, rt, init);
}
-
static nxt_int_t
nxt_main_start_worker_process(nxt_task_t *task, nxt_runtime_t *rt,
nxt_common_app_conf_t *app_conf, uint32_t stream)
@@ -597,41 +618,72 @@ nxt_main_start_worker_process(nxt_task_t *task, nxt_runtime_t *rt,
char *user, *group;
u_char *title, *last, *end;
size_t size;
+ nxt_int_t ret;
nxt_process_init_t *init;
size = sizeof(nxt_process_init_t)
- + sizeof(nxt_user_cred_t)
- + app_conf->user.length + 1
- + app_conf->group.length + 1
- + app_conf->name.length + sizeof("\"\" application");
+ + app_conf->name.length
+ + sizeof("\"\" application");
+
+ if (rt->capabilities.setid) {
+ size += sizeof(nxt_user_cred_t)
+ + app_conf->user.length + 1
+ + app_conf->group.length + 1;
+ }
init = nxt_malloc(size);
if (nxt_slow_path(init == NULL)) {
return NXT_ERROR;
}
- init->user_cred = nxt_pointer_to(init, sizeof(nxt_process_init_t));
- user = nxt_pointer_to(init->user_cred, sizeof(nxt_user_cred_t));
+ nxt_memzero(init, sizeof(nxt_process_init_t));
- nxt_memcpy(user, app_conf->user.start, app_conf->user.length);
- last = nxt_pointer_to(user, app_conf->user.length);
- *last++ = '\0';
+ if (rt->capabilities.setid) {
+ init->user_cred = nxt_pointer_to(init, sizeof(nxt_process_init_t));
+ user = nxt_pointer_to(init->user_cred, sizeof(nxt_user_cred_t));
- init->user_cred->user = user;
+ nxt_memcpy(user, app_conf->user.start, app_conf->user.length);
+ last = nxt_pointer_to(user, app_conf->user.length);
+ *last++ = '\0';
- if (app_conf->group.start != NULL) {
- group = (char *) last;
+ init->user_cred->user = user;
- nxt_memcpy(group, app_conf->group.start, app_conf->group.length);
- last = nxt_pointer_to(group, app_conf->group.length);
- *last++ = '\0';
+ if (app_conf->group.start != NULL) {
+ group = (char *) last;
+
+ nxt_memcpy(group, app_conf->group.start, app_conf->group.length);
+ last = nxt_pointer_to(group, app_conf->group.length);
+ *last++ = '\0';
+
+ } else {
+ group = NULL;
+ }
+
+ ret = nxt_user_cred_get(task, init->user_cred, group);
+ if (ret != NXT_OK) {
+ return NXT_ERROR;
+ }
} else {
- group = NULL;
- }
+ if (!nxt_str_eq(&app_conf->user, (u_char *) rt->user_cred.user,
+ nxt_strlen(rt->user_cred.user)))
+ {
+ nxt_alert(task, "cannot set user \"%V\" for app \"%V\": "
+ "missing capabilities", &app_conf->user, &app_conf->name);
+ return NXT_ERROR;
+ }
- if (nxt_user_cred_get(task, init->user_cred, group) != NXT_OK) {
- return NXT_ERROR;
+ if (app_conf->group.length > 0
+ && !nxt_str_eq(&app_conf->group, (u_char *) rt->group,
+ nxt_strlen(rt->group)))
+ {
+ nxt_alert(task, "cannot set group \"%V\" for app \"%V\": "
+ "missing capabilities", &app_conf->group,
+ &app_conf->name);
+ return NXT_ERROR;
+ }
+
+ last = nxt_pointer_to(init, sizeof(nxt_process_init_t));
}
title = last;
@@ -648,6 +700,11 @@ nxt_main_start_worker_process(nxt_task_t *task, nxt_runtime_t *rt,
init->stream = stream;
init->restart = NULL;
+ ret = nxt_init_set_isolation(task, init, app_conf->isolation);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ return NXT_ERROR;
+ }
+
return nxt_main_create_worker_process(task, rt, init);
}
@@ -1246,7 +1303,7 @@ nxt_main_port_modules_handler(nxt_task_t *task, nxt_port_recv_msg_t *msg)
nxt_conf_value_t *conf, *root, *value;
nxt_app_lang_module_t *lang;
- static nxt_str_t root_path = nxt_string("/");
+ static nxt_str_t root_path = nxt_string("/");
rt = task->thread->runtime;
@@ -1438,3 +1495,105 @@ nxt_main_port_access_log_handler(nxt_task_t *task, nxt_port_recv_msg_t *msg)
msg->port_msg.stream, 0, NULL);
}
}
+
+
+static nxt_int_t
+nxt_init_set_isolation(nxt_task_t *task, nxt_process_init_t *init,
+ nxt_conf_value_t *isolation)
+{
+ nxt_int_t ret;
+ nxt_conf_value_t *object;
+
+ static nxt_str_t nsname = nxt_string("namespaces");
+ static nxt_str_t uidname = nxt_string("uidmap");
+ static nxt_str_t gidname = nxt_string("gidmap");
+
+ if (isolation == NULL) {
+ return NXT_OK;
+ }
+
+ object = nxt_conf_get_object_member(isolation, &nsname, NULL);
+ if (object != NULL) {
+ ret = nxt_init_set_ns(task, init, object);
+ if (ret != NXT_OK) {
+ return ret;
+ }
+ }
+
+ object = nxt_conf_get_object_member(isolation, &uidname, NULL);
+ if (object != NULL) {
+ init->isolation.clone.uidmap = object;
+ }
+
+ object = nxt_conf_get_object_member(isolation, &gidname, NULL);
+ if (object != NULL) {
+ init->isolation.clone.gidmap = object;
+ }
+
+ return NXT_OK;
+}
+
+
+static nxt_int_t
+nxt_init_set_ns(nxt_task_t *task, nxt_process_init_t *init, nxt_conf_value_t *namespaces)
+{
+ uint32_t index;
+ nxt_str_t name;
+ nxt_int_t flag;
+ nxt_conf_value_t *value;
+
+ index = 0;
+
+ while ((value = nxt_conf_next_object_member(namespaces, &name, &index)) != NULL) {
+ flag = 0;
+
+#if (NXT_HAVE_CLONE_NEWUSER)
+ if (nxt_str_eq(&name, "credential", 10)) {
+ flag = CLONE_NEWUSER;
+ }
+#endif
+
+#if (NXT_HAVE_CLONE_NEWPID)
+ if (nxt_str_eq(&name, "pid", 3)) {
+ flag = CLONE_NEWPID;
+ }
+#endif
+
+#if (NXT_HAVE_CLONE_NEWNET)
+ if (nxt_str_eq(&name, "network", 7)) {
+ flag = CLONE_NEWNET;
+ }
+#endif
+
+#if (NXT_HAVE_CLONE_NEWUTS)
+ if (nxt_str_eq(&name, "uname", 5)) {
+ flag = CLONE_NEWUTS;
+ }
+#endif
+
+#if (NXT_HAVE_CLONE_NEWNS)
+ if (nxt_str_eq(&name, "mount", 5)) {
+ flag = CLONE_NEWNS;
+ }
+#endif
+
+#if (NXT_HAVE_CLONE_NEWCGROUP)
+ if (nxt_str_eq(&name, "cgroup", 6)) {
+ flag = CLONE_NEWCGROUP;
+ }
+#endif
+
+ if (!flag) {
+ nxt_alert(task, "unknown namespace flag: \"%V\"", &name);
+ return NXT_ERROR;
+ }
+
+ if (nxt_conf_get_integer(value) == 0) {
+ continue; /* process shares everything by default */
+ }
+
+ init->isolation.clone.flags |= flag;
+ }
+
+ return NXT_OK;
+}
diff --git a/src/nxt_process.c b/src/nxt_process.c
index c4aef21c..638765a4 100644
--- a/src/nxt_process.c
+++ b/src/nxt_process.c
@@ -7,10 +7,16 @@
#include <nxt_main.h>
#include <nxt_main_process.h>
+#if (NXT_HAVE_CLONE)
+#include <nxt_clone.h>
+#endif
+
+#include <signal.h>
static void nxt_process_start(nxt_task_t *task, nxt_process_t *process);
static nxt_int_t nxt_user_groups_get(nxt_task_t *task, nxt_user_cred_t *uc);
-
+static nxt_int_t nxt_process_worker_setup(nxt_task_t *task,
+ nxt_process_t *process, int parentfd);
/* A cached process pid. */
nxt_pid_t nxt_pid;
@@ -34,84 +40,217 @@ nxt_bool_t nxt_proc_remove_notify_matrix[NXT_PROCESS_MAX][NXT_PROCESS_MAX] = {
{ 0, 0, 0, 1, 0 },
};
-nxt_pid_t
-nxt_process_create(nxt_task_t *task, nxt_process_t *process)
-{
- nxt_pid_t pid;
+
+static nxt_int_t
+nxt_process_worker_setup(nxt_task_t *task, nxt_process_t *process, int parentfd) {
+ pid_t rpid, pid;
+ ssize_t n;
+ nxt_int_t parent_status;
nxt_process_t *p;
nxt_runtime_t *rt;
+ nxt_process_init_t *init;
nxt_process_type_t ptype;
- rt = task->thread->runtime;
+ pid = getpid();
+ rpid = 0;
+ rt = task->thread->runtime;
+ init = process->init;
- pid = fork();
+ /* Setup the worker process. */
- switch (pid) {
+ n = read(parentfd, &rpid, sizeof(rpid));
+ if (nxt_slow_path(n == -1 || n != sizeof(rpid))) {
+ nxt_alert(task, "failed to read real pid");
+ return NXT_ERROR;
+ }
- case -1:
- nxt_alert(task, "fork() failed while creating \"%s\" %E",
- process->init->name, nxt_errno);
- break;
+ if (nxt_slow_path(rpid == 0)) {
+ nxt_alert(task, "failed to get real pid from parent");
+ return NXT_ERROR;
+ }
- case 0:
- /* A child. */
- nxt_pid = getpid();
+ nxt_pid = rpid;
+
+ /* Clean inherited cached thread tid. */
+ task->thread->tid = 0;
+
+ process->pid = nxt_pid;
+
+ if (nxt_pid != pid) {
+ nxt_debug(task, "app \"%s\" real pid %d", init->name, nxt_pid);
+ nxt_debug(task, "app \"%s\" isolated pid: %d", init->name, pid);
+ }
- /* Clean inherited cached thread tid. */
- task->thread->tid = 0;
+ n = read(parentfd, &parent_status, sizeof(parent_status));
+ if (nxt_slow_path(n == -1 || n != sizeof(parent_status))) {
+ nxt_alert(task, "failed to read parent status");
+ return NXT_ERROR;
+ }
- process->pid = nxt_pid;
+ if (nxt_slow_path(close(parentfd) == -1)) {
+ nxt_alert(task, "failed to close reader pipe fd");
+ return NXT_ERROR;
+ }
- ptype = process->init->type;
+ if (nxt_slow_path(parent_status != NXT_OK)) {
+ return parent_status;
+ }
- nxt_port_reset_next_id();
+ ptype = init->type;
- nxt_event_engine_thread_adopt(task->thread->engine);
+ nxt_port_reset_next_id();
- /* Remove not ready processes */
- nxt_runtime_process_each(rt, p) {
+ nxt_event_engine_thread_adopt(task->thread->engine);
- if (nxt_proc_conn_matrix[ptype][nxt_process_type(p)] == 0) {
- nxt_debug(task, "remove not required process %PI", p->pid);
+ /* Remove not ready processes. */
+ nxt_runtime_process_each(rt, p) {
- nxt_process_close_ports(task, p);
+ if (nxt_proc_conn_matrix[ptype][nxt_process_type(p)] == 0) {
+ nxt_debug(task, "remove not required process %PI", p->pid);
- continue;
- }
+ nxt_process_close_ports(task, p);
- if (!p->ready) {
- nxt_debug(task, "remove not ready process %PI", p->pid);
+ continue;
+ }
- nxt_process_close_ports(task, p);
+ if (!p->ready) {
+ nxt_debug(task, "remove not ready process %PI", p->pid);
- continue;
- }
+ nxt_process_close_ports(task, p);
- nxt_port_mmaps_destroy(&p->incoming, 0);
- nxt_port_mmaps_destroy(&p->outgoing, 0);
+ continue;
+ }
- } nxt_runtime_process_loop;
+ nxt_port_mmaps_destroy(&p->incoming, 0);
+ nxt_port_mmaps_destroy(&p->outgoing, 0);
- nxt_runtime_process_add(task, process);
+ } nxt_runtime_process_loop;
- nxt_process_start(task, process);
+ nxt_runtime_process_add(task, process);
- process->ready = 1;
+ nxt_process_start(task, process);
- break;
+ process->ready = 1;
- default:
- /* A parent. */
- nxt_debug(task, "fork(\"%s\"): %PI", process->init->name, pid);
+ return NXT_OK;
+}
- process->pid = pid;
- nxt_runtime_process_add(task, process);
+nxt_pid_t
+nxt_process_create(nxt_task_t *task, nxt_process_t *process)
+{
+ int pipefd[2];
+ nxt_int_t ret;
+ nxt_pid_t pid;
+ nxt_process_init_t *init;
- break;
+ if (nxt_slow_path(pipe(pipefd) == -1)) {
+ nxt_alert(task, "failed to create process pipe for passing rpid");
+ return -1;
+ }
+
+ init = process->init;
+
+#if (NXT_HAVE_CLONE)
+ pid = nxt_clone(SIGCHLD|init->isolation.clone.flags);
+#else
+ pid = fork();
+#endif
+
+ if (nxt_slow_path(pid < 0)) {
+#if (NXT_HAVE_CLONE)
+ nxt_alert(task, "clone() failed while creating \"%s\" %E",
+ init->name, nxt_errno);
+#else
+ nxt_alert(task, "fork() failed while creating \"%s\" %E",
+ init->name, nxt_errno);
+#endif
+
+ return pid;
+ }
+
+ if (pid == 0) {
+ /* Child. */
+
+ if (nxt_slow_path(close(pipefd[1]) == -1)) {
+ nxt_alert(task, "failed to close writer pipe fd");
+ return NXT_ERROR;
+ }
+
+ ret = nxt_process_worker_setup(task, process, pipefd[0]);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ exit(1);
+ }
+
+ /*
+ * Explicitly return 0 to notice the caller function this is the child.
+ * The caller must return to the event engine work queue loop.
+ */
+ return 0;
+ }
+
+ /* Parent. */
+
+ if (nxt_slow_path(close(pipefd[0]) != 0)) {
+ nxt_alert(task, "failed to close pipe: %E", nxt_errno);
+ }
+
+ /*
+ * At this point, the child process is blocked reading the
+ * pipe fd to get its real pid (rpid).
+ *
+ * If anything goes wrong now, we need to terminate the child
+ * process by sending a NXT_ERROR in the pipe.
+ */
+
+#if (NXT_HAVE_CLONE)
+ nxt_debug(task, "clone(\"%s\"): %PI", init->name, pid);
+#else
+ nxt_debug(task, "fork(\"%s\"): %PI", init->name, pid);
+#endif
+
+ if (nxt_slow_path(write(pipefd[1], &pid, sizeof(pid)) == -1)) {
+ nxt_alert(task, "failed to write real pid");
+ goto fail_cleanup;
+ }
+
+#if (NXT_HAVE_CLONE_NEWUSER)
+ if ((init->isolation.clone.flags & CLONE_NEWUSER) == CLONE_NEWUSER) {
+ ret = nxt_clone_proc_map(task, pid, &init->isolation.clone);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ goto fail_cleanup;
+ }
+ }
+#endif
+
+ ret = NXT_OK;
+
+ if (nxt_slow_path(write(pipefd[1], &ret, sizeof(ret)) == -1)) {
+ nxt_alert(task, "failed to write status");
+ goto fail_cleanup;
}
+ process->pid = pid;
+
+ nxt_runtime_process_add(task, process);
+
return pid;
+
+fail_cleanup:
+
+ ret = NXT_ERROR;
+
+ if (nxt_slow_path(write(pipefd[1], &ret, sizeof(ret)) == -1)) {
+ nxt_alert(task, "failed to write status");
+ }
+
+ if (nxt_slow_path(close(pipefd[1]) != 0)) {
+ nxt_alert(task, "failed to close pipe: %E", nxt_errno);
+ }
+
+ waitpid(pid, NULL, 0);
+
+ return -1;
}
@@ -133,22 +272,17 @@ nxt_process_start(nxt_task_t *task, nxt_process_t *process)
nxt_process_title(task, "unit: %s", init->name);
thread = task->thread;
+ rt = thread->runtime;
nxt_random_init(&thread->random);
- if (init->user_cred != NULL) {
- /*
- * Changing user credentials requires either root privileges
- * or CAP_SETUID and CAP_SETGID capabilities on Linux.
- */
+ if (rt->capabilities.setid && init->user_cred != NULL) {
ret = nxt_user_cred_set(task, init->user_cred);
if (ret != NXT_OK) {
goto fail;
}
}
- rt = thread->runtime;
-
rt->type = init->type;
engine = thread->engine;
@@ -592,15 +726,8 @@ nxt_user_cred_set(nxt_task_t *task, nxt_user_cred_t *uc)
uc->user, (uint64_t) uc->uid, (uint64_t) uc->base_gid);
if (setgid(uc->base_gid) != 0) {
- if (nxt_errno == NXT_EPERM) {
- nxt_log(task, NXT_LOG_NOTICE, "setgid(%d) failed %E, ignored",
- uc->base_gid, nxt_errno);
- return NXT_OK;
-
- } else {
- nxt_alert(task, "setgid(%d) failed %E", uc->base_gid, nxt_errno);
- return NXT_ERROR;
- }
+ nxt_alert(task, "setgid(%d) failed %E", uc->base_gid, nxt_errno);
+ return NXT_ERROR;
}
if (uc->gids != NULL) {
diff --git a/src/nxt_process.h b/src/nxt_process.h
index c6e19f97..df9ca038 100644
--- a/src/nxt_process.h
+++ b/src/nxt_process.h
@@ -7,6 +7,8 @@
#ifndef _NXT_PROCESS_H_INCLUDED_
#define _NXT_PROCESS_H_INCLUDED_
+#include <nxt_conf.h>
+
typedef pid_t nxt_pid_t;
typedef uid_t nxt_uid_t;
@@ -21,26 +23,35 @@ typedef struct {
nxt_gid_t *gids;
} nxt_user_cred_t;
+typedef struct {
+ nxt_int_t flags;
+ nxt_conf_value_t *uidmap;
+ nxt_conf_value_t *gidmap;
+} nxt_process_clone_t;
+
typedef struct nxt_process_init_s nxt_process_init_t;
typedef nxt_int_t (*nxt_process_start_t)(nxt_task_t *task, void *data);
typedef nxt_int_t (*nxt_process_restart_t)(nxt_task_t *task, nxt_runtime_t *rt,
nxt_process_init_t *init);
-
struct nxt_process_init_s {
- nxt_process_start_t start;
- const char *name;
- nxt_user_cred_t *user_cred;
+ nxt_process_start_t start;
+ const char *name;
+ nxt_user_cred_t *user_cred;
+
+ nxt_port_handlers_t *port_handlers;
+ const nxt_sig_event_t *signals;
- nxt_port_handlers_t *port_handlers;
- const nxt_sig_event_t *signals;
+ nxt_process_type_t type;
- nxt_process_type_t type;
+ void *data;
+ uint32_t stream;
- void *data;
- uint32_t stream;
+ nxt_process_restart_t restart;
- nxt_process_restart_t restart;
+ union {
+ nxt_process_clone_t clone;
+ } isolation;
};
diff --git a/src/nxt_runtime.c b/src/nxt_runtime.c
index 06478f72..de41ba4d 100644
--- a/src/nxt_runtime.c
+++ b/src/nxt_runtime.c
@@ -692,14 +692,26 @@ nxt_runtime_conf_init(nxt_task_t *task, nxt_runtime_t *rt)
rt->state = NXT_STATE;
rt->control = NXT_CONTROL_SOCK;
+ nxt_memzero(&rt->capabilities, sizeof(nxt_capabilities_t));
+
if (nxt_runtime_conf_read_cmd(task, rt) != NXT_OK) {
return NXT_ERROR;
}
- if (nxt_user_cred_get(task, &rt->user_cred, rt->group) != NXT_OK) {
+ if (nxt_capability_set(task, &rt->capabilities) != NXT_OK) {
return NXT_ERROR;
}
+ if (rt->capabilities.setid) {
+ if (nxt_user_cred_get(task, &rt->user_cred, rt->group) != NXT_OK) {
+ return NXT_ERROR;
+ }
+
+ } else {
+ nxt_log(task, NXT_LOG_WARN, "Unit is running unprivileged, then it "
+ "cannot use arbitrary user and group.");
+ }
+
/* An engine's parameters. */
interface = nxt_service_get(rt->services, "engine", rt->engine);
diff --git a/src/nxt_runtime.h b/src/nxt_runtime.h
index 496ae478..0791f8e7 100644
--- a/src/nxt_runtime.h
+++ b/src/nxt_runtime.h
@@ -59,6 +59,7 @@ struct nxt_runtime_s {
uint32_t engine_connections;
uint32_t auxiliary_threads;
nxt_user_cred_t user_cred;
+ nxt_capabilities_t capabilities;
const char *group;
const char *pid;
const char *log;
diff --git a/src/nxt_unit.c b/src/nxt_unit.c
index 4497d09d..9ccd1fd9 100644
--- a/src/nxt_unit.c
+++ b/src/nxt_unit.c
@@ -333,6 +333,7 @@ nxt_unit_init(nxt_unit_init_t *init)
}
}
+ lib->pid = read_port.id.pid;
ctx = &lib->main_ctx.ctx;
rc = lib->callbacks.add_port(ctx, &ready_port);
@@ -398,7 +399,6 @@ nxt_unit_create(nxt_unit_init_t *init)
lib->processes.slot = NULL;
lib->ports.slot = NULL;
- lib->pid = getpid();
lib->log_fd = STDERR_FILENO;
lib->online = 1;
diff --git a/test/go/ns_inspect/app.go b/test/go/ns_inspect/app.go
new file mode 100644
index 00000000..ebecbb00
--- /dev/null
+++ b/test/go/ns_inspect/app.go
@@ -0,0 +1,79 @@
+package main
+
+import (
+ "encoding/json"
+ "fmt"
+ "net/http"
+ "nginx/unit"
+ "os"
+ "strconv"
+)
+
+type (
+ NS struct {
+ USER uint64
+ PID uint64
+ IPC uint64
+ CGROUP uint64
+ UTS uint64
+ MNT uint64
+ NET uint64
+ }
+
+ Output struct {
+ PID int
+ UID int
+ GID int
+ NS NS
+ }
+)
+
+func abortonerr(err error) {
+ if err != nil {
+ panic(err)
+ }
+}
+
+// returns: [nstype]:[4026531835]
+func getns(nstype string) uint64 {
+ str, err := os.Readlink(fmt.Sprintf("/proc/self/ns/%s", nstype))
+ if err != nil {
+ return 0
+ }
+
+ str = str[len(nstype)+2:]
+ str = str[:len(str)-1]
+ val, err := strconv.ParseUint(str, 10, 64)
+ abortonerr(err)
+ return val
+}
+
+func handler(w http.ResponseWriter, r *http.Request) {
+ pid := os.Getpid()
+ out := &Output{
+ PID: pid,
+ UID: os.Getuid(),
+ GID: os.Getgid(),
+ NS: NS{
+ PID: getns("pid"),
+ USER: getns("user"),
+ MNT: getns("mnt"),
+ IPC: getns("ipc"),
+ UTS: getns("uts"),
+ NET: getns("net"),
+ CGROUP: getns("cgroup"),
+ },
+ }
+ data, err := json.Marshal(out)
+ if err != nil {
+ w.WriteHeader(http.StatusInternalServerError)
+ return
+ }
+
+ w.Write(data)
+}
+
+func main() {
+ http.HandleFunc("/", handler)
+ unit.ListenAndServe(":7080", nil)
+}
diff --git a/test/test_go_isolation.py b/test/test_go_isolation.py
new file mode 100644
index 00000000..780c2b03
--- /dev/null
+++ b/test/test_go_isolation.py
@@ -0,0 +1,135 @@
+import os
+import json
+import unittest
+from unit.applications.lang.go import TestApplicationGo
+from unit.feature.isolation import TestFeatureIsolation
+
+
+class TestGoIsolation(TestApplicationGo):
+ prerequisites = {'modules': ['go'], 'features': ['isolation']}
+
+ isolation = TestFeatureIsolation()
+
+ @classmethod
+ def setUpClass(cls, complete_check=True):
+ unit = super().setUpClass(complete_check=False)
+
+ TestFeatureIsolation().check(cls.available, unit.testdir)
+
+ return unit if not complete_check else unit.complete()
+
+ def isolation_key(self, key):
+ return key in self.available['features']['isolation'].keys()
+
+ def conf_isolation(self, isolation):
+ self.assertIn(
+ 'success',
+ self.conf(isolation, 'applications/ns_inspect/isolation'),
+ 'configure isolation',
+ )
+
+ def test_isolation_values(self):
+ self.load('ns_inspect')
+
+ obj = self.isolation.parsejson(self.get()['body'])
+
+ for ns, ns_value in self.available['features']['isolation'].items():
+ if ns.upper() in obj['NS']:
+ self.assertEqual(
+ obj['NS'][ns.upper()], ns_value, '%s match' % ns
+ )
+
+ def test_isolation_user(self):
+ if not self.isolation_key('unprivileged_userns_clone'):
+ print('unprivileged clone is not available')
+ raise unittest.SkipTest()
+
+ self.load('ns_inspect')
+ obj = self.isolation.parsejson(self.get()['body'])
+
+ self.assertTrue(obj['UID'] != 0, 'uid not zero')
+ self.assertTrue(obj['GID'] != 0, 'gid not zero')
+ self.assertEqual(obj['UID'], os.getuid(), 'uid match')
+ self.assertEqual(obj['GID'], os.getgid(), 'gid match')
+
+ self.conf_isolation({"namespaces": {"credential": True}})
+
+ obj = self.isolation.parsejson(self.get()['body'])
+
+ # default uid and gid maps current user to nobody
+ self.assertEqual(obj['UID'], 65534, 'uid nobody')
+ self.assertEqual(obj['GID'], 65534, 'gid nobody')
+
+ self.conf_isolation(
+ {
+ "namespaces": {"credential": True},
+ "uidmap": [
+ {"container": 1000, "host": os.geteuid(), "size": 1}
+ ],
+ "gidmap": [
+ {"container": 1000, "host": os.getegid(), "size": 1}
+ ],
+ }
+ )
+
+ obj = self.isolation.parsejson(self.get()['body'])
+
+ # default uid and gid maps current user to root
+ self.assertEqual(obj['UID'], 1000, 'uid root')
+ self.assertEqual(obj['GID'], 1000, 'gid root')
+
+ def test_isolation_mnt(self):
+ if not self.isolation_key('mnt'):
+ print('mnt namespace is not supported')
+ raise unittest.SkipTest()
+
+ if not self.isolation_key('unprivileged_userns_clone'):
+ print('unprivileged clone is not available')
+ raise unittest.SkipTest()
+
+ self.load('ns_inspect')
+ self.conf_isolation(
+ {"namespaces": {"mount": True, "credential": True}}
+ )
+
+ obj = self.isolation.parsejson(self.get()['body'])
+
+ # all but user and mnt
+ allns = list(self.available['features']['isolation'].keys())
+ allns.remove('user')
+ allns.remove('mnt')
+
+ for ns in allns:
+ if ns.upper() in obj['NS']:
+ self.assertEqual(
+ obj['NS'][ns.upper()],
+ self.available['features']['isolation'][ns],
+ '%s match' % ns,
+ )
+
+ self.assertNotEqual(
+ obj['NS']['MNT'], self.isolation.getns('mnt'), 'mnt set'
+ )
+ self.assertNotEqual(
+ obj['NS']['USER'], self.isolation.getns('user'), 'user set'
+ )
+
+ def test_isolation_pid(self):
+ if not self.isolation_key('pid'):
+ print('pid namespace is not supported')
+ raise unittest.SkipTest()
+
+ if not self.isolation_key('unprivileged_userns_clone'):
+ print('unprivileged clone is not available')
+ raise unittest.SkipTest()
+
+ self.load('ns_inspect')
+ self.conf_isolation({"namespaces": {"pid": True, "credential": True}})
+
+ obj = self.isolation.parsejson(self.get()['body'])
+
+ self.assertEqual(obj['PID'], 1, 'pid of container is 1')
+
+
+if __name__ == '__main__':
+ TestGoIsolation.main()
diff --git a/test/unit/feature/isolation.py b/test/unit/feature/isolation.py
new file mode 100644
index 00000000..9b06ab3c
--- /dev/null
+++ b/test/unit/feature/isolation.py
@@ -0,0 +1,87 @@
+import os
+import json
+from unit.applications.proto import TestApplicationProto
+from unit.applications.lang.go import TestApplicationGo
+from unit.applications.lang.java import TestApplicationJava
+from unit.applications.lang.node import TestApplicationNode
+from unit.applications.lang.perl import TestApplicationPerl
+from unit.applications.lang.php import TestApplicationPHP
+from unit.applications.lang.python import TestApplicationPython
+from unit.applications.lang.ruby import TestApplicationRuby
+
+
+class TestFeatureIsolation(TestApplicationProto):
+ allns = ['pid', 'mnt', 'ipc', 'uts', 'cgroup', 'net']
+
+ def check(self, available, testdir):
+ test_conf = {"namespaces": {"credential": True}}
+
+ module = ''
+ app = 'empty'
+ if 'go' in available['modules']:
+ module = TestApplicationGo()
+
+ elif 'java' in available['modules']:
+ module = TestApplicationJava()
+
+ elif 'node' in available['modules']:
+ module = TestApplicationNode()
+ app = 'basic'
+
+ elif 'perl' in available['modules']:
+ module = TestApplicationPerl()
+ app = 'body_empty'
+
+ elif 'php' in available['modules']:
+ module = TestApplicationPHP()
+ app = 'phpinfo'
+
+ elif 'python' in available['modules']:
+ module = TestApplicationPython()
+
+ elif 'ruby' in available['modules']:
+ module = TestApplicationRuby()
+
+ if not module:
+ return
+
+ module.testdir = testdir
+ module.load(app)
+
+ resp = module.conf(test_conf, 'applications/' + app + '/isolation')
+ if 'success' not in resp:
+ return
+
+ userns = self.getns('user')
+ if not userns:
+ return
+
+ available['features']['isolation'] = {'user': userns}
+
+ unp_clone_path = '/proc/sys/kernel/unprivileged_userns_clone'
+ if os.path.exists(unp_clone_path):
+ with open(unp_clone_path, 'r') as f:
+ if str(f.read()).rstrip() == '1':
+ available['features']['isolation'][
+ 'unprivileged_userns_clone'
+ ] = True
+
+ for ns in self.allns:
+ ns_value = self.getns(ns)
+ if ns_value:
+ available['features']['isolation'][ns] = ns_value
+
+ def getns(self, nstype):
+ # read namespace id from symlink file:
+ # it points to: '<nstype>:[<ns id>]'
+ # # eg.: 'pid:[4026531836]'
+ nspath = '/proc/self/ns/' + nstype
+ data = None
+
+ if os.path.exists(nspath):
+ data = int(os.readlink(nspath)[len(nstype) + 2 : -1])
+
+ return data
+
+ def parsejson(self, data):
+ return json.loads(data.split('\n')[1])