summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorTiago Natel de Moura <t.nateldemoura@f5.com>2020-08-20 15:22:58 +0100
committerTiago Natel de Moura <t.nateldemoura@f5.com>2020-08-20 15:22:58 +0100
commita8a7eeb1fc7aada17d0d8fe8e15d325525986937 (patch)
treeeb438829169ed66ced4c7c2a793a19a04fa2bc59
parent9bf6efc55ac9678ad386fd1a9d420a3b75e9ab70 (diff)
downloadunit-a8a7eeb1fc7aada17d0d8fe8e15d325525986937.tar.gz
unit-a8a7eeb1fc7aada17d0d8fe8e15d325525986937.tar.bz2
Moved isolation related code to "nxt_isolation.c".
-rw-r--r--auto/sources1
-rw-r--r--src/nxt_application.c585
-rw-r--r--src/nxt_isolation.c958
-rw-r--r--src/nxt_isolation.h18
-rw-r--r--src/nxt_main_process.c6
-rw-r--r--src/nxt_process.c382
-rw-r--r--src/nxt_process.h20
7 files changed, 990 insertions, 980 deletions
diff --git a/auto/sources b/auto/sources
index a61577dc..e44dc4bb 100644
--- a/auto/sources
+++ b/auto/sources
@@ -14,6 +14,7 @@ NXT_LIB_SRCS=" \
src/nxt_socket.c \
src/nxt_socketpair.c \
src/nxt_credential.c \
+ src/nxt_isolation.c \
src/nxt_process.c \
src/nxt_process_title.c \
src/nxt_signal.c \
diff --git a/src/nxt_application.c b/src/nxt_application.c
index 57e4615e..6935346c 100644
--- a/src/nxt_application.c
+++ b/src/nxt_application.c
@@ -14,6 +14,7 @@
#include <nxt_application.h>
#include <nxt_unit.h>
#include <nxt_port_memory_int.h>
+#include <nxt_isolation.h>
#include <glob.h>
@@ -41,45 +42,10 @@ static void nxt_discovery_quit(nxt_task_t *task, nxt_port_recv_msg_t *msg,
void *data);
static nxt_app_module_t *nxt_app_module_load(nxt_task_t *task,
const char *name);
-static nxt_int_t nxt_app_main_prefork(nxt_task_t *task, nxt_process_t *process,
- nxt_mp_t *mp);
static nxt_int_t nxt_app_setup(nxt_task_t *task, nxt_process_t *process);
static nxt_int_t nxt_app_set_environment(nxt_conf_value_t *environment);
static u_char *nxt_cstr_dup(nxt_mp_t *mp, u_char *dst, u_char *src);
-#if (NXT_HAVE_ISOLATION_ROOTFS)
-static nxt_int_t nxt_app_set_isolation_mounts(nxt_task_t *task,
- nxt_process_t *process, nxt_str_t *app_type);
-static nxt_int_t nxt_app_set_lang_mounts(nxt_task_t *task,
- nxt_process_t *process, nxt_array_t *syspaths);
-static nxt_int_t nxt_app_set_isolation_rootfs(nxt_task_t *task,
- nxt_conf_value_t *isolation, nxt_process_t *process);
-static nxt_int_t nxt_app_prepare_rootfs(nxt_task_t *task,
- nxt_process_t *process);
-#endif
-
-static nxt_int_t nxt_app_set_isolation(nxt_task_t *task,
- nxt_conf_value_t *isolation, nxt_process_t *process);
-
-#if (NXT_HAVE_CLONE)
-static nxt_int_t nxt_app_set_isolation_namespaces(nxt_task_t *task,
- nxt_conf_value_t *isolation, nxt_process_t *process);
-static nxt_int_t nxt_app_clone_flags(nxt_task_t *task,
- nxt_conf_value_t *namespaces, nxt_clone_t *clone);
-#endif
-
-#if (NXT_HAVE_CLONE_NEWUSER)
-static nxt_int_t nxt_app_set_isolation_creds(nxt_task_t *task,
- nxt_conf_value_t *isolation, nxt_process_t *process);
-static nxt_int_t nxt_app_isolation_credential_map(nxt_task_t *task,
- nxt_mp_t *mem_pool, nxt_conf_value_t *map_array,
- nxt_clone_credential_map_t *map);
-#endif
-
-#if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
-static nxt_int_t nxt_app_set_isolation_new_privs(nxt_task_t *task,
- nxt_conf_value_t *isolation, nxt_process_t *process);
-#endif
nxt_str_t nxt_server = nxt_string(NXT_SERVER);
@@ -126,7 +92,7 @@ const nxt_process_init_t nxt_discovery_process = {
const nxt_process_init_t nxt_app_process = {
.type = NXT_PROCESS_APP,
.setup = nxt_app_setup,
- .prefork = nxt_app_main_prefork,
+ .prefork = nxt_isolation_main_prefork,
.restart = 0,
.start = NULL, /* set to module->start */
.port_handlers = &nxt_app_process_port_handlers,
@@ -474,81 +440,6 @@ nxt_discovery_quit(nxt_task_t *task, nxt_port_recv_msg_t *msg, void *data)
static nxt_int_t
-nxt_app_main_prefork(nxt_task_t *task, nxt_process_t *process, nxt_mp_t *mp)
-{
- nxt_int_t cap_setid;
- nxt_int_t ret;
- nxt_runtime_t *rt;
- nxt_common_app_conf_t *app_conf;
-
- rt = task->thread->runtime;
- app_conf = process->data.app;
- cap_setid = rt->capabilities.setid;
-
- if (app_conf->isolation != NULL) {
- ret = nxt_app_set_isolation(task, app_conf->isolation, process);
- if (nxt_slow_path(ret != NXT_OK)) {
- return ret;
- }
- }
-
-#if (NXT_HAVE_CLONE_NEWUSER)
- if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) {
- cap_setid = 1;
- }
-#endif
-
-#if (NXT_HAVE_ISOLATION_ROOTFS)
- if (process->isolation.rootfs != NULL) {
- ret = nxt_app_set_isolation_mounts(task, process, &app_conf->type);
- if (nxt_slow_path(ret != NXT_OK)) {
- return ret;
- }
- }
-#endif
-
- if (cap_setid) {
- ret = nxt_process_creds_set(task, process, &app_conf->user,
- &app_conf->group);
-
- if (nxt_slow_path(ret != NXT_OK)) {
- return ret;
- }
-
- } else {
- if (!nxt_str_eq(&app_conf->user, (u_char *) rt->user_cred.user,
- nxt_strlen(rt->user_cred.user)))
- {
- nxt_alert(task, "cannot set user \"%V\" for app \"%V\": "
- "missing capabilities", &app_conf->user, &app_conf->name);
-
- return NXT_ERROR;
- }
-
- if (app_conf->group.length > 0
- && !nxt_str_eq(&app_conf->group, (u_char *) rt->group,
- nxt_strlen(rt->group)))
- {
- nxt_alert(task, "cannot set group \"%V\" for app \"%V\": "
- "missing capabilities", &app_conf->group,
- &app_conf->name);
-
- return NXT_ERROR;
- }
- }
-
-#if (NXT_HAVE_CLONE_NEWUSER)
- ret = nxt_process_vldt_isolation_creds(task, process);
- if (nxt_slow_path(ret != NXT_OK)) {
- return ret;
- }
-#endif
-
- return NXT_OK;
-}
-
-
-static nxt_int_t
nxt_app_setup(nxt_task_t *task, nxt_process_t *process)
{
nxt_int_t ret;
@@ -594,13 +485,13 @@ nxt_app_setup(nxt_task_t *task, nxt_process_t *process)
#if (NXT_HAVE_ISOLATION_ROOTFS)
if (process->isolation.rootfs != NULL) {
if (process->isolation.mounts != NULL) {
- ret = nxt_app_prepare_rootfs(task, process);
+ ret = nxt_isolation_prepare_rootfs(task, process);
if (nxt_slow_path(ret != NXT_OK)) {
return ret;
}
}
- ret = nxt_process_change_root(task, process);
+ ret = nxt_isolation_change_root(task, process);
if (nxt_slow_path(ret != NXT_OK)) {
return NXT_ERROR;
}
@@ -686,474 +577,6 @@ nxt_app_set_environment(nxt_conf_value_t *environment)
}
-static nxt_int_t
-nxt_app_set_isolation(nxt_task_t *task, nxt_conf_value_t *isolation,
- nxt_process_t *process)
-{
-#if (NXT_HAVE_CLONE)
- if (nxt_slow_path(nxt_app_set_isolation_namespaces(task, isolation, process)
- != NXT_OK))
- {
- return NXT_ERROR;
- }
-#endif
-
-#if (NXT_HAVE_CLONE_NEWUSER)
- if (nxt_slow_path(nxt_app_set_isolation_creds(task, isolation, process)
- != NXT_OK))
- {
- return NXT_ERROR;
- }
-#endif
-
-#if (NXT_HAVE_ISOLATION_ROOTFS)
- if (nxt_slow_path(nxt_app_set_isolation_rootfs(task, isolation, process)
- != NXT_OK))
- {
- return NXT_ERROR;
- }
-#endif
-
-#if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
- if (nxt_slow_path(nxt_app_set_isolation_new_privs(task, isolation, process)
- != NXT_OK))
- {
- return NXT_ERROR;
- }
-#endif
-
- return NXT_OK;
-}
-
-
-#if (NXT_HAVE_CLONE)
-
-static nxt_int_t
-nxt_app_set_isolation_namespaces(nxt_task_t *task, nxt_conf_value_t *isolation,
- nxt_process_t *process)
-{
- nxt_int_t ret;
- nxt_conf_value_t *obj;
-
- static nxt_str_t nsname = nxt_string("namespaces");
-
- obj = nxt_conf_get_object_member(isolation, &nsname, NULL);
- if (obj != NULL) {
- ret = nxt_app_clone_flags(task, obj, &process->isolation.clone);
- if (nxt_slow_path(ret != NXT_OK)) {
- return NXT_ERROR;
- }
- }
-
- return NXT_OK;
-}
-
-#endif
-
-
-#if (NXT_HAVE_CLONE_NEWUSER)
-
-static nxt_int_t
-nxt_app_set_isolation_creds(nxt_task_t *task, nxt_conf_value_t *isolation,
- nxt_process_t *process)
-{
- nxt_int_t ret;
- nxt_clone_t *clone;
- nxt_conf_value_t *array;
-
- static nxt_str_t uidname = nxt_string("uidmap");
- static nxt_str_t gidname = nxt_string("gidmap");
-
- clone = &process->isolation.clone;
-
- array = nxt_conf_get_object_member(isolation, &uidname, NULL);
- if (array != NULL) {
- ret = nxt_app_isolation_credential_map(task, process->mem_pool, array,
- &clone->uidmap);
-
- if (nxt_slow_path(ret != NXT_OK)) {
- return NXT_ERROR;
- }
- }
-
- array = nxt_conf_get_object_member(isolation, &gidname, NULL);
- if (array != NULL) {
- ret = nxt_app_isolation_credential_map(task, process->mem_pool, array,
- &clone->gidmap);
-
- if (nxt_slow_path(ret != NXT_OK)) {
- return NXT_ERROR;
- }
- }
-
- return NXT_OK;
-}
-
-
-static nxt_int_t
-nxt_app_isolation_credential_map(nxt_task_t *task, nxt_mp_t *mp,
- nxt_conf_value_t *map_array, nxt_clone_credential_map_t *map)
-{
- nxt_int_t ret;
- nxt_uint_t i;
- nxt_conf_value_t *obj;
-
- static nxt_conf_map_t nxt_clone_map_entry_conf[] = {
- {
- nxt_string("container"),
- NXT_CONF_MAP_INT,
- offsetof(nxt_clone_map_entry_t, container),
- },
-
- {
- nxt_string("host"),
- NXT_CONF_MAP_INT,
- offsetof(nxt_clone_map_entry_t, host),
- },
-
- {
- nxt_string("size"),
- NXT_CONF_MAP_INT,
- offsetof(nxt_clone_map_entry_t, size),
- },
- };
-
- map->size = nxt_conf_array_elements_count(map_array);
-
- if (map->size == 0) {
- return NXT_OK;
- }
-
- map->map = nxt_mp_alloc(mp, map->size * sizeof(nxt_clone_map_entry_t));
- if (nxt_slow_path(map->map == NULL)) {
- return NXT_ERROR;
- }
-
- for (i = 0; i < map->size; i++) {
- obj = nxt_conf_get_array_element(map_array, i);
-
- ret = nxt_conf_map_object(mp, obj, nxt_clone_map_entry_conf,
- nxt_nitems(nxt_clone_map_entry_conf),
- map->map + i);
- if (nxt_slow_path(ret != NXT_OK)) {
- nxt_alert(task, "clone map entry map error");
- return NXT_ERROR;
- }
- }
-
- return NXT_OK;
-}
-
-#endif
-
-#if (NXT_HAVE_CLONE)
-
-static nxt_int_t
-nxt_app_clone_flags(nxt_task_t *task, nxt_conf_value_t *namespaces,
- nxt_clone_t *clone)
-{
- uint32_t index;
- nxt_str_t name;
- nxt_int_t flag;
- nxt_conf_value_t *value;
-
- index = 0;
-
- for ( ;; ) {
- value = nxt_conf_next_object_member(namespaces, &name, &index);
-
- if (value == NULL) {
- break;
- }
-
- flag = 0;
-
-#if (NXT_HAVE_CLONE_NEWUSER)
- if (nxt_str_eq(&name, "credential", 10)) {
- flag = CLONE_NEWUSER;
- }
-#endif
-
-#if (NXT_HAVE_CLONE_NEWPID)
- if (nxt_str_eq(&name, "pid", 3)) {
- flag = CLONE_NEWPID;
- }
-#endif
-
-#if (NXT_HAVE_CLONE_NEWNET)
- if (nxt_str_eq(&name, "network", 7)) {
- flag = CLONE_NEWNET;
- }
-#endif
-
-#if (NXT_HAVE_CLONE_NEWUTS)
- if (nxt_str_eq(&name, "uname", 5)) {
- flag = CLONE_NEWUTS;
- }
-#endif
-
-#if (NXT_HAVE_CLONE_NEWNS)
- if (nxt_str_eq(&name, "mount", 5)) {
- flag = CLONE_NEWNS;
- }
-#endif
-
-#if (NXT_HAVE_CLONE_NEWCGROUP)
- if (nxt_str_eq(&name, "cgroup", 6)) {
- flag = CLONE_NEWCGROUP;
- }
-#endif
-
- if (!flag) {
- nxt_alert(task, "unknown namespace flag: \"%V\"", &name);
- return NXT_ERROR;
- }
-
- if (nxt_conf_get_boolean(value)) {
- clone->flags |= flag;
- }
- }
-
- return NXT_OK;
-}
-
-#endif
-
-
-#if (NXT_HAVE_ISOLATION_ROOTFS)
-
-static nxt_int_t
-nxt_app_set_isolation_rootfs(nxt_task_t *task, nxt_conf_value_t *isolation,
- nxt_process_t *process)
-{
- nxt_str_t str;
- nxt_conf_value_t *obj;
-
- static nxt_str_t rootfs_name = nxt_string("rootfs");
-
- obj = nxt_conf_get_object_member(isolation, &rootfs_name, NULL);
- if (obj != NULL) {
- nxt_conf_get_string(obj, &str);
-
- if (nxt_slow_path(str.length <= 1 || str.start[0] != '/')) {
- nxt_log(task, NXT_LOG_ERR, "rootfs requires an absolute path other "
- "than \"/\" but given \"%V\"", &str);
-
- return NXT_ERROR;
- }
-
- if (str.start[str.length - 1] == '/') {
- str.length--;
- }
-
- process->isolation.rootfs = nxt_mp_alloc(process->mem_pool,
- str.length + 1);
-
- if (nxt_slow_path(process->isolation.rootfs == NULL)) {
- return NXT_ERROR;
- }
-
- nxt_memcpy(process->isolation.rootfs, str.start, str.length);
-
- process->isolation.rootfs[str.length] = '\0';
- }
-
- return NXT_OK;
-}
-
-
-static nxt_int_t
-nxt_app_set_isolation_mounts(nxt_task_t *task, nxt_process_t *process,
- nxt_str_t *app_type)
-{
- nxt_int_t ret, cap_chroot;
- nxt_runtime_t *rt;
- nxt_app_lang_module_t *lang;
-
- rt = task->thread->runtime;
- cap_chroot = rt->capabilities.chroot;
- lang = nxt_app_lang_module(rt, app_type);
-
- nxt_assert(lang != NULL);
-
-#if (NXT_HAVE_CLONE_NEWUSER)
- if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) {
- cap_chroot = 1;
- }
-#endif
-
- if (!cap_chroot) {
- nxt_log(task, NXT_LOG_ERR, "The \"rootfs\" field requires privileges");
- return NXT_ERROR;
- }
-
- if (lang->mounts != NULL && lang->mounts->nelts > 0) {
- ret = nxt_app_set_lang_mounts(task, process, lang->mounts);
- if (nxt_slow_path(ret != NXT_OK)) {
- return NXT_ERROR;
- }
- }
-
- return NXT_OK;
-}
-
-
-static nxt_int_t
-nxt_app_set_lang_mounts(nxt_task_t *task, nxt_process_t *process,
- nxt_array_t *lang_mounts)
-{
- u_char *p;
- size_t i, n, rootfs_len, len;
- nxt_mp_t *mp;
- nxt_array_t *mounts;
- const u_char *rootfs;
- nxt_fs_mount_t *mnt, *lang_mnt;
-
- rootfs = process->isolation.rootfs;
- rootfs_len = nxt_strlen(rootfs);
- mp = process->mem_pool;
-
- /* copy to init mem pool */
- mounts = nxt_array_copy(mp, NULL, lang_mounts);
- if (mounts == NULL) {
- return NXT_ERROR;
- }
-
- n = mounts->nelts;
- mnt = mounts->elts;
- lang_mnt = lang_mounts->elts;
-
- for (i = 0; i < n; i++) {
- len = nxt_strlen(lang_mnt[i].dst);
-
- mnt[i].dst = nxt_mp_alloc(mp, rootfs_len + len + 1);
- if (mnt[i].dst == NULL) {
- return NXT_ERROR;
- }
-
- p = nxt_cpymem(mnt[i].dst, rootfs, rootfs_len);
- p = nxt_cpymem(p, lang_mnt[i].dst, len);
- *p = '\0';
- }
-
- process->isolation.mounts = mounts;
-
- return NXT_OK;
-}
-
-
-static nxt_int_t
-nxt_app_prepare_rootfs(nxt_task_t *task, nxt_process_t *process)
-{
- size_t i, n;
- nxt_int_t ret, hasproc;
- struct stat st;
- nxt_array_t *mounts;
- const u_char *dst;
- nxt_fs_mount_t *mnt;
-
- hasproc = 0;
-
-#if (NXT_HAVE_CLONE_NEWPID) && (NXT_HAVE_CLONE_NEWNS)
- nxt_fs_mount_t mount;
-
- if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWPID)
- && nxt_is_clone_flag_set(process->isolation.clone.flags, NEWNS))
- {
- /*
- * This mount point will automatically be gone when the namespace is
- * destroyed.
- */
-
- mount.fstype = (u_char *) "proc";
- mount.src = (u_char *) "proc";
- mount.dst = (u_char *) "/proc";
- mount.data = (u_char *) "";
- mount.flags = 0;
-
- ret = nxt_fs_mkdir_all(mount.dst, S_IRWXU | S_IRWXG | S_IRWXO);
- if (nxt_fast_path(ret == NXT_OK)) {
- ret = nxt_fs_mount(task, &mount);
- if (nxt_fast_path(ret == NXT_OK)) {
- hasproc = 1;
- }
-
- } else {
- nxt_log(task, NXT_LOG_WARN, "mkdir(%s) %E", mount.dst, nxt_errno);
- }
- }
-#endif
-
- mounts = process->isolation.mounts;
-
- n = mounts->nelts;
- mnt = mounts->elts;
-
- for (i = 0; i < n; i++) {
- dst = mnt[i].dst;
-
- if (nxt_slow_path(nxt_memcmp(mnt[i].fstype, "bind", 4) == 0
- && stat((const char *) mnt[i].src, &st) != 0))
- {
- nxt_log(task, NXT_LOG_WARN, "host path not found: %s", mnt[i].src);
- continue;
- }
-
- if (hasproc && nxt_memcmp(mnt[i].fstype, "proc", 4) == 0
- && nxt_memcmp(mnt[i].dst, "/proc", 5) == 0)
- {
- continue;
- }
-
- ret = nxt_fs_mkdir_all(dst, S_IRWXU | S_IRWXG | S_IRWXO);
- if (nxt_slow_path(ret != NXT_OK)) {
- nxt_alert(task, "mkdir(%s) %E", dst, nxt_errno);
- goto undo;
- }
-
- ret = nxt_fs_mount(task, &mnt[i]);
- if (nxt_slow_path(ret != NXT_OK)) {
- goto undo;
- }
- }
-
- return NXT_OK;
-
-undo:
-
- n = i + 1;
-
- for (i = 0; i < n; i++) {
- nxt_fs_unmount(mnt[i].dst);
- }
-
- return NXT_ERROR;
-}
-
-#endif
-
-
-#if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
-
-static nxt_int_t
-nxt_app_set_isolation_new_privs(nxt_task_t *task, nxt_conf_value_t *isolation,
- nxt_process_t *process)
-{
- nxt_conf_value_t *obj;
-
- static nxt_str_t new_privs_name = nxt_string("new_privs");
-
- obj = nxt_conf_get_object_member(isolation, &new_privs_name, NULL);
- if (obj != NULL) {
- process->isolation.new_privs = nxt_conf_get_boolean(obj);
- }
-
- return NXT_OK;
-}
-
-#endif
-
-
static u_char *
nxt_cstr_dup(nxt_mp_t *mp, u_char *dst, u_char *src)
{
diff --git a/src/nxt_isolation.c b/src/nxt_isolation.c
new file mode 100644
index 00000000..60de4324
--- /dev/null
+++ b/src/nxt_isolation.c
@@ -0,0 +1,958 @@
+/*
+ * Copyright (C) NGINX, Inc.
+ */
+
+#include <nxt_main.h>
+#include <nxt_application.h>
+#include <nxt_process.h>
+#include <nxt_isolation.h>
+
+#if (NXT_HAVE_PIVOT_ROOT)
+#include <mntent.h>
+#endif
+
+
+static nxt_int_t nxt_isolation_set(nxt_task_t *task,
+ nxt_conf_value_t *isolation, nxt_process_t *process);
+
+#if (NXT_HAVE_CLONE)
+static nxt_int_t nxt_isolation_set_namespaces(nxt_task_t *task,
+ nxt_conf_value_t *isolation, nxt_process_t *process);
+static nxt_int_t nxt_isolation_clone_flags(nxt_task_t *task,
+ nxt_conf_value_t *namespaces, nxt_clone_t *clone);
+#endif
+
+#if (NXT_HAVE_CLONE_NEWUSER)
+static nxt_int_t nxt_isolation_set_creds(nxt_task_t *task,
+ nxt_conf_value_t *isolation, nxt_process_t *process);
+static nxt_int_t nxt_isolation_credential_map(nxt_task_t *task,
+ nxt_mp_t *mem_pool, nxt_conf_value_t *map_array,
+ nxt_clone_credential_map_t *map);
+static nxt_int_t nxt_isolation_vldt_creds(nxt_task_t *task,
+ nxt_process_t *process);
+#endif
+
+#if (NXT_HAVE_ISOLATION_ROOTFS)
+static nxt_int_t nxt_isolation_set_rootfs(nxt_task_t *task,
+ nxt_conf_value_t *isolation, nxt_process_t *process);
+static nxt_int_t nxt_isolation_set_mounts(nxt_task_t *task,
+ nxt_process_t *process, nxt_str_t *app_type);
+static nxt_int_t nxt_isolation_set_lang_mounts(nxt_task_t *task,
+ nxt_process_t *process, nxt_array_t *syspaths);
+static void nxt_isolation_unmount_all(nxt_task_t *task, nxt_process_t *process);
+
+#if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS)
+static nxt_int_t nxt_isolation_pivot_root(nxt_task_t *task, const char *rootfs);
+static nxt_int_t nxt_isolation_make_private_mount(nxt_task_t *task,
+ const char *rootfs);
+nxt_inline int nxt_pivot_root(const char *new_root, const char *old_root);
+#endif
+
+static nxt_int_t nxt_isolation_chroot(nxt_task_t *task, const char *path);
+#endif
+
+#if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
+static nxt_int_t nxt_isolation_set_new_privs(nxt_task_t *task,
+ nxt_conf_value_t *isolation, nxt_process_t *process);
+#endif
+
+
+nxt_int_t
+nxt_isolation_main_prefork(nxt_task_t *task, nxt_process_t *process,
+ nxt_mp_t *mp)
+{
+ nxt_int_t cap_setid;
+ nxt_int_t ret;
+ nxt_runtime_t *rt;
+ nxt_common_app_conf_t *app_conf;
+
+ rt = task->thread->runtime;
+ app_conf = process->data.app;
+ cap_setid = rt->capabilities.setid;
+
+ if (app_conf->isolation != NULL) {
+ ret = nxt_isolation_set(task, app_conf->isolation, process);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ return ret;
+ }
+ }
+
+#if (NXT_HAVE_CLONE_NEWUSER)
+ if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) {
+ cap_setid = 1;
+ }
+#endif
+
+#if (NXT_HAVE_ISOLATION_ROOTFS)
+ if (process->isolation.rootfs != NULL) {
+ ret = nxt_isolation_set_mounts(task, process, &app_conf->type);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ return ret;
+ }
+ }
+#endif
+
+ if (cap_setid) {
+ ret = nxt_process_creds_set(task, process, &app_conf->user,
+ &app_conf->group);
+
+ if (nxt_slow_path(ret != NXT_OK)) {
+ return ret;
+ }
+
+ } else {
+ if (!nxt_str_eq(&app_conf->user, (u_char *) rt->user_cred.user,
+ nxt_strlen(rt->user_cred.user)))
+ {
+ nxt_alert(task, "cannot set user \"%V\" for app \"%V\": "
+ "missing capabilities", &app_conf->user, &app_conf->name);
+
+ return NXT_ERROR;
+ }
+
+ if (app_conf->group.length > 0
+ && !nxt_str_eq(&app_conf->group, (u_char *) rt->group,
+ nxt_strlen(rt->group)))
+ {
+ nxt_alert(task, "cannot set group \"%V\" for app \"%V\": "
+ "missing capabilities", &app_conf->group,
+ &app_conf->name);
+
+ return NXT_ERROR;
+ }
+ }
+
+#if (NXT_HAVE_CLONE_NEWUSER)
+ ret = nxt_isolation_vldt_creds(task, process);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ return ret;
+ }
+#endif
+
+ return NXT_OK;
+}
+
+
+static nxt_int_t
+nxt_isolation_set(nxt_task_t *task, nxt_conf_value_t *isolation,
+ nxt_process_t *process)
+{
+#if (NXT_HAVE_CLONE)
+ if (nxt_slow_path(nxt_isolation_set_namespaces(task, isolation, process)
+ != NXT_OK))
+ {
+ return NXT_ERROR;
+ }
+#endif
+
+#if (NXT_HAVE_CLONE_NEWUSER)
+ if (nxt_slow_path(nxt_isolation_set_creds(task, isolation, process)
+ != NXT_OK))
+ {
+ return NXT_ERROR;
+ }
+#endif
+
+#if (NXT_HAVE_ISOLATION_ROOTFS)
+ if (nxt_slow_path(nxt_isolation_set_rootfs(task, isolation, process)
+ != NXT_OK))
+ {
+ return NXT_ERROR;
+ }
+#endif
+
+#if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
+ if (nxt_slow_path(nxt_isolation_set_new_privs(task, isolation, process)
+ != NXT_OK))
+ {
+ return NXT_ERROR;
+ }
+#endif
+
+ return NXT_OK;
+}
+
+
+#if (NXT_HAVE_CLONE)
+
+static nxt_int_t
+nxt_isolation_set_namespaces(nxt_task_t *task, nxt_conf_value_t *isolation,
+ nxt_process_t *process)
+{
+ nxt_int_t ret;
+ nxt_conf_value_t *obj;
+
+ static nxt_str_t nsname = nxt_string("namespaces");
+
+ obj = nxt_conf_get_object_member(isolation, &nsname, NULL);
+ if (obj != NULL) {
+ ret = nxt_isolation_clone_flags(task, obj, &process->isolation.clone);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ return NXT_ERROR;
+ }
+ }
+
+ return NXT_OK;
+}
+
+#endif
+
+
+#if (NXT_HAVE_CLONE_NEWUSER)
+
+static nxt_int_t
+nxt_isolation_set_creds(nxt_task_t *task, nxt_conf_value_t *isolation,
+ nxt_process_t *process)
+{
+ nxt_int_t ret;
+ nxt_clone_t *clone;
+ nxt_conf_value_t *array;
+
+ static nxt_str_t uidname = nxt_string("uidmap");
+ static nxt_str_t gidname = nxt_string("gidmap");
+
+ clone = &process->isolation.clone;
+
+ array = nxt_conf_get_object_member(isolation, &uidname, NULL);
+ if (array != NULL) {
+ ret = nxt_isolation_credential_map(task, process->mem_pool, array,
+ &clone->uidmap);
+
+ if (nxt_slow_path(ret != NXT_OK)) {
+ return NXT_ERROR;
+ }
+ }
+
+ array = nxt_conf_get_object_member(isolation, &gidname, NULL);
+ if (array != NULL) {
+ ret = nxt_isolation_credential_map(task, process->mem_pool, array,
+ &clone->gidmap);
+
+ if (nxt_slow_path(ret != NXT_OK)) {
+ return NXT_ERROR;
+ }
+ }
+
+ return NXT_OK;
+}
+
+
+static nxt_int_t
+nxt_isolation_credential_map(nxt_task_t *task, nxt_mp_t *mp,
+ nxt_conf_value_t *map_array, nxt_clone_credential_map_t *map)
+{
+ nxt_int_t ret;
+ nxt_uint_t i;
+ nxt_conf_value_t *obj;
+
+ static nxt_conf_map_t nxt_clone_map_entry_conf[] = {
+ {
+ nxt_string("container"),
+ NXT_CONF_MAP_INT,
+ offsetof(nxt_clone_map_entry_t, container),
+ },
+
+ {
+ nxt_string("host"),
+ NXT_CONF_MAP_INT,
+ offsetof(nxt_clone_map_entry_t, host),
+ },
+
+ {
+ nxt_string("size"),
+ NXT_CONF_MAP_INT,
+ offsetof(nxt_clone_map_entry_t, size),
+ },
+ };
+
+ map->size = nxt_conf_array_elements_count(map_array);
+
+ if (map->size == 0) {
+ return NXT_OK;
+ }
+
+ map->map = nxt_mp_alloc(mp, map->size * sizeof(nxt_clone_map_entry_t));
+ if (nxt_slow_path(map->map == NULL)) {
+ return NXT_ERROR;
+ }
+
+ for (i = 0; i < map->size; i++) {
+ obj = nxt_conf_get_array_element(map_array, i);
+
+ ret = nxt_conf_map_object(mp, obj, nxt_clone_map_entry_conf,
+ nxt_nitems(nxt_clone_map_entry_conf),
+ map->map + i);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ nxt_alert(task, "clone map entry map error");
+ return NXT_ERROR;
+ }
+ }
+
+ return NXT_OK;
+}
+
+
+static nxt_int_t
+nxt_isolation_vldt_creds(nxt_task_t *task, nxt_process_t *process)
+{
+ nxt_int_t ret;
+ nxt_clone_t *clone;
+ nxt_credential_t *creds;
+
+ clone = &process->isolation.clone;
+ creds = process->user_cred;
+
+ if (clone->uidmap.size == 0 && clone->gidmap.size == 0) {
+ return NXT_OK;
+ }
+
+ if (!nxt_is_clone_flag_set(clone->flags, NEWUSER)) {
+ if (nxt_slow_path(clone->uidmap.size > 0)) {
+ nxt_log(task, NXT_LOG_ERR, "\"uidmap\" is set but "
+ "\"isolation.namespaces.credential\" is false or unset");
+
+ return NXT_ERROR;
+ }
+
+ if (nxt_slow_path(clone->gidmap.size > 0)) {
+ nxt_log(task, NXT_LOG_ERR, "\"gidmap\" is set but "
+ "\"isolation.namespaces.credential\" is false or unset");
+
+ return NXT_ERROR;
+ }
+
+ return NXT_OK;
+ }
+
+ ret = nxt_clone_vldt_credential_uidmap(task, &clone->uidmap, creds);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ return NXT_ERROR;
+ }
+
+ return nxt_clone_vldt_credential_gidmap(task, &clone->gidmap, creds);
+}
+
+#endif
+
+
+#if (NXT_HAVE_CLONE)
+
+static nxt_int_t
+nxt_isolation_clone_flags(nxt_task_t *task, nxt_conf_value_t *namespaces,
+ nxt_clone_t *clone)
+{
+ uint32_t index;
+ nxt_str_t name;
+ nxt_int_t flag;
+ nxt_conf_value_t *value;
+
+ index = 0;
+
+ for ( ;; ) {
+ value = nxt_conf_next_object_member(namespaces, &name, &index);
+
+ if (value == NULL) {
+ break;
+ }
+
+ flag = 0;
+
+#if (NXT_HAVE_CLONE_NEWUSER)
+ if (nxt_str_eq(&name, "credential", 10)) {
+ flag = CLONE_NEWUSER;
+ }
+#endif
+
+#if (NXT_HAVE_CLONE_NEWPID)
+ if (nxt_str_eq(&name, "pid", 3)) {
+ flag = CLONE_NEWPID;
+ }
+#endif
+
+#if (NXT_HAVE_CLONE_NEWNET)
+ if (nxt_str_eq(&name, "network", 7)) {
+ flag = CLONE_NEWNET;
+ }
+#endif
+
+#if (NXT_HAVE_CLONE_NEWUTS)
+ if (nxt_str_eq(&name, "uname", 5)) {
+ flag = CLONE_NEWUTS;
+ }
+#endif
+
+#if (NXT_HAVE_CLONE_NEWNS)
+ if (nxt_str_eq(&name, "mount", 5)) {
+ flag = CLONE_NEWNS;
+ }
+#endif
+
+#if (NXT_HAVE_CLONE_NEWCGROUP)
+ if (nxt_str_eq(&name, "cgroup", 6)) {
+ flag = CLONE_NEWCGROUP;
+ }
+#endif
+
+ if (!flag) {
+ nxt_alert(task, "unknown namespace flag: \"%V\"", &name);
+ return NXT_ERROR;
+ }
+
+ if (nxt_conf_get_boolean(value)) {
+ clone->flags |= flag;
+ }
+ }
+
+ return NXT_OK;
+}
+
+#endif
+
+
+#if (NXT_HAVE_ISOLATION_ROOTFS)
+
+static nxt_int_t
+nxt_isolation_set_rootfs(nxt_task_t *task, nxt_conf_value_t *isolation,
+ nxt_process_t *process)
+{
+ nxt_str_t str;
+ nxt_conf_value_t *obj;
+
+ static nxt_str_t rootfs_name = nxt_string("rootfs");
+
+ obj = nxt_conf_get_object_member(isolation, &rootfs_name, NULL);
+ if (obj != NULL) {
+ nxt_conf_get_string(obj, &str);
+
+ if (nxt_slow_path(str.length <= 1 || str.start[0] != '/')) {
+ nxt_log(task, NXT_LOG_ERR, "rootfs requires an absolute path other "
+ "than \"/\" but given \"%V\"", &str);
+
+ return NXT_ERROR;
+ }
+
+ if (str.start[str.length - 1] == '/') {
+ str.length--;
+ }
+
+ process->isolation.rootfs = nxt_mp_alloc(process->mem_pool,
+ str.length + 1);
+
+ if (nxt_slow_path(process->isolation.rootfs == NULL)) {
+ return NXT_ERROR;
+ }
+
+ nxt_memcpy(process->isolation.rootfs, str.start, str.length);
+
+ process->isolation.rootfs[str.length] = '\0';
+ }
+
+ return NXT_OK;
+}
+
+
+static nxt_int_t
+nxt_isolation_set_mounts(nxt_task_t *task, nxt_process_t *process,
+ nxt_str_t *app_type)
+{
+ nxt_int_t ret, cap_chroot;
+ nxt_runtime_t *rt;
+ nxt_app_lang_module_t *lang;
+
+ rt = task->thread->runtime;
+ cap_chroot = rt->capabilities.chroot;
+ lang = nxt_app_lang_module(rt, app_type);
+
+ nxt_assert(lang != NULL);
+
+#if (NXT_HAVE_CLONE_NEWUSER)
+ if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) {
+ cap_chroot = 1;
+ }
+#endif
+
+ if (!cap_chroot) {
+ nxt_log(task, NXT_LOG_ERR, "The \"rootfs\" field requires privileges");
+ return NXT_ERROR;
+ }
+
+ if (lang->mounts != NULL && lang->mounts->nelts > 0) {
+ ret = nxt_isolation_set_lang_mounts(task, process, lang->mounts);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ return NXT_ERROR;
+ }
+
+ process->isolation.cleanup = nxt_isolation_unmount_all;
+ }
+
+ return NXT_OK;
+}
+
+
+static nxt_int_t
+nxt_isolation_set_lang_mounts(nxt_task_t *task, nxt_process_t *process,
+ nxt_array_t *lang_mounts)
+{
+ u_char *p;
+ size_t i, n, rootfs_len, len;
+ nxt_mp_t *mp;
+ nxt_array_t *mounts;
+ const u_char *rootfs;
+ nxt_fs_mount_t *mnt, *lang_mnt;
+
+ rootfs = process->isolation.rootfs;
+ rootfs_len = nxt_strlen(rootfs);
+ mp = process->mem_pool;
+
+ /* copy to init mem pool */
+ mounts = nxt_array_copy(mp, NULL, lang_mounts);
+ if (mounts == NULL) {
+ return NXT_ERROR;
+ }
+
+ n = mounts->nelts;
+ mnt = mounts->elts;
+ lang_mnt = lang_mounts->elts;
+
+ for (i = 0; i < n; i++) {
+ len = nxt_strlen(lang_mnt[i].dst);
+
+ mnt[i].dst = nxt_mp_alloc(mp, rootfs_len + len + 1);
+ if (mnt[i].dst == NULL) {
+ return NXT_ERROR;
+ }
+
+ p = nxt_cpymem(mnt[i].dst, rootfs, rootfs_len);
+ p = nxt_cpymem(p, lang_mnt[i].dst, len);
+ *p = '\0';
+ }
+
+ process->isolation.mounts = mounts;
+
+ return NXT_OK;
+}
+
+
+void
+nxt_isolation_unmount_all(nxt_task_t *task, nxt_process_t *process)
+{
+ size_t i, n;
+ nxt_array_t *mounts;
+ nxt_fs_mount_t *mnt;
+
+ nxt_debug(task, "unmount all (%s)", process->name);
+
+ mounts = process->isolation.mounts;
+ n = mounts->nelts;
+ mnt = mounts->elts;
+
+ for (i = 0; i < n; i++) {
+ nxt_fs_unmount(mnt[i].dst);
+ }
+}
+
+
+nxt_int_t
+nxt_isolation_prepare_rootfs(nxt_task_t *task, nxt_process_t *process)
+{
+ size_t i, n;
+ nxt_int_t ret, hasproc;
+ struct stat st;
+ nxt_array_t *mounts;
+ const u_char *dst;
+ nxt_fs_mount_t *mnt;
+
+ hasproc = 0;
+
+#if (NXT_HAVE_CLONE_NEWPID) && (NXT_HAVE_CLONE_NEWNS)
+ nxt_fs_mount_t mount;
+
+ if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWPID)
+ && nxt_is_clone_flag_set(process->isolation.clone.flags, NEWNS))
+ {
+ /*
+ * This mount point will automatically be gone when the namespace is
+ * destroyed.
+ */
+
+ mount.fstype = (u_char *) "proc";
+ mount.src = (u_char *) "proc";
+ mount.dst = (u_char *) "/proc";
+ mount.data = (u_char *) "";
+ mount.flags = 0;
+
+ ret = nxt_fs_mkdir_all(mount.dst, S_IRWXU | S_IRWXG | S_IRWXO);
+ if (nxt_fast_path(ret == NXT_OK)) {
+ ret = nxt_fs_mount(task, &mount);
+ if (nxt_fast_path(ret == NXT_OK)) {
+ hasproc = 1;
+ }
+
+ } else {
+ nxt_log(task, NXT_LOG_WARN, "mkdir(%s) %E", mount.dst, nxt_errno);
+ }
+ }
+#endif
+
+ mounts = process->isolation.mounts;
+
+ n = mounts->nelts;
+ mnt = mounts->elts;
+
+ for (i = 0; i < n; i++) {
+ dst = mnt[i].dst;
+
+ if (nxt_slow_path(nxt_memcmp(mnt[i].fstype, "bind", 4) == 0
+ && stat((const char *) mnt[i].src, &st) != 0))
+ {
+ nxt_log(task, NXT_LOG_WARN, "host path not found: %s", mnt[i].src);
+ continue;
+ }
+
+ if (hasproc && nxt_memcmp(mnt[i].fstype, "proc", 4) == 0
+ && nxt_memcmp(mnt[i].dst, "/proc", 5) == 0)
+ {
+ continue;
+ }
+
+ ret = nxt_fs_mkdir_all(dst, S_IRWXU | S_IRWXG | S_IRWXO);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ nxt_alert(task, "mkdir(%s) %E", dst, nxt_errno);
+ goto undo;
+ }
+
+ ret = nxt_fs_mount(task, &mnt[i]);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ goto undo;
+ }
+ }
+
+ return NXT_OK;
+
+undo:
+
+ n = i + 1;
+
+ for (i = 0; i < n; i++) {
+ nxt_fs_unmount(mnt[i].dst);
+ }
+
+ return NXT_ERROR;
+}
+
+
+#if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS)
+
+nxt_int_t
+nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process)
+{
+ char *rootfs;
+ nxt_int_t ret;
+
+ rootfs = (char *) process->isolation.rootfs;
+
+ nxt_debug(task, "change root: %s", rootfs);
+
+ if (NXT_CLONE_MNT(process->isolation.clone.flags)) {
+ ret = nxt_isolation_pivot_root(task, rootfs);
+
+ } else {
+ ret = nxt_isolation_chroot(task, rootfs);
+ }
+
+ if (nxt_fast_path(ret == NXT_OK)) {
+ if (nxt_slow_path(chdir("/") < 0)) {
+ nxt_alert(task, "chdir(\"/\") %E", nxt_errno);
+ return NXT_ERROR;
+ }
+ }
+
+ return ret;
+}
+
+
+/*
+ * pivot_root(2) can only be safely used with containers, otherwise it can
+ * umount(2) the global root filesystem and screw up the machine.
+ */
+
+static nxt_int_t
+nxt_isolation_pivot_root(nxt_task_t *task, const char *path)
+{
+ /*
+ * This implementation makes use of a kernel trick that works for ages
+ * and now documented in Linux kernel 5.
+ * https://lore.kernel.org/linux-man/87r24piwhm.fsf@x220.int.ebiederm.org/T/
+ */
+
+ if (nxt_slow_path(mount("", "/", "", MS_SLAVE|MS_REC, "") != 0)) {
+ nxt_alert(task, "failed to make / a slave mount %E", nxt_errno);
+ return NXT_ERROR;
+ }
+
+ if (nxt_slow_path(nxt_isolation_make_private_mount(task, path) != NXT_OK)) {
+ return NXT_ERROR;
+ }
+
+ if (nxt_slow_path(mount(path, path, "bind", MS_BIND|MS_REC, "") != 0)) {
+ nxt_alert(task, "error bind mounting rootfs %E", nxt_errno);
+ return NXT_ERROR;
+ }
+
+ if (nxt_slow_path(chdir(path) != 0)) {
+ nxt_alert(task, "failed to chdir(%s) %E", path, nxt_errno);
+ return NXT_ERROR;
+ }
+
+ if (nxt_slow_path(nxt_pivot_root(".", ".") != 0)) {
+ nxt_alert(task, "failed to pivot_root %E", nxt_errno);
+ return NXT_ERROR;
+ }
+
+ /*
+ * Make oldroot a slave mount to avoid unmounts getting propagated to the
+ * host.
+ */
+ if (nxt_slow_path(mount("", ".", "", MS_SLAVE | MS_REC, NULL) != 0)) {
+ nxt_alert(task, "failed to bind mount rootfs %E", nxt_errno);
+ return NXT_ERROR;
+ }
+
+ if (nxt_slow_path(umount2(".", MNT_DETACH) != 0)) {
+ nxt_alert(task, "failed to umount old root directory %E", nxt_errno);
+ return NXT_ERROR;
+ }
+
+ return NXT_OK;
+}
+
+
+static nxt_int_t
+nxt_isolation_make_private_mount(nxt_task_t *task, const char *rootfs)
+{
+ char *parent_mnt;
+ FILE *procfile;
+ u_char **mounts;
+ size_t len;
+ uint8_t *shared;
+ nxt_int_t ret, index, nmounts;
+ struct mntent *ent;
+
+ static const char *mount_path = "/proc/self/mounts";
+
+ ret = NXT_ERROR;
+ ent = NULL;
+ shared = NULL;
+ procfile = NULL;
+ parent_mnt = NULL;
+
+ nmounts = 256;
+
+ mounts = nxt_malloc(nmounts * sizeof(uintptr_t));
+ if (nxt_slow_path(mounts == NULL)) {
+ goto fail;
+ }
+
+ shared = nxt_malloc(nmounts);
+ if (nxt_slow_path(shared == NULL)) {
+ goto fail;
+ }
+
+ procfile = setmntent(mount_path, "r");
+ if (nxt_slow_path(procfile == NULL)) {
+ nxt_alert(task, "failed to open %s %E", mount_path, nxt_errno);
+
+ goto fail;
+ }
+
+ index = 0;
+
+again:
+
+ for ( ; index < nmounts; index++) {
+ ent = getmntent(procfile);
+ if (ent == NULL) {
+ nmounts = index;
+ break;
+ }
+
+ mounts[index] = (u_char *) strdup(ent->mnt_dir);
+ shared[index] = hasmntopt(ent, "shared") != NULL;
+ }
+
+ if (ent != NULL) {
+ /* there are still entries to be read */
+
+ nmounts *= 2;
+ mounts = nxt_realloc(mounts, nmounts);
+ if (nxt_slow_path(mounts == NULL)) {
+ goto fail;
+ }
+
+ shared = nxt_realloc(shared, nmounts);
+ if (nxt_slow_path(shared == NULL)) {
+ goto fail;
+ }
+
+ goto again;
+ }
+
+ for (index = 0; index < nmounts; index++) {
+ if (nxt_strcmp(mounts[index], rootfs) == 0) {
+ parent_mnt = (char *) rootfs;
+ break;
+ }
+ }
+
+ if (parent_mnt == NULL) {
+ len = nxt_strlen(rootfs);
+
+ parent_mnt = nxt_malloc(len + 1);
+ if (parent_mnt == NULL) {
+ goto fail;
+ }
+
+ nxt_memcpy(parent_mnt, rootfs, len);
+ parent_mnt[len] = '\0';
+
+ if (parent_mnt[len - 1] == '/') {
+ parent_mnt[len - 1] = '\0';
+ len--;
+ }
+
+ for ( ;; ) {
+ for (index = 0; index < nmounts; index++) {
+ if (nxt_strcmp(mounts[index], parent_mnt) == 0) {
+ goto found;
+ }
+ }
+
+ if (len == 1 && parent_mnt[0] == '/') {
+ nxt_alert(task, "parent mount not found");
+ goto fail;
+ }
+
+ /* parent dir */
+ while (parent_mnt[len - 1] != '/' && len > 0) {
+ len--;
+ }
+
+ if (nxt_slow_path(len == 0)) {
+ nxt_alert(task, "parent mount not found");
+ goto fail;
+ }
+
+ if (len == 1) {
+ parent_mnt[len] = '\0'; /* / */
+ } else {
+ parent_mnt[len - 1] = '\0'; /* /<path> */
+ }
+ }
+ }
+
+found:
+
+ if (shared[index]) {
+ if (nxt_slow_path(mount("", parent_mnt, "", MS_PRIVATE, "") != 0)) {
+ nxt_alert(task, "mount(\"\", \"%s\", MS_PRIVATE) %E", parent_mnt,
+ nxt_errno);
+
+ goto fail;
+ }
+ }
+
+ ret = NXT_OK;
+
+fail:
+
+ if (procfile != NULL) {
+ endmntent(procfile);
+ }
+
+ if (mounts != NULL) {
+ for (index = 0; index < nmounts; index++) {
+ nxt_free(mounts[index]);
+ }
+
+ nxt_free(mounts);
+ }
+
+ if (shared != NULL) {
+ nxt_free(shared);
+ }
+
+ if (parent_mnt != NULL && parent_mnt != rootfs) {
+ nxt_free(parent_mnt);
+ }
+
+ return ret;
+}
+
+
+nxt_inline int
+nxt_pivot_root(const char *new_root, const char *old_root)
+{
+ return syscall(__NR_pivot_root, new_root, old_root);
+}
+
+
+#else /* !(NXT_HAVE_PIVOT_ROOT) || !(NXT_HAVE_CLONE_NEWNS) */
+
+
+nxt_int_t
+nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process)
+{
+ char *rootfs;
+
+ rootfs = (char *) process->isolation.rootfs;
+
+ nxt_debug(task, "change root: %s", rootfs);
+
+ if (nxt_fast_path(nxt_isolation_chroot(task, rootfs) == NXT_OK)) {
+ if (nxt_slow_path(chdir("/") < 0)) {
+ nxt_alert(task, "chdir(\"/\") %E", nxt_errno);
+ return NXT_ERROR;
+ }
+
+ return NXT_OK;
+ }
+
+ return NXT_ERROR;
+}
+
+#endif
+
+
+static nxt_int_t
+nxt_isolation_chroot(nxt_task_t *task, const char *path)
+{
+ if (nxt_slow_path(chroot(path) < 0)) {
+ nxt_alert(task, "chroot(%s) %E", path, nxt_errno);
+ return NXT_ERROR;
+ }
+
+ return NXT_OK;
+}
+
+#endif /* NXT_HAVE_ISOLATION_ROOTFS */
+
+
+#if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
+
+static nxt_int_t
+nxt_isolation_set_new_privs(nxt_task_t *task, nxt_conf_value_t *isolation,
+ nxt_process_t *process)
+{
+ nxt_conf_value_t *obj;
+
+ static nxt_str_t new_privs_name = nxt_string("new_privs");
+
+ obj = nxt_conf_get_object_member(isolation, &new_privs_name, NULL);
+ if (obj != NULL) {
+ process->isolation.new_privs = nxt_conf_get_boolean(obj);
+ }
+
+ return NXT_OK;
+}
+
+#endif
diff --git a/src/nxt_isolation.h b/src/nxt_isolation.h
new file mode 100644
index 00000000..88a5f9e1
--- /dev/null
+++ b/src/nxt_isolation.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) NGINX, Inc.
+ */
+
+#ifndef _NXT_ISOLATION_H_
+#define _NXT_ISOLATION_H_
+
+
+nxt_int_t nxt_isolation_main_prefork(nxt_task_t *task, nxt_process_t *process,
+ nxt_mp_t *mp);
+
+#if (NXT_HAVE_ISOLATION_ROOTFS)
+nxt_int_t nxt_isolation_prepare_rootfs(nxt_task_t *task,
+ nxt_process_t *process);
+nxt_int_t nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process);
+#endif
+
+#endif /* _NXT_ISOLATION_H_ */
diff --git a/src/nxt_main_process.c b/src/nxt_main_process.c
index 48eb2abb..a7b84b73 100644
--- a/src/nxt_main_process.c
+++ b/src/nxt_main_process.c
@@ -878,11 +878,9 @@ nxt_main_cleanup_process(nxt_task_t *task, nxt_pid_t pid)
return;
}
-#if (NXT_HAVE_ISOLATION_ROOTFS)
- if (process->isolation.rootfs != NULL && process->isolation.mounts) {
- (void) nxt_process_unmount_all(task, process);
+ if (process->isolation.cleanup != NULL) {
+ process->isolation.cleanup(task, process);
}
-#endif
name = process->name;
stream = process->stream;
diff --git a/src/nxt_process.c b/src/nxt_process.c
index 9bfae395..9be7974f 100644
--- a/src/nxt_process.c
+++ b/src/nxt_process.c
@@ -17,10 +17,6 @@
#include <sys/prctl.h>
#endif
-#if (NXT_HAVE_PIVOT_ROOT)
-#include <mntent.h>
-#endif
-
static nxt_int_t nxt_process_setup(nxt_task_t *task, nxt_process_t *process);
static nxt_int_t nxt_process_child_fixup(nxt_task_t *task,
nxt_process_t *process);
@@ -33,16 +29,6 @@ static void nxt_process_created_ok(nxt_task_t *task, nxt_port_recv_msg_t *msg,
static void nxt_process_created_error(nxt_task_t *task,
nxt_port_recv_msg_t *msg, void *data);
-#if (NXT_HAVE_ISOLATION_ROOTFS)
-static nxt_int_t nxt_process_chroot(nxt_task_t *task, const char *path);
-
-#if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS)
-static nxt_int_t nxt_process_pivot_root(nxt_task_t *task, const char *rootfs);
-static nxt_int_t nxt_process_private_mount(nxt_task_t *task,
- const char *rootfs);
-static int nxt_pivot_root(const char *new_root, const char *old_root);
-#endif
-#endif
/* A cached process pid. */
nxt_pid_t nxt_pid;
@@ -398,51 +384,6 @@ nxt_process_core_setup(nxt_task_t *task, nxt_process_t *process)
}
-#if (NXT_HAVE_CLONE_NEWUSER)
-
-nxt_int_t
-nxt_process_vldt_isolation_creds(nxt_task_t *task, nxt_process_t *process)
-{
- nxt_int_t ret;
- nxt_clone_t *clone;
- nxt_credential_t *creds;
-
- clone = &process->isolation.clone;
- creds = process->user_cred;
-
- if (clone->uidmap.size == 0 && clone->gidmap.size == 0) {
- return NXT_OK;
- }
-
- if (!nxt_is_clone_flag_set(clone->flags, NEWUSER)) {
- if (nxt_slow_path(clone->uidmap.size > 0)) {
- nxt_log(task, NXT_LOG_ERR, "\"uidmap\" is set but "
- "\"isolation.namespaces.credential\" is false or unset");
-
- return NXT_ERROR;
- }
-
- if (nxt_slow_path(clone->gidmap.size > 0)) {
- nxt_log(task, NXT_LOG_ERR, "\"gidmap\" is set but "
- "\"isolation.namespaces.credential\" is false or unset");
-
- return NXT_ERROR;
- }
-
- return NXT_OK;
- }
-
- ret = nxt_clone_vldt_credential_uidmap(task, &clone->uidmap, creds);
- if (nxt_slow_path(ret != NXT_OK)) {
- return NXT_ERROR;
- }
-
- return nxt_clone_vldt_credential_gidmap(task, &clone->gidmap, creds);
-}
-
-#endif
-
-
nxt_int_t
nxt_process_creds_set(nxt_task_t *task, nxt_process_t *process, nxt_str_t *user,
nxt_str_t *group)
@@ -525,329 +466,6 @@ nxt_process_apply_creds(nxt_task_t *task, nxt_process_t *process)
}
-#if (NXT_HAVE_ISOLATION_ROOTFS)
-
-
-#if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS)
-
-
-nxt_int_t
-nxt_process_change_root(nxt_task_t *task, nxt_process_t *process)
-{
- char *rootfs;
- nxt_int_t ret;
-
- rootfs = (char *) process->isolation.rootfs;
-
- nxt_debug(task, "change root: %s", rootfs);
-
- if (NXT_CLONE_MNT(process->isolation.clone.flags)) {
- ret = nxt_process_pivot_root(task, rootfs);
- } else {
- ret = nxt_process_chroot(task, rootfs);
- }
-
- if (nxt_fast_path(ret == NXT_OK)) {
- if (nxt_slow_path(chdir("/") < 0)) {
- nxt_alert(task, "chdir(\"/\") %E", nxt_errno);
- return NXT_ERROR;
- }
- }
-
- return ret;
-}
-
-
-#else
-
-
-nxt_int_t
-nxt_process_change_root(nxt_task_t *task, nxt_process_t *process)
-{
- char *rootfs;
-
- rootfs = (char *) process->isolation.rootfs;
-
- nxt_debug(task, "change root: %s", rootfs);
-
- if (nxt_fast_path(nxt_process_chroot(task, rootfs) == NXT_OK)) {
- if (nxt_slow_path(chdir("/") < 0)) {
- nxt_alert(task, "chdir(\"/\") %E", nxt_errno);
- return NXT_ERROR;
- }
-
- return NXT_OK;
- }
-
- return NXT_ERROR;
-}
-
-
-#endif
-
-
-static nxt_int_t
-nxt_process_chroot(nxt_task_t *task, const char *path)
-{
- if (nxt_slow_path(chroot(path) < 0)) {
- nxt_alert(task, "chroot(%s) %E", path, nxt_errno);
- return NXT_ERROR;
- }
-
- return NXT_OK;
-}
-
-
-void
-nxt_process_unmount_all(nxt_task_t *task, nxt_process_t *process)
-{
- size_t i, n;
- nxt_array_t *mounts;
- nxt_fs_mount_t *mnt;
-
- nxt_debug(task, "unmount all (%s)", process->name);
-
- mounts = process->isolation.mounts;
- n = mounts->nelts;
- mnt = mounts->elts;
-
- for (i = 0; i < n; i++) {
- nxt_fs_unmount(mnt[i].dst);
- }
-}
-
-
-#if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS)
-
-/*
- * pivot_root(2) can only be safely used with containers, otherwise it can
- * umount(2) the global root filesystem and screw up the machine.
- */
-
-static nxt_int_t
-nxt_process_pivot_root(nxt_task_t *task, const char *path)
-{
- /*
- * This implementation makes use of a kernel trick that works for ages
- * and now documented in Linux kernel 5.
- * https://lore.kernel.org/linux-man/87r24piwhm.fsf@x220.int.ebiederm.org/T/
- */
-
- if (nxt_slow_path(mount("", "/", "", MS_SLAVE|MS_REC, "") != 0)) {
- nxt_alert(task, "failed to make / a slave mount %E", nxt_errno);
- return NXT_ERROR;
- }
-
- if (nxt_slow_path(nxt_process_private_mount(task, path) != NXT_OK)) {
- return NXT_ERROR;
- }
-
- if (nxt_slow_path(mount(path, path, "bind", MS_BIND|MS_REC, "") != 0)) {
- nxt_alert(task, "error bind mounting rootfs %E", nxt_errno);
- return NXT_ERROR;
- }
-
- if (nxt_slow_path(chdir(path) != 0)) {
- nxt_alert(task, "failed to chdir(%s) %E", path, nxt_errno);
- return NXT_ERROR;
- }
-
- if (nxt_slow_path(nxt_pivot_root(".", ".") != 0)) {
- nxt_alert(task, "failed to pivot_root %E", nxt_errno);
- return NXT_ERROR;
- }
-
- /*
- * Make oldroot a slave mount to avoid unmounts getting propagated to the
- * host.
- */
- if (nxt_slow_path(mount("", ".", "", MS_SLAVE | MS_REC, NULL) != 0)) {
- nxt_alert(task, "failed to bind mount rootfs %E", nxt_errno);
- return NXT_ERROR;
- }
-
- if (nxt_slow_path(umount2(".", MNT_DETACH) != 0)) {
- nxt_alert(task, "failed to umount old root directory %E", nxt_errno);
- return NXT_ERROR;
- }
-
- return NXT_OK;
-}
-
-
-static nxt_int_t
-nxt_process_private_mount(nxt_task_t *task, const char *rootfs)
-{
- char *parent_mnt;
- FILE *procfile;
- u_char **mounts;
- size_t len;
- uint8_t *shared;
- nxt_int_t ret, index, nmounts;
- struct mntent *ent;
-
- static const char *mount_path = "/proc/self/mounts";
-
- ret = NXT_ERROR;
- ent = NULL;
- shared = NULL;
- procfile = NULL;
- parent_mnt = NULL;
-
- nmounts = 256;
-
- mounts = nxt_malloc(nmounts * sizeof(uintptr_t));
- if (nxt_slow_path(mounts == NULL)) {
- goto fail;
- }
-
- shared = nxt_malloc(nmounts);
- if (nxt_slow_path(shared == NULL)) {
- goto fail;
- }
-
- procfile = setmntent(mount_path, "r");
- if (nxt_slow_path(procfile == NULL)) {
- nxt_alert(task, "failed to open %s %E", mount_path, nxt_errno);
-
- goto fail;
- }
-
- index = 0;
-
-again:
-
- for ( ; index < nmounts; index++) {
- ent = getmntent(procfile);
- if (ent == NULL) {
- nmounts = index;
- break;
- }
-
- mounts[index] = (u_char *) strdup(ent->mnt_dir);
- shared[index] = hasmntopt(ent, "shared") != NULL;
- }
-
- if (ent != NULL) {
- /* there are still entries to be read */
-
- nmounts *= 2;
- mounts = nxt_realloc(mounts, nmounts);
- if (nxt_slow_path(mounts == NULL)) {
- goto fail;
- }
-
- shared = nxt_realloc(shared, nmounts);
- if (nxt_slow_path(shared == NULL)) {
- goto fail;
- }
-
- goto again;
- }
-
- for (index = 0; index < nmounts; index++) {
- if (nxt_strcmp(mounts[index], rootfs) == 0) {
- parent_mnt = (char *) rootfs;
- break;
- }
- }
-
- if (parent_mnt == NULL) {
- len = nxt_strlen(rootfs);
-
- parent_mnt = nxt_malloc(len + 1);
- if (parent_mnt == NULL) {
- goto fail;
- }
-
- nxt_memcpy(parent_mnt, rootfs, len);
- parent_mnt[len] = '\0';
-
- if (parent_mnt[len - 1] == '/') {
- parent_mnt[len - 1] = '\0';
- len--;
- }
-
- for ( ;; ) {
- for (index = 0; index < nmounts; index++) {
- if (nxt_strcmp(mounts[index], parent_mnt) == 0) {
- goto found;
- }
- }
-
- if (len == 1 && parent_mnt[0] == '/') {
- nxt_alert(task, "parent mount not found");
- goto fail;
- }
-
- /* parent dir */
- while (parent_mnt[len - 1] != '/' && len > 0) {
- len--;
- }
-
- if (nxt_slow_path(len == 0)) {
- nxt_alert(task, "parent mount not found");
- goto fail;
- }
-
- if (len == 1) {
- parent_mnt[len] = '\0'; /* / */
- } else {
- parent_mnt[len - 1] = '\0'; /* /<path> */
- }
- }
- }
-
-found:
-
- if (shared[index]) {
- if (nxt_slow_path(mount("", parent_mnt, "", MS_PRIVATE, "") != 0)) {
- nxt_alert(task, "mount(\"\", \"%s\", MS_PRIVATE) %E", parent_mnt,
- nxt_errno);
-
- goto fail;
- }
- }
-
- ret = NXT_OK;
-
-fail:
-
- if (procfile != NULL) {
- endmntent(procfile);
- }
-
- if (mounts != NULL) {
- for (index = 0; index < nmounts; index++) {
- nxt_free(mounts[index]);
- }
-
- nxt_free(mounts);
- }
-
- if (shared != NULL) {
- nxt_free(shared);
- }
-
- if (parent_mnt != NULL && parent_mnt != rootfs) {
- nxt_free(parent_mnt);
- }
-
- return ret;
-}
-
-
-static int
-nxt_pivot_root(const char *new_root, const char *old_root)
-{
- return syscall(__NR_pivot_root, new_root, old_root);
-}
-
-#endif
-
-#endif
-
-
static nxt_int_t
nxt_process_send_ready(nxt_task_t *task, nxt_process_t *process)
{
diff --git a/src/nxt_process.h b/src/nxt_process.h
index ecd813e2..f2383687 100644
--- a/src/nxt_process.h
+++ b/src/nxt_process.h
@@ -60,6 +60,9 @@ typedef enum {
typedef struct nxt_port_mmap_s nxt_port_mmap_t;
+typedef struct nxt_process_s nxt_process_t;
+typedef void (*nxt_isolation_cleanup_t)(nxt_task_t *task,
+ nxt_process_t *process);
typedef struct {
@@ -73,6 +76,8 @@ typedef struct {
u_char *rootfs;
nxt_array_t *mounts; /* of nxt_mount_t */
+ nxt_isolation_cleanup_t cleanup;
+
#if (NXT_HAVE_CLONE)
nxt_clone_t clone;
#endif
@@ -83,7 +88,7 @@ typedef struct {
} nxt_process_isolation_t;
-typedef struct {
+struct nxt_process_s {
nxt_pid_t pid;
const char *name;
nxt_queue_t ports; /* of nxt_port_t */
@@ -103,7 +108,7 @@ typedef struct {
nxt_process_data_t data;
nxt_process_isolation_t isolation;
-} nxt_process_t;
+};
typedef nxt_int_t (*nxt_process_prefork_t)(nxt_task_t *task,
@@ -178,17 +183,6 @@ nxt_int_t nxt_process_creds_set(nxt_task_t *task, nxt_process_t *process,
nxt_str_t *user, nxt_str_t *group);
nxt_int_t nxt_process_apply_creds(nxt_task_t *task, nxt_process_t *process);
-#if (NXT_HAVE_CLONE_NEWUSER)
-nxt_int_t nxt_process_vldt_isolation_creds(nxt_task_t *task,
- nxt_process_t *process);
-#endif
-
-nxt_int_t nxt_process_change_root(nxt_task_t *task, nxt_process_t *process);
-
-#if (NXT_HAVE_ISOLATION_ROOTFS)
-void nxt_process_unmount_all(nxt_task_t *task, nxt_process_t *process);
-#endif
-
#if (NXT_HAVE_SETPROCTITLE)
#define nxt_process_title(task, fmt, ...) \