diff options
author | Tiago Natel de Moura <t.nateldemoura@f5.com> | 2020-08-20 15:22:58 +0100 |
---|---|---|
committer | Tiago Natel de Moura <t.nateldemoura@f5.com> | 2020-08-20 15:22:58 +0100 |
commit | a8a7eeb1fc7aada17d0d8fe8e15d325525986937 (patch) | |
tree | eb438829169ed66ced4c7c2a793a19a04fa2bc59 | |
parent | 9bf6efc55ac9678ad386fd1a9d420a3b75e9ab70 (diff) | |
download | unit-a8a7eeb1fc7aada17d0d8fe8e15d325525986937.tar.gz unit-a8a7eeb1fc7aada17d0d8fe8e15d325525986937.tar.bz2 |
Moved isolation related code to "nxt_isolation.c".
-rw-r--r-- | auto/sources | 1 | ||||
-rw-r--r-- | src/nxt_application.c | 585 | ||||
-rw-r--r-- | src/nxt_isolation.c | 958 | ||||
-rw-r--r-- | src/nxt_isolation.h | 18 | ||||
-rw-r--r-- | src/nxt_main_process.c | 6 | ||||
-rw-r--r-- | src/nxt_process.c | 382 | ||||
-rw-r--r-- | src/nxt_process.h | 20 |
7 files changed, 990 insertions, 980 deletions
diff --git a/auto/sources b/auto/sources index a61577dc..e44dc4bb 100644 --- a/auto/sources +++ b/auto/sources @@ -14,6 +14,7 @@ NXT_LIB_SRCS=" \ src/nxt_socket.c \ src/nxt_socketpair.c \ src/nxt_credential.c \ + src/nxt_isolation.c \ src/nxt_process.c \ src/nxt_process_title.c \ src/nxt_signal.c \ diff --git a/src/nxt_application.c b/src/nxt_application.c index 57e4615e..6935346c 100644 --- a/src/nxt_application.c +++ b/src/nxt_application.c @@ -14,6 +14,7 @@ #include <nxt_application.h> #include <nxt_unit.h> #include <nxt_port_memory_int.h> +#include <nxt_isolation.h> #include <glob.h> @@ -41,45 +42,10 @@ static void nxt_discovery_quit(nxt_task_t *task, nxt_port_recv_msg_t *msg, void *data); static nxt_app_module_t *nxt_app_module_load(nxt_task_t *task, const char *name); -static nxt_int_t nxt_app_main_prefork(nxt_task_t *task, nxt_process_t *process, - nxt_mp_t *mp); static nxt_int_t nxt_app_setup(nxt_task_t *task, nxt_process_t *process); static nxt_int_t nxt_app_set_environment(nxt_conf_value_t *environment); static u_char *nxt_cstr_dup(nxt_mp_t *mp, u_char *dst, u_char *src); -#if (NXT_HAVE_ISOLATION_ROOTFS) -static nxt_int_t nxt_app_set_isolation_mounts(nxt_task_t *task, - nxt_process_t *process, nxt_str_t *app_type); -static nxt_int_t nxt_app_set_lang_mounts(nxt_task_t *task, - nxt_process_t *process, nxt_array_t *syspaths); -static nxt_int_t nxt_app_set_isolation_rootfs(nxt_task_t *task, - nxt_conf_value_t *isolation, nxt_process_t *process); -static nxt_int_t nxt_app_prepare_rootfs(nxt_task_t *task, - nxt_process_t *process); -#endif - -static nxt_int_t nxt_app_set_isolation(nxt_task_t *task, - nxt_conf_value_t *isolation, nxt_process_t *process); - -#if (NXT_HAVE_CLONE) -static nxt_int_t nxt_app_set_isolation_namespaces(nxt_task_t *task, - nxt_conf_value_t *isolation, nxt_process_t *process); -static nxt_int_t nxt_app_clone_flags(nxt_task_t *task, - nxt_conf_value_t *namespaces, nxt_clone_t *clone); -#endif - -#if (NXT_HAVE_CLONE_NEWUSER) -static nxt_int_t nxt_app_set_isolation_creds(nxt_task_t *task, - nxt_conf_value_t *isolation, nxt_process_t *process); -static nxt_int_t nxt_app_isolation_credential_map(nxt_task_t *task, - nxt_mp_t *mem_pool, nxt_conf_value_t *map_array, - nxt_clone_credential_map_t *map); -#endif - -#if (NXT_HAVE_PR_SET_NO_NEW_PRIVS) -static nxt_int_t nxt_app_set_isolation_new_privs(nxt_task_t *task, - nxt_conf_value_t *isolation, nxt_process_t *process); -#endif nxt_str_t nxt_server = nxt_string(NXT_SERVER); @@ -126,7 +92,7 @@ const nxt_process_init_t nxt_discovery_process = { const nxt_process_init_t nxt_app_process = { .type = NXT_PROCESS_APP, .setup = nxt_app_setup, - .prefork = nxt_app_main_prefork, + .prefork = nxt_isolation_main_prefork, .restart = 0, .start = NULL, /* set to module->start */ .port_handlers = &nxt_app_process_port_handlers, @@ -474,81 +440,6 @@ nxt_discovery_quit(nxt_task_t *task, nxt_port_recv_msg_t *msg, void *data) static nxt_int_t -nxt_app_main_prefork(nxt_task_t *task, nxt_process_t *process, nxt_mp_t *mp) -{ - nxt_int_t cap_setid; - nxt_int_t ret; - nxt_runtime_t *rt; - nxt_common_app_conf_t *app_conf; - - rt = task->thread->runtime; - app_conf = process->data.app; - cap_setid = rt->capabilities.setid; - - if (app_conf->isolation != NULL) { - ret = nxt_app_set_isolation(task, app_conf->isolation, process); - if (nxt_slow_path(ret != NXT_OK)) { - return ret; - } - } - -#if (NXT_HAVE_CLONE_NEWUSER) - if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) { - cap_setid = 1; - } -#endif - -#if (NXT_HAVE_ISOLATION_ROOTFS) - if (process->isolation.rootfs != NULL) { - ret = nxt_app_set_isolation_mounts(task, process, &app_conf->type); - if (nxt_slow_path(ret != NXT_OK)) { - return ret; - } - } -#endif - - if (cap_setid) { - ret = nxt_process_creds_set(task, process, &app_conf->user, - &app_conf->group); - - if (nxt_slow_path(ret != NXT_OK)) { - return ret; - } - - } else { - if (!nxt_str_eq(&app_conf->user, (u_char *) rt->user_cred.user, - nxt_strlen(rt->user_cred.user))) - { - nxt_alert(task, "cannot set user \"%V\" for app \"%V\": " - "missing capabilities", &app_conf->user, &app_conf->name); - - return NXT_ERROR; - } - - if (app_conf->group.length > 0 - && !nxt_str_eq(&app_conf->group, (u_char *) rt->group, - nxt_strlen(rt->group))) - { - nxt_alert(task, "cannot set group \"%V\" for app \"%V\": " - "missing capabilities", &app_conf->group, - &app_conf->name); - - return NXT_ERROR; - } - } - -#if (NXT_HAVE_CLONE_NEWUSER) - ret = nxt_process_vldt_isolation_creds(task, process); - if (nxt_slow_path(ret != NXT_OK)) { - return ret; - } -#endif - - return NXT_OK; -} - - -static nxt_int_t nxt_app_setup(nxt_task_t *task, nxt_process_t *process) { nxt_int_t ret; @@ -594,13 +485,13 @@ nxt_app_setup(nxt_task_t *task, nxt_process_t *process) #if (NXT_HAVE_ISOLATION_ROOTFS) if (process->isolation.rootfs != NULL) { if (process->isolation.mounts != NULL) { - ret = nxt_app_prepare_rootfs(task, process); + ret = nxt_isolation_prepare_rootfs(task, process); if (nxt_slow_path(ret != NXT_OK)) { return ret; } } - ret = nxt_process_change_root(task, process); + ret = nxt_isolation_change_root(task, process); if (nxt_slow_path(ret != NXT_OK)) { return NXT_ERROR; } @@ -686,474 +577,6 @@ nxt_app_set_environment(nxt_conf_value_t *environment) } -static nxt_int_t -nxt_app_set_isolation(nxt_task_t *task, nxt_conf_value_t *isolation, - nxt_process_t *process) -{ -#if (NXT_HAVE_CLONE) - if (nxt_slow_path(nxt_app_set_isolation_namespaces(task, isolation, process) - != NXT_OK)) - { - return NXT_ERROR; - } -#endif - -#if (NXT_HAVE_CLONE_NEWUSER) - if (nxt_slow_path(nxt_app_set_isolation_creds(task, isolation, process) - != NXT_OK)) - { - return NXT_ERROR; - } -#endif - -#if (NXT_HAVE_ISOLATION_ROOTFS) - if (nxt_slow_path(nxt_app_set_isolation_rootfs(task, isolation, process) - != NXT_OK)) - { - return NXT_ERROR; - } -#endif - -#if (NXT_HAVE_PR_SET_NO_NEW_PRIVS) - if (nxt_slow_path(nxt_app_set_isolation_new_privs(task, isolation, process) - != NXT_OK)) - { - return NXT_ERROR; - } -#endif - - return NXT_OK; -} - - -#if (NXT_HAVE_CLONE) - -static nxt_int_t -nxt_app_set_isolation_namespaces(nxt_task_t *task, nxt_conf_value_t *isolation, - nxt_process_t *process) -{ - nxt_int_t ret; - nxt_conf_value_t *obj; - - static nxt_str_t nsname = nxt_string("namespaces"); - - obj = nxt_conf_get_object_member(isolation, &nsname, NULL); - if (obj != NULL) { - ret = nxt_app_clone_flags(task, obj, &process->isolation.clone); - if (nxt_slow_path(ret != NXT_OK)) { - return NXT_ERROR; - } - } - - return NXT_OK; -} - -#endif - - -#if (NXT_HAVE_CLONE_NEWUSER) - -static nxt_int_t -nxt_app_set_isolation_creds(nxt_task_t *task, nxt_conf_value_t *isolation, - nxt_process_t *process) -{ - nxt_int_t ret; - nxt_clone_t *clone; - nxt_conf_value_t *array; - - static nxt_str_t uidname = nxt_string("uidmap"); - static nxt_str_t gidname = nxt_string("gidmap"); - - clone = &process->isolation.clone; - - array = nxt_conf_get_object_member(isolation, &uidname, NULL); - if (array != NULL) { - ret = nxt_app_isolation_credential_map(task, process->mem_pool, array, - &clone->uidmap); - - if (nxt_slow_path(ret != NXT_OK)) { - return NXT_ERROR; - } - } - - array = nxt_conf_get_object_member(isolation, &gidname, NULL); - if (array != NULL) { - ret = nxt_app_isolation_credential_map(task, process->mem_pool, array, - &clone->gidmap); - - if (nxt_slow_path(ret != NXT_OK)) { - return NXT_ERROR; - } - } - - return NXT_OK; -} - - -static nxt_int_t -nxt_app_isolation_credential_map(nxt_task_t *task, nxt_mp_t *mp, - nxt_conf_value_t *map_array, nxt_clone_credential_map_t *map) -{ - nxt_int_t ret; - nxt_uint_t i; - nxt_conf_value_t *obj; - - static nxt_conf_map_t nxt_clone_map_entry_conf[] = { - { - nxt_string("container"), - NXT_CONF_MAP_INT, - offsetof(nxt_clone_map_entry_t, container), - }, - - { - nxt_string("host"), - NXT_CONF_MAP_INT, - offsetof(nxt_clone_map_entry_t, host), - }, - - { - nxt_string("size"), - NXT_CONF_MAP_INT, - offsetof(nxt_clone_map_entry_t, size), - }, - }; - - map->size = nxt_conf_array_elements_count(map_array); - - if (map->size == 0) { - return NXT_OK; - } - - map->map = nxt_mp_alloc(mp, map->size * sizeof(nxt_clone_map_entry_t)); - if (nxt_slow_path(map->map == NULL)) { - return NXT_ERROR; - } - - for (i = 0; i < map->size; i++) { - obj = nxt_conf_get_array_element(map_array, i); - - ret = nxt_conf_map_object(mp, obj, nxt_clone_map_entry_conf, - nxt_nitems(nxt_clone_map_entry_conf), - map->map + i); - if (nxt_slow_path(ret != NXT_OK)) { - nxt_alert(task, "clone map entry map error"); - return NXT_ERROR; - } - } - - return NXT_OK; -} - -#endif - -#if (NXT_HAVE_CLONE) - -static nxt_int_t -nxt_app_clone_flags(nxt_task_t *task, nxt_conf_value_t *namespaces, - nxt_clone_t *clone) -{ - uint32_t index; - nxt_str_t name; - nxt_int_t flag; - nxt_conf_value_t *value; - - index = 0; - - for ( ;; ) { - value = nxt_conf_next_object_member(namespaces, &name, &index); - - if (value == NULL) { - break; - } - - flag = 0; - -#if (NXT_HAVE_CLONE_NEWUSER) - if (nxt_str_eq(&name, "credential", 10)) { - flag = CLONE_NEWUSER; - } -#endif - -#if (NXT_HAVE_CLONE_NEWPID) - if (nxt_str_eq(&name, "pid", 3)) { - flag = CLONE_NEWPID; - } -#endif - -#if (NXT_HAVE_CLONE_NEWNET) - if (nxt_str_eq(&name, "network", 7)) { - flag = CLONE_NEWNET; - } -#endif - -#if (NXT_HAVE_CLONE_NEWUTS) - if (nxt_str_eq(&name, "uname", 5)) { - flag = CLONE_NEWUTS; - } -#endif - -#if (NXT_HAVE_CLONE_NEWNS) - if (nxt_str_eq(&name, "mount", 5)) { - flag = CLONE_NEWNS; - } -#endif - -#if (NXT_HAVE_CLONE_NEWCGROUP) - if (nxt_str_eq(&name, "cgroup", 6)) { - flag = CLONE_NEWCGROUP; - } -#endif - - if (!flag) { - nxt_alert(task, "unknown namespace flag: \"%V\"", &name); - return NXT_ERROR; - } - - if (nxt_conf_get_boolean(value)) { - clone->flags |= flag; - } - } - - return NXT_OK; -} - -#endif - - -#if (NXT_HAVE_ISOLATION_ROOTFS) - -static nxt_int_t -nxt_app_set_isolation_rootfs(nxt_task_t *task, nxt_conf_value_t *isolation, - nxt_process_t *process) -{ - nxt_str_t str; - nxt_conf_value_t *obj; - - static nxt_str_t rootfs_name = nxt_string("rootfs"); - - obj = nxt_conf_get_object_member(isolation, &rootfs_name, NULL); - if (obj != NULL) { - nxt_conf_get_string(obj, &str); - - if (nxt_slow_path(str.length <= 1 || str.start[0] != '/')) { - nxt_log(task, NXT_LOG_ERR, "rootfs requires an absolute path other " - "than \"/\" but given \"%V\"", &str); - - return NXT_ERROR; - } - - if (str.start[str.length - 1] == '/') { - str.length--; - } - - process->isolation.rootfs = nxt_mp_alloc(process->mem_pool, - str.length + 1); - - if (nxt_slow_path(process->isolation.rootfs == NULL)) { - return NXT_ERROR; - } - - nxt_memcpy(process->isolation.rootfs, str.start, str.length); - - process->isolation.rootfs[str.length] = '\0'; - } - - return NXT_OK; -} - - -static nxt_int_t -nxt_app_set_isolation_mounts(nxt_task_t *task, nxt_process_t *process, - nxt_str_t *app_type) -{ - nxt_int_t ret, cap_chroot; - nxt_runtime_t *rt; - nxt_app_lang_module_t *lang; - - rt = task->thread->runtime; - cap_chroot = rt->capabilities.chroot; - lang = nxt_app_lang_module(rt, app_type); - - nxt_assert(lang != NULL); - -#if (NXT_HAVE_CLONE_NEWUSER) - if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) { - cap_chroot = 1; - } -#endif - - if (!cap_chroot) { - nxt_log(task, NXT_LOG_ERR, "The \"rootfs\" field requires privileges"); - return NXT_ERROR; - } - - if (lang->mounts != NULL && lang->mounts->nelts > 0) { - ret = nxt_app_set_lang_mounts(task, process, lang->mounts); - if (nxt_slow_path(ret != NXT_OK)) { - return NXT_ERROR; - } - } - - return NXT_OK; -} - - -static nxt_int_t -nxt_app_set_lang_mounts(nxt_task_t *task, nxt_process_t *process, - nxt_array_t *lang_mounts) -{ - u_char *p; - size_t i, n, rootfs_len, len; - nxt_mp_t *mp; - nxt_array_t *mounts; - const u_char *rootfs; - nxt_fs_mount_t *mnt, *lang_mnt; - - rootfs = process->isolation.rootfs; - rootfs_len = nxt_strlen(rootfs); - mp = process->mem_pool; - - /* copy to init mem pool */ - mounts = nxt_array_copy(mp, NULL, lang_mounts); - if (mounts == NULL) { - return NXT_ERROR; - } - - n = mounts->nelts; - mnt = mounts->elts; - lang_mnt = lang_mounts->elts; - - for (i = 0; i < n; i++) { - len = nxt_strlen(lang_mnt[i].dst); - - mnt[i].dst = nxt_mp_alloc(mp, rootfs_len + len + 1); - if (mnt[i].dst == NULL) { - return NXT_ERROR; - } - - p = nxt_cpymem(mnt[i].dst, rootfs, rootfs_len); - p = nxt_cpymem(p, lang_mnt[i].dst, len); - *p = '\0'; - } - - process->isolation.mounts = mounts; - - return NXT_OK; -} - - -static nxt_int_t -nxt_app_prepare_rootfs(nxt_task_t *task, nxt_process_t *process) -{ - size_t i, n; - nxt_int_t ret, hasproc; - struct stat st; - nxt_array_t *mounts; - const u_char *dst; - nxt_fs_mount_t *mnt; - - hasproc = 0; - -#if (NXT_HAVE_CLONE_NEWPID) && (NXT_HAVE_CLONE_NEWNS) - nxt_fs_mount_t mount; - - if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWPID) - && nxt_is_clone_flag_set(process->isolation.clone.flags, NEWNS)) - { - /* - * This mount point will automatically be gone when the namespace is - * destroyed. - */ - - mount.fstype = (u_char *) "proc"; - mount.src = (u_char *) "proc"; - mount.dst = (u_char *) "/proc"; - mount.data = (u_char *) ""; - mount.flags = 0; - - ret = nxt_fs_mkdir_all(mount.dst, S_IRWXU | S_IRWXG | S_IRWXO); - if (nxt_fast_path(ret == NXT_OK)) { - ret = nxt_fs_mount(task, &mount); - if (nxt_fast_path(ret == NXT_OK)) { - hasproc = 1; - } - - } else { - nxt_log(task, NXT_LOG_WARN, "mkdir(%s) %E", mount.dst, nxt_errno); - } - } -#endif - - mounts = process->isolation.mounts; - - n = mounts->nelts; - mnt = mounts->elts; - - for (i = 0; i < n; i++) { - dst = mnt[i].dst; - - if (nxt_slow_path(nxt_memcmp(mnt[i].fstype, "bind", 4) == 0 - && stat((const char *) mnt[i].src, &st) != 0)) - { - nxt_log(task, NXT_LOG_WARN, "host path not found: %s", mnt[i].src); - continue; - } - - if (hasproc && nxt_memcmp(mnt[i].fstype, "proc", 4) == 0 - && nxt_memcmp(mnt[i].dst, "/proc", 5) == 0) - { - continue; - } - - ret = nxt_fs_mkdir_all(dst, S_IRWXU | S_IRWXG | S_IRWXO); - if (nxt_slow_path(ret != NXT_OK)) { - nxt_alert(task, "mkdir(%s) %E", dst, nxt_errno); - goto undo; - } - - ret = nxt_fs_mount(task, &mnt[i]); - if (nxt_slow_path(ret != NXT_OK)) { - goto undo; - } - } - - return NXT_OK; - -undo: - - n = i + 1; - - for (i = 0; i < n; i++) { - nxt_fs_unmount(mnt[i].dst); - } - - return NXT_ERROR; -} - -#endif - - -#if (NXT_HAVE_PR_SET_NO_NEW_PRIVS) - -static nxt_int_t -nxt_app_set_isolation_new_privs(nxt_task_t *task, nxt_conf_value_t *isolation, - nxt_process_t *process) -{ - nxt_conf_value_t *obj; - - static nxt_str_t new_privs_name = nxt_string("new_privs"); - - obj = nxt_conf_get_object_member(isolation, &new_privs_name, NULL); - if (obj != NULL) { - process->isolation.new_privs = nxt_conf_get_boolean(obj); - } - - return NXT_OK; -} - -#endif - - static u_char * nxt_cstr_dup(nxt_mp_t *mp, u_char *dst, u_char *src) { diff --git a/src/nxt_isolation.c b/src/nxt_isolation.c new file mode 100644 index 00000000..60de4324 --- /dev/null +++ b/src/nxt_isolation.c @@ -0,0 +1,958 @@ +/* + * Copyright (C) NGINX, Inc. + */ + +#include <nxt_main.h> +#include <nxt_application.h> +#include <nxt_process.h> +#include <nxt_isolation.h> + +#if (NXT_HAVE_PIVOT_ROOT) +#include <mntent.h> +#endif + + +static nxt_int_t nxt_isolation_set(nxt_task_t *task, + nxt_conf_value_t *isolation, nxt_process_t *process); + +#if (NXT_HAVE_CLONE) +static nxt_int_t nxt_isolation_set_namespaces(nxt_task_t *task, + nxt_conf_value_t *isolation, nxt_process_t *process); +static nxt_int_t nxt_isolation_clone_flags(nxt_task_t *task, + nxt_conf_value_t *namespaces, nxt_clone_t *clone); +#endif + +#if (NXT_HAVE_CLONE_NEWUSER) +static nxt_int_t nxt_isolation_set_creds(nxt_task_t *task, + nxt_conf_value_t *isolation, nxt_process_t *process); +static nxt_int_t nxt_isolation_credential_map(nxt_task_t *task, + nxt_mp_t *mem_pool, nxt_conf_value_t *map_array, + nxt_clone_credential_map_t *map); +static nxt_int_t nxt_isolation_vldt_creds(nxt_task_t *task, + nxt_process_t *process); +#endif + +#if (NXT_HAVE_ISOLATION_ROOTFS) +static nxt_int_t nxt_isolation_set_rootfs(nxt_task_t *task, + nxt_conf_value_t *isolation, nxt_process_t *process); +static nxt_int_t nxt_isolation_set_mounts(nxt_task_t *task, + nxt_process_t *process, nxt_str_t *app_type); +static nxt_int_t nxt_isolation_set_lang_mounts(nxt_task_t *task, + nxt_process_t *process, nxt_array_t *syspaths); +static void nxt_isolation_unmount_all(nxt_task_t *task, nxt_process_t *process); + +#if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS) +static nxt_int_t nxt_isolation_pivot_root(nxt_task_t *task, const char *rootfs); +static nxt_int_t nxt_isolation_make_private_mount(nxt_task_t *task, + const char *rootfs); +nxt_inline int nxt_pivot_root(const char *new_root, const char *old_root); +#endif + +static nxt_int_t nxt_isolation_chroot(nxt_task_t *task, const char *path); +#endif + +#if (NXT_HAVE_PR_SET_NO_NEW_PRIVS) +static nxt_int_t nxt_isolation_set_new_privs(nxt_task_t *task, + nxt_conf_value_t *isolation, nxt_process_t *process); +#endif + + +nxt_int_t +nxt_isolation_main_prefork(nxt_task_t *task, nxt_process_t *process, + nxt_mp_t *mp) +{ + nxt_int_t cap_setid; + nxt_int_t ret; + nxt_runtime_t *rt; + nxt_common_app_conf_t *app_conf; + + rt = task->thread->runtime; + app_conf = process->data.app; + cap_setid = rt->capabilities.setid; + + if (app_conf->isolation != NULL) { + ret = nxt_isolation_set(task, app_conf->isolation, process); + if (nxt_slow_path(ret != NXT_OK)) { + return ret; + } + } + +#if (NXT_HAVE_CLONE_NEWUSER) + if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) { + cap_setid = 1; + } +#endif + +#if (NXT_HAVE_ISOLATION_ROOTFS) + if (process->isolation.rootfs != NULL) { + ret = nxt_isolation_set_mounts(task, process, &app_conf->type); + if (nxt_slow_path(ret != NXT_OK)) { + return ret; + } + } +#endif + + if (cap_setid) { + ret = nxt_process_creds_set(task, process, &app_conf->user, + &app_conf->group); + + if (nxt_slow_path(ret != NXT_OK)) { + return ret; + } + + } else { + if (!nxt_str_eq(&app_conf->user, (u_char *) rt->user_cred.user, + nxt_strlen(rt->user_cred.user))) + { + nxt_alert(task, "cannot set user \"%V\" for app \"%V\": " + "missing capabilities", &app_conf->user, &app_conf->name); + + return NXT_ERROR; + } + + if (app_conf->group.length > 0 + && !nxt_str_eq(&app_conf->group, (u_char *) rt->group, + nxt_strlen(rt->group))) + { + nxt_alert(task, "cannot set group \"%V\" for app \"%V\": " + "missing capabilities", &app_conf->group, + &app_conf->name); + + return NXT_ERROR; + } + } + +#if (NXT_HAVE_CLONE_NEWUSER) + ret = nxt_isolation_vldt_creds(task, process); + if (nxt_slow_path(ret != NXT_OK)) { + return ret; + } +#endif + + return NXT_OK; +} + + +static nxt_int_t +nxt_isolation_set(nxt_task_t *task, nxt_conf_value_t *isolation, + nxt_process_t *process) +{ +#if (NXT_HAVE_CLONE) + if (nxt_slow_path(nxt_isolation_set_namespaces(task, isolation, process) + != NXT_OK)) + { + return NXT_ERROR; + } +#endif + +#if (NXT_HAVE_CLONE_NEWUSER) + if (nxt_slow_path(nxt_isolation_set_creds(task, isolation, process) + != NXT_OK)) + { + return NXT_ERROR; + } +#endif + +#if (NXT_HAVE_ISOLATION_ROOTFS) + if (nxt_slow_path(nxt_isolation_set_rootfs(task, isolation, process) + != NXT_OK)) + { + return NXT_ERROR; + } +#endif + +#if (NXT_HAVE_PR_SET_NO_NEW_PRIVS) + if (nxt_slow_path(nxt_isolation_set_new_privs(task, isolation, process) + != NXT_OK)) + { + return NXT_ERROR; + } +#endif + + return NXT_OK; +} + + +#if (NXT_HAVE_CLONE) + +static nxt_int_t +nxt_isolation_set_namespaces(nxt_task_t *task, nxt_conf_value_t *isolation, + nxt_process_t *process) +{ + nxt_int_t ret; + nxt_conf_value_t *obj; + + static nxt_str_t nsname = nxt_string("namespaces"); + + obj = nxt_conf_get_object_member(isolation, &nsname, NULL); + if (obj != NULL) { + ret = nxt_isolation_clone_flags(task, obj, &process->isolation.clone); + if (nxt_slow_path(ret != NXT_OK)) { + return NXT_ERROR; + } + } + + return NXT_OK; +} + +#endif + + +#if (NXT_HAVE_CLONE_NEWUSER) + +static nxt_int_t +nxt_isolation_set_creds(nxt_task_t *task, nxt_conf_value_t *isolation, + nxt_process_t *process) +{ + nxt_int_t ret; + nxt_clone_t *clone; + nxt_conf_value_t *array; + + static nxt_str_t uidname = nxt_string("uidmap"); + static nxt_str_t gidname = nxt_string("gidmap"); + + clone = &process->isolation.clone; + + array = nxt_conf_get_object_member(isolation, &uidname, NULL); + if (array != NULL) { + ret = nxt_isolation_credential_map(task, process->mem_pool, array, + &clone->uidmap); + + if (nxt_slow_path(ret != NXT_OK)) { + return NXT_ERROR; + } + } + + array = nxt_conf_get_object_member(isolation, &gidname, NULL); + if (array != NULL) { + ret = nxt_isolation_credential_map(task, process->mem_pool, array, + &clone->gidmap); + + if (nxt_slow_path(ret != NXT_OK)) { + return NXT_ERROR; + } + } + + return NXT_OK; +} + + +static nxt_int_t +nxt_isolation_credential_map(nxt_task_t *task, nxt_mp_t *mp, + nxt_conf_value_t *map_array, nxt_clone_credential_map_t *map) +{ + nxt_int_t ret; + nxt_uint_t i; + nxt_conf_value_t *obj; + + static nxt_conf_map_t nxt_clone_map_entry_conf[] = { + { + nxt_string("container"), + NXT_CONF_MAP_INT, + offsetof(nxt_clone_map_entry_t, container), + }, + + { + nxt_string("host"), + NXT_CONF_MAP_INT, + offsetof(nxt_clone_map_entry_t, host), + }, + + { + nxt_string("size"), + NXT_CONF_MAP_INT, + offsetof(nxt_clone_map_entry_t, size), + }, + }; + + map->size = nxt_conf_array_elements_count(map_array); + + if (map->size == 0) { + return NXT_OK; + } + + map->map = nxt_mp_alloc(mp, map->size * sizeof(nxt_clone_map_entry_t)); + if (nxt_slow_path(map->map == NULL)) { + return NXT_ERROR; + } + + for (i = 0; i < map->size; i++) { + obj = nxt_conf_get_array_element(map_array, i); + + ret = nxt_conf_map_object(mp, obj, nxt_clone_map_entry_conf, + nxt_nitems(nxt_clone_map_entry_conf), + map->map + i); + if (nxt_slow_path(ret != NXT_OK)) { + nxt_alert(task, "clone map entry map error"); + return NXT_ERROR; + } + } + + return NXT_OK; +} + + +static nxt_int_t +nxt_isolation_vldt_creds(nxt_task_t *task, nxt_process_t *process) +{ + nxt_int_t ret; + nxt_clone_t *clone; + nxt_credential_t *creds; + + clone = &process->isolation.clone; + creds = process->user_cred; + + if (clone->uidmap.size == 0 && clone->gidmap.size == 0) { + return NXT_OK; + } + + if (!nxt_is_clone_flag_set(clone->flags, NEWUSER)) { + if (nxt_slow_path(clone->uidmap.size > 0)) { + nxt_log(task, NXT_LOG_ERR, "\"uidmap\" is set but " + "\"isolation.namespaces.credential\" is false or unset"); + + return NXT_ERROR; + } + + if (nxt_slow_path(clone->gidmap.size > 0)) { + nxt_log(task, NXT_LOG_ERR, "\"gidmap\" is set but " + "\"isolation.namespaces.credential\" is false or unset"); + + return NXT_ERROR; + } + + return NXT_OK; + } + + ret = nxt_clone_vldt_credential_uidmap(task, &clone->uidmap, creds); + if (nxt_slow_path(ret != NXT_OK)) { + return NXT_ERROR; + } + + return nxt_clone_vldt_credential_gidmap(task, &clone->gidmap, creds); +} + +#endif + + +#if (NXT_HAVE_CLONE) + +static nxt_int_t +nxt_isolation_clone_flags(nxt_task_t *task, nxt_conf_value_t *namespaces, + nxt_clone_t *clone) +{ + uint32_t index; + nxt_str_t name; + nxt_int_t flag; + nxt_conf_value_t *value; + + index = 0; + + for ( ;; ) { + value = nxt_conf_next_object_member(namespaces, &name, &index); + + if (value == NULL) { + break; + } + + flag = 0; + +#if (NXT_HAVE_CLONE_NEWUSER) + if (nxt_str_eq(&name, "credential", 10)) { + flag = CLONE_NEWUSER; + } +#endif + +#if (NXT_HAVE_CLONE_NEWPID) + if (nxt_str_eq(&name, "pid", 3)) { + flag = CLONE_NEWPID; + } +#endif + +#if (NXT_HAVE_CLONE_NEWNET) + if (nxt_str_eq(&name, "network", 7)) { + flag = CLONE_NEWNET; + } +#endif + +#if (NXT_HAVE_CLONE_NEWUTS) + if (nxt_str_eq(&name, "uname", 5)) { + flag = CLONE_NEWUTS; + } +#endif + +#if (NXT_HAVE_CLONE_NEWNS) + if (nxt_str_eq(&name, "mount", 5)) { + flag = CLONE_NEWNS; + } +#endif + +#if (NXT_HAVE_CLONE_NEWCGROUP) + if (nxt_str_eq(&name, "cgroup", 6)) { + flag = CLONE_NEWCGROUP; + } +#endif + + if (!flag) { + nxt_alert(task, "unknown namespace flag: \"%V\"", &name); + return NXT_ERROR; + } + + if (nxt_conf_get_boolean(value)) { + clone->flags |= flag; + } + } + + return NXT_OK; +} + +#endif + + +#if (NXT_HAVE_ISOLATION_ROOTFS) + +static nxt_int_t +nxt_isolation_set_rootfs(nxt_task_t *task, nxt_conf_value_t *isolation, + nxt_process_t *process) +{ + nxt_str_t str; + nxt_conf_value_t *obj; + + static nxt_str_t rootfs_name = nxt_string("rootfs"); + + obj = nxt_conf_get_object_member(isolation, &rootfs_name, NULL); + if (obj != NULL) { + nxt_conf_get_string(obj, &str); + + if (nxt_slow_path(str.length <= 1 || str.start[0] != '/')) { + nxt_log(task, NXT_LOG_ERR, "rootfs requires an absolute path other " + "than \"/\" but given \"%V\"", &str); + + return NXT_ERROR; + } + + if (str.start[str.length - 1] == '/') { + str.length--; + } + + process->isolation.rootfs = nxt_mp_alloc(process->mem_pool, + str.length + 1); + + if (nxt_slow_path(process->isolation.rootfs == NULL)) { + return NXT_ERROR; + } + + nxt_memcpy(process->isolation.rootfs, str.start, str.length); + + process->isolation.rootfs[str.length] = '\0'; + } + + return NXT_OK; +} + + +static nxt_int_t +nxt_isolation_set_mounts(nxt_task_t *task, nxt_process_t *process, + nxt_str_t *app_type) +{ + nxt_int_t ret, cap_chroot; + nxt_runtime_t *rt; + nxt_app_lang_module_t *lang; + + rt = task->thread->runtime; + cap_chroot = rt->capabilities.chroot; + lang = nxt_app_lang_module(rt, app_type); + + nxt_assert(lang != NULL); + +#if (NXT_HAVE_CLONE_NEWUSER) + if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) { + cap_chroot = 1; + } +#endif + + if (!cap_chroot) { + nxt_log(task, NXT_LOG_ERR, "The \"rootfs\" field requires privileges"); + return NXT_ERROR; + } + + if (lang->mounts != NULL && lang->mounts->nelts > 0) { + ret = nxt_isolation_set_lang_mounts(task, process, lang->mounts); + if (nxt_slow_path(ret != NXT_OK)) { + return NXT_ERROR; + } + + process->isolation.cleanup = nxt_isolation_unmount_all; + } + + return NXT_OK; +} + + +static nxt_int_t +nxt_isolation_set_lang_mounts(nxt_task_t *task, nxt_process_t *process, + nxt_array_t *lang_mounts) +{ + u_char *p; + size_t i, n, rootfs_len, len; + nxt_mp_t *mp; + nxt_array_t *mounts; + const u_char *rootfs; + nxt_fs_mount_t *mnt, *lang_mnt; + + rootfs = process->isolation.rootfs; + rootfs_len = nxt_strlen(rootfs); + mp = process->mem_pool; + + /* copy to init mem pool */ + mounts = nxt_array_copy(mp, NULL, lang_mounts); + if (mounts == NULL) { + return NXT_ERROR; + } + + n = mounts->nelts; + mnt = mounts->elts; + lang_mnt = lang_mounts->elts; + + for (i = 0; i < n; i++) { + len = nxt_strlen(lang_mnt[i].dst); + + mnt[i].dst = nxt_mp_alloc(mp, rootfs_len + len + 1); + if (mnt[i].dst == NULL) { + return NXT_ERROR; + } + + p = nxt_cpymem(mnt[i].dst, rootfs, rootfs_len); + p = nxt_cpymem(p, lang_mnt[i].dst, len); + *p = '\0'; + } + + process->isolation.mounts = mounts; + + return NXT_OK; +} + + +void +nxt_isolation_unmount_all(nxt_task_t *task, nxt_process_t *process) +{ + size_t i, n; + nxt_array_t *mounts; + nxt_fs_mount_t *mnt; + + nxt_debug(task, "unmount all (%s)", process->name); + + mounts = process->isolation.mounts; + n = mounts->nelts; + mnt = mounts->elts; + + for (i = 0; i < n; i++) { + nxt_fs_unmount(mnt[i].dst); + } +} + + +nxt_int_t +nxt_isolation_prepare_rootfs(nxt_task_t *task, nxt_process_t *process) +{ + size_t i, n; + nxt_int_t ret, hasproc; + struct stat st; + nxt_array_t *mounts; + const u_char *dst; + nxt_fs_mount_t *mnt; + + hasproc = 0; + +#if (NXT_HAVE_CLONE_NEWPID) && (NXT_HAVE_CLONE_NEWNS) + nxt_fs_mount_t mount; + + if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWPID) + && nxt_is_clone_flag_set(process->isolation.clone.flags, NEWNS)) + { + /* + * This mount point will automatically be gone when the namespace is + * destroyed. + */ + + mount.fstype = (u_char *) "proc"; + mount.src = (u_char *) "proc"; + mount.dst = (u_char *) "/proc"; + mount.data = (u_char *) ""; + mount.flags = 0; + + ret = nxt_fs_mkdir_all(mount.dst, S_IRWXU | S_IRWXG | S_IRWXO); + if (nxt_fast_path(ret == NXT_OK)) { + ret = nxt_fs_mount(task, &mount); + if (nxt_fast_path(ret == NXT_OK)) { + hasproc = 1; + } + + } else { + nxt_log(task, NXT_LOG_WARN, "mkdir(%s) %E", mount.dst, nxt_errno); + } + } +#endif + + mounts = process->isolation.mounts; + + n = mounts->nelts; + mnt = mounts->elts; + + for (i = 0; i < n; i++) { + dst = mnt[i].dst; + + if (nxt_slow_path(nxt_memcmp(mnt[i].fstype, "bind", 4) == 0 + && stat((const char *) mnt[i].src, &st) != 0)) + { + nxt_log(task, NXT_LOG_WARN, "host path not found: %s", mnt[i].src); + continue; + } + + if (hasproc && nxt_memcmp(mnt[i].fstype, "proc", 4) == 0 + && nxt_memcmp(mnt[i].dst, "/proc", 5) == 0) + { + continue; + } + + ret = nxt_fs_mkdir_all(dst, S_IRWXU | S_IRWXG | S_IRWXO); + if (nxt_slow_path(ret != NXT_OK)) { + nxt_alert(task, "mkdir(%s) %E", dst, nxt_errno); + goto undo; + } + + ret = nxt_fs_mount(task, &mnt[i]); + if (nxt_slow_path(ret != NXT_OK)) { + goto undo; + } + } + + return NXT_OK; + +undo: + + n = i + 1; + + for (i = 0; i < n; i++) { + nxt_fs_unmount(mnt[i].dst); + } + + return NXT_ERROR; +} + + +#if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS) + +nxt_int_t +nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process) +{ + char *rootfs; + nxt_int_t ret; + + rootfs = (char *) process->isolation.rootfs; + + nxt_debug(task, "change root: %s", rootfs); + + if (NXT_CLONE_MNT(process->isolation.clone.flags)) { + ret = nxt_isolation_pivot_root(task, rootfs); + + } else { + ret = nxt_isolation_chroot(task, rootfs); + } + + if (nxt_fast_path(ret == NXT_OK)) { + if (nxt_slow_path(chdir("/") < 0)) { + nxt_alert(task, "chdir(\"/\") %E", nxt_errno); + return NXT_ERROR; + } + } + + return ret; +} + + +/* + * pivot_root(2) can only be safely used with containers, otherwise it can + * umount(2) the global root filesystem and screw up the machine. + */ + +static nxt_int_t +nxt_isolation_pivot_root(nxt_task_t *task, const char *path) +{ + /* + * This implementation makes use of a kernel trick that works for ages + * and now documented in Linux kernel 5. + * https://lore.kernel.org/linux-man/87r24piwhm.fsf@x220.int.ebiederm.org/T/ + */ + + if (nxt_slow_path(mount("", "/", "", MS_SLAVE|MS_REC, "") != 0)) { + nxt_alert(task, "failed to make / a slave mount %E", nxt_errno); + return NXT_ERROR; + } + + if (nxt_slow_path(nxt_isolation_make_private_mount(task, path) != NXT_OK)) { + return NXT_ERROR; + } + + if (nxt_slow_path(mount(path, path, "bind", MS_BIND|MS_REC, "") != 0)) { + nxt_alert(task, "error bind mounting rootfs %E", nxt_errno); + return NXT_ERROR; + } + + if (nxt_slow_path(chdir(path) != 0)) { + nxt_alert(task, "failed to chdir(%s) %E", path, nxt_errno); + return NXT_ERROR; + } + + if (nxt_slow_path(nxt_pivot_root(".", ".") != 0)) { + nxt_alert(task, "failed to pivot_root %E", nxt_errno); + return NXT_ERROR; + } + + /* + * Make oldroot a slave mount to avoid unmounts getting propagated to the + * host. + */ + if (nxt_slow_path(mount("", ".", "", MS_SLAVE | MS_REC, NULL) != 0)) { + nxt_alert(task, "failed to bind mount rootfs %E", nxt_errno); + return NXT_ERROR; + } + + if (nxt_slow_path(umount2(".", MNT_DETACH) != 0)) { + nxt_alert(task, "failed to umount old root directory %E", nxt_errno); + return NXT_ERROR; + } + + return NXT_OK; +} + + +static nxt_int_t +nxt_isolation_make_private_mount(nxt_task_t *task, const char *rootfs) +{ + char *parent_mnt; + FILE *procfile; + u_char **mounts; + size_t len; + uint8_t *shared; + nxt_int_t ret, index, nmounts; + struct mntent *ent; + + static const char *mount_path = "/proc/self/mounts"; + + ret = NXT_ERROR; + ent = NULL; + shared = NULL; + procfile = NULL; + parent_mnt = NULL; + + nmounts = 256; + + mounts = nxt_malloc(nmounts * sizeof(uintptr_t)); + if (nxt_slow_path(mounts == NULL)) { + goto fail; + } + + shared = nxt_malloc(nmounts); + if (nxt_slow_path(shared == NULL)) { + goto fail; + } + + procfile = setmntent(mount_path, "r"); + if (nxt_slow_path(procfile == NULL)) { + nxt_alert(task, "failed to open %s %E", mount_path, nxt_errno); + + goto fail; + } + + index = 0; + +again: + + for ( ; index < nmounts; index++) { + ent = getmntent(procfile); + if (ent == NULL) { + nmounts = index; + break; + } + + mounts[index] = (u_char *) strdup(ent->mnt_dir); + shared[index] = hasmntopt(ent, "shared") != NULL; + } + + if (ent != NULL) { + /* there are still entries to be read */ + + nmounts *= 2; + mounts = nxt_realloc(mounts, nmounts); + if (nxt_slow_path(mounts == NULL)) { + goto fail; + } + + shared = nxt_realloc(shared, nmounts); + if (nxt_slow_path(shared == NULL)) { + goto fail; + } + + goto again; + } + + for (index = 0; index < nmounts; index++) { + if (nxt_strcmp(mounts[index], rootfs) == 0) { + parent_mnt = (char *) rootfs; + break; + } + } + + if (parent_mnt == NULL) { + len = nxt_strlen(rootfs); + + parent_mnt = nxt_malloc(len + 1); + if (parent_mnt == NULL) { + goto fail; + } + + nxt_memcpy(parent_mnt, rootfs, len); + parent_mnt[len] = '\0'; + + if (parent_mnt[len - 1] == '/') { + parent_mnt[len - 1] = '\0'; + len--; + } + + for ( ;; ) { + for (index = 0; index < nmounts; index++) { + if (nxt_strcmp(mounts[index], parent_mnt) == 0) { + goto found; + } + } + + if (len == 1 && parent_mnt[0] == '/') { + nxt_alert(task, "parent mount not found"); + goto fail; + } + + /* parent dir */ + while (parent_mnt[len - 1] != '/' && len > 0) { + len--; + } + + if (nxt_slow_path(len == 0)) { + nxt_alert(task, "parent mount not found"); + goto fail; + } + + if (len == 1) { + parent_mnt[len] = '\0'; /* / */ + } else { + parent_mnt[len - 1] = '\0'; /* /<path> */ + } + } + } + +found: + + if (shared[index]) { + if (nxt_slow_path(mount("", parent_mnt, "", MS_PRIVATE, "") != 0)) { + nxt_alert(task, "mount(\"\", \"%s\", MS_PRIVATE) %E", parent_mnt, + nxt_errno); + + goto fail; + } + } + + ret = NXT_OK; + +fail: + + if (procfile != NULL) { + endmntent(procfile); + } + + if (mounts != NULL) { + for (index = 0; index < nmounts; index++) { + nxt_free(mounts[index]); + } + + nxt_free(mounts); + } + + if (shared != NULL) { + nxt_free(shared); + } + + if (parent_mnt != NULL && parent_mnt != rootfs) { + nxt_free(parent_mnt); + } + + return ret; +} + + +nxt_inline int +nxt_pivot_root(const char *new_root, const char *old_root) +{ + return syscall(__NR_pivot_root, new_root, old_root); +} + + +#else /* !(NXT_HAVE_PIVOT_ROOT) || !(NXT_HAVE_CLONE_NEWNS) */ + + +nxt_int_t +nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process) +{ + char *rootfs; + + rootfs = (char *) process->isolation.rootfs; + + nxt_debug(task, "change root: %s", rootfs); + + if (nxt_fast_path(nxt_isolation_chroot(task, rootfs) == NXT_OK)) { + if (nxt_slow_path(chdir("/") < 0)) { + nxt_alert(task, "chdir(\"/\") %E", nxt_errno); + return NXT_ERROR; + } + + return NXT_OK; + } + + return NXT_ERROR; +} + +#endif + + +static nxt_int_t +nxt_isolation_chroot(nxt_task_t *task, const char *path) +{ + if (nxt_slow_path(chroot(path) < 0)) { + nxt_alert(task, "chroot(%s) %E", path, nxt_errno); + return NXT_ERROR; + } + + return NXT_OK; +} + +#endif /* NXT_HAVE_ISOLATION_ROOTFS */ + + +#if (NXT_HAVE_PR_SET_NO_NEW_PRIVS) + +static nxt_int_t +nxt_isolation_set_new_privs(nxt_task_t *task, nxt_conf_value_t *isolation, + nxt_process_t *process) +{ + nxt_conf_value_t *obj; + + static nxt_str_t new_privs_name = nxt_string("new_privs"); + + obj = nxt_conf_get_object_member(isolation, &new_privs_name, NULL); + if (obj != NULL) { + process->isolation.new_privs = nxt_conf_get_boolean(obj); + } + + return NXT_OK; +} + +#endif diff --git a/src/nxt_isolation.h b/src/nxt_isolation.h new file mode 100644 index 00000000..88a5f9e1 --- /dev/null +++ b/src/nxt_isolation.h @@ -0,0 +1,18 @@ +/* + * Copyright (C) NGINX, Inc. + */ + +#ifndef _NXT_ISOLATION_H_ +#define _NXT_ISOLATION_H_ + + +nxt_int_t nxt_isolation_main_prefork(nxt_task_t *task, nxt_process_t *process, + nxt_mp_t *mp); + +#if (NXT_HAVE_ISOLATION_ROOTFS) +nxt_int_t nxt_isolation_prepare_rootfs(nxt_task_t *task, + nxt_process_t *process); +nxt_int_t nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process); +#endif + +#endif /* _NXT_ISOLATION_H_ */ diff --git a/src/nxt_main_process.c b/src/nxt_main_process.c index 48eb2abb..a7b84b73 100644 --- a/src/nxt_main_process.c +++ b/src/nxt_main_process.c @@ -878,11 +878,9 @@ nxt_main_cleanup_process(nxt_task_t *task, nxt_pid_t pid) return; } -#if (NXT_HAVE_ISOLATION_ROOTFS) - if (process->isolation.rootfs != NULL && process->isolation.mounts) { - (void) nxt_process_unmount_all(task, process); + if (process->isolation.cleanup != NULL) { + process->isolation.cleanup(task, process); } -#endif name = process->name; stream = process->stream; diff --git a/src/nxt_process.c b/src/nxt_process.c index 9bfae395..9be7974f 100644 --- a/src/nxt_process.c +++ b/src/nxt_process.c @@ -17,10 +17,6 @@ #include <sys/prctl.h> #endif -#if (NXT_HAVE_PIVOT_ROOT) -#include <mntent.h> -#endif - static nxt_int_t nxt_process_setup(nxt_task_t *task, nxt_process_t *process); static nxt_int_t nxt_process_child_fixup(nxt_task_t *task, nxt_process_t *process); @@ -33,16 +29,6 @@ static void nxt_process_created_ok(nxt_task_t *task, nxt_port_recv_msg_t *msg, static void nxt_process_created_error(nxt_task_t *task, nxt_port_recv_msg_t *msg, void *data); -#if (NXT_HAVE_ISOLATION_ROOTFS) -static nxt_int_t nxt_process_chroot(nxt_task_t *task, const char *path); - -#if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS) -static nxt_int_t nxt_process_pivot_root(nxt_task_t *task, const char *rootfs); -static nxt_int_t nxt_process_private_mount(nxt_task_t *task, - const char *rootfs); -static int nxt_pivot_root(const char *new_root, const char *old_root); -#endif -#endif /* A cached process pid. */ nxt_pid_t nxt_pid; @@ -398,51 +384,6 @@ nxt_process_core_setup(nxt_task_t *task, nxt_process_t *process) } -#if (NXT_HAVE_CLONE_NEWUSER) - -nxt_int_t -nxt_process_vldt_isolation_creds(nxt_task_t *task, nxt_process_t *process) -{ - nxt_int_t ret; - nxt_clone_t *clone; - nxt_credential_t *creds; - - clone = &process->isolation.clone; - creds = process->user_cred; - - if (clone->uidmap.size == 0 && clone->gidmap.size == 0) { - return NXT_OK; - } - - if (!nxt_is_clone_flag_set(clone->flags, NEWUSER)) { - if (nxt_slow_path(clone->uidmap.size > 0)) { - nxt_log(task, NXT_LOG_ERR, "\"uidmap\" is set but " - "\"isolation.namespaces.credential\" is false or unset"); - - return NXT_ERROR; - } - - if (nxt_slow_path(clone->gidmap.size > 0)) { - nxt_log(task, NXT_LOG_ERR, "\"gidmap\" is set but " - "\"isolation.namespaces.credential\" is false or unset"); - - return NXT_ERROR; - } - - return NXT_OK; - } - - ret = nxt_clone_vldt_credential_uidmap(task, &clone->uidmap, creds); - if (nxt_slow_path(ret != NXT_OK)) { - return NXT_ERROR; - } - - return nxt_clone_vldt_credential_gidmap(task, &clone->gidmap, creds); -} - -#endif - - nxt_int_t nxt_process_creds_set(nxt_task_t *task, nxt_process_t *process, nxt_str_t *user, nxt_str_t *group) @@ -525,329 +466,6 @@ nxt_process_apply_creds(nxt_task_t *task, nxt_process_t *process) } -#if (NXT_HAVE_ISOLATION_ROOTFS) - - -#if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS) - - -nxt_int_t -nxt_process_change_root(nxt_task_t *task, nxt_process_t *process) -{ - char *rootfs; - nxt_int_t ret; - - rootfs = (char *) process->isolation.rootfs; - - nxt_debug(task, "change root: %s", rootfs); - - if (NXT_CLONE_MNT(process->isolation.clone.flags)) { - ret = nxt_process_pivot_root(task, rootfs); - } else { - ret = nxt_process_chroot(task, rootfs); - } - - if (nxt_fast_path(ret == NXT_OK)) { - if (nxt_slow_path(chdir("/") < 0)) { - nxt_alert(task, "chdir(\"/\") %E", nxt_errno); - return NXT_ERROR; - } - } - - return ret; -} - - -#else - - -nxt_int_t -nxt_process_change_root(nxt_task_t *task, nxt_process_t *process) -{ - char *rootfs; - - rootfs = (char *) process->isolation.rootfs; - - nxt_debug(task, "change root: %s", rootfs); - - if (nxt_fast_path(nxt_process_chroot(task, rootfs) == NXT_OK)) { - if (nxt_slow_path(chdir("/") < 0)) { - nxt_alert(task, "chdir(\"/\") %E", nxt_errno); - return NXT_ERROR; - } - - return NXT_OK; - } - - return NXT_ERROR; -} - - -#endif - - -static nxt_int_t -nxt_process_chroot(nxt_task_t *task, const char *path) -{ - if (nxt_slow_path(chroot(path) < 0)) { - nxt_alert(task, "chroot(%s) %E", path, nxt_errno); - return NXT_ERROR; - } - - return NXT_OK; -} - - -void -nxt_process_unmount_all(nxt_task_t *task, nxt_process_t *process) -{ - size_t i, n; - nxt_array_t *mounts; - nxt_fs_mount_t *mnt; - - nxt_debug(task, "unmount all (%s)", process->name); - - mounts = process->isolation.mounts; - n = mounts->nelts; - mnt = mounts->elts; - - for (i = 0; i < n; i++) { - nxt_fs_unmount(mnt[i].dst); - } -} - - -#if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS) - -/* - * pivot_root(2) can only be safely used with containers, otherwise it can - * umount(2) the global root filesystem and screw up the machine. - */ - -static nxt_int_t -nxt_process_pivot_root(nxt_task_t *task, const char *path) -{ - /* - * This implementation makes use of a kernel trick that works for ages - * and now documented in Linux kernel 5. - * https://lore.kernel.org/linux-man/87r24piwhm.fsf@x220.int.ebiederm.org/T/ - */ - - if (nxt_slow_path(mount("", "/", "", MS_SLAVE|MS_REC, "") != 0)) { - nxt_alert(task, "failed to make / a slave mount %E", nxt_errno); - return NXT_ERROR; - } - - if (nxt_slow_path(nxt_process_private_mount(task, path) != NXT_OK)) { - return NXT_ERROR; - } - - if (nxt_slow_path(mount(path, path, "bind", MS_BIND|MS_REC, "") != 0)) { - nxt_alert(task, "error bind mounting rootfs %E", nxt_errno); - return NXT_ERROR; - } - - if (nxt_slow_path(chdir(path) != 0)) { - nxt_alert(task, "failed to chdir(%s) %E", path, nxt_errno); - return NXT_ERROR; - } - - if (nxt_slow_path(nxt_pivot_root(".", ".") != 0)) { - nxt_alert(task, "failed to pivot_root %E", nxt_errno); - return NXT_ERROR; - } - - /* - * Make oldroot a slave mount to avoid unmounts getting propagated to the - * host. - */ - if (nxt_slow_path(mount("", ".", "", MS_SLAVE | MS_REC, NULL) != 0)) { - nxt_alert(task, "failed to bind mount rootfs %E", nxt_errno); - return NXT_ERROR; - } - - if (nxt_slow_path(umount2(".", MNT_DETACH) != 0)) { - nxt_alert(task, "failed to umount old root directory %E", nxt_errno); - return NXT_ERROR; - } - - return NXT_OK; -} - - -static nxt_int_t -nxt_process_private_mount(nxt_task_t *task, const char *rootfs) -{ - char *parent_mnt; - FILE *procfile; - u_char **mounts; - size_t len; - uint8_t *shared; - nxt_int_t ret, index, nmounts; - struct mntent *ent; - - static const char *mount_path = "/proc/self/mounts"; - - ret = NXT_ERROR; - ent = NULL; - shared = NULL; - procfile = NULL; - parent_mnt = NULL; - - nmounts = 256; - - mounts = nxt_malloc(nmounts * sizeof(uintptr_t)); - if (nxt_slow_path(mounts == NULL)) { - goto fail; - } - - shared = nxt_malloc(nmounts); - if (nxt_slow_path(shared == NULL)) { - goto fail; - } - - procfile = setmntent(mount_path, "r"); - if (nxt_slow_path(procfile == NULL)) { - nxt_alert(task, "failed to open %s %E", mount_path, nxt_errno); - - goto fail; - } - - index = 0; - -again: - - for ( ; index < nmounts; index++) { - ent = getmntent(procfile); - if (ent == NULL) { - nmounts = index; - break; - } - - mounts[index] = (u_char *) strdup(ent->mnt_dir); - shared[index] = hasmntopt(ent, "shared") != NULL; - } - - if (ent != NULL) { - /* there are still entries to be read */ - - nmounts *= 2; - mounts = nxt_realloc(mounts, nmounts); - if (nxt_slow_path(mounts == NULL)) { - goto fail; - } - - shared = nxt_realloc(shared, nmounts); - if (nxt_slow_path(shared == NULL)) { - goto fail; - } - - goto again; - } - - for (index = 0; index < nmounts; index++) { - if (nxt_strcmp(mounts[index], rootfs) == 0) { - parent_mnt = (char *) rootfs; - break; - } - } - - if (parent_mnt == NULL) { - len = nxt_strlen(rootfs); - - parent_mnt = nxt_malloc(len + 1); - if (parent_mnt == NULL) { - goto fail; - } - - nxt_memcpy(parent_mnt, rootfs, len); - parent_mnt[len] = '\0'; - - if (parent_mnt[len - 1] == '/') { - parent_mnt[len - 1] = '\0'; - len--; - } - - for ( ;; ) { - for (index = 0; index < nmounts; index++) { - if (nxt_strcmp(mounts[index], parent_mnt) == 0) { - goto found; - } - } - - if (len == 1 && parent_mnt[0] == '/') { - nxt_alert(task, "parent mount not found"); - goto fail; - } - - /* parent dir */ - while (parent_mnt[len - 1] != '/' && len > 0) { - len--; - } - - if (nxt_slow_path(len == 0)) { - nxt_alert(task, "parent mount not found"); - goto fail; - } - - if (len == 1) { - parent_mnt[len] = '\0'; /* / */ - } else { - parent_mnt[len - 1] = '\0'; /* /<path> */ - } - } - } - -found: - - if (shared[index]) { - if (nxt_slow_path(mount("", parent_mnt, "", MS_PRIVATE, "") != 0)) { - nxt_alert(task, "mount(\"\", \"%s\", MS_PRIVATE) %E", parent_mnt, - nxt_errno); - - goto fail; - } - } - - ret = NXT_OK; - -fail: - - if (procfile != NULL) { - endmntent(procfile); - } - - if (mounts != NULL) { - for (index = 0; index < nmounts; index++) { - nxt_free(mounts[index]); - } - - nxt_free(mounts); - } - - if (shared != NULL) { - nxt_free(shared); - } - - if (parent_mnt != NULL && parent_mnt != rootfs) { - nxt_free(parent_mnt); - } - - return ret; -} - - -static int -nxt_pivot_root(const char *new_root, const char *old_root) -{ - return syscall(__NR_pivot_root, new_root, old_root); -} - -#endif - -#endif - - static nxt_int_t nxt_process_send_ready(nxt_task_t *task, nxt_process_t *process) { diff --git a/src/nxt_process.h b/src/nxt_process.h index ecd813e2..f2383687 100644 --- a/src/nxt_process.h +++ b/src/nxt_process.h @@ -60,6 +60,9 @@ typedef enum { typedef struct nxt_port_mmap_s nxt_port_mmap_t; +typedef struct nxt_process_s nxt_process_t; +typedef void (*nxt_isolation_cleanup_t)(nxt_task_t *task, + nxt_process_t *process); typedef struct { @@ -73,6 +76,8 @@ typedef struct { u_char *rootfs; nxt_array_t *mounts; /* of nxt_mount_t */ + nxt_isolation_cleanup_t cleanup; + #if (NXT_HAVE_CLONE) nxt_clone_t clone; #endif @@ -83,7 +88,7 @@ typedef struct { } nxt_process_isolation_t; -typedef struct { +struct nxt_process_s { nxt_pid_t pid; const char *name; nxt_queue_t ports; /* of nxt_port_t */ @@ -103,7 +108,7 @@ typedef struct { nxt_process_data_t data; nxt_process_isolation_t isolation; -} nxt_process_t; +}; typedef nxt_int_t (*nxt_process_prefork_t)(nxt_task_t *task, @@ -178,17 +183,6 @@ nxt_int_t nxt_process_creds_set(nxt_task_t *task, nxt_process_t *process, nxt_str_t *user, nxt_str_t *group); nxt_int_t nxt_process_apply_creds(nxt_task_t *task, nxt_process_t *process); -#if (NXT_HAVE_CLONE_NEWUSER) -nxt_int_t nxt_process_vldt_isolation_creds(nxt_task_t *task, - nxt_process_t *process); -#endif - -nxt_int_t nxt_process_change_root(nxt_task_t *task, nxt_process_t *process); - -#if (NXT_HAVE_ISOLATION_ROOTFS) -void nxt_process_unmount_all(nxt_task_t *task, nxt_process_t *process); -#endif - #if (NXT_HAVE_SETPROCTITLE) #define nxt_process_title(task, fmt, ...) \ |