summaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/nxt_application.c481
-rw-r--r--src/nxt_application.h4
-rw-r--r--src/nxt_array.c39
-rw-r--r--src/nxt_array.h3
-rw-r--r--src/nxt_capability.c5
-rw-r--r--src/nxt_capability.h3
-rw-r--r--src/nxt_clone.h3
-rw-r--r--src/nxt_conf_validation.c18
-rw-r--r--src/nxt_external.c2
-rw-r--r--src/nxt_fs.c163
-rw-r--r--src/nxt_fs.h36
-rw-r--r--src/nxt_java.c72
-rw-r--r--src/nxt_main.h1
-rw-r--r--src/nxt_main_process.c89
-rw-r--r--src/nxt_php_sapi.c2
-rw-r--r--src/nxt_process.c358
-rw-r--r--src/nxt_process.h53
-rw-r--r--src/nxt_python_wsgi.c9
-rw-r--r--src/nxt_runtime.c1
-rw-r--r--src/nxt_unix.h3
-rw-r--r--src/perl/nxt_perl_psgi.c6
-rw-r--r--src/ruby/nxt_ruby.c5
22 files changed, 1285 insertions, 71 deletions
diff --git a/src/nxt_application.c b/src/nxt_application.c
index 6de82257..566bf256 100644
--- a/src/nxt_application.c
+++ b/src/nxt_application.c
@@ -17,11 +17,16 @@
#include <glob.h>
+#if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
+#include <sys/prctl.h>
+#endif
+
typedef struct {
nxt_app_type_t type;
nxt_str_t version;
nxt_str_t file;
+ nxt_array_t *mounts;
} nxt_module_t;
@@ -40,22 +45,40 @@ static nxt_int_t nxt_app_prefork(nxt_task_t *task, nxt_process_t *process,
nxt_mp_t *mp);
static nxt_int_t nxt_app_setup(nxt_task_t *task, nxt_process_t *process);
static nxt_int_t nxt_app_set_environment(nxt_conf_value_t *environment);
-static nxt_int_t nxt_app_isolation(nxt_task_t *task,
+static u_char *nxt_cstr_dup(nxt_mp_t *mp, u_char *dst, u_char *src);
+
+#if (NXT_HAVE_ISOLATION_ROOTFS)
+static nxt_int_t nxt_app_prepare_rootfs(nxt_task_t *task,
+ nxt_process_t *process);
+static nxt_int_t nxt_app_prepare_lang_mounts(nxt_task_t *task,
+ nxt_process_t *process, nxt_array_t *syspaths);
+static nxt_int_t nxt_app_set_isolation_rootfs(nxt_task_t *task,
+ nxt_conf_value_t *isolation, nxt_process_t *process);
+#endif
+
+static nxt_int_t nxt_app_set_isolation(nxt_task_t *task,
nxt_conf_value_t *isolation, nxt_process_t *process);
#if (NXT_HAVE_CLONE)
+static nxt_int_t nxt_app_set_isolation_namespaces(nxt_task_t *task,
+ nxt_conf_value_t *isolation, nxt_process_t *process);
static nxt_int_t nxt_app_clone_flags(nxt_task_t *task,
nxt_conf_value_t *namespaces, nxt_clone_t *clone);
#endif
#if (NXT_HAVE_CLONE_NEWUSER)
-static nxt_int_t nxt_app_isolation_creds(nxt_task_t *task,
+static nxt_int_t nxt_app_set_isolation_creds(nxt_task_t *task,
nxt_conf_value_t *isolation, nxt_process_t *process);
static nxt_int_t nxt_app_isolation_credential_map(nxt_task_t *task,
nxt_mp_t *mem_pool, nxt_conf_value_t *map_array,
nxt_clone_credential_map_t *map);
#endif
+#if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
+static nxt_int_t nxt_app_set_isolation_new_privs(nxt_task_t *task,
+ nxt_conf_value_t *isolation, nxt_process_t *process);
+#endif
+
nxt_str_t nxt_server = nxt_string(NXT_SERVER);
@@ -154,16 +177,17 @@ nxt_discovery_start(nxt_task_t *task, nxt_process_data_t *data)
static nxt_buf_t *
nxt_discovery_modules(nxt_task_t *task, const char *path)
{
- char *name;
- u_char *p, *end;
- size_t size;
- glob_t glb;
- nxt_mp_t *mp;
- nxt_buf_t *b;
- nxt_int_t ret;
- nxt_uint_t i, n;
- nxt_array_t *modules;
- nxt_module_t *module;
+ char *name;
+ u_char *p, *end;
+ size_t size;
+ glob_t glb;
+ nxt_mp_t *mp;
+ nxt_buf_t *b;
+ nxt_int_t ret;
+ nxt_uint_t i, n, j;
+ nxt_array_t *modules, *mounts;
+ nxt_module_t *module;
+ nxt_fs_mount_t *mnt;
b = NULL;
@@ -206,11 +230,26 @@ nxt_discovery_modules(nxt_task_t *task, const char *path)
size += nxt_length("{\"type\": ,");
size += nxt_length(" \"version\": \"\",");
- size += nxt_length(" \"file\": \"\"},");
+ size += nxt_length(" \"file\": \"\",");
+ size += nxt_length(" \"mounts\": []},");
size += NXT_INT_T_LEN
+ module[i].version.length
+ module[i].file.length;
+
+ mounts = module[i].mounts;
+
+ size += mounts->nelts * nxt_length("{\"src\": \"\", \"dst\": \"\", "
+ "\"fstype\": \"\", \"flags\": , "
+ "\"data\": \"\"},");
+
+ mnt = mounts->elts;
+
+ for (j = 0; j < mounts->nelts; j++) {
+ size += nxt_strlen(mnt[j].src) + nxt_strlen(mnt[j].dst)
+ + nxt_strlen(mnt[j].fstype) + NXT_INT_T_LEN
+ + (mnt[j].data == NULL ? 0 : nxt_strlen(mnt[j].data));
+ }
}
b = nxt_buf_mem_alloc(mp, size, 0);
@@ -225,12 +264,34 @@ nxt_discovery_modules(nxt_task_t *task, const char *path)
*p++ = '[';
for (i = 0; i < n; i++) {
- p = nxt_sprintf(p, end,
- "{\"type\": %d, \"version\": \"%V\", \"file\": \"%V\"},",
- module[i].type, &module[i].version, &module[i].file);
+ mounts = module[i].mounts;
+
+ p = nxt_sprintf(p, end, "{\"type\": %d, \"version\": \"%V\", "
+ "\"file\": \"%V\", \"mounts\": [",
+ module[i].type, &module[i].version, &module[i].file);
+
+ mnt = mounts->elts;
+ for (j = 0; j < mounts->nelts; j++) {
+ p = nxt_sprintf(p, end,
+ "{\"src\": \"%s\", \"dst\": \"%s\", "
+ "\"fstype\": \"%s\", \"flags\": %d, "
+ "\"data\": \"%s\"},",
+ mnt[j].src, mnt[j].dst, mnt[j].fstype, mnt[j].flags,
+ mnt[j].data == NULL ? (u_char *) "" : mnt[j].data);
+ }
+
+ *p++ = ']';
+ *p++ = '}';
+ *p++ = ',';
}
*p++ = ']';
+
+ if (nxt_slow_path(p >= end)) {
+ nxt_alert(task, "discovery write past the buffer");
+ goto fail;
+ }
+
b->mem.free = p;
fail:
@@ -245,13 +306,16 @@ static nxt_int_t
nxt_discovery_module(nxt_task_t *task, nxt_mp_t *mp, nxt_array_t *modules,
const char *name)
{
- void *dl;
- nxt_str_t version;
- nxt_int_t ret;
- nxt_uint_t i, n;
- nxt_module_t *module;
- nxt_app_type_t type;
- nxt_app_module_t *app;
+ void *dl;
+ nxt_str_t version;
+ nxt_int_t ret;
+ nxt_uint_t i, j, n;
+ nxt_array_t *mounts;
+ nxt_module_t *module;
+ nxt_app_type_t type;
+ nxt_fs_mount_t *to;
+ nxt_app_module_t *app;
+ const nxt_fs_mount_t *from;
/*
* Only memory allocation failure should return NXT_ERROR.
@@ -328,6 +392,47 @@ nxt_discovery_module(nxt_task_t *task, nxt_mp_t *mp, nxt_array_t *modules,
nxt_memcpy(module->file.start, name, module->file.length);
+ module->mounts = nxt_array_create(mp, app->nmounts,
+ sizeof(nxt_fs_mount_t));
+
+ if (nxt_slow_path(module->mounts == NULL)) {
+ goto fail;
+ }
+
+ mounts = module->mounts;
+
+ for (j = 0; j < app->nmounts; j++) {
+ from = &app->mounts[j];
+ to = nxt_array_zero_add(mounts);
+ if (nxt_slow_path(to == NULL)) {
+ goto fail;
+ }
+
+ to->src = nxt_cstr_dup(mp, to->src, from->src);
+ if (nxt_slow_path(to->src == NULL)) {
+ goto fail;
+ }
+
+ to->dst = nxt_cstr_dup(mp, to->dst, from->dst);
+ if (nxt_slow_path(to->dst == NULL)) {
+ goto fail;
+ }
+
+ to->fstype = nxt_cstr_dup(mp, to->fstype, from->fstype);
+ if (nxt_slow_path(to->fstype == NULL)) {
+ goto fail;
+ }
+
+ if (from->data != NULL) {
+ to->data = nxt_cstr_dup(mp, to->data, from->data);
+ if (nxt_slow_path(to->data == NULL)) {
+ goto fail;
+ }
+ }
+
+ to->flags = from->flags;
+ }
+
} else {
nxt_alert(task, "dlsym(\"%s\"), failed: \"%s\"", name, dlerror());
}
@@ -369,17 +474,23 @@ nxt_discovery_quit(nxt_task_t *task, nxt_port_recv_msg_t *msg, void *data)
static nxt_int_t
nxt_app_prefork(nxt_task_t *task, nxt_process_t *process, nxt_mp_t *mp)
{
- nxt_int_t cap_setid;
+ nxt_int_t cap_setid, cap_chroot;
nxt_int_t ret;
nxt_runtime_t *rt;
nxt_common_app_conf_t *app_conf;
+ nxt_app_lang_module_t *lang;
rt = task->thread->runtime;
app_conf = process->data.app;
cap_setid = rt->capabilities.setid;
+ cap_chroot = rt->capabilities.chroot;
+
+ lang = nxt_app_lang_module(rt, &app_conf->type);
+
+ nxt_assert(lang != NULL);
if (app_conf->isolation != NULL) {
- ret = nxt_app_isolation(task, app_conf->isolation, process);
+ ret = nxt_app_set_isolation(task, app_conf->isolation, process);
if (nxt_slow_path(ret != NXT_OK)) {
return ret;
}
@@ -388,6 +499,25 @@ nxt_app_prefork(nxt_task_t *task, nxt_process_t *process, nxt_mp_t *mp)
#if (NXT_HAVE_CLONE_NEWUSER)
if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) {
cap_setid = 1;
+ cap_chroot = 1;
+ }
+#endif
+
+#if (NXT_HAVE_ISOLATION_ROOTFS)
+ if (process->isolation.rootfs != NULL) {
+ if (!cap_chroot) {
+ nxt_log(task, NXT_LOG_ERR,
+ "The \"rootfs\" field requires privileges");
+
+ return NXT_ERROR;
+ }
+
+ if (lang->mounts != NULL && lang->mounts->nelts > 0) {
+ ret = nxt_app_prepare_lang_mounts(task, process, lang->mounts);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ return NXT_ERROR;
+ }
+ }
}
#endif
@@ -460,6 +590,13 @@ nxt_app_setup(nxt_task_t *task, nxt_process_t *process)
}
}
+ if (nxt_slow_path(nxt_app_set_environment(app_conf->environment)
+ != NXT_OK))
+ {
+ nxt_alert(task, "failed to set environment");
+ return NXT_ERROR;
+ }
+
if (nxt_app->setup != NULL) {
ret = nxt_app->setup(task, process, app_conf);
@@ -468,6 +605,22 @@ nxt_app_setup(nxt_task_t *task, nxt_process_t *process)
}
}
+#if (NXT_HAVE_ISOLATION_ROOTFS)
+ if (process->isolation.rootfs != NULL) {
+ if (process->isolation.mounts != NULL) {
+ ret = nxt_app_prepare_rootfs(task, process);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ return ret;
+ }
+ }
+
+ ret = nxt_process_change_root(task, process);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ return NXT_ERROR;
+ }
+ }
+#endif
+
if (app_conf->working_directory != NULL
&& app_conf->working_directory[0] != 0)
{
@@ -481,13 +634,6 @@ nxt_app_setup(nxt_task_t *task, nxt_process_t *process)
}
}
- if (nxt_slow_path(nxt_app_set_environment(app_conf->environment)
- != NXT_OK))
- {
- nxt_alert(task, "failed to set environment");
- return NXT_ERROR;
- }
-
init = nxt_process_init(process);
init->start = nxt_app->start;
@@ -555,10 +701,51 @@ nxt_app_set_environment(nxt_conf_value_t *environment)
static nxt_int_t
-nxt_app_isolation(nxt_task_t *task, nxt_conf_value_t *isolation,
+nxt_app_set_isolation(nxt_task_t *task, nxt_conf_value_t *isolation,
nxt_process_t *process)
{
#if (NXT_HAVE_CLONE)
+ if (nxt_slow_path(nxt_app_set_isolation_namespaces(task, isolation, process)
+ != NXT_OK))
+ {
+ return NXT_ERROR;
+ }
+#endif
+
+#if (NXT_HAVE_CLONE_NEWUSER)
+ if (nxt_slow_path(nxt_app_set_isolation_creds(task, isolation, process)
+ != NXT_OK))
+ {
+ return NXT_ERROR;
+ }
+#endif
+
+#if (NXT_HAVE_ISOLATION_ROOTFS)
+ if (nxt_slow_path(nxt_app_set_isolation_rootfs(task, isolation, process)
+ != NXT_OK))
+ {
+ return NXT_ERROR;
+ }
+#endif
+
+#if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
+ if (nxt_slow_path(nxt_app_set_isolation_new_privs(task, isolation, process)
+ != NXT_OK))
+ {
+ return NXT_ERROR;
+ }
+#endif
+
+ return NXT_OK;
+}
+
+
+#if (NXT_HAVE_CLONE)
+
+static nxt_int_t
+nxt_app_set_isolation_namespaces(nxt_task_t *task, nxt_conf_value_t *isolation,
+ nxt_process_t *process)
+{
nxt_int_t ret;
nxt_conf_value_t *obj;
@@ -571,23 +758,82 @@ nxt_app_isolation(nxt_task_t *task, nxt_conf_value_t *isolation,
return NXT_ERROR;
}
}
+
+ return NXT_OK;
+}
+
#endif
-#if (NXT_HAVE_CLONE_NEWUSER)
- ret = nxt_app_isolation_creds(task, isolation, process);
- if (nxt_slow_path(ret != NXT_OK)) {
- return NXT_ERROR;
+
+#if (NXT_HAVE_ISOLATION_ROOTFS)
+
+static nxt_int_t
+nxt_app_set_isolation_rootfs(nxt_task_t *task, nxt_conf_value_t *isolation,
+ nxt_process_t *process)
+{
+ nxt_str_t str;
+ nxt_conf_value_t *obj;
+
+ static nxt_str_t rootfs_name = nxt_string("rootfs");
+
+ obj = nxt_conf_get_object_member(isolation, &rootfs_name, NULL);
+ if (obj != NULL) {
+ nxt_conf_get_string(obj, &str);
+
+ if (nxt_slow_path(str.length <= 1 || str.start[0] != '/')) {
+ nxt_log(task, NXT_LOG_ERR, "rootfs requires an absolute path other "
+ "than \"/\" but given \"%V\"", &str);
+
+ return NXT_ERROR;
+ }
+
+ if (str.start[str.length - 1] == '/') {
+ str.length--;
+ }
+
+ process->isolation.rootfs = nxt_mp_alloc(process->mem_pool,
+ str.length + 1);
+
+ if (nxt_slow_path(process->isolation.rootfs == NULL)) {
+ return NXT_ERROR;
+ }
+
+ nxt_memcpy(process->isolation.rootfs, str.start, str.length);
+
+ process->isolation.rootfs[str.length] = '\0';
}
+
+ return NXT_OK;
+}
+
#endif
+
+#if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
+
+static nxt_int_t
+nxt_app_set_isolation_new_privs(nxt_task_t *task, nxt_conf_value_t *isolation,
+ nxt_process_t *process)
+{
+ nxt_conf_value_t *obj;
+
+ static nxt_str_t new_privs_name = nxt_string("new_privs");
+
+ obj = nxt_conf_get_object_member(isolation, &new_privs_name, NULL);
+ if (obj != NULL) {
+ process->isolation.new_privs = nxt_conf_get_boolean(obj);
+ }
+
return NXT_OK;
}
+#endif
+
#if (NXT_HAVE_CLONE_NEWUSER)
static nxt_int_t
-nxt_app_isolation_creds(nxt_task_t *task, nxt_conf_value_t *isolation,
+nxt_app_set_isolation_creds(nxt_task_t *task, nxt_conf_value_t *isolation,
nxt_process_t *process)
{
nxt_int_t ret;
@@ -753,6 +999,165 @@ nxt_app_clone_flags(nxt_task_t *task, nxt_conf_value_t *namespaces,
#endif
+#if (NXT_HAVE_ISOLATION_ROOTFS)
+
+static nxt_int_t
+nxt_app_prepare_lang_mounts(nxt_task_t *task, nxt_process_t *process,
+ nxt_array_t *lang_mounts)
+{
+ u_char *p;
+ size_t i, n, rootfs_len, len;
+ nxt_mp_t *mp;
+ nxt_array_t *mounts;
+ const u_char *rootfs;
+ nxt_fs_mount_t *mnt, *lang_mnt;
+
+ rootfs = process->isolation.rootfs;
+ rootfs_len = nxt_strlen(rootfs);
+ mp = process->mem_pool;
+
+ /* copy to init mem pool */
+ mounts = nxt_array_copy(mp, NULL, lang_mounts);
+ if (mounts == NULL) {
+ return NXT_ERROR;
+ }
+
+ n = mounts->nelts;
+ mnt = mounts->elts;
+ lang_mnt = lang_mounts->elts;
+
+ for (i = 0; i < n; i++) {
+ len = nxt_strlen(lang_mnt[i].dst);
+
+ mnt[i].dst = nxt_mp_alloc(mp, rootfs_len + len + 1);
+ if (mnt[i].dst == NULL) {
+ return NXT_ERROR;
+ }
+
+ p = nxt_cpymem(mnt[i].dst, rootfs, rootfs_len);
+ p = nxt_cpymem(p, lang_mnt[i].dst, len);
+ *p = '\0';
+ }
+
+ process->isolation.mounts = mounts;
+
+ return NXT_OK;
+}
+
+
+
+static nxt_int_t
+nxt_app_prepare_rootfs(nxt_task_t *task, nxt_process_t *process)
+{
+ size_t i, n;
+ nxt_int_t ret, hasproc;
+ struct stat st;
+ nxt_array_t *mounts;
+ const u_char *dst;
+ nxt_fs_mount_t *mnt;
+
+ hasproc = 0;
+
+#if (NXT_HAVE_CLONE_NEWPID) && (NXT_HAVE_CLONE_NEWNS)
+ nxt_fs_mount_t mount;
+
+ if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWPID)
+ && nxt_is_clone_flag_set(process->isolation.clone.flags, NEWNS))
+ {
+ /*
+ * This mount point will automatically be gone when the namespace is
+ * destroyed.
+ */
+
+ mount.fstype = (u_char *) "proc";
+ mount.src = (u_char *) "proc";
+ mount.dst = (u_char *) "/proc";
+ mount.data = (u_char *) "";
+ mount.flags = 0;
+
+ ret = nxt_fs_mkdir_all(mount.dst, S_IRWXU | S_IRWXG | S_IRWXO);
+ if (nxt_fast_path(ret == NXT_OK)) {
+ ret = nxt_fs_mount(task, &mount);
+ if (nxt_fast_path(ret == NXT_OK)) {
+ hasproc = 1;
+ }
+
+ } else {
+ nxt_log(task, NXT_LOG_WARN, "mkdir(%s) %E", mount.dst, nxt_errno);
+ }
+ }
+#endif
+
+ mounts = process->isolation.mounts;
+
+ n = mounts->nelts;
+ mnt = mounts->elts;
+
+ for (i = 0; i < n; i++) {
+ dst = mnt[i].dst;
+
+ if (nxt_slow_path(nxt_memcmp(mnt[i].fstype, "bind", 4) == 0
+ && stat((const char *) mnt[i].src, &st) != 0))
+ {
+ nxt_log(task, NXT_LOG_WARN, "host path not found: %s", mnt[i].src);
+ continue;
+ }
+
+ if (hasproc && nxt_memcmp(mnt[i].fstype, "proc", 4) == 0
+ && nxt_memcmp(mnt[i].dst, "/proc", 5) == 0)
+ {
+ continue;
+ }
+
+ ret = nxt_fs_mkdir_all(dst, S_IRWXU | S_IRWXG | S_IRWXO);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ nxt_alert(task, "mkdir(%s) %E", dst, nxt_errno);
+ goto undo;
+ }
+
+ ret = nxt_fs_mount(task, &mnt[i]);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ goto undo;
+ }
+ }
+
+ return NXT_OK;
+
+undo:
+
+ n = i + 1;
+
+ for (i = 0; i < n; i++) {
+ nxt_fs_unmount(mnt[i].dst);
+ }
+
+ return NXT_ERROR;
+}
+
+#endif
+
+
+static u_char *
+nxt_cstr_dup(nxt_mp_t *mp, u_char *dst, u_char *src)
+{
+ u_char *p;
+ size_t len;
+
+ len = nxt_strlen(src);
+
+ if (dst == NULL) {
+ dst = nxt_mp_alloc(mp, len + 1);
+ if (nxt_slow_path(dst == NULL)) {
+ return NULL;
+ }
+ }
+
+ p = nxt_cpymem(dst, src, len);
+ *p = '\0';
+
+ return dst;
+}
+
nxt_app_lang_module_t *
nxt_app_lang_module(nxt_runtime_t *rt, nxt_str_t *name)
diff --git a/src/nxt_application.h b/src/nxt_application.h
index b4231e3b..3144dc3f 100644
--- a/src/nxt_application.h
+++ b/src/nxt_application.h
@@ -36,6 +36,7 @@ typedef struct {
u_char *version;
char *file;
nxt_app_module_t *module;
+ nxt_array_t *mounts; /* of nxt_fs_mount_t */
} nxt_app_lang_module_t;
@@ -110,6 +111,9 @@ struct nxt_app_module_s {
nxt_str_t type;
const char *version;
+ const nxt_fs_mount_t *mounts;
+ nxt_uint_t nmounts;
+
nxt_application_setup_t setup;
nxt_process_start_t start;
};
diff --git a/src/nxt_array.c b/src/nxt_array.c
index 82019f92..6fe9ad6a 100644
--- a/src/nxt_array.c
+++ b/src/nxt_array.c
@@ -109,3 +109,42 @@ nxt_array_remove(nxt_array_t *array, void *elt)
array->nelts--;
}
+
+
+nxt_array_t *
+nxt_array_copy(nxt_mp_t *mp, nxt_array_t *dst, nxt_array_t *src)
+{
+ void *data;
+ uint32_t i, size;
+
+ size = src->size;
+
+ if (dst == NULL) {
+ dst = nxt_array_create(mp, src->nelts, size);
+ if (nxt_slow_path(dst == NULL)) {
+ return NULL;
+ }
+ }
+
+ nxt_assert(size == dst->size);
+
+ if (dst->nalloc >= src->nelts) {
+ nxt_memcpy(dst->elts, src->elts, src->nelts * size);
+
+ } else {
+ nxt_memcpy(dst->elts, src->elts, dst->nelts * size);
+
+ for (i = dst->nelts; i < src->nelts; i++) {
+ data = nxt_array_add(dst);
+ if (nxt_slow_path(data == NULL)) {
+ return NULL;
+ }
+
+ nxt_memcpy(data, src->elts + (i * size), size);
+ }
+ }
+
+ dst->nelts = src->nelts;
+
+ return dst;
+}
diff --git a/src/nxt_array.h b/src/nxt_array.h
index 8d2b14f1..5762ec27 100644
--- a/src/nxt_array.h
+++ b/src/nxt_array.h
@@ -24,7 +24,8 @@ NXT_EXPORT void nxt_array_destroy(nxt_array_t *array);
NXT_EXPORT void *nxt_array_add(nxt_array_t *array);
NXT_EXPORT void *nxt_array_zero_add(nxt_array_t *array);
NXT_EXPORT void nxt_array_remove(nxt_array_t *array, void *elt);
-
+NXT_EXPORT nxt_array_t *nxt_array_copy(nxt_mp_t *mp, nxt_array_t *dst,
+ nxt_array_t *src);
#define \
nxt_array_last(array) \
diff --git a/src/nxt_capability.c b/src/nxt_capability.c
index dfa7a834..24fd55d0 100644
--- a/src/nxt_capability.c
+++ b/src/nxt_capability.c
@@ -39,6 +39,7 @@ nxt_capability_set(nxt_task_t *task, nxt_capabilities_t *cap)
if (geteuid() == 0) {
cap->setid = 1;
+ cap->chroot = 1;
return NXT_OK;
}
@@ -91,6 +92,10 @@ nxt_capability_specific_set(nxt_task_t *task, nxt_capabilities_t *cap)
return NXT_ERROR;
}
+ if ((val->effective & (1 << CAP_SYS_CHROOT)) != 0) {
+ cap->chroot = 1;
+ }
+
if ((val->effective & (1 << CAP_SETUID)) == 0) {
return NXT_OK;
}
diff --git a/src/nxt_capability.h b/src/nxt_capability.h
index 60bbd5f8..1575d409 100644
--- a/src/nxt_capability.h
+++ b/src/nxt_capability.h
@@ -7,7 +7,8 @@
#define _NXT_CAPABILITY_INCLUDED_
typedef struct {
- uint8_t setid; /* 1 bit */
+ uint8_t setid; /* 1 bit */
+ uint8_t chroot; /* 1 bit */
} nxt_capabilities_t;
diff --git a/src/nxt_clone.h b/src/nxt_clone.h
index c2066ce6..e89fd82d 100644
--- a/src/nxt_clone.h
+++ b/src/nxt_clone.h
@@ -42,6 +42,9 @@ pid_t nxt_clone(nxt_int_t flags);
#if (NXT_HAVE_CLONE_NEWUSER)
+#define NXT_CLONE_MNT(flags) \
+ ((flags & CLONE_NEWNS) == CLONE_NEWNS)
+
NXT_EXPORT nxt_int_t nxt_clone_credential_map(nxt_task_t *task, pid_t pid,
nxt_credential_t *creds, nxt_clone_t *clone);
NXT_EXPORT nxt_int_t nxt_clone_vldt_credential_uidmap(nxt_task_t *task,
diff --git a/src/nxt_conf_validation.c b/src/nxt_conf_validation.c
index a7a8d139..c4f78608 100644
--- a/src/nxt_conf_validation.c
+++ b/src/nxt_conf_validation.c
@@ -575,6 +575,24 @@ static nxt_conf_vldt_object_t nxt_conf_vldt_app_isolation_members[] = {
#endif
+#if (NXT_HAVE_ISOLATION_ROOTFS)
+
+ { nxt_string("rootfs"),
+ NXT_CONF_VLDT_STRING,
+ NULL,
+ NULL },
+
+#endif
+
+#if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
+
+ { nxt_string("new_privs"),
+ NXT_CONF_VLDT_BOOLEAN,
+ NULL,
+ NULL },
+
+#endif
+
NXT_CONF_VLDT_END
};
diff --git a/src/nxt_external.c b/src/nxt_external.c
index 58523525..6370a9c4 100644
--- a/src/nxt_external.c
+++ b/src/nxt_external.c
@@ -18,6 +18,8 @@ nxt_app_module_t nxt_external_module = {
nxt_string("external"),
"*",
NULL,
+ 0,
+ NULL,
nxt_external_start,
};
diff --git a/src/nxt_fs.c b/src/nxt_fs.c
new file mode 100644
index 00000000..fe271802
--- /dev/null
+++ b/src/nxt_fs.c
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) NGINX, Inc.
+ */
+
+#include <nxt_main.h>
+
+#if (NXT_HAVE_FREEBSD_NMOUNT)
+#include <sys/param.h>
+#include <sys/uio.h>
+#endif
+
+
+static nxt_int_t nxt_fs_mkdir(const u_char *dir, mode_t mode);
+
+
+#if (NXT_HAVE_LINUX_MOUNT)
+
+nxt_int_t
+nxt_fs_mount(nxt_task_t *task, nxt_fs_mount_t *mnt)
+{
+ int rc;
+
+ rc = mount((const char *) mnt->src, (const char *) mnt->dst,
+ (const char *) mnt->fstype, mnt->flags, mnt->data);
+
+ if (nxt_slow_path(rc < 0)) {
+ nxt_alert(task, "mount(\"%s\", \"%s\", \"%s\", %d, \"%s\") %E",
+ mnt->src, mnt->dst, mnt->fstype, mnt->flags, mnt->data,
+ nxt_errno);
+
+ return NXT_ERROR;
+ }
+
+ return NXT_OK;
+}
+
+
+#elif (NXT_HAVE_FREEBSD_NMOUNT)
+
+nxt_int_t
+nxt_fs_mount(nxt_task_t *task, nxt_fs_mount_t *mnt)
+{
+ const char *fstype;
+ uint8_t is_bind, is_proc;
+ struct iovec iov[8];
+ char errmsg[256];
+
+ is_bind = nxt_strncmp(mnt->fstype, "bind", 4) == 0;
+ is_proc = nxt_strncmp(mnt->fstype, "proc", 4) == 0;
+
+ if (nxt_slow_path(!is_bind && !is_proc)) {
+ nxt_alert(task, "mount type \"%s\" not implemented.", mnt->fstype);
+ return NXT_ERROR;
+ }
+
+ if (is_bind) {
+ fstype = "nullfs";
+
+ } else {
+ fstype = "procfs";
+ }
+
+ iov[0].iov_base = (void *) "fstype";
+ iov[0].iov_len = 7;
+ iov[1].iov_base = (void *) fstype;
+ iov[1].iov_len = strlen(fstype) + 1;
+ iov[2].iov_base = (void *) "fspath";
+ iov[2].iov_len = 7;
+ iov[3].iov_base = (void *) mnt->dst;
+ iov[3].iov_len = nxt_strlen(mnt->dst) + 1;
+ iov[4].iov_base = (void *) "target";
+ iov[4].iov_len = 7;
+ iov[5].iov_base = (void *) mnt->src;
+ iov[5].iov_len = nxt_strlen(mnt->src) + 1;
+ iov[6].iov_base = (void *) "errmsg";
+ iov[6].iov_len = 7;
+ iov[7].iov_base = (void *) errmsg;
+ iov[7].iov_len = sizeof(errmsg);
+
+ if (nxt_slow_path(nmount(iov, 8, 0) < 0)) {
+ nxt_alert(task, "nmount(%p, 8, 0) %s", errmsg);
+ return NXT_ERROR;
+ }
+
+ return NXT_OK;
+}
+
+#endif
+
+
+#if (NXT_HAVE_LINUX_UMOUNT2)
+
+void
+nxt_fs_unmount(const u_char *path)
+{
+ if (nxt_slow_path(umount2((const char *) path, MNT_DETACH) < 0)) {
+ nxt_thread_log_error(NXT_LOG_WARN, "umount2(%s, MNT_DETACH) %E",
+ path, nxt_errno);
+ }
+}
+
+#elif (NXT_HAVE_UNMOUNT)
+
+void
+nxt_fs_unmount(const u_char *path)
+{
+ if (nxt_slow_path(unmount((const char *) path, MNT_FORCE) < 0)) {
+ nxt_thread_log_error(NXT_LOG_WARN, "unmount(%s) %E", path, nxt_errno);
+ }
+}
+
+#endif
+
+
+nxt_int_t
+nxt_fs_mkdir_all(const u_char *dir, mode_t mode)
+{
+ char *start, *end, *dst;
+ size_t dirlen;
+ char path[PATH_MAX];
+
+ dirlen = nxt_strlen(dir);
+
+ nxt_assert(dirlen < PATH_MAX && dirlen > 1 && dir[0] == '/');
+
+ dst = path;
+ start = end = (char *) dir;
+
+ while (*start != '\0') {
+ if (*start == '/') {
+ *dst++ = *start++;
+ }
+
+ end = strchr(start, '/');
+ if (end == NULL) {
+ end = ((char *)dir + dirlen);
+ }
+
+ dst = nxt_cpymem(dst, start, end - start);
+ *dst = '\0';
+
+ if (nxt_slow_path(nxt_fs_mkdir((u_char *) path, mode) != NXT_OK
+ && nxt_errno != EEXIST))
+ {
+ return NXT_ERROR;
+ }
+
+ start = end;
+ }
+
+ return NXT_OK;
+}
+
+
+static nxt_int_t
+nxt_fs_mkdir(const u_char *dir, mode_t mode)
+{
+ if (nxt_fast_path(mkdir((const char *) dir, mode) == 0)) {
+ return NXT_OK;
+ }
+
+ return NXT_ERROR;
+}
diff --git a/src/nxt_fs.h b/src/nxt_fs.h
new file mode 100644
index 00000000..85c78b27
--- /dev/null
+++ b/src/nxt_fs.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) NGINX, Inc.
+ */
+
+#ifndef _NXT_FS_H_INCLUDED_
+#define _NXT_FS_H_INCLUDED_
+
+
+#ifdef MS_BIND
+#define NXT_MS_BIND MS_BIND
+#else
+#define NXT_MS_BIND 0
+#endif
+
+#ifdef MS_REC
+#define NXT_MS_REC MS_BIND
+#else
+#define NXT_MS_REC 0
+#endif
+
+
+typedef struct {
+ u_char *src;
+ u_char *dst;
+ u_char *fstype;
+ nxt_int_t flags;
+ u_char *data;
+} nxt_fs_mount_t;
+
+
+nxt_int_t nxt_fs_mkdir_all(const u_char *dir, mode_t mode);
+nxt_int_t nxt_fs_mount(nxt_task_t *task, nxt_fs_mount_t *mnt);
+void nxt_fs_unmount(const u_char *path);
+
+
+#endif /* _NXT_FS_H_INCLUDED_ */
diff --git a/src/nxt_java.c b/src/nxt_java.c
index c4145c1d..c7471509 100644
--- a/src/nxt_java.c
+++ b/src/nxt_java.c
@@ -26,6 +26,7 @@
#include "java/nxt_jni_URLClassLoader.h"
#include "nxt_jars.h"
+#include "nxt_java_mounts.h"
static nxt_int_t nxt_java_setup(nxt_task_t *task, nxt_process_t *process,
nxt_common_app_conf_t *conf);
@@ -50,6 +51,8 @@ NXT_EXPORT nxt_app_module_t nxt_app_module = {
compat,
nxt_string("java"),
NXT_STRING(NXT_JAVA_VERSION),
+ nxt_java_mounts,
+ nxt_nitems(nxt_java_mounts),
nxt_java_setup,
nxt_java_start,
};
@@ -64,20 +67,66 @@ static nxt_int_t
nxt_java_setup(nxt_task_t *task, nxt_process_t *process,
nxt_common_app_conf_t *conf)
{
+ char *path, *relpath, *p, *rootfs;
+ size_t jars_dir_len, rootfs_len;
const char *unit_jars;
+ rootfs = (char *) process->isolation.rootfs;
+ rootfs_len = 0;
+
unit_jars = conf->u.java.unit_jars;
if (unit_jars == NULL) {
- unit_jars = NXT_JARS;
+ if (rootfs != NULL) {
+ unit_jars = "/";
+ } else {
+ unit_jars = NXT_JARS;
+ }
}
- nxt_java_modules = realpath(unit_jars, NULL);
- if (nxt_java_modules == NULL) {
- nxt_alert(task, "realpath(%s) failed: %E", unit_jars, nxt_errno);
+ relpath = strdup(unit_jars);
+ if (nxt_slow_path(relpath == NULL)) {
return NXT_ERROR;
}
+ if (rootfs != NULL) {
+ jars_dir_len = strlen(unit_jars);
+ rootfs_len = strlen(rootfs);
+
+ path = nxt_malloc(jars_dir_len + rootfs_len + 1);
+ if (nxt_slow_path(path == NULL)) {
+ free(relpath);
+ return NXT_ERROR;
+ }
+
+ p = nxt_cpymem(path, process->isolation.rootfs, rootfs_len);
+ p = nxt_cpymem(p, relpath, jars_dir_len);
+ *p = '\0';
+
+ free(relpath);
+
+ } else {
+ path = relpath;
+ }
+
+ nxt_java_modules = realpath(path, NULL);
+ if (nxt_java_modules == NULL) {
+ nxt_alert(task, "realpath(\"%s\") failed %E", path, nxt_errno);
+ goto free;
+ }
+
+ if (rootfs != NULL && strlen(path) > rootfs_len) {
+ nxt_java_modules = path + rootfs_len;
+ }
+
+ nxt_debug(task, "JAVA MODULES: %s", nxt_java_modules);
+
return NXT_OK;
+
+free:
+
+ nxt_free(path);
+
+ return NXT_ERROR;
}
@@ -85,6 +134,7 @@ static char **
nxt_java_module_jars(const char *jars[], int jar_count)
{
char **res, *jurl;
+ uint8_t pathsep;
nxt_int_t modules_len, jlen, i;
const char **jar;
@@ -95,9 +145,13 @@ nxt_java_module_jars(const char *jars[], int jar_count)
modules_len = nxt_strlen(nxt_java_modules);
+ pathsep = nxt_java_modules[modules_len - 1] == '/';
+
for (i = 0, jar = jars; *jar != NULL; jar++) {
- jlen = nxt_length("file:") + modules_len + nxt_length("/")
- + nxt_strlen(*jar) + 1;
+ jlen = nxt_length("file:") + modules_len
+ + (!pathsep ? nxt_length("/") : 0)
+ + nxt_strlen(*jar) + 1;
+
jurl = nxt_malloc(jlen);
if (jurl == NULL) {
return NULL;
@@ -107,7 +161,11 @@ nxt_java_module_jars(const char *jars[], int jar_count)
jurl = nxt_cpymem(jurl, "file:", nxt_length("file:"));
jurl = nxt_cpymem(jurl, nxt_java_modules, modules_len);
- *jurl++ = '/';
+
+ if (!pathsep) {
+ *jurl++ = '/';
+ }
+
jurl = nxt_cpymem(jurl, *jar, nxt_strlen(*jar));
*jurl++ = '\0';
}
diff --git a/src/nxt_main.h b/src/nxt_main.h
index b310c4fa..5914fbd1 100644
--- a/src/nxt_main.h
+++ b/src/nxt_main.h
@@ -59,6 +59,7 @@ typedef uint16_t nxt_port_id_t;
#include <nxt_process_type.h>
#include <nxt_capability.h>
#include <nxt_credential.h>
+#include <nxt_fs.h>
#include <nxt_process.h>
#include <nxt_utf8.h>
#include <nxt_file_name.h>
diff --git a/src/nxt_main_process.c b/src/nxt_main_process.c
index 0dff050b..a16e44d3 100644
--- a/src/nxt_main_process.c
+++ b/src/nxt_main_process.c
@@ -14,6 +14,8 @@
#include <nxt_cert.h>
#endif
+#include <sys/mount.h>
+
typedef struct {
nxt_socket_t socket;
@@ -869,6 +871,12 @@ nxt_main_cleanup_process(nxt_task_t *task, nxt_pid_t pid)
return;
}
+#if (NXT_HAVE_ISOLATION_ROOTFS)
+ if (process->isolation.rootfs != NULL && process->isolation.mounts) {
+ (void) nxt_process_unmount_all(task, process);
+ }
+#endif
+
name = process->name;
stream = process->stream;
init = *((nxt_process_init_t *) nxt_process_init(process));
@@ -1132,19 +1140,50 @@ static nxt_conf_map_t nxt_app_lang_module_map[] = {
};
+static nxt_conf_map_t nxt_app_lang_mounts_map[] = {
+ {
+ nxt_string("src"),
+ NXT_CONF_MAP_CSTRZ,
+ offsetof(nxt_fs_mount_t, src),
+ },
+ {
+ nxt_string("dst"),
+ NXT_CONF_MAP_CSTRZ,
+ offsetof(nxt_fs_mount_t, dst),
+ },
+ {
+ nxt_string("fstype"),
+ NXT_CONF_MAP_CSTRZ,
+ offsetof(nxt_fs_mount_t, fstype),
+ },
+ {
+ nxt_string("flags"),
+ NXT_CONF_MAP_INT,
+ offsetof(nxt_fs_mount_t, flags),
+ },
+ {
+ nxt_string("data"),
+ NXT_CONF_MAP_CSTRZ,
+ offsetof(nxt_fs_mount_t, data),
+ },
+};
+
+
static void
nxt_main_port_modules_handler(nxt_task_t *task, nxt_port_recv_msg_t *msg)
{
- uint32_t index;
+ uint32_t index, jindex, nmounts;
nxt_mp_t *mp;
nxt_int_t ret;
nxt_buf_t *b;
nxt_port_t *port;
nxt_runtime_t *rt;
- nxt_conf_value_t *conf, *root, *value;
+ nxt_fs_mount_t *mnt;
+ nxt_conf_value_t *conf, *root, *value, *mounts;
nxt_app_lang_module_t *lang;
static nxt_str_t root_path = nxt_string("/");
+ static nxt_str_t mounts_name = nxt_string("mounts");
rt = task->thread->runtime;
@@ -1201,7 +1240,7 @@ nxt_main_port_modules_handler(nxt_task_t *task, nxt_port_recv_msg_t *msg)
break;
}
- lang = nxt_array_add(rt->languages);
+ lang = nxt_array_zero_add(rt->languages);
if (lang == NULL) {
goto fail;
}
@@ -1215,8 +1254,48 @@ nxt_main_port_modules_handler(nxt_task_t *task, nxt_port_recv_msg_t *msg)
goto fail;
}
- nxt_debug(task, "lang %d %s \"%s\"",
- lang->type, lang->version, lang->file);
+ mounts = nxt_conf_get_object_member(value, &mounts_name, NULL);
+ if (mounts == NULL) {
+ nxt_alert(task, "missing mounts from discovery message.");
+ goto fail;
+ }
+
+ if (nxt_conf_type(mounts) != NXT_CONF_ARRAY) {
+ nxt_alert(task, "invalid mounts type from discovery message.");
+ goto fail;
+ }
+
+ nmounts = nxt_conf_array_elements_count(mounts);
+
+ lang->mounts = nxt_array_create(rt->mem_pool, nmounts,
+ sizeof(nxt_fs_mount_t));
+
+ if (lang->mounts == NULL) {
+ goto fail;
+ }
+
+ for (jindex = 0; /* */; jindex++) {
+ value = nxt_conf_get_array_element(mounts, jindex);
+ if (value == NULL) {
+ break;
+ }
+
+ mnt = nxt_array_zero_add(lang->mounts);
+ if (mnt == NULL) {
+ goto fail;
+ }
+
+ ret = nxt_conf_map_object(rt->mem_pool, value,
+ nxt_app_lang_mounts_map,
+ nxt_nitems(nxt_app_lang_mounts_map), mnt);
+
+ if (ret != NXT_OK) {
+ goto fail;
+ }
+ }
+
+ nxt_debug(task, "lang %d %s \"%s\" (%d mounts)",
+ lang->type, lang->version, lang->file, lang->mounts->nelts);
}
qsort(rt->languages->elts, rt->languages->nelts,
diff --git a/src/nxt_php_sapi.c b/src/nxt_php_sapi.c
index ddad5761..7ae8484d 100644
--- a/src/nxt_php_sapi.c
+++ b/src/nxt_php_sapi.c
@@ -242,6 +242,8 @@ NXT_EXPORT nxt_app_module_t nxt_app_module = {
nxt_string("php"),
PHP_VERSION,
NULL,
+ 0,
+ NULL,
nxt_php_start,
};
diff --git a/src/nxt_process.c b/src/nxt_process.c
index e84549b3..c4c44d14 100644
--- a/src/nxt_process.c
+++ b/src/nxt_process.c
@@ -13,6 +13,14 @@
#include <signal.h>
+#if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
+#include <sys/prctl.h>
+#endif
+
+#if (NXT_HAVE_PIVOT_ROOT)
+#include <mntent.h>
+#endif
+
static nxt_int_t nxt_process_setup(nxt_task_t *task, nxt_process_t *process);
static nxt_int_t nxt_process_child_fixup(nxt_task_t *task,
nxt_process_t *process);
@@ -25,6 +33,19 @@ static void nxt_process_created_ok(nxt_task_t *task, nxt_port_recv_msg_t *msg,
static void nxt_process_created_error(nxt_task_t *task,
nxt_port_recv_msg_t *msg, void *data);
+#if (NXT_HAVE_ISOLATION_ROOTFS)
+static nxt_int_t nxt_process_chroot(nxt_task_t *task, const char *path);
+#endif
+
+#if (NXT_HAVE_PIVOT_ROOT)
+static nxt_int_t nxt_process_pivot_root(nxt_task_t *task, const char *rootfs);
+static nxt_int_t nxt_process_private_mount(nxt_task_t *task,
+ const char *rootfs);
+#endif
+
+#if (NXT_HAVE_PIVOT_ROOT)
+static int nxt_pivot_root(const char *new_root, const char *old_root);
+#endif
/* A cached process pid. */
nxt_pid_t nxt_pid;
@@ -495,10 +516,347 @@ nxt_process_apply_creds(nxt_task_t *task, nxt_process_t *process)
}
}
+#if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
+ if (nxt_slow_path(process->isolation.new_privs == 0
+ && prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) != 0))
+ {
+ nxt_alert(task, "failed to set no_new_privs %E", nxt_errno);
+ return NXT_ERROR;
+ }
+#endif
+
+ return NXT_OK;
+}
+
+
+#if (NXT_HAVE_ISOLATION_ROOTFS)
+
+
+#if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS)
+
+
+nxt_int_t
+nxt_process_change_root(nxt_task_t *task, nxt_process_t *process)
+{
+ char *rootfs;
+ nxt_int_t ret;
+
+ rootfs = (char *) process->isolation.rootfs;
+
+ nxt_debug(task, "change root: %s", rootfs);
+
+ if (NXT_CLONE_MNT(process->isolation.clone.flags)) {
+ ret = nxt_process_pivot_root(task, rootfs);
+ } else {
+ ret = nxt_process_chroot(task, rootfs);
+ }
+
+ if (nxt_fast_path(ret == NXT_OK)) {
+ if (nxt_slow_path(chdir("/") < 0)) {
+ nxt_alert(task, "chdir(\"/\") %E", nxt_errno);
+ return NXT_ERROR;
+ }
+ }
+
+ return ret;
+}
+
+
+#else
+
+
+nxt_int_t
+nxt_process_change_root(nxt_task_t *task, nxt_process_t *process)
+{
+ char *rootfs;
+
+ rootfs = (char *) process->isolation.rootfs;
+
+ nxt_debug(task, "change root: %s", rootfs);
+
+ if (nxt_fast_path(nxt_process_chroot(task, rootfs) == NXT_OK)) {
+ if (nxt_slow_path(chdir("/") < 0)) {
+ nxt_alert(task, "chdir(\"/\") %E", nxt_errno);
+ return NXT_ERROR;
+ }
+
+ return NXT_OK;
+ }
+
+ return NXT_ERROR;
+}
+
+
+#endif
+
+
+#endif
+
+
+#if (NXT_HAVE_ISOLATION_ROOTFS)
+
+static nxt_int_t
+nxt_process_chroot(nxt_task_t *task, const char *path)
+{
+ if (nxt_slow_path(chroot(path) < 0)) {
+ nxt_alert(task, "chroot(%s) %E", path, nxt_errno);
+ return NXT_ERROR;
+ }
+
return NXT_OK;
}
+void
+nxt_process_unmount_all(nxt_task_t *task, nxt_process_t *process)
+{
+ size_t i, n;
+ nxt_array_t *mounts;
+ nxt_fs_mount_t *mnt;
+
+ nxt_debug(task, "unmount all (%s)", process->name);
+
+ mounts = process->isolation.mounts;
+ n = mounts->nelts;
+ mnt = mounts->elts;
+
+ for (i = 0; i < n; i++) {
+ nxt_fs_unmount(mnt[i].dst);
+ }
+}
+
+#endif
+
+
+#if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS)
+
+/*
+ * pivot_root(2) can only be safely used with containers, otherwise it can
+ * umount(2) the global root filesystem and screw up the machine.
+ */
+
+static nxt_int_t
+nxt_process_pivot_root(nxt_task_t *task, const char *path)
+{
+ /*
+ * This implementation makes use of a kernel trick that works for ages
+ * and now documented in Linux kernel 5.
+ * https://lore.kernel.org/linux-man/87r24piwhm.fsf@x220.int.ebiederm.org/T/
+ */
+
+ if (nxt_slow_path(mount("", "/", "", MS_SLAVE|MS_REC, "") != 0)) {
+ nxt_alert(task, "failed to make / a slave mount %E", nxt_errno);
+ return NXT_ERROR;
+ }
+
+ if (nxt_slow_path(nxt_process_private_mount(task, path) != NXT_OK)) {
+ return NXT_ERROR;
+ }
+
+ if (nxt_slow_path(mount(path, path, "bind", MS_BIND|MS_REC, "") != 0)) {
+ nxt_alert(task, "error bind mounting rootfs %E", nxt_errno);
+ return NXT_ERROR;
+ }
+
+ if (nxt_slow_path(chdir(path) != 0)) {
+ nxt_alert(task, "failed to chdir(%s) %E", path, nxt_errno);
+ return NXT_ERROR;
+ }
+
+ if (nxt_slow_path(nxt_pivot_root(".", ".") != 0)) {
+ nxt_alert(task, "failed to pivot_root %E", nxt_errno);
+ return NXT_ERROR;
+ }
+
+ /*
+ * Make oldroot a slave mount to avoid unmounts getting propagated to the
+ * host.
+ */
+ if (nxt_slow_path(mount("", ".", "", MS_SLAVE | MS_REC, NULL) != 0)) {
+ nxt_alert(task, "failed to bind mount rootfs %E", nxt_errno);
+ return NXT_ERROR;
+ }
+
+ if (nxt_slow_path(umount2(".", MNT_DETACH) != 0)) {
+ nxt_alert(task, "failed to umount old root directory %E", nxt_errno);
+ return NXT_ERROR;
+ }
+
+ return NXT_OK;
+}
+
+
+static nxt_int_t
+nxt_process_private_mount(nxt_task_t *task, const char *rootfs)
+{
+ char *parent_mnt;
+ FILE *procfile;
+ u_char **mounts;
+ size_t len;
+ uint8_t *shared;
+ nxt_int_t ret, index, nmounts;
+ struct mntent *ent;
+
+ static const char *mount_path = "/proc/self/mounts";
+
+ ret = NXT_ERROR;
+ ent = NULL;
+ shared = NULL;
+ procfile = NULL;
+ parent_mnt = NULL;
+
+ nmounts = 256;
+
+ mounts = nxt_malloc(nmounts * sizeof(uintptr_t));
+ if (nxt_slow_path(mounts == NULL)) {
+ goto fail;
+ }
+
+ shared = nxt_malloc(nmounts);
+ if (nxt_slow_path(shared == NULL)) {
+ goto fail;
+ }
+
+ procfile = setmntent(mount_path, "r");
+ if (nxt_slow_path(procfile == NULL)) {
+ nxt_alert(task, "failed to open %s %E", mount_path, nxt_errno);
+
+ goto fail;
+ }
+
+ index = 0;
+
+again:
+
+ for ( ; index < nmounts; index++) {
+ ent = getmntent(procfile);
+ if (ent == NULL) {
+ nmounts = index;
+ break;
+ }
+
+ mounts[index] = (u_char *) strdup(ent->mnt_dir);
+ shared[index] = hasmntopt(ent, "shared") != NULL;
+ }
+
+ if (ent != NULL) {
+ /* there are still entries to be read */
+
+ nmounts *= 2;
+ mounts = nxt_realloc(mounts, nmounts);
+ if (nxt_slow_path(mounts == NULL)) {
+ goto fail;
+ }
+
+ shared = nxt_realloc(shared, nmounts);
+ if (nxt_slow_path(shared == NULL)) {
+ goto fail;
+ }
+
+ goto again;
+ }
+
+ for (index = 0; index < nmounts; index++) {
+ if (nxt_strcmp(mounts[index], rootfs) == 0) {
+ parent_mnt = (char *) rootfs;
+ break;
+ }
+ }
+
+ if (parent_mnt == NULL) {
+ len = nxt_strlen(rootfs);
+
+ parent_mnt = nxt_malloc(len + 1);
+ if (parent_mnt == NULL) {
+ goto fail;
+ }
+
+ nxt_memcpy(parent_mnt, rootfs, len);
+ parent_mnt[len] = '\0';
+
+ if (parent_mnt[len - 1] == '/') {
+ parent_mnt[len - 1] = '\0';
+ len--;
+ }
+
+ for ( ;; ) {
+ for (index = 0; index < nmounts; index++) {
+ if (nxt_strcmp(mounts[index], parent_mnt) == 0) {
+ goto found;
+ }
+ }
+
+ if (len == 1 && parent_mnt[0] == '/') {
+ nxt_alert(task, "parent mount not found");
+ goto fail;
+ }
+
+ /* parent dir */
+ while (parent_mnt[len - 1] != '/' && len > 0) {
+ len--;
+ }
+
+ if (nxt_slow_path(len == 0)) {
+ nxt_alert(task, "parent mount not found");
+ goto fail;
+ }
+
+ if (len == 1) {
+ parent_mnt[len] = '\0'; /* / */
+ } else {
+ parent_mnt[len - 1] = '\0'; /* /<path> */
+ }
+ }
+ }
+
+found:
+
+ if (shared[index]) {
+ if (nxt_slow_path(mount("", parent_mnt, "", MS_PRIVATE, "") != 0)) {
+ nxt_alert(task, "mount(\"\", \"%s\", MS_PRIVATE) %E", parent_mnt,
+ nxt_errno);
+
+ goto fail;
+ }
+ }
+
+ ret = NXT_OK;
+
+fail:
+
+ if (procfile != NULL) {
+ endmntent(procfile);
+ }
+
+ if (mounts != NULL) {
+ for (index = 0; index < nmounts; index++) {
+ nxt_free(mounts[index]);
+ }
+
+ nxt_free(mounts);
+ }
+
+ if (shared != NULL) {
+ nxt_free(shared);
+ }
+
+ if (parent_mnt != NULL && parent_mnt != rootfs) {
+ nxt_free(parent_mnt);
+ }
+
+ return ret;
+}
+
+
+static int
+nxt_pivot_root(const char *new_root, const char *old_root)
+{
+ return syscall(__NR_pivot_root, new_root, old_root);
+}
+
+#endif
+
+
static nxt_int_t
nxt_process_send_ready(nxt_task_t *task, nxt_process_t *process)
{
diff --git a/src/nxt_process.h b/src/nxt_process.h
index 45bab25e..d3311722 100644
--- a/src/nxt_process.h
+++ b/src/nxt_process.h
@@ -69,33 +69,42 @@ typedef struct {
nxt_port_mmap_t *elts;
} nxt_port_mmaps_t;
+typedef struct {
+ u_char *rootfs;
+ nxt_array_t *mounts; /* of nxt_mount_t */
+
+#if (NXT_HAVE_CLONE)
+ nxt_clone_t clone;
+#endif
+
+#if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
+ uint8_t new_privs; /* 1 bit */
+#endif
+} nxt_process_isolation_t;
+
typedef struct {
- nxt_pid_t pid;
- const char *name;
- nxt_queue_t ports; /* of nxt_port_t */
- nxt_process_state_t state;
- nxt_bool_t registered;
- nxt_int_t use_count;
+ nxt_pid_t pid;
+ const char *name;
+ nxt_queue_t ports; /* of nxt_port_t */
+ nxt_process_state_t state;
+ nxt_bool_t registered;
+ nxt_int_t use_count;
- nxt_port_mmaps_t incoming;
- nxt_port_mmaps_t outgoing;
+ nxt_port_mmaps_t incoming;
+ nxt_port_mmaps_t outgoing;
- nxt_thread_mutex_t cp_mutex;
- nxt_lvlhsh_t connected_ports; /* of nxt_port_t */
+ nxt_thread_mutex_t cp_mutex;
+ nxt_lvlhsh_t connected_ports; /* of nxt_port_t */
- uint32_t stream;
+ uint32_t stream;
- nxt_mp_t *mem_pool;
- nxt_credential_t *user_cred;
+ nxt_mp_t *mem_pool;
+ nxt_credential_t *user_cred;
- nxt_process_data_t data;
+ nxt_process_data_t data;
- union {
-#if (NXT_HAVE_CLONE)
- nxt_clone_t clone;
-#endif
- } isolation;
+ nxt_process_isolation_t isolation;
} nxt_process_t;
@@ -184,6 +193,12 @@ nxt_int_t nxt_process_vldt_isolation_creds(nxt_task_t *task,
nxt_process_t *process);
#endif
+nxt_int_t nxt_process_change_root(nxt_task_t *task, nxt_process_t *process);
+
+#if (NXT_HAVE_ISOLATION_ROOTFS)
+void nxt_process_unmount_all(nxt_task_t *task, nxt_process_t *process);
+#endif
+
#if (NXT_HAVE_SETPROCTITLE)
#define nxt_process_title(task, fmt, ...) \
diff --git a/src/nxt_python_wsgi.c b/src/nxt_python_wsgi.c
index 089d15c0..b9033a75 100644
--- a/src/nxt_python_wsgi.c
+++ b/src/nxt_python_wsgi.c
@@ -18,6 +18,7 @@
#include <nxt_unit_field.h>
#include <nxt_unit_request.h>
#include <nxt_unit_response.h>
+#include <nxt_python_mounts.h>
/*
* According to "PEP 3333 / A Note On String Types"
@@ -38,11 +39,17 @@
*/
+#define _NXT_PYTHON_MOUNTS(major, minor) \
+ nxt_python ## major ## minor ## _mounts
+
+#define NXT_PYTHON_MOUNTS(major, minor) _NXT_PYTHON_MOUNTS(major, minor)
+
#if PY_MAJOR_VERSION == 3
#define NXT_PYTHON_BYTES_TYPE "bytestring"
#define PyString_FromStringAndSize(str, size) \
PyUnicode_DecodeLatin1((str), (size), "strict")
+
#else
#define NXT_PYTHON_BYTES_TYPE "string"
@@ -116,6 +123,8 @@ NXT_EXPORT nxt_app_module_t nxt_app_module = {
compat,
nxt_string("python"),
PY_VERSION,
+ NXT_PYTHON_MOUNTS(PY_MAJOR_VERSION, PY_MINOR_VERSION),
+ nxt_nitems(NXT_PYTHON_MOUNTS(PY_MAJOR_VERSION, PY_MINOR_VERSION)),
NULL,
nxt_python_start,
};
diff --git a/src/nxt_runtime.c b/src/nxt_runtime.c
index d7e35dec..5aa061dd 100644
--- a/src/nxt_runtime.c
+++ b/src/nxt_runtime.c
@@ -84,6 +84,7 @@ nxt_runtime_create(nxt_task_t *task)
lang->version = (u_char *) "";
lang->file = NULL;
lang->module = &nxt_external_module;
+ lang->mounts = NULL;
listen_sockets = nxt_array_create(mp, 1, sizeof(nxt_listen_socket_t));
if (nxt_slow_path(listen_sockets == NULL)) {
diff --git a/src/nxt_unix.h b/src/nxt_unix.h
index 151dd555..609f7e95 100644
--- a/src/nxt_unix.h
+++ b/src/nxt_unix.h
@@ -238,6 +238,9 @@
#include <sys/random.h> /* getentropy(). */
#endif
+#if (NXT_HAVE_ISOLATION_ROOTFS)
+#include <sys/mount.h>
+#endif
#if (NXT_TEST_BUILD)
#include <nxt_test_build.h>
diff --git a/src/perl/nxt_perl_psgi.c b/src/perl/nxt_perl_psgi.c
index 5e9200dc..14e107e4 100644
--- a/src/perl/nxt_perl_psgi.c
+++ b/src/perl/nxt_perl_psgi.c
@@ -118,6 +118,12 @@ NXT_EXPORT nxt_app_module_t nxt_app_module = {
nxt_perl_psgi_compat,
nxt_string("perl"),
PERL_VERSION_STRING,
+
+#if (NXT_HAVE_ISOLATION_ROOTFS)
+ NULL,
+ 0,
+#endif
+
NULL,
nxt_perl_psgi_start,
};
diff --git a/src/ruby/nxt_ruby.c b/src/ruby/nxt_ruby.c
index 40f72f51..489ddcf4 100644
--- a/src/ruby/nxt_ruby.c
+++ b/src/ruby/nxt_ruby.c
@@ -7,6 +7,7 @@
#include <nxt_unit.h>
#include <nxt_unit_request.h>
+#include <nxt_ruby_mounts.h>
#define NXT_RUBY_RACK_API_VERSION_MAJOR 1
@@ -78,6 +79,10 @@ NXT_EXPORT nxt_app_module_t nxt_app_module = {
compat,
nxt_string("ruby"),
ruby_version,
+#if (NXT_HAVE_ISOLATION_ROOTFS)
+ nxt_ruby_mounts,
+ nxt_nitems(nxt_ruby_mounts),
+#endif
NULL,
nxt_ruby_start,
};