summaryrefslogtreecommitdiffhomepage
path: root/src/nxt_process.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/nxt_process.c272
1 files changed, 255 insertions, 17 deletions
diff --git a/src/nxt_process.c b/src/nxt_process.c
index d8836ad2..025efe70 100644
--- a/src/nxt_process.c
+++ b/src/nxt_process.c
@@ -7,7 +7,7 @@
#include <nxt_main.h>
#include <nxt_cgroup.h>
-#if (NXT_HAVE_CLONE)
+#if (NXT_HAVE_LINUX_NS)
#include <nxt_clone.h>
#endif
@@ -18,7 +18,7 @@
#endif
-#if (NXT_HAVE_CLONE) && (NXT_HAVE_CLONE_NEWPID)
+#if (NXT_HAVE_LINUX_NS) && (NXT_HAVE_CLONE_NEWPID)
#define nxt_is_pid_isolated(process) \
nxt_is_clone_flag_set(process->isolation.clone.flags, NEWPID)
#else
@@ -27,6 +27,19 @@
#endif
+#if (NXT_HAVE_LINUX_NS)
+static nxt_int_t nxt_process_pipe_timer(nxt_fd_t fd, short event);
+static nxt_int_t nxt_process_check_pid_status(const nxt_fd_t *gc_pipe);
+static nxt_pid_t nxt_process_recv_pid(const nxt_fd_t *pid_pipe,
+ const nxt_fd_t *gc_pipe);
+static void nxt_process_send_pid(const nxt_fd_t *pid_pipe, nxt_pid_t pid);
+static nxt_int_t nxt_process_unshare(nxt_task_t *task, nxt_process_t *process,
+ nxt_fd_t *pid_pipe, nxt_fd_t *gc_pipe, nxt_bool_t use_pidns);
+static nxt_int_t nxt_process_init_pidns(nxt_task_t *task,
+ const nxt_process_t *process, nxt_fd_t *pid_pipe, nxt_fd_t *gc_pipe,
+ nxt_bool_t *use_pidns);
+#endif
+
static nxt_pid_t nxt_process_create(nxt_task_t *task, nxt_process_t *process);
static nxt_int_t nxt_process_do_start(nxt_task_t *task, nxt_process_t *process);
static nxt_int_t nxt_process_whoami(nxt_task_t *task, nxt_process_t *process);
@@ -256,7 +269,7 @@ nxt_process_child_fixup(nxt_task_t *task, nxt_process_t *process)
nxt_ppid = nxt_pid;
- nxt_pid = nxt_getpid();
+ nxt_pid = getpid();
process->pid = nxt_pid;
process->isolated_pid = nxt_pid;
@@ -311,6 +324,217 @@ nxt_process_child_fixup(nxt_task_t *task, nxt_process_t *process)
}
+#if (NXT_HAVE_LINUX_NS)
+
+static nxt_int_t
+nxt_process_pipe_timer(nxt_fd_t fd, short event)
+{
+ int ret;
+ sigset_t mask;
+ struct pollfd pfd;
+
+ static const struct timespec ts = { .tv_sec = 5 };
+
+ /*
+ * Temporarily block the signals we are handling, (except
+ * for SIGINT & SIGTERM) so that ppoll(2) doesn't get
+ * interrupted. After ppoll(2) returns, our old sigmask
+ * will be back in effect and any pending signals will be
+ * delivered.
+ *
+ * This is because while the kernel ppoll syscall updates
+ * the struct timespec with the time remaining if it got
+ * interrupted with EINTR, the glibc wrapper hides this
+ * from us so we have no way of knowing how long to retry
+ * the ppoll(2) for and if we just retry with the same
+ * timeout we could find ourselves in an infinite loop.
+ */
+ pthread_sigmask(SIG_SETMASK, NULL, &mask);
+ sigdelset(&mask, SIGINT);
+ sigdelset(&mask, SIGTERM);
+
+ pfd.fd = fd;
+ pfd.events = event;
+
+ ret = ppoll(&pfd, 1, &ts, &mask);
+ if (ret <= 0 || (ret == 1 && pfd.revents & POLLERR)) {
+ return NXT_ERROR;
+ }
+
+ return NXT_OK;
+}
+
+
+static nxt_int_t
+nxt_process_check_pid_status(const nxt_fd_t *gc_pipe)
+{
+ int8_t status;
+ ssize_t ret;
+
+ close(gc_pipe[1]);
+
+ ret = nxt_process_pipe_timer(gc_pipe[0], POLLIN);
+ if (ret == NXT_OK) {
+ ret = read(gc_pipe[0], &status, sizeof(int8_t));
+ }
+
+ if (ret <= 0) {
+ status = -1;
+ }
+
+ close(gc_pipe[0]);
+
+ return status;
+}
+
+
+static nxt_pid_t
+nxt_process_recv_pid(const nxt_fd_t *pid_pipe, const nxt_fd_t *gc_pipe)
+{
+ int8_t status;
+ ssize_t ret;
+ nxt_pid_t pid;
+
+ close(pid_pipe[1]);
+ close(gc_pipe[0]);
+
+ status = 0;
+
+ ret = nxt_process_pipe_timer(pid_pipe[0], POLLIN);
+ if (ret == NXT_OK) {
+ ret = read(pid_pipe[0], &pid, sizeof(nxt_pid_t));
+ }
+
+ if (ret <= 0) {
+ status = -1;
+ pid = -1;
+ }
+
+ write(gc_pipe[1], &status, sizeof(int8_t));
+
+ close(pid_pipe[0]);
+ close(gc_pipe[1]);
+
+ return pid;
+}
+
+
+static void
+nxt_process_send_pid(const nxt_fd_t *pid_pipe, nxt_pid_t pid)
+{
+ nxt_int_t ret;
+
+ close(pid_pipe[0]);
+
+ ret = nxt_process_pipe_timer(pid_pipe[1], POLLOUT);
+ if (ret == NXT_OK) {
+ write(pid_pipe[1], &pid, sizeof(nxt_pid_t));
+ }
+
+ close(pid_pipe[1]);
+}
+
+
+static nxt_int_t
+nxt_process_unshare(nxt_task_t *task, nxt_process_t *process,
+ nxt_fd_t *pid_pipe, nxt_fd_t *gc_pipe,
+ nxt_bool_t use_pidns)
+{
+ int ret;
+ nxt_pid_t pid;
+
+ if (process->isolation.clone.flags == 0) {
+ return NXT_OK;
+ }
+
+ ret = unshare(process->isolation.clone.flags);
+ if (nxt_slow_path(ret == -1)) {
+ nxt_alert(task, "unshare() failed for %s %E", process->name,
+ nxt_errno);
+
+ if (use_pidns) {
+ nxt_pipe_close(task, gc_pipe);
+ nxt_pipe_close(task, pid_pipe);
+ }
+
+ return NXT_ERROR;
+ }
+
+ if (!use_pidns) {
+ return NXT_OK;
+ }
+
+ /*
+ * PID namespace requested. Employ a double fork(2) technique
+ * so that the prototype process will be placed into the new
+ * namespace and end up with PID 1 (as before with clone).
+ */
+ pid = fork();
+ if (nxt_slow_path(pid < 0)) {
+ nxt_alert(task, "fork() failed for %s %E", process->name, nxt_errno);
+ nxt_pipe_close(task, gc_pipe);
+ nxt_pipe_close(task, pid_pipe);
+
+ return NXT_ERROR;
+
+ } else if (pid > 0) {
+ nxt_pipe_close(task, gc_pipe);
+ nxt_process_send_pid(pid_pipe, pid);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ nxt_pipe_close(task, pid_pipe);
+ ret = nxt_process_check_pid_status(gc_pipe);
+ if (ret == -1) {
+ return NXT_ERROR;
+ }
+
+ return NXT_OK;
+}
+
+
+static nxt_int_t
+nxt_process_init_pidns(nxt_task_t *task, const nxt_process_t *process,
+ nxt_fd_t *pid_pipe, nxt_fd_t *gc_pipe,
+ nxt_bool_t *use_pidns)
+{
+ int ret;
+
+ *use_pidns = 0;
+
+#if (NXT_HAVE_CLONE_NEWPID)
+ *use_pidns = nxt_is_pid_isolated(process);
+#endif
+
+ if (!*use_pidns) {
+ return NXT_OK;
+ }
+
+ ret = nxt_pipe_create(task, pid_pipe, 0, 0);
+ if (nxt_slow_path(ret == NXT_ERROR)) {
+ return NXT_ERROR;
+ }
+
+ ret = nxt_pipe_create(task, gc_pipe, 0, 0);
+ if (nxt_slow_path(ret == NXT_ERROR)) {
+ return NXT_ERROR;
+ }
+
+#if (NXT_HAVE_PR_SET_CHILD_SUBREAPER)
+ ret = prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0);
+ if (nxt_slow_path(ret == -1)) {
+ nxt_alert(task, "prctl(PR_SET_CHILD_SUBREAPER) failed for %s %E",
+ process->name, nxt_errno);
+ }
+#endif
+
+ return NXT_OK;
+}
+
+#endif /* NXT_HAVE_LINUX_NS */
+
+
static nxt_pid_t
nxt_process_create(nxt_task_t *task, nxt_process_t *process)
{
@@ -318,23 +542,32 @@ nxt_process_create(nxt_task_t *task, nxt_process_t *process)
nxt_pid_t pid;
nxt_runtime_t *rt;
-#if (NXT_HAVE_CLONE)
- pid = nxt_clone(SIGCHLD | process->isolation.clone.flags);
- if (nxt_slow_path(pid < 0)) {
- nxt_alert(task, "clone() failed for %s %E", process->name, nxt_errno);
- return pid;
+#if (NXT_HAVE_LINUX_NS)
+ nxt_fd_t pid_pipe[2], gc_pipe[2];
+ nxt_bool_t use_pidns;
+
+ ret = nxt_process_init_pidns(task, process, pid_pipe, gc_pipe, &use_pidns);
+ if (ret == NXT_ERROR) {
+ return -1;
}
-#else
+#endif
+
pid = fork();
if (nxt_slow_path(pid < 0)) {
nxt_alert(task, "fork() failed for %s %E", process->name, nxt_errno);
return pid;
}
-#endif
if (pid == 0) {
/* Child. */
+#if (NXT_HAVE_LINUX_NS)
+ ret = nxt_process_unshare(task, process, pid_pipe, gc_pipe, use_pidns);
+ if (ret == NXT_ERROR) {
+ _exit(EXIT_FAILURE);
+ }
+#endif
+
ret = nxt_process_child_fixup(task, process);
if (nxt_slow_path(ret != NXT_OK)) {
nxt_process_quit(task, 1);
@@ -355,10 +588,15 @@ nxt_process_create(nxt_task_t *task, nxt_process_t *process)
/* Parent. */
-#if (NXT_HAVE_CLONE)
- nxt_debug(task, "clone(%s): %PI", process->name, pid);
-#else
nxt_debug(task, "fork(%s): %PI", process->name, pid);
+
+#if (NXT_HAVE_LINUX_NS)
+ if (use_pidns) {
+ pid = nxt_process_recv_pid(pid_pipe, gc_pipe);
+ if (pid == -1) {
+ return pid;
+ }
+ }
#endif
process->pid = pid;
@@ -781,7 +1019,7 @@ nxt_process_apply_creds(nxt_task_t *task, nxt_process_t *process)
cap_setid = rt->capabilities.setid;
-#if (NXT_HAVE_CLONE && NXT_HAVE_CLONE_NEWUSER)
+#if (NXT_HAVE_LINUX_NS && NXT_HAVE_CLONE_NEWUSER)
if (!cap_setid
&& nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER))
{
@@ -918,10 +1156,10 @@ nxt_process_daemon(nxt_task_t *task)
}
/*
- * Reset file mode creation mask: any access
- * rights can be set on file creation.
+ * Set a sefe umask to give at most 755/644 permissions on
+ * directories/files.
*/
- umask(0);
+ umask(0022);
/* Redirect STDIN and STDOUT to the "/dev/null". */