summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAndrew Clayton <a.clayton@nginx.com>2022-10-24 17:33:23 +0100
committerAndrew Clayton <a.clayton@nginx.com>2022-12-10 14:00:20 +0000
commit7d177faf3b8a483fd7ef958e884ec5625e058ca0 (patch)
tree231384c7eec073a9ad45a72c7714424d7e0d3bab
parent9466daf9bdafa3e00f521a47f4ce218353bf7f86 (diff)
downloadunit-7d177faf3b8a483fd7ef958e884ec5625e058ca0.tar.gz
unit-7d177faf3b8a483fd7ef958e884ec5625e058ca0.tar.bz2
Isolation: added core cgroup infrastructure.
Firstly, this is not to be confused with CLONE_NEWCGROUP which unit already supports and is related to namespaces. To re-cap, namespaces allow processes to have different views of various parts of the system such as filesystem mounts, networking, hostname etc. Whereas cgroup[0] is a Linux kernel facility for collecting a bunch of processes together to perform some task on the group as a whole, for example to implement resource limits. There are two parts to cgroup, the core part of organising processes into a hierarchy and the controllers which are responsible for enforcing resource limits etc. There are currently two versions of the cgroup sub-system, the original cgroup and a version 2[1] introduced in 3.16 (August 2014) and marked stable in 4.5 (March 2016). This commit supports the cgroup V2 API and implements the ability to place applications into their own cgroup on a per-application basis. You can put them each into their own cgroup or you can group some together. The ability to set resource limits can easily be added in future. The initial use case of this would be to aid in observability of unit applications which becomes much easier if you can just monitor them on a per cgroup basis. One thing to note about cgroup, is that unlike namespaces which are controlled via system calls such as clone(2) and unshare(2), cgroups are setup and controlled through the cgroupfs pseudo-filesystem. cgroup is Linux only and this support will only be enabled if configure finds the cgroup2 filesystem mount, e.g cgroup2 on /sys/fs/cgroup type cgroup2 (rw,nosuid,nodev,noexec,relatime,seclabel,nsdelegate,memory_recursiveprot) The cgroups are removed on shutdown or as required on reconfiguration. This commit just adds the basic infrastructure for using cgroups within unit. Subsequent commits will wire up this support. It supports creating cgroups relative to the main cgroup root and also below the cgroup of the main unit process. [0]: <https://man7.org/linux/man-pages/man7/cgroups.7.html> [1]: <https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html> Cc: Alejandro Colomar <alx@nginx.com> Signed-off-by: Andrew Clayton <a.clayton@nginx.com>
Diffstat (limited to '')
-rw-r--r--src/nxt_cgroup.c174
-rw-r--r--src/nxt_cgroup.h14
2 files changed, 188 insertions, 0 deletions
diff --git a/src/nxt_cgroup.c b/src/nxt_cgroup.c
new file mode 100644
index 00000000..2c404acc
--- /dev/null
+++ b/src/nxt_cgroup.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright (C) Andrew Clayton
+ * Copyright (C) F5, Inc.
+ */
+
+#include <nxt_main.h>
+
+#include <nxt_cgroup.h>
+
+
+static int nxt_mk_cgpath_relative(nxt_task_t *task, const char *dir,
+ char *cgpath);
+static nxt_int_t nxt_mk_cgpath(nxt_task_t *task, const char *dir,
+ char *cgpath);
+
+
+nxt_int_t
+nxt_cgroup_proc_add(nxt_task_t *task, nxt_process_t *process)
+{
+ int len;
+ char cgprocs[NXT_MAX_PATH_LEN];
+ FILE *fp;
+ nxt_int_t ret;
+
+ if (task->thread->runtime->type != NXT_PROCESS_MAIN
+ || nxt_process_type(process) != NXT_PROCESS_PROTOTYPE
+ || process->isolation.cgroup.path == NULL)
+ {
+ return NXT_OK;
+ }
+
+ ret = nxt_mk_cgpath(task, process->isolation.cgroup.path, cgprocs);
+ if (nxt_slow_path(ret == NXT_ERROR)) {
+ return NXT_ERROR;
+ }
+
+ ret = nxt_fs_mkdir_all((const u_char *) cgprocs, 0777);
+ if (nxt_slow_path(ret == NXT_ERROR)) {
+ return NXT_ERROR;
+ }
+
+ len = strlen(cgprocs);
+
+ len = snprintf(cgprocs + len, NXT_MAX_PATH_LEN - len, "/cgroup.procs");
+ if (nxt_slow_path(len >= NXT_MAX_PATH_LEN - len)) {
+ nxt_errno = ENAMETOOLONG;
+ return NXT_ERROR;
+ }
+
+ fp = nxt_file_fopen(task, cgprocs, "we");
+ if (nxt_slow_path(fp == NULL)) {
+ return NXT_ERROR;
+ }
+
+ setvbuf(fp, NULL, _IONBF, 0);
+ len = fprintf(fp, "%d\n", process->pid);
+ nxt_file_fclose(task, fp);
+
+ if (nxt_slow_path(len < 0)) {
+ return NXT_ERROR;
+ }
+
+ return NXT_OK;
+}
+
+
+void
+nxt_cgroup_cleanup(nxt_task_t *task, const nxt_process_t *process)
+{
+ char *ptr;
+ char cgroot[NXT_MAX_PATH_LEN], cgpath[NXT_MAX_PATH_LEN];
+ nxt_int_t ret;
+
+ ret = nxt_mk_cgpath(task, "", cgroot);
+ if (nxt_slow_path(ret == NXT_ERROR)) {
+ return;
+ }
+
+ ret = nxt_mk_cgpath(task, process->isolation.cgroup.path, cgpath);
+ if (nxt_slow_path(ret == NXT_ERROR)) {
+ return;
+ }
+
+ while (*cgpath != '\0' && strcmp(cgroot, cgpath) != 0) {
+ rmdir(cgpath);
+ ptr = strrchr(cgpath, '/');
+ *ptr = '\0';
+ }
+}
+
+
+static int
+nxt_mk_cgpath_relative(nxt_task_t *task, const char *dir, char *cgpath)
+{
+ int i, len;
+ char *buf, *ptr;
+ FILE *fp;
+ size_t size;
+ ssize_t nread;
+ nxt_bool_t found;
+
+ fp = nxt_file_fopen(task, "/proc/self/cgroup", "re");
+ if (nxt_slow_path(fp == NULL)) {
+ return -1;
+ }
+
+ len = -1;
+ buf = NULL;
+ found = 0;
+ while ((nread = getline(&buf, &size, fp)) != -1) {
+ if (strncmp(buf, "0::", 3) == 0) {
+ found = 1;
+ break;
+ }
+ }
+
+ nxt_file_fclose(task, fp);
+
+ if (!found) {
+ nxt_errno = ENODATA;
+ goto out_free_buf;
+ }
+
+ buf[nread - 1] = '\0'; /* lose the trailing '\n' */
+ ptr = buf;
+ for (i = 0; i < 2; i++) {
+ ptr = strchr(ptr, ':');
+ if (ptr == NULL) {
+ nxt_errno = ENODATA;
+ goto out_free_buf;
+ }
+
+ ptr++;
+ }
+
+ len = snprintf(cgpath, NXT_MAX_PATH_LEN, NXT_CGROUP_ROOT "%s/%s",
+ ptr, dir);
+
+out_free_buf:
+
+ nxt_free(buf);
+
+ return len;
+}
+
+
+static nxt_int_t
+nxt_mk_cgpath(nxt_task_t *task, const char *dir, char *cgpath)
+{
+ int len;
+
+ /*
+ * If the path from the config is relative, we need to make
+ * the cgroup path include the main unit processes cgroup. I.e
+ *
+ * NXT_CGROUP_ROOT/<main process cgroup>/<cgroup path>
+ */
+ if (dir[0] != '/') {
+ len = nxt_mk_cgpath_relative(task, dir, cgpath);
+ } else {
+ len = snprintf(cgpath, NXT_MAX_PATH_LEN, NXT_CGROUP_ROOT "%s", dir);
+ }
+
+ if (len == -1) {
+ return NXT_ERROR;
+ }
+
+ if (len >= NXT_MAX_PATH_LEN) {
+ nxt_errno = ENAMETOOLONG;
+ return NXT_ERROR;
+ }
+
+ return NXT_OK;
+}
diff --git a/src/nxt_cgroup.h b/src/nxt_cgroup.h
new file mode 100644
index 00000000..0b9055d2
--- /dev/null
+++ b/src/nxt_cgroup.h
@@ -0,0 +1,14 @@
+/*
+ * Copyright (C) Andrew Clayton
+ * Copyright (C) F5, Inc.
+ */
+
+#ifndef _NXT_CGROUP_H_INCLUDED_
+#define _NXT_CGROUP_H_INCLUDED_
+
+
+nxt_int_t nxt_cgroup_proc_add(nxt_task_t *task, nxt_process_t *process);
+void nxt_cgroup_cleanup(nxt_task_t *task, const nxt_process_t *process);
+
+
+#endif /* _NXT_CGROUP_H_INCLUDED_ */