diff options
author | Andrew Clayton <a.clayton@nginx.com> | 2022-10-24 17:33:23 +0100 |
---|---|---|
committer | Andrew Clayton <a.clayton@nginx.com> | 2022-12-10 14:00:20 +0000 |
commit | 7d177faf3b8a483fd7ef958e884ec5625e058ca0 (patch) | |
tree | 231384c7eec073a9ad45a72c7714424d7e0d3bab /src | |
parent | 9466daf9bdafa3e00f521a47f4ce218353bf7f86 (diff) | |
download | unit-7d177faf3b8a483fd7ef958e884ec5625e058ca0.tar.gz unit-7d177faf3b8a483fd7ef958e884ec5625e058ca0.tar.bz2 |
Isolation: added core cgroup infrastructure.
Firstly, this is not to be confused with CLONE_NEWCGROUP which unit
already supports and is related to namespaces. To re-cap, namespaces
allow processes to have different views of various parts of the system
such as filesystem mounts, networking, hostname etc.
Whereas cgroup[0] is a Linux kernel facility for collecting a bunch of
processes together to perform some task on the group as a whole, for
example to implement resource limits.
There are two parts to cgroup, the core part of organising processes
into a hierarchy and the controllers which are responsible for enforcing
resource limits etc.
There are currently two versions of the cgroup sub-system, the original
cgroup and a version 2[1] introduced in 3.16 (August 2014) and marked
stable in 4.5 (March 2016).
This commit supports the cgroup V2 API and implements the ability to
place applications into their own cgroup on a per-application basis.
You can put them each into their own cgroup or you can group some
together. The ability to set resource limits can easily be added in
future.
The initial use case of this would be to aid in observability of unit
applications which becomes much easier if you can just monitor them on a
per cgroup basis.
One thing to note about cgroup, is that unlike namespaces which are
controlled via system calls such as clone(2) and unshare(2), cgroups are
setup and controlled through the cgroupfs pseudo-filesystem.
cgroup is Linux only and this support will only be enabled if configure
finds the cgroup2 filesystem mount, e.g
cgroup2 on /sys/fs/cgroup type cgroup2 (rw,nosuid,nodev,noexec,relatime,seclabel,nsdelegate,memory_recursiveprot)
The cgroups are removed on shutdown or as required on reconfiguration.
This commit just adds the basic infrastructure for using cgroups within
unit. Subsequent commits will wire up this support.
It supports creating cgroups relative to the main cgroup root and also
below the cgroup of the main unit process.
[0]: <https://man7.org/linux/man-pages/man7/cgroups.7.html>
[1]: <https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html>
Cc: Alejandro Colomar <alx@nginx.com>
Signed-off-by: Andrew Clayton <a.clayton@nginx.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/nxt_cgroup.c | 174 | ||||
-rw-r--r-- | src/nxt_cgroup.h | 14 |
2 files changed, 188 insertions, 0 deletions
diff --git a/src/nxt_cgroup.c b/src/nxt_cgroup.c new file mode 100644 index 00000000..2c404acc --- /dev/null +++ b/src/nxt_cgroup.c @@ -0,0 +1,174 @@ +/* + * Copyright (C) Andrew Clayton + * Copyright (C) F5, Inc. + */ + +#include <nxt_main.h> + +#include <nxt_cgroup.h> + + +static int nxt_mk_cgpath_relative(nxt_task_t *task, const char *dir, + char *cgpath); +static nxt_int_t nxt_mk_cgpath(nxt_task_t *task, const char *dir, + char *cgpath); + + +nxt_int_t +nxt_cgroup_proc_add(nxt_task_t *task, nxt_process_t *process) +{ + int len; + char cgprocs[NXT_MAX_PATH_LEN]; + FILE *fp; + nxt_int_t ret; + + if (task->thread->runtime->type != NXT_PROCESS_MAIN + || nxt_process_type(process) != NXT_PROCESS_PROTOTYPE + || process->isolation.cgroup.path == NULL) + { + return NXT_OK; + } + + ret = nxt_mk_cgpath(task, process->isolation.cgroup.path, cgprocs); + if (nxt_slow_path(ret == NXT_ERROR)) { + return NXT_ERROR; + } + + ret = nxt_fs_mkdir_all((const u_char *) cgprocs, 0777); + if (nxt_slow_path(ret == NXT_ERROR)) { + return NXT_ERROR; + } + + len = strlen(cgprocs); + + len = snprintf(cgprocs + len, NXT_MAX_PATH_LEN - len, "/cgroup.procs"); + if (nxt_slow_path(len >= NXT_MAX_PATH_LEN - len)) { + nxt_errno = ENAMETOOLONG; + return NXT_ERROR; + } + + fp = nxt_file_fopen(task, cgprocs, "we"); + if (nxt_slow_path(fp == NULL)) { + return NXT_ERROR; + } + + setvbuf(fp, NULL, _IONBF, 0); + len = fprintf(fp, "%d\n", process->pid); + nxt_file_fclose(task, fp); + + if (nxt_slow_path(len < 0)) { + return NXT_ERROR; + } + + return NXT_OK; +} + + +void +nxt_cgroup_cleanup(nxt_task_t *task, const nxt_process_t *process) +{ + char *ptr; + char cgroot[NXT_MAX_PATH_LEN], cgpath[NXT_MAX_PATH_LEN]; + nxt_int_t ret; + + ret = nxt_mk_cgpath(task, "", cgroot); + if (nxt_slow_path(ret == NXT_ERROR)) { + return; + } + + ret = nxt_mk_cgpath(task, process->isolation.cgroup.path, cgpath); + if (nxt_slow_path(ret == NXT_ERROR)) { + return; + } + + while (*cgpath != '\0' && strcmp(cgroot, cgpath) != 0) { + rmdir(cgpath); + ptr = strrchr(cgpath, '/'); + *ptr = '\0'; + } +} + + +static int +nxt_mk_cgpath_relative(nxt_task_t *task, const char *dir, char *cgpath) +{ + int i, len; + char *buf, *ptr; + FILE *fp; + size_t size; + ssize_t nread; + nxt_bool_t found; + + fp = nxt_file_fopen(task, "/proc/self/cgroup", "re"); + if (nxt_slow_path(fp == NULL)) { + return -1; + } + + len = -1; + buf = NULL; + found = 0; + while ((nread = getline(&buf, &size, fp)) != -1) { + if (strncmp(buf, "0::", 3) == 0) { + found = 1; + break; + } + } + + nxt_file_fclose(task, fp); + + if (!found) { + nxt_errno = ENODATA; + goto out_free_buf; + } + + buf[nread - 1] = '\0'; /* lose the trailing '\n' */ + ptr = buf; + for (i = 0; i < 2; i++) { + ptr = strchr(ptr, ':'); + if (ptr == NULL) { + nxt_errno = ENODATA; + goto out_free_buf; + } + + ptr++; + } + + len = snprintf(cgpath, NXT_MAX_PATH_LEN, NXT_CGROUP_ROOT "%s/%s", + ptr, dir); + +out_free_buf: + + nxt_free(buf); + + return len; +} + + +static nxt_int_t +nxt_mk_cgpath(nxt_task_t *task, const char *dir, char *cgpath) +{ + int len; + + /* + * If the path from the config is relative, we need to make + * the cgroup path include the main unit processes cgroup. I.e + * + * NXT_CGROUP_ROOT/<main process cgroup>/<cgroup path> + */ + if (dir[0] != '/') { + len = nxt_mk_cgpath_relative(task, dir, cgpath); + } else { + len = snprintf(cgpath, NXT_MAX_PATH_LEN, NXT_CGROUP_ROOT "%s", dir); + } + + if (len == -1) { + return NXT_ERROR; + } + + if (len >= NXT_MAX_PATH_LEN) { + nxt_errno = ENAMETOOLONG; + return NXT_ERROR; + } + + return NXT_OK; +} diff --git a/src/nxt_cgroup.h b/src/nxt_cgroup.h new file mode 100644 index 00000000..0b9055d2 --- /dev/null +++ b/src/nxt_cgroup.h @@ -0,0 +1,14 @@ +/* + * Copyright (C) Andrew Clayton + * Copyright (C) F5, Inc. + */ + +#ifndef _NXT_CGROUP_H_INCLUDED_ +#define _NXT_CGROUP_H_INCLUDED_ + + +nxt_int_t nxt_cgroup_proc_add(nxt_task_t *task, nxt_process_t *process); +void nxt_cgroup_cleanup(nxt_task_t *task, const nxt_process_t *process); + + +#endif /* _NXT_CGROUP_H_INCLUDED_ */ |