From e0d3bc292caaa2eec3f8360494b6c17618a36f12 Mon Sep 17 00:00:00 2001 From: Nikolay Amiantov Date: Fri, 10 May 2019 14:59:16 +0300 Subject: [PATCH 1/3] chrootenv: make stackable The problem with stacking chrootenv before was that CLONE_NEWUSER cannot be used when a child uses chroot. So instead of that we use pivot_root which replaces root in the whole namespace. This requires our new root to be an actual fs so we mount tmpfs. --- .../build-fhs-userenv/chrootenv/chrootenv.c | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/pkgs/build-support/build-fhs-userenv/chrootenv/chrootenv.c b/pkgs/build-support/build-fhs-userenv/chrootenv/chrootenv.c index 34e050dde4fb..e57949cc44cb 100644 --- a/pkgs/build-support/build-fhs-userenv/chrootenv/chrootenv.c +++ b/pkgs/build-support/build-fhs-userenv/chrootenv/chrootenv.c @@ -18,6 +18,7 @@ #include #include #include +#include int min(int a, int b) { return a > b ? b : a; @@ -25,14 +26,23 @@ int min(int a, int b) { const gchar *bind_blacklist[] = {"bin", "etc", "host", "usr", "lib", "lib64", "lib32", "sbin", NULL}; +int pivot_root(const char *new_root, const char *put_old) { + return syscall(SYS_pivot_root, new_root, put_old); +} + +void mount_tmpfs(const gchar *target) { + fail_if(mount("none", target, "tmpfs", 0, NULL)); +} + void bind_mount(const gchar *source, const gchar *target) { fail_if(g_mkdir(target, 0755)); fail_if(mount(source, target, "bind", MS_BIND | MS_REC, NULL)); } -void bind_mount_host(const gchar *host, const gchar *guest) { +void pivot_host(const gchar *host, const gchar *guest) { g_autofree gchar *point = g_build_filename(guest, "host", NULL); - bind_mount(host, point); + fail_if(g_mkdir(point, 0755)); + fail_if(pivot_root(guest, point)); } void bind_mount_item(const gchar *host, const gchar *guest, const gchar *name) { @@ -44,6 +54,8 @@ void bind_mount_item(const gchar *host, const gchar *guest, const gchar *name) { } void bind(const gchar *host, const gchar *guest) { + mount_tmpfs(guest); + g_autoptr(GError) err = NULL; g_autoptr(GDir) dir = g_dir_open(host, 0, &err); @@ -52,11 +64,11 @@ void bind(const gchar *host, const gchar *guest) { const gchar *item; - while (item = g_dir_read_name(dir)) + while ((item = g_dir_read_name(dir))) if (!g_strv_contains(bind_blacklist, item)) bind_mount_item(host, guest, item); - bind_mount_host(host, guest); + pivot_host(host, guest); } void spit(const char *path, char *fmt, ...) { @@ -85,11 +97,6 @@ int main(gint argc, gchar **argv) { return 1; } - if (g_getenv("NIX_CHROOTENV")) - g_warning("chrootenv doesn't stack!"); - else - g_setenv("NIX_CHROOTENV", "", TRUE); - g_autofree gchar *prefix = g_build_filename(g_get_tmp_dir(), "chrootenvXXXXXX", NULL); @@ -121,7 +128,6 @@ int main(gint argc, gchar **argv) { bind("/", prefix); - fail_if(chroot(prefix)); fail_if(chdir("/")); fail_if(execvp(*argv, argv)); } From 7664ffbbafbe1275529fcca7936a321cb1c9e90c Mon Sep 17 00:00:00 2001 From: Nikolay Amiantov Date: Sat, 11 May 2019 11:32:47 +0300 Subject: [PATCH 2/3] chrootenv: small improvements * Remove unused argument from pivot_root; * Factor out tmpdir creation into a separate function; * Remove unused fstype from bind mount; * Use unlink instead of a treewalk to remove empty temporary directory. --- .../build-fhs-userenv/chrootenv/chrootenv.c | 42 +++++++------------ 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/pkgs/build-support/build-fhs-userenv/chrootenv/chrootenv.c b/pkgs/build-support/build-fhs-userenv/chrootenv/chrootenv.c index e57949cc44cb..9a7c76b1c078 100644 --- a/pkgs/build-support/build-fhs-userenv/chrootenv/chrootenv.c +++ b/pkgs/build-support/build-fhs-userenv/chrootenv/chrootenv.c @@ -7,22 +7,16 @@ #include #include -#define fail(s, err) g_error("%s: %s: %s", __func__, s, g_strerror(err)) -#define fail_if(expr) \ - if (expr) \ - fail(#expr, errno); - -#include - #include #include #include #include #include -int min(int a, int b) { - return a > b ? b : a; -} +#define fail(s, err) g_error("%s: %s: %s", __func__, s, g_strerror(err)) +#define fail_if(expr) \ + if (expr) \ + fail(#expr, errno); const gchar *bind_blacklist[] = {"bin", "etc", "host", "usr", "lib", "lib64", "lib32", "sbin", NULL}; @@ -36,10 +30,17 @@ void mount_tmpfs(const gchar *target) { void bind_mount(const gchar *source, const gchar *target) { fail_if(g_mkdir(target, 0755)); - fail_if(mount(source, target, "bind", MS_BIND | MS_REC, NULL)); + fail_if(mount(source, target, NULL, MS_BIND | MS_REC, NULL)); } -void pivot_host(const gchar *host, const gchar *guest) { +const gchar *create_tmpdir() { + gchar *prefix = + g_build_filename(g_get_tmp_dir(), "chrootenvXXXXXX", NULL); + fail_if(!g_mkdtemp_full(prefix, 0755)); + return prefix; +} + +void pivot_host(const gchar *guest) { g_autofree gchar *point = g_build_filename(guest, "host", NULL); fail_if(g_mkdir(point, 0755)); fail_if(pivot_root(guest, point)); @@ -68,7 +69,7 @@ void bind(const gchar *host, const gchar *guest) { if (!g_strv_contains(bind_blacklist, item)) bind_mount_item(host, guest, item); - pivot_host(host, guest); + pivot_host(guest); } void spit(const char *path, char *fmt, ...) { @@ -84,11 +85,6 @@ void spit(const char *path, char *fmt, ...) { fclose(f); } -int nftw_remove(const char *path, const struct stat *sb, int type, - struct FTW *ftw) { - return remove(path); -} - int main(gint argc, gchar **argv) { const gchar *self = *argv++; @@ -97,10 +93,7 @@ int main(gint argc, gchar **argv) { return 1; } - g_autofree gchar *prefix = - g_build_filename(g_get_tmp_dir(), "chrootenvXXXXXX", NULL); - - fail_if(!g_mkdtemp_full(prefix, 0755)); + g_autofree const gchar *prefix = create_tmpdir(); pid_t cpid = fork(); @@ -136,10 +129,7 @@ int main(gint argc, gchar **argv) { int status; fail_if(waitpid(cpid, &status, 0) != cpid); - // glibc 2.27 (and possibly other versions) can't handle - // an nopenfd value larger than 2^19 - fail_if(nftw(prefix, nftw_remove, min(getdtablesize(), 1<<19), - FTW_DEPTH | FTW_MOUNT | FTW_PHYS)); + fail_if(rmdir(prefix)); if (WIFEXITED(status)) return WEXITSTATUS(status); From 06f27dc2e932778cf1d2ce6b0bf30b32575dd16a Mon Sep 17 00:00:00 2001 From: Nikolay Amiantov Date: Sun, 12 May 2019 09:35:12 +0300 Subject: [PATCH 3/3] chrootenv: propagate nested chrootenv /host To avoid symlink loops to /host in nested chrootenvs we need to remove one level of indirection. This is also what's generally expected of /host contents. --- .../build-fhs-userenv/chrootenv/chrootenv.c | 29 +++++++++++++++---- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/pkgs/build-support/build-fhs-userenv/chrootenv/chrootenv.c b/pkgs/build-support/build-fhs-userenv/chrootenv/chrootenv.c index 9a7c76b1c078..dcb2e97aa932 100644 --- a/pkgs/build-support/build-fhs-userenv/chrootenv/chrootenv.c +++ b/pkgs/build-support/build-fhs-userenv/chrootenv/chrootenv.c @@ -18,7 +18,7 @@ if (expr) \ fail(#expr, errno); -const gchar *bind_blacklist[] = {"bin", "etc", "host", "usr", "lib", "lib64", "lib32", "sbin", NULL}; +const gchar *bind_blacklist[] = {"bin", "etc", "host", "real-host", "usr", "lib", "lib64", "lib32", "sbin", NULL}; int pivot_root(const char *new_root, const char *put_old) { return syscall(SYS_pivot_root, new_root, put_old); @@ -56,9 +56,12 @@ void bind_mount_item(const gchar *host, const gchar *guest, const gchar *name) { void bind(const gchar *host, const gchar *guest) { mount_tmpfs(guest); + pivot_host(guest); + + g_autofree gchar *host_dir = g_build_filename("/host", host, NULL); g_autoptr(GError) err = NULL; - g_autoptr(GDir) dir = g_dir_open(host, 0, &err); + g_autoptr(GDir) dir = g_dir_open(host_dir, 0, &err); if (err != NULL) fail("g_dir_open", errno); @@ -67,9 +70,7 @@ void bind(const gchar *host, const gchar *guest) { while ((item = g_dir_read_name(dir))) if (!g_strv_contains(bind_blacklist, item)) - bind_mount_item(host, guest, item); - - pivot_host(guest); + bind_mount_item(host_dir, "/", item); } void spit(const char *path, char *fmt, ...) { @@ -119,7 +120,23 @@ int main(gint argc, gchar **argv) { spit("/proc/self/uid_map", "%d %d 1", uid, uid); spit("/proc/self/gid_map", "%d %d 1", gid, gid); - bind("/", prefix); + // If there is a /host directory, assume this is nested chrootenv and use it as host instead. + gboolean nested_host = g_file_test("/host", G_FILE_TEST_EXISTS | G_FILE_TEST_IS_DIR); + g_autofree const gchar *host = nested_host ? "/host" : "/"; + + bind(host, prefix); + + // Replace /host by an actual (inner) /host. + if (nested_host) { + fail_if(g_mkdir("/real-host", 0755)); + fail_if(mount("/host/host", "/real-host", NULL, MS_BIND | MS_REC, NULL)); + // For some reason umount("/host") returns EBUSY even immediately after + // pivot_root. We detach it at least to keep `/proc/mounts` from blowing + // up in nested cases. + fail_if(umount2("/host", MNT_DETACH)); + fail_if(mount("/real-host", "/host", NULL, MS_MOVE, NULL)); + fail_if(rmdir("/real-host")); + } fail_if(chdir("/")); fail_if(execvp(*argv, argv));