From 7336138eedf1c9b09b432428c4cccc2da25ab9e0 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 20 Apr 2016 22:53:39 +0200 Subject: [PATCH] nspawn: optionally fix up OS tree uid/gids for userns This adds a new --private-userns-chown switch that may be used in combination with --private-userns. If it is passed a recursive chmod() operation is run on the OS tree, fixing all file owner UID/GIDs to the right ranges. This should make user namespacing pretty workable, as the OS trees don't need to be prepared manually anymore. --- .gitignore | 1 + Makefile.am | 13 ++ src/nspawn/nspawn-patch-uid.c | 417 ++++++++++++++++++++++++++++++++++ src/nspawn/nspawn-patch-uid.h | 23 ++ src/nspawn/nspawn.c | 47 +++- src/nspawn/test-patch-uid.c | 61 +++++ 6 files changed, 560 insertions(+), 2 deletions(-) create mode 100644 src/nspawn/nspawn-patch-uid.c create mode 100644 src/nspawn/nspawn-patch-uid.h create mode 100644 src/nspawn/test-patch-uid.c diff --git a/.gitignore b/.gitignore index 02ba86ef6f..c7eb14452d 100644 --- a/.gitignore +++ b/.gitignore @@ -240,6 +240,7 @@ /test-ns /test-nss /test-parse-util +/test-patch-uid /test-path /test-path-lookup /test-path-util diff --git a/Makefile.am b/Makefile.am index 0f475c6d09..b323de55c6 100644 --- a/Makefile.am +++ b/Makefile.am @@ -3021,6 +3021,8 @@ systemd_nspawn_SOURCES = \ src/nspawn/nspawn-setuid.h \ src/nspawn/nspawn-stub-pid1.c \ src/nspawn/nspawn-stub-pid1.h \ + src/nspawn/nspawn-patch-uid.c \ + src/nspawn/nspawn-patch-uid.h \ src/core/mount-setup.c \ src/core/mount-setup.h \ src/core/loopback-setup.c \ @@ -3048,6 +3050,17 @@ systemd_nspawn_LDADD += \ libfirewall.la endif +test_patch_uid_SOURCES = \ + src/nspawn/nspawn-patch-uid.c \ + src/nspawn/nspawn-patch-uid.h \ + src/nspawn/test-patch-uid.c + +test_patch_uid_LDADD = \ + libshared.la + +manual_tests += \ + test-patch-uid + # ------------------------------------------------------------------------------ systemd_run_SOURCES = \ src/run/run.c diff --git a/src/nspawn/nspawn-patch-uid.c b/src/nspawn/nspawn-patch-uid.c new file mode 100644 index 0000000000..f53164fbbf --- /dev/null +++ b/src/nspawn/nspawn-patch-uid.c @@ -0,0 +1,417 @@ +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include +#ifdef HAVE_ACL +#include +#endif +#include +#include + +#include "acl-util.h" +#include "dirent-util.h" +#include "fd-util.h" +#include "nspawn-patch-uid.h" +#include "stdio-util.h" +#include "string-util.h" +#include "strv.h" +#include "user-util.h" + +#ifdef HAVE_ACL + +static int get_acl(int fd, const char *name, acl_type_t type, acl_t *ret) { + char procfs_path[strlen("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1]; + acl_t acl; + + assert(fd >= 0); + assert(ret); + + if (name) { + _cleanup_close_ int child_fd = -1; + + child_fd = openat(fd, name, O_PATH|O_CLOEXEC|O_NOFOLLOW); + if (child_fd < 0) + return -errno; + + xsprintf(procfs_path, "/proc/self/fd/%i", child_fd); + acl = acl_get_file(procfs_path, type); + } else if (type == ACL_TYPE_ACCESS) + acl = acl_get_fd(fd); + else { + xsprintf(procfs_path, "/proc/self/fd/%i", fd); + acl = acl_get_file(procfs_path, type); + } + if (!acl) + return -errno; + + *ret = acl; + return 0; +} + +static int set_acl(int fd, const char *name, acl_type_t type, acl_t acl) { + char procfs_path[strlen("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1]; + int r; + + assert(fd >= 0); + assert(acl); + + if (name) { + _cleanup_close_ int child_fd = -1; + + child_fd = openat(fd, name, O_PATH|O_CLOEXEC|O_NOFOLLOW); + if (child_fd < 0) + return -errno; + + xsprintf(procfs_path, "/proc/self/fd/%i", child_fd); + r = acl_set_file(procfs_path, type, acl); + } else if (type == ACL_TYPE_ACCESS) + r = acl_set_fd(fd, acl); + else { + xsprintf(procfs_path, "/proc/self/fd/%i", fd); + r = acl_set_file(procfs_path, type, acl); + } + if (r < 0) + return -errno; + + return 0; +} + +static int shift_acl(acl_t acl, uid_t shift, acl_t *ret) { + _cleanup_(acl_freep) acl_t copy = NULL; + acl_entry_t i; + int r; + + assert(acl); + assert(ret); + + r = acl_get_entry(acl, ACL_FIRST_ENTRY, &i); + if (r < 0) + return -errno; + while (r > 0) { + uid_t *old_uid, new_uid; + bool modify = false; + acl_tag_t tag; + + if (acl_get_tag_type(i, &tag) < 0) + return -errno; + + if (IN_SET(tag, ACL_USER, ACL_GROUP)) { + + /* We don't distuingish here between uid_t and gid_t, let's make sure the compiler checks that + * this is actually OK */ + assert_cc(sizeof(uid_t) == sizeof(gid_t)); + + old_uid = acl_get_qualifier(i); + if (!old_uid) + return -errno; + + new_uid = shift | (*old_uid & UINT32_C(0xFFFF)); + if (!uid_is_valid(new_uid)) + return -EINVAL; + + modify = new_uid != *old_uid; + if (modify && !copy) { + int n; + + /* There's no copy of the ACL yet? if so, let's create one, and start the loop from the + * beginning, so that we copy all entries, starting from the first, this time. */ + + n = acl_entries(acl); + if (n < 0) + return -errno; + + copy = acl_init(n); + if (!copy) + return -errno; + + /* Seek back to the beginning */ + r = acl_get_entry(acl, ACL_FIRST_ENTRY, &i); + if (r < 0) + return -errno; + continue; + } + } + + if (copy) { + acl_entry_t new_entry; + + if (acl_create_entry(©, &new_entry) < 0) + return -errno; + + if (acl_copy_entry(new_entry, i) < 0) + return -errno; + + if (modify) + if (acl_set_qualifier(new_entry, &new_uid) < 0) + return -errno; + } + + r = acl_get_entry(acl, ACL_NEXT_ENTRY, &i); + if (r < 0) + return -errno; + } + + *ret = copy; + copy = NULL; + + return !!*ret; +} + +static int patch_acls(int fd, const char *name, const struct stat *st, uid_t shift) { + _cleanup_(acl_freep) acl_t acl = NULL, shifted = NULL; + bool changed = false; + int r; + + assert(fd >= 0); + assert(st); + + /* ACLs are not supported on symlinks, there's no point in trying */ + if (S_ISLNK(st->st_mode)) + return 0; + + r = get_acl(fd, name, ACL_TYPE_ACCESS, &acl); + if (r == -EOPNOTSUPP) + return 0; + if (r < 0) + return r; + + r = shift_acl(acl, shift, &shifted); + if (r < 0) + return r; + if (r > 0) { + r = set_acl(fd, name, ACL_TYPE_ACCESS, shifted); + if (r < 0) + return r; + + changed = true; + } + + if (S_ISDIR(st->st_mode)) { + acl_free(acl); + acl_free(shifted); + + acl = shifted = NULL; + + r = get_acl(fd, name, ACL_TYPE_DEFAULT, &acl); + if (r < 0) + return r; + + r = shift_acl(acl, shift, &shifted); + if (r < 0) + return r; + if (r > 0) { + r = set_acl(fd, name, ACL_TYPE_DEFAULT, shifted); + if (r < 0) + return r; + + changed = true; + } + } + + return changed; +} + +#else + +static int patch_acls(int fd, const char *name, const struct stat *st, uid_t shift) { + return 0; +} + +#endif + +static int patch_fd(int fd, const char *name, const struct stat *st, uid_t shift) { + uid_t new_uid; + gid_t new_gid; + bool changed = false; + int r; + + assert(fd >= 0); + assert(st); + + new_uid = shift | (st->st_uid & UINT32_C(0xFFFF)); + new_gid = (gid_t) shift | (st->st_gid & UINT32_C(0xFFFF)); + + if (!uid_is_valid(new_uid) || !gid_is_valid(new_gid)) + return -EINVAL; + + if (st->st_uid != new_uid || st->st_gid != new_gid) { + if (name) + r = fchownat(fd, name, new_uid, new_gid, AT_SYMLINK_NOFOLLOW); + else + r = fchown(fd, new_uid, new_gid); + if (r < 0) + return -errno; + + /* The Linux kernel alters the mode in some cases of chown(). Let's undo this. */ + if (name && !S_ISLNK(st->st_mode)) + r = fchmodat(fd, name, st->st_mode, 0); + else + r = fchmod(fd, st->st_mode); + if (r < 0) + return -errno; + + changed = true; + } + + r = patch_acls(fd, name, st, shift); + if (r < 0) + return r; + + return r > 0 || changed; +} + +static int recurse_fd(int fd, bool donate_fd, const struct stat *st, uid_t shift) { + bool changed = false; + int r; + + assert(fd >= 0); + + r = patch_fd(fd, NULL, st, shift); + if (r < 0) + goto finish; + + if (S_ISDIR(st->st_mode)) { + _cleanup_closedir_ DIR *d = NULL; + struct dirent *de; + + if (!donate_fd) { + int copy; + + copy = fcntl(fd, F_DUPFD_CLOEXEC, 3); + if (copy < 0) + return -errno; + + fd = copy; + donate_fd = true; + } + + d = fdopendir(fd); + if (!d) { + r = -errno; + goto finish; + } + fd = -1; + + FOREACH_DIRENT_ALL(de, d, r = -errno; goto finish) { + struct stat fst; + + if (STR_IN_SET(de->d_name, ".", "..")) + continue; + + if (fstatat(dirfd(d), de->d_name, &fst, AT_SYMLINK_NOFOLLOW) < 0) { + r = -errno; + goto finish; + } + + if (S_ISDIR(fst.st_mode)) { + int subdir_fd; + + subdir_fd = openat(dirfd(d), de->d_name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME); + if (subdir_fd < 0) { + r = -errno; + goto finish; + + } + + r = recurse_fd(subdir_fd, true, &fst, shift); + if (r < 0) + goto finish; + if (r > 0) + changed = true; + + } else { + r = patch_fd(dirfd(d), de->d_name, &fst, shift); + if (r < 0) + goto finish; + if (r > 0) + changed = true; + } + } + } + + r = changed; + +finish: + if (donate_fd) + safe_close(fd); + + return r; +} + +static int fd_patch_uid_internal(int fd, bool donate_fd, uid_t shift, uid_t range) { + struct stat st; + int r; + + assert(fd >= 0); + + /* Recursively adjusts the UID/GIDs of all files of a directory tree. This is used to automatically fix up an + * OS tree to the used user namespace UID range. Note that this automatic adjustment only works for UID ranges + * following the concept that the upper 16bit of a UID identify the container, and the lower 16bit are the actual + * UID within the container. */ + + if ((shift & 0xFFFF) != 0) { + /* We only support containers where the shift starts at a 2^16 boundary */ + r = -EOPNOTSUPP; + goto finish; + } + + if (range != 0x10000) { + /* We only support containers with 16bit UID ranges for the patching logic */ + r = -EOPNOTSUPP; + goto finish; + } + + if (fstat(fd, &st) < 0) { + r = -errno; + goto finish; + } + + if ((uint32_t) st.st_uid >> 16 != (uint32_t) st.st_gid >> 16) { + /* We only support containers where the uid/gid container ID match */ + r = -EBADE; + goto finish; + } + + /* Try to detect if the range is already right. Of course, this a pretty drastic optimization, as we assume + * that if the top-level dir has the right upper 16bit assigned, then everything below will have too... */ + if (((uint32_t) (st.st_uid ^ shift) >> 16) == 0) + return 0; + + return recurse_fd(fd, donate_fd, &st, shift); + +finish: + if (donate_fd) + safe_close(fd); + + return r; +} + +int fd_patch_uid(int fd, uid_t shift, uid_t range) { + return fd_patch_uid_internal(fd, false, shift, range); +} + +int path_patch_uid(const char *path, uid_t shift, uid_t range) { + int fd; + + fd = open(path, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME); + if (fd < 0) + return -errno; + + return fd_patch_uid_internal(fd, true, shift, range); +} diff --git a/src/nspawn/nspawn-patch-uid.h b/src/nspawn/nspawn-patch-uid.h new file mode 100644 index 0000000000..55d0990016 --- /dev/null +++ b/src/nspawn/nspawn-patch-uid.h @@ -0,0 +1,23 @@ +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include + +int fd_patch_uid(int fd, uid_t shift, uid_t range); +int path_patch_uid(const char *path, uid_t shift, uid_t range); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index e1d37d383a..a3190545fa 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -75,6 +75,7 @@ #include "nspawn-expose-ports.h" #include "nspawn-mount.h" #include "nspawn-network.h" +#include "nspawn-patch-uid.h" #include "nspawn-register.h" #include "nspawn-settings.h" #include "nspawn-setuid.h" @@ -175,6 +176,7 @@ static ExposePort *arg_expose_ports = NULL; static char **arg_property = NULL; static uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U; static bool arg_userns = false; +static bool arg_userns_chown = false; static int arg_kill_signal = 0; static bool arg_unified_cgroup_hierarchy = false; static SettingsMask arg_settings_mask = 0; @@ -204,6 +206,7 @@ static void help(void) { " --property=NAME=VALUE Set scope unit property\n" " --private-users[=UIDBASE[:NUIDS]]\n" " Run within user namespace\n" + " --private-user-chown Adjust OS tree file ownership for private user range\n" " --private-network Disable network in container\n" " --network-interface=INTERFACE\n" " Assign an existing network interface to the\n" @@ -349,6 +352,7 @@ static int parse_argv(int argc, char *argv[]) { ARG_KILL_SIGNAL, ARG_SETTINGS, ARG_CHDIR, + ARG_PRIVATE_USERS_CHOWN, }; static const struct option options[] = { @@ -392,6 +396,7 @@ static int parse_argv(int argc, char *argv[]) { { "port", required_argument, NULL, 'p' }, { "property", required_argument, NULL, ARG_PROPERTY }, { "private-users", optional_argument, NULL, ARG_PRIVATE_USERS }, + { "private-users-chown", optional_argument, NULL, ARG_PRIVATE_USERS_CHOWN}, { "kill-signal", required_argument, NULL, ARG_KILL_SIGNAL }, { "settings", required_argument, NULL, ARG_SETTINGS }, { "chdir", required_argument, NULL, ARG_CHDIR }, @@ -825,6 +830,10 @@ static int parse_argv(int argc, char *argv[]) { arg_userns = true; break; + case ARG_PRIVATE_USERS_CHOWN: + arg_userns_chown = true; + break; + case ARG_KILL_SIGNAL: arg_kill_signal = signal_from_string_try_harder(optarg); if (arg_kill_signal < 0) { @@ -933,8 +942,15 @@ static int parse_argv(int argc, char *argv[]) { return -EINVAL; } - if (arg_userns && access("/proc/self/uid_map", F_OK) < 0) - return log_error_errno(EOPNOTSUPP, "--private-users= is not supported, kernel compiled without user namespace support."); + if (arg_userns && access("/proc/self/uid_map", F_OK) < 0) { + log_error("--private-users= is not supported, kernel compiled without user namespace support."); + return -EOPNOTSUPP; + } + + if (arg_userns_chown && arg_read_only) { + log_error("--read-only and --private-users-chown may not be combined."); + return -EINVAL; + } if (argc > optind) { arg_parameters = strv_copy(argv + optind); @@ -2218,6 +2234,29 @@ static int setup_machine_id(const char *directory) { return 0; } +static int recursive_chown(const char *directory, uid_t shift, uid_t range) { + int r; + + assert(directory); + + if (!arg_userns || !arg_userns_chown) + return 0; + + r = path_patch_uid(directory, arg_uid_shift, arg_uid_range); + if (r == -EOPNOTSUPP) + return log_error_errno(r, "Automatic UID/GID adjusting is only supported for UID/GID ranges starting at multiples of 2^16 with a range of 2^16."); + if (r == -EBADE) + return log_error_errno(r, "Upper 16 bits of root directory UID and GID do not match."); + if (r < 0) + return log_error_errno(r, "Failed to adjust UID/GID shift of OS tree: %m"); + if (r == 0) + log_debug("Root directory of image is already owned by the right UID/GID range, skipping recursive chown operation."); + else + log_debug("Patched directory tree to match UID/GID range."); + + return r; +} + static int mount_devices( const char *where, const char *root_device, bool root_device_rw, @@ -2763,6 +2802,10 @@ static int outer_child( if (mount(directory, directory, NULL, MS_BIND|MS_REC, NULL) < 0) return log_error_errno(errno, "Failed to make bind mount: %m"); + r = recursive_chown(directory, arg_uid_shift, arg_uid_range); + if (r < 0) + return r; + r = setup_volatile(directory, arg_volatile_mode, arg_userns, arg_uid_shift, arg_uid_range, arg_selinux_context); if (r < 0) return r; diff --git a/src/nspawn/test-patch-uid.c b/src/nspawn/test-patch-uid.c new file mode 100644 index 0000000000..11c5321788 --- /dev/null +++ b/src/nspawn/test-patch-uid.c @@ -0,0 +1,61 @@ +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include + +#include "log.h" +#include "nspawn-patch-uid.h" +#include "user-util.h" +#include "util.h" + +int main(int argc, char *argv[]) { + uid_t shift, range; + int r; + + log_set_max_level(LOG_DEBUG); + log_parse_environment(); + log_open(); + + if (argc != 4) { + log_error("Expected PATH SHIFT RANGE parameters."); + return EXIT_FAILURE; + } + + r = parse_uid(argv[2], &shift); + if (r < 0) { + log_error_errno(r, "Failed to parse UID shift %s.", argv[2]); + return EXIT_FAILURE; + } + + r = parse_gid(argv[3], &range); + if (r < 0) { + log_error_errno(r, "Failed to parse UID range %s.", argv[3]); + return EXIT_FAILURE; + } + + r = path_patch_uid(argv[1], shift, range); + if (r < 0) { + log_error_errno(r, "Failed to patch directory tree: %m"); + return EXIT_FAILURE; + } + + log_info("Changed: %s", yes_no(r)); + + return EXIT_SUCCESS; +}