diff --git a/module/os/linux/zfs/zpl_xattr.c b/module/os/linux/zfs/zpl_xattr.c index 52b17f27abef..a8f4166e91cd 100644 --- a/module/os/linux/zfs/zpl_xattr.c +++ b/module/os/linux/zfs/zpl_xattr.c @@ -1462,6 +1462,14 @@ xattr_handler_t zpl_xattr_acl_default_handler = #endif /* CONFIG_FS_POSIX_ACL */ +/* + * zpl_permission() gets called by linux kernel whenever it checks + * inode_permission via inode->i_op->permission. The general preference + * is to defer to the standard in-kernel permission check (generic_permission) + * wherever possible. + * + * https://www.kernel.org/doc/Documentation/filesystems/vfs.txt + */ int #if defined(HAVE_IOPS_PERMISSION_USERNS) zpl_permission(struct user_namespace *userns, struct inode *ip, int mask) @@ -1506,23 +1514,40 @@ zpl_permission(struct inode *ip, int mask) } /* - * Avoid potentially blocking in RCU walk. + * Fast path for execute checks. Do not use zfs_fastaccesschk_execute + * since it may end up granting execute access in presence of explicit + * deny entry for user / group, and it also read the ZFS ACL + * (non-cached) which we wish to avoid in RCU. */ - if (mask & MAY_NOT_BLOCK) { - return (-ECHILD); - } + if ((to_check == ACE_EXECUTE) && + (ITOZ(ip)->z_pflags & ZFS_NO_EXECS_DENIED)) + return (0); + /* + * inode permission operation may be called in rcu-walk mode + * (mask & MAY_NOT_BLOCK). If in rcu-walk mode, the filesystem must + * check the permission without blocking or storing to the inode. + * + * If a situation is encountered that rcu-walk cannot handle, + * return -ECHILD and it will be called again in ref-walk mode. + */ cr = CRED(); crhold(cr); - ret = -zfs_access(ITOZ(ip), to_check, V_ACE_MASK, cr); - if (ret != -EPERM && ret != -EACCES) { - crfree(cr); - return (ret); - } /* - * There are some situations in which capabilities - * may allow overriding the DACL. + * There are some situations in which capabilities may allow overriding + * the DACL. Skip reading ACL if requested permissions are fully + * satisfied by capabilities. + */ + + /* + * CAP_DAC_OVERRIDE may override RWX on directories, and RW on other + * files. Execute may also be overriden if at least one execute bit is + * set. This behavior is not formally documented, but is covered in + * commit messages and code comments in namei.c. + * + * CAP_DAC_READ_SEARCH may bypass file read permission checks and + * directory read and execute permission checks. */ if (S_ISDIR(ip->i_mode)) { #ifdef SB_NFSV4ACL @@ -1539,25 +1564,27 @@ zpl_permission(struct inode *ip, int mask) crfree(cr); return (0); } - crfree(cr); - return (ret); } - if (to_check == ACE_READ_DATA) { - if (capable(CAP_DAC_READ_SEARCH)) { + if (!(mask & MAY_EXEC) || (ip->i_mode & S_IXUGO)) { + if (capable(CAP_DAC_OVERRIDE)) { crfree(cr); return (0); } } - if (!(mask & MAY_EXEC) || - (zfs_fastaccesschk_execute(ITOZ(ip), cr) == 0)) { - if (capable(CAP_DAC_OVERRIDE)) { - crfree(cr); - return (0); - } + if ((to_check == ACE_READ_DATA) && + capable(CAP_DAC_READ_SEARCH)) { + crfree(cr); + return (0); + } + + if (mask & MAY_NOT_BLOCK) { + crfree(cr); + return (-ECHILD); } + ret = -zfs_access(ITOZ(ip), to_check, V_ACE_MASK, cr); crfree(cr); return (ret); }