diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 2e9e5bdd5629..9783723e8ffe 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -504,7 +504,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; INIT_LIST_HEAD(&inode->i_mapping->private_list); info = HUGETLBFS_I(inode); - mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, 0, NULL); + mpol_shared_policy_init(&info->policy, NULL); switch (mode & S_IFMT) { default: init_special_inode(inode, mode, dev); diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index dcc17378c952..3a39570b81b8 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -182,8 +182,7 @@ struct shared_policy { spinlock_t lock; }; -void mpol_shared_policy_init(struct shared_policy *info, unsigned short mode, - unsigned short flags, nodemask_t *nodes); +void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol); int mpol_set_shared_policy(struct shared_policy *info, struct vm_area_struct *vma, struct mempolicy *new); @@ -216,10 +215,10 @@ int do_migrate_pages(struct mm_struct *mm, #ifdef CONFIG_TMPFS -extern int mpol_parse_str(char *str, unsigned short *mode, - unsigned short *mode_flags, nodemask_t *policy_nodes); +extern int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context); -extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol); +extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, + int no_context); #endif #else @@ -262,8 +261,8 @@ static inline int mpol_set_shared_policy(struct shared_policy *info, return -EINVAL; } -static inline void mpol_shared_policy_init(struct shared_policy *info, - unsigned short mode, unsigned short flags, nodemask_t *nodes) +static inline void mpol_shared_policy_init(struct shared_policy *sp, + struct mempolicy *mpol) { } @@ -322,13 +321,14 @@ static inline void check_highest_zone(int k) } #ifdef CONFIG_TMPFS -static inline int mpol_parse_str(char *value, unsigned short *policy, - unsigned short flags, nodemask_t *policy_nodes) +static inline int mpol_parse_str(char *str, struct mempolicy **mpol, + int no_context) { - return 1; + return 1; /* error */ } -static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) +static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, + int no_context) { return 0; } diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index d7699a628d78..f2d12d5a21b8 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -34,9 +34,7 @@ struct shmem_sb_info { uid_t uid; /* Mount uid for root directory */ gid_t gid; /* Mount gid for root directory */ mode_t mode; /* Mount mode for root directory */ - unsigned short policy; /* Default NUMA memory alloc policy */ - unsigned short flags; /* Optional mempolicy flags */ - nodemask_t policy_nodes; /* nodemask for preferred and bind */ + struct mempolicy *mpol; /* default memory policy for mappings */ }; static inline struct shmem_inode_info *SHMEM_I(struct inode *inode) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 6b751565eed1..a37a5034f63d 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1828,27 +1828,35 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start, return 0; } -void mpol_shared_policy_init(struct shared_policy *info, unsigned short policy, - unsigned short flags, nodemask_t *policy_nodes) +/** + * mpol_shared_policy_init - initialize shared policy for inode + * @sp: pointer to inode shared policy + * @mpol: struct mempolicy to install + * + * Install non-NULL @mpol in inode's shared policy rb-tree. + * On entry, the current task has a reference on a non-NULL @mpol. + * This must be released on exit. + */ +void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) { - info->root = RB_ROOT; - spin_lock_init(&info->lock); + sp->root = RB_ROOT; /* empty tree == default mempolicy */ + spin_lock_init(&sp->lock); - if (policy != MPOL_DEFAULT) { - struct mempolicy *newpol; + if (mpol) { + struct vm_area_struct pvma; + struct mempolicy *new; - /* Falls back to NULL policy [MPOL_DEFAULT] on any error */ - newpol = mpol_new(policy, flags, policy_nodes); - if (!IS_ERR(newpol)) { - /* Create pseudo-vma that contains just the policy */ - struct vm_area_struct pvma; + /* contextualize the tmpfs mount point mempolicy */ + new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); + mpol_put(mpol); /* drop our ref on sb mpol */ + if (IS_ERR(new)) + return; /* no valid nodemask intersection */ - memset(&pvma, 0, sizeof(struct vm_area_struct)); - /* Policy covers entire file */ - pvma.vm_end = TASK_SIZE; - mpol_set_shared_policy(info, &pvma, newpol); - mpol_put(newpol); - } + /* Create pseudo-vma that contains just the policy */ + memset(&pvma, 0, sizeof(struct vm_area_struct)); + pvma.vm_end = TASK_SIZE; /* policy covers entire file */ + mpol_set_shared_policy(sp, &pvma, new); /* adds ref */ + mpol_put(new); /* drop initial ref */ } } @@ -1962,18 +1970,27 @@ static const char * const policy_types[] = /** * mpol_parse_str - parse string to mempolicy * @str: string containing mempolicy to parse - * @mode: pointer to returned policy mode - * @mode_flags: pointer to returned flags - * @policy_nodes: pointer to returned nodemask + * @mpol: pointer to struct mempolicy pointer, returned on success. + * @no_context: flag whether to "contextualize" the mempolicy * * Format of input: * [=][:] * - * Currently only used for tmpfs/shmem mount options + * if @no_context is true, save the input nodemask in w.user_nodemask in + * the returned mempolicy. This will be used to "clone" the mempolicy in + * a specific context [cpuset] at a later time. Used to parse tmpfs mpol + * mount option. Note that if 'static' or 'relative' mode flags were + * specified, the input nodemask will already have been saved. Saving + * it again is redundant, but safe. + * + * On success, returns 0, else 1 */ -int mpol_parse_str(char *str, unsigned short *mode, unsigned short *mode_flags, - nodemask_t *policy_nodes) +int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) { + struct mempolicy *new = NULL; + unsigned short uninitialized_var(mode); + unsigned short uninitialized_var(mode_flags); + nodemask_t nodes; char *nodelist = strchr(str, ':'); char *flags = strchr(str, '='); int i; @@ -1982,26 +1999,30 @@ int mpol_parse_str(char *str, unsigned short *mode, unsigned short *mode_flags, if (nodelist) { /* NUL-terminate mode or flags string */ *nodelist++ = '\0'; - if (nodelist_parse(nodelist, *policy_nodes)) + if (nodelist_parse(nodelist, nodes)) goto out; - if (!nodes_subset(*policy_nodes, node_states[N_HIGH_MEMORY])) + if (!nodes_subset(nodes, node_states[N_HIGH_MEMORY])) goto out; - } + } else + nodes_clear(nodes); + if (flags) *flags++ = '\0'; /* terminate mode string */ for (i = 0; i <= MPOL_LOCAL; i++) { if (!strcmp(str, policy_types[i])) { - *mode = i; + mode = i; break; } } if (i > MPOL_LOCAL) goto out; - switch (*mode) { + switch (mode) { case MPOL_PREFERRED: - /* Insist on a nodelist of one node only */ + /* + * Insist on a nodelist of one node only + */ if (nodelist) { char *rest = nodelist; while (isdigit(*rest)) @@ -2010,63 +2031,73 @@ int mpol_parse_str(char *str, unsigned short *mode, unsigned short *mode_flags, err = 0; } break; - case MPOL_BIND: - /* Insist on a nodelist */ - if (nodelist) - err = 0; - break; case MPOL_INTERLEAVE: /* * Default to online nodes with memory if no nodelist */ if (!nodelist) - *policy_nodes = node_states[N_HIGH_MEMORY]; + nodes = node_states[N_HIGH_MEMORY]; err = 0; break; - default: + case MPOL_LOCAL: /* - * MPOL_DEFAULT or MPOL_LOCAL - * Don't allow a nodelist nor flags + * Don't allow a nodelist; mpol_new() checks flags */ - if (!nodelist && !flags) - err = 0; - if (*mode == MPOL_DEFAULT) + if (nodelist) goto out; - /* else MPOL_LOCAL */ - *mode = MPOL_PREFERRED; - nodes_clear(*policy_nodes); + mode = MPOL_PREFERRED; break; + + /* + * case MPOL_BIND: mpol_new() enforces non-empty nodemask. + * case MPOL_DEFAULT: mpol_new() enforces empty nodemask, ignores flags. + */ } - *mode_flags = 0; + mode_flags = 0; if (flags) { /* * Currently, we only support two mutually exclusive * mode flags. */ if (!strcmp(flags, "static")) - *mode_flags |= MPOL_F_STATIC_NODES; + mode_flags |= MPOL_F_STATIC_NODES; else if (!strcmp(flags, "relative")) - *mode_flags |= MPOL_F_RELATIVE_NODES; + mode_flags |= MPOL_F_RELATIVE_NODES; else err = 1; } + + new = mpol_new(mode, mode_flags, &nodes); + if (IS_ERR(new)) + err = 1; + else if (no_context) + new->w.user_nodemask = nodes; /* save for contextualization */ + out: /* Restore string for error message */ if (nodelist) *--nodelist = ':'; if (flags) *--flags = '='; + if (!err) + *mpol = new; return err; } #endif /* CONFIG_TMPFS */ -/* +/** + * mpol_to_str - format a mempolicy structure for printing + * @buffer: to contain formatted mempolicy string + * @maxlen: length of @buffer + * @pol: pointer to mempolicy to be formatted + * @no_context: "context free" mempolicy - use nodemask in w.user_nodemask + * * Convert a mempolicy into a string. * Returns the number of characters in buffer (if positive) * or an error (negative) */ -int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) +int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context) { char *p = buffer; int l; @@ -2100,7 +2131,10 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) case MPOL_BIND: /* Fall through */ case MPOL_INTERLEAVE: - nodes = pol->v.nodes; + if (no_context) + nodes = pol->w.user_nodemask; + else + nodes = pol->v.nodes; break; default: @@ -2231,7 +2265,7 @@ int show_numa_map(struct seq_file *m, void *v) return 0; pol = get_vma_policy(priv->task, vma, vma->vm_start); - mpol_to_str(buffer, sizeof(buffer), pol); + mpol_to_str(buffer, sizeof(buffer), pol, 0); mpol_cond_put(pol); seq_printf(m, "%08lx %s", vma->vm_start, buffer); diff --git a/mm/shmem.c b/mm/shmem.c index 3c620dc10135..e6d9298aa22a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1079,23 +1079,29 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) #ifdef CONFIG_NUMA #ifdef CONFIG_TMPFS -static void shmem_show_mpol(struct seq_file *seq, unsigned short mode, - unsigned short flags, const nodemask_t policy_nodes) +static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol) { - struct mempolicy temp; char buffer[64]; - if (mode == MPOL_DEFAULT) + if (!mpol || mpol->mode == MPOL_DEFAULT) return; /* show nothing */ - temp.mode = mode; - temp.flags = flags; - temp.v.nodes = policy_nodes; - - mpol_to_str(buffer, sizeof(buffer), &temp); + mpol_to_str(buffer, sizeof(buffer), mpol, 1); seq_printf(seq, ",mpol=%s", buffer); } + +static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) +{ + struct mempolicy *mpol = NULL; + if (sbinfo->mpol) { + spin_lock(&sbinfo->stat_lock); /* prevent replace/use races */ + mpol = sbinfo->mpol; + mpol_get(mpol); + spin_unlock(&sbinfo->stat_lock); + } + return mpol; +} #endif /* CONFIG_TMPFS */ static struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp, @@ -1135,8 +1141,7 @@ static struct page *shmem_alloc_page(gfp_t gfp, } #else /* !CONFIG_NUMA */ #ifdef CONFIG_TMPFS -static inline void shmem_show_mpol(struct seq_file *seq, unsigned short policy, - unsigned short flags, const nodemask_t policy_nodes) +static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *p) { } #endif /* CONFIG_TMPFS */ @@ -1154,6 +1159,13 @@ static inline struct page *shmem_alloc_page(gfp_t gfp, } #endif /* CONFIG_NUMA */ +#if !defined(CONFIG_NUMA) || !defined(CONFIG_TMPFS) +static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) +{ + return NULL; +} +#endif + /* * shmem_getpage - either get the page from swap or allocate a new one * @@ -1508,8 +1520,8 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev) case S_IFREG: inode->i_op = &shmem_inode_operations; inode->i_fop = &shmem_file_operations; - mpol_shared_policy_init(&info->policy, sbinfo->policy, - sbinfo->flags, &sbinfo->policy_nodes); + mpol_shared_policy_init(&info->policy, + shmem_get_sbmpol(sbinfo)); break; case S_IFDIR: inc_nlink(inode); @@ -1523,8 +1535,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev) * Must not load anything in the rbtree, * mpol_free_shared_policy will not be called. */ - mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, 0, - NULL); + mpol_shared_policy_init(&info->policy, NULL); break; } } else @@ -2139,8 +2150,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, if (*rest) goto bad_val; } else if (!strcmp(this_char,"mpol")) { - if (mpol_parse_str(value, &sbinfo->policy, - &sbinfo->flags, &sbinfo->policy_nodes)) + if (mpol_parse_str(value, &sbinfo->mpol, 1)) goto bad_val; } else { printk(KERN_ERR "tmpfs: Bad mount option %s\n", @@ -2191,9 +2201,9 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) sbinfo->free_blocks = config.max_blocks - blocks; sbinfo->max_inodes = config.max_inodes; sbinfo->free_inodes = config.max_inodes - inodes; - sbinfo->policy = config.policy; - sbinfo->flags = config.flags; - sbinfo->policy_nodes = config.policy_nodes; + + mpol_put(sbinfo->mpol); + sbinfo->mpol = config.mpol; /* transfers initial ref */ out: spin_unlock(&sbinfo->stat_lock); return error; @@ -2214,8 +2224,7 @@ static int shmem_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_printf(seq, ",uid=%u", sbinfo->uid); if (sbinfo->gid != 0) seq_printf(seq, ",gid=%u", sbinfo->gid); - shmem_show_mpol(seq, sbinfo->policy, sbinfo->flags, - sbinfo->policy_nodes); + shmem_show_mpol(seq, sbinfo->mpol); return 0; } #endif /* CONFIG_TMPFS */ @@ -2245,9 +2254,7 @@ static int shmem_fill_super(struct super_block *sb, sbinfo->mode = S_IRWXUGO | S_ISVTX; sbinfo->uid = current->fsuid; sbinfo->gid = current->fsgid; - sbinfo->policy = MPOL_DEFAULT; - sbinfo->flags = 0; - sbinfo->policy_nodes = node_states[N_HIGH_MEMORY]; + sbinfo->mpol = NULL; sb->s_fs_info = sbinfo; #ifdef CONFIG_TMPFS