This implements the ability to remove inodes in a particular slab
from inode caches. In order to remove an inode we may have to write out
the pages of an inode, the inode itself and remove the dentries referring
to the node.
Provide generic functionality that can be used by filesystems that have
their own inode caches to also tie into the defragmentation functions
that are made available here.
Cc: Alexander Viro <viro@ftp.linux.org.uk>
Cc: Christoph Hellwig <hch@infradead.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
fs/inode.c | 123 +++++++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/fs.h | 6 ++
2 files changed, 129 insertions(+)
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c 2008-05-07 20:27:10.563908386 -0700
+++ linux-2.6/fs/inode.c 2008-05-07 20:48:14.473081107 -0700
@@ -1363,6 +1363,128 @@ static int __init set_ihash_entries(char
__setup("ihash_entries=", set_ihash_entries);
/*
+ * Obtain a refcount on a list of struct inodes pointed to by v. If the
+ * inode is in the process of being freed then zap the v[] entry so that
+ * we skip the freeing attempts later.
+ *
+ * This is a generic function for the ->get slab defrag callback.
+ */
+void *get_inodes(struct kmem_cache *s, int nr, void **v)
+{
+ int i;
+
+ spin_lock(&inode_lock);
+ for (i = 0; i < nr; i++) {
+ struct inode *inode = v[i];
+
+ if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
+ v[i] = NULL;
+ else
+ __iget(inode);
+ }
+ spin_unlock(&inode_lock);
+ return NULL;
+}
+EXPORT_SYMBOL(get_inodes);
+
+/*
+ * Function for filesystems that embedd struct inode into their own
+ * fs inode. The offset is the offset of the struct inode in the fs inode.
+ *
+ * The function adds to the pointers in v[] in order to make them point to
+ * struct inode. Then get_inodes() is used to get the refcount.
+ * The converted v[] pointers can then also be passed to the kick() callback
+ * without further processing.
+ */
+void *fs_get_inodes(struct kmem_cache *s, int nr, void **v,
+ unsigned long offset)
+{
+ int i;
+
+ for (i = 0; i < nr; i++)
+ v[i] += offset;
+
+ return get_inodes(s, nr, v);
+}
+EXPORT_SYMBOL(fs_get_inodes);
+
+/*
+ * Generic callback function slab defrag ->kick methods. Takes the
+ * array with inodes where we obtained refcounts using fs_get_inodes()
+ * or get_inodes() and tries to free them.
+ */
+void kick_inodes(struct kmem_cache *s, int nr, void **v, void *private)
+{
+ struct inode *inode;
+ int i;
+ int abort = 0;
+ LIST_HEAD(freeable);
+ int active;
+
+ for (i = 0; i < nr; i++) {
+ inode = v[i];
+ if (!inode)
+ continue;
+
+ if (inode_has_buffers(inode) || inode->i_data.nrpages) {
+ if (remove_inode_buffers(inode))
+ /*
+ * Should be really be doing this? Or
+ * limit this if there are only a few pages?
+ *
+ * Possibly an expensive operation but we
+ * cannot reclaim the inode if the pages
+ * are still present.
+ */
+ invalidate_mapping_pages(&inode->i_data,
+ 0, -1);
+ }
+
+ /* Invalidate children and dentry */
+ if (S_ISDIR(inode->i_mode)) {
+ struct dentry *d = d_find_alias(inode);
+
+ if (d) {
+ d_invalidate(d);
+ dput(d);
+ }
+ }
+
+ if (inode->i_state & I_DIRTY)
+ write_inode_now(inode, 1);
+
+ d_prune_aliases(inode);
+ }
+
+ mutex_lock(&iprune_mutex);
+ for (i = 0; i < nr; i++) {
+ inode = v[i];
+
+ if (!inode)
+ /* inode is alrady being freed */
+ continue;
+
+ active = inode->i_sb->s_flags & MS_ACTIVE;
+ iput(inode);
+ if (abort || !active)
+ continue;
+
+ spin_lock(&inode_lock);
+ abort = !can_unuse(inode);
+
+ if (!abort) {
+ list_move(&inode->i_list, &freeable);
+ inode->i_state |= I_FREEING;
+ inodes_stat.nr_unused--;
+ }
+ spin_unlock(&inode_lock);
+ }
+ dispose_list(&freeable);
+ mutex_unlock(&iprune_mutex);
+}
+EXPORT_SYMBOL(kick_inodes);
+
+/*
* Initialize the waitqueues and inode hash table.
*/
void __init inode_init_early(void)
@@ -1401,6 +1523,7 @@ void __init inode_init(void)
SLAB_MEM_SPREAD),
init_once);
register_shrinker(&icache_shrinker);
+ kmem_cache_setup_defrag(inode_cachep, get_inodes, kick_inodes);
/* Hash may have been set up in inode_init_early */
if (!hashdist)
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h 2008-05-07 20:27:10.573910205 -0700
+++ linux-2.6/include/linux/fs.h 2008-05-07 20:30:15.153909886 -0700
@@ -1826,6 +1826,12 @@ static inline void insert_inode_hash(str
__insert_inode_hash(inode, inode->i_ino);
}
+/* Helper functions for inode defragmentation support in filesystems */
+extern void kick_inodes(struct kmem_cache *, int, void **, void *);
+extern void *get_inodes(struct kmem_cache *, int nr, void **);
+extern void *fs_get_inodes(struct kmem_cache *, int nr, void **,
+ unsigned long offset);
+
extern struct file * get_empty_filp(void);
extern void file_move(struct file *f, struct list_head *list);
extern void file_kill(struct file *f);
--
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
| Ingo Molnar | Re: [patch] paravirt: VDSO page is essential |
| Johannes Weiner | Re: Versioning file system |
| Matt Mackall | [PATCH 1/13] maps: Uninline some functions in the page walker |
| Greg KH | [patch 00/49] 2.6.25-stable review |
git: | |
| Johannes Schindelin | Re: [PATCH 1/4] Move redo merge code in a function |
| Dmitry Potapov | Re: [RFC] Git User's Survey 2008 |
| Johannes Schindelin | Re: [PATCH] Teach 'git apply' to look at $GIT_DIR/config |
| Shawn O. Pearce | Re: [kernel.org users] [RFD] On deprecating "git-foo" for builtins |
| Brian A. Seklecki | sshd_config(5) PermitRootLogin yes |
| Richard Stallman | Real men don't attack straw men |
| ropers | Re: low-MHz server |
| Diego Fernando Nieto Moreno | Intel DG33 Support |
| Holger Schurig | Re: Linux Wireless Mini-Summit -- Ottawa -- July 22, 2008 |
| Tilman Schmidt | Re: 2.6.25-rc8: FTP transfer errors |
| Eric Dumazet | Re: [rfc][patch 3/3] use SLAB_ALIGN_SMP |
| Lennert Buytenhek | [PATCH 21/39] mv643xx_eth: move port_receive() into its only caller |
| high memory | 13 hours ago | Linux kernel |
| semaphore access speed | 16 hours ago | Applications and Utilities |
| the kernel how to power off the machine | 17 hours ago | Linux kernel |
| Easter Eggs in windows XP | 20 hours ago | Windows |
| Shared swap partition | 21 hours ago | Linux general |
| Root password | 21 hours ago | Linux general |
| Where/when DNOTIFY is used? | 23 hours ago | Linux kernel |
| How to convert Linux Kernel built-in module into a loadable module | 1 day ago | Linux kernel |
| Linux 2.6.24 and I/O schedulers | 1 day ago | Linux kernel |
| USB Driver -- Interrupt Polling -- A Little Help Please | 1 day ago | Linux general |
