[PATCH 4/5] FUSE: add fuse_conn->release()

Previous thread: [PATCHSET] FUSE: extend FUSE to support more operations by Tejun Heo on Thursday, August 28, 2008 - 10:40 am. (81 messages)

Next thread: Re: Fwd: Re: [PATCH] USB: add USB test and measurement class driver by Marcel Janssen on Thursday, August 28, 2008 - 11:22 am. (3 messages)
From: Tejun Heo
Date: Thursday, August 28, 2008 - 11:18 am

This patchset implements CUSE - Character device in Userspace.  Except
for initialization sequence and creation of character device instead
of a mount, CUSE isn't very different from FUSE.

This patchset is consisted of the following five patches.

  0001-FUSE-add-fuse_-prefix-to-several-functions.patch
  0002-FUSE-export-symbols-to-be-used-by-CUSE.patch
  0003-FUSE-separate-out-fuse_conn_init-from-new_conn.patch
  0004-FUSE-add-fuse_conn-release.patch
  0005-CUSE-implement-CUSE-Character-device-in-Userspace.patch

0001-0004 prepares FUSE for CUSE addition and 0005 implements CUSE.
Corresponding libfuse changes will be posted separately.

This patchset is on top of...

  2.6.27-rc4 (b8e6c91c74e9f0279b7c51048779b3d62da60b88)
+ [1] 9p-use-single-poller patchset
+ [2] wait-kill-is_sync_wait
+ [3] poll-allow-f_op_poll-to-sleep
+ [4] uevent updates (2 patches)
+ [5] char_dev-add-release
+ [6] extend-FUSE patchset

The above three patches allow f_op->poll() to sleep and 0007 depends
on it.

This patchset is available in the following git tree.

 http://git.kernel.org/?p=linux/kernel/git/tj/misc.git;a=shortlog;h=cuse
 git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc.git cuse

and contains the following changes.

 fs/Kconfig           |   10 
 fs/fuse/Makefile     |    1 
 fs/fuse/cuse.c       |  634 +++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/fuse/dev.c        |   32 +-
 fs/fuse/dir.c        |   34 +-
 fs/fuse/file.c       |   60 ++--
 fs/fuse/fuse_i.h     |   46 +++
 fs/fuse/inode.c      |  143 ++++++-----
 include/linux/cuse.h |   40 +++
 include/linux/fuse.h |    2 
 10 files changed, 882 insertions(+), 120 deletions(-)

Thanks.

--
tejun

[1] http://thread.gmane.org/gmane.linux.kernel/726098
[2] http://article.gmane.org/gmane.linux.kernel/726176
[3] http://article.gmane.org/gmane.linux.kernel/726178
[4] http://thread.gmane.org/gmane.linux.kernel/727127
[5] http://article.gmane.org/gmane.linux.kernel/727133
[6] ...
From: Tejun Heo
Date: Thursday, August 28, 2008 - 11:19 am

Add fuse_ prefix to request_send*() and get_root_inode() as some of
those functions will be exported for CUSE.  With or without CUSE
export, having the function names scoped is a good idea for
debuggability.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 fs/fuse/dev.c    |   23 ++++++++++++-----------
 fs/fuse/dir.c    |   34 +++++++++++++++++-----------------
 fs/fuse/file.c   |   30 +++++++++++++++---------------
 fs/fuse/fuse_i.h |    9 +++++----
 fs/fuse/inode.c  |   12 ++++++------
 5 files changed, 55 insertions(+), 53 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 1c422f9..b448dfd 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -380,7 +380,7 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
 	}
 }
 
-void request_send(struct fuse_conn *fc, struct fuse_req *req)
+void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 {
 	req->isreply = 1;
 	spin_lock(&fc->lock);
@@ -399,8 +399,8 @@ void request_send(struct fuse_conn *fc, struct fuse_req *req)
 	spin_unlock(&fc->lock);
 }
 
-static void request_send_nowait_locked(struct fuse_conn *fc,
-				       struct fuse_req *req)
+static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
+					    struct fuse_req *req)
 {
 	req->background = 1;
 	fc->num_background++;
@@ -414,11 +414,11 @@ static void request_send_nowait_locked(struct fuse_conn *fc,
 	flush_bg_queue(fc);
 }
 
-static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
+static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
 {
 	spin_lock(&fc->lock);
 	if (fc->connected) {
-		request_send_nowait_locked(fc, req);
+		fuse_request_send_nowait_locked(fc, req);
 		spin_unlock(&fc->lock);
 	} else {
 		req->out.h.error = -ENOTCONN;
@@ -426,16 +426,16 @@ static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
 	}
 }
 
-void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
+void ...
From: Tejun Heo
Date: Thursday, August 28, 2008 - 11:19 am

Export the following symbols for CUSE.

fuse_conn_put()
fuse_conn_get()
fuse_get_root_inode()
fuse_super_operations
fuse_send_init()
fuse_flush()
fuse_fsync()
fuse_direct_io()
fuse_file_lock()
fuse_file_flock()
fuse_file_llseek()
fuse_file_ioctl()
fuse_file_compat_ioctl()
fuse_file_poll()

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 fs/fuse/dev.c    |    9 ++++++++-
 fs/fuse/file.c   |   30 ++++++++++++++++++++----------
 fs/fuse/fuse_i.h |   29 +++++++++++++++++++++++++++++
 fs/fuse/inode.c  |   11 ++++++++---
 4 files changed, 65 insertions(+), 14 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index b448dfd..75e2775 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -46,6 +46,7 @@ struct fuse_req *fuse_request_alloc(void)
 		fuse_request_init(req);
 	return req;
 }
+EXPORT_SYMBOL_GPL(fuse_request_alloc);
 
 struct fuse_req *fuse_request_alloc_nofs(void)
 {
@@ -124,6 +125,7 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc)
 	atomic_dec(&fc->num_waiting);
 	return ERR_PTR(err);
 }
+EXPORT_SYMBOL_GPL(fuse_get_req);
 
 /*
  * Return request in fuse_file->reserved_req.  However that may
@@ -208,6 +210,7 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
 			fuse_request_free(req);
 	}
 }
+EXPORT_SYMBOL_GPL(fuse_put_request);
 
 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
 {
@@ -398,6 +401,7 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 	}
 	spin_unlock(&fc->lock);
 }
+EXPORT_SYMBOL_GPL(fuse_request_send);
 
 static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
 					    struct fuse_req *req)
@@ -1092,8 +1096,9 @@ void fuse_abort_conn(struct fuse_conn *fc)
 	}
 	spin_unlock(&fc->lock);
 }
+EXPORT_SYMBOL_GPL(fuse_abort_conn);
 
-static int fuse_dev_release(struct inode *inode, struct file *file)
+int fuse_dev_release(struct inode *inode, struct file *file)
 {
 	struct fuse_conn *fc = fuse_get_conn(file);
 	if (fc) {
@@ -1108,6 +1113,7 @@ static int ...
From: Tejun Heo
Date: Thursday, August 28, 2008 - 11:19 am

Separate out fuse_conn_init() from new_conn() and while at it
initialize fuse_conn->entry during conn initialization.

This will be used by CUSE.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 fs/fuse/fuse_i.h |    5 ++
 fs/fuse/inode.c  |  116 +++++++++++++++++++++++++++++------------------------
 2 files changed, 68 insertions(+), 53 deletions(-)

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index bc55f6d..4795264 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -679,6 +679,11 @@ void fuse_invalidate_entry_cache(struct dentry *entry);
 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
 
 /**
+ * Initialize fuse_conn
+ */
+int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb);
+
+/**
  * Release reference to fuse_conn
  */
 void fuse_conn_put(struct fuse_conn *fc);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index fae8732..8d092ea 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -462,66 +462,76 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
 	return 0;
 }
 
-static struct fuse_conn *new_conn(struct super_block *sb)
+int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb)
 {
-	struct fuse_conn *fc;
 	int err;
 
-	fc = kzalloc(sizeof(*fc), GFP_KERNEL);
-	if (fc) {
-		spin_lock_init(&fc->lock);
-		mutex_init(&fc->inst_mutex);
-		atomic_set(&fc->count, 1);
-		init_waitqueue_head(&fc->waitq);
-		init_waitqueue_head(&fc->blocked_waitq);
-		init_waitqueue_head(&fc->reserved_req_waitq);
-		INIT_LIST_HEAD(&fc->pending);
-		INIT_LIST_HEAD(&fc->processing);
-		INIT_LIST_HEAD(&fc->io);
-		INIT_LIST_HEAD(&fc->interrupts);
-		INIT_LIST_HEAD(&fc->bg_queue);
-		atomic_set(&fc->num_waiting, 0);
-		fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
-		fc->bdi.unplug_io_fn = default_unplug_io_fn;
-		/* fuse does it's own writeback accounting */
-		fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
-		fc->polled_files = RB_ROOT;
-		fc->dev = sb->s_dev;
-		err = bdi_init(&fc->bdi);
-		if ...
From: Tejun Heo
Date: Thursday, August 28, 2008 - 11:19 am

Add fuse_conn->release() so that fuse_conn can be embedded in other
structures.  If unspecified, the original action - kfree() - is done.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 fs/fuse/fuse_i.h |    3 +++
 fs/fuse/inode.c  |    6 +++++-
 2 files changed, 8 insertions(+), 1 deletions(-)

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 4795264..67f33e8 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -465,6 +465,9 @@ struct fuse_conn {
 
 	/** Version counter for attribute changes */
 	u64 attr_version;
+
+	/** Called on final put.  If implemented, should free the connection */
+	void (*release)(struct fuse_conn *);
 };
 
 static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 8d092ea..b99bb95 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -543,7 +543,11 @@ void fuse_conn_put(struct fuse_conn *fc)
 			fuse_request_free(fc->destroy_req);
 		mutex_destroy(&fc->inst_mutex);
 		bdi_destroy(&fc->bdi);
-		kfree(fc);
+
+		if (fc->release)
+			fc->release(fc);
+		else
+			kfree(fc);
 	}
 }
 EXPORT_SYMBOL_GPL(fuse_conn_put);
-- 
1.5.4.5

--

From: Tejun Heo
Date: Thursday, August 28, 2008 - 11:19 am

CUSE enables implementing character devices in userspace.  With recent
additions of nonblock, lseek, ioctl and poll support, FUSE already has
most of what's necessary to implement character devices.  All CUSE has
to do is bonding all those components - FUSE, chardev and the driver
model - nicely.

Due to the number of different objects involved and many ways an
instance can fail, object lifetime rules are a tad bit complex.
Please take a look at the comment on top of fs/fuse/cuse.c for
details.

Other than that, it's mostly straight forward.  Client opens
/dev/cuse, kernel starts conversation with CUSE_INIT.  The client
tells CUSE which device it wants to create.  CUSE creates the device
for the client and the rest works the same way as in a direct IO FUSE
session.

Each CUSE device has a corresponding directory /sys/class/cuse/DEVNAME
(which is symlink to /sys/devices/virtual/class/DEVNAME if
SYSFS_DEPRECATED is turned off) which hosts "waiting" and "abort"
among other things.  Those two files have the same meaning as the FUSE
control files.

The only notable lacking feature compared to in-kernel implementation
is mmap support.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 fs/Kconfig           |   10 +
 fs/fuse/Makefile     |    1 +
 fs/fuse/cuse.c       |  634 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/cuse.h |   40 ++++
 include/linux/fuse.h |    2 +
 5 files changed, 687 insertions(+), 0 deletions(-)
 create mode 100644 fs/fuse/cuse.c
 create mode 100644 include/linux/cuse.h

diff --git a/fs/Kconfig b/fs/Kconfig
index d387358..3da7551 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -648,6 +648,16 @@ config FUSE_FS
 	  If you want to develop a userspace FS, or if you want to use
 	  a filesystem based on FUSE, answer Y or M.
 
+config CUSE
+	tristate "Character device in Userpace support"
+	depends on FUSE_FS
+	help
+	  This FUSE extension allows character devices to be
+	  implemented in userspace.
+
+	  If you want to develop or use ...
From: Andrew Morton
Date: Thursday, August 28, 2008 - 1:07 pm

On Fri, 29 Aug 2008 03:19:04 +0900



I didn't know you could do that with bools.

These two fields will share a word, and modifications of one are racy
wrt modifications of the other.  So some form of locking is needed, and

I believe all the above could be implemented in C.

Making that change would fix the bug in cuse_conn_get(), which


OK, I have NFI whatsoever what this thing is doing and I am disinclined
to reverse-engineer it.

If this is parsing something whcih operators/users provided then it
should have been documented somewhere?

Whether it is or isn't doing that, this function really really really
needs a comment telling readers (ie: me) what it does.  Or what it

This functions seems to be woefully misnamed.  The name implies that it
initialises a worker.  But it's a kernel thread?

Could you please document your design somehow?  What does this kernel

hotplug?  What's all this?  Seems to have something to do with an

So...  basically this undocumented kernel thread will for undocumented
reasons create the device node?

An obvious question which the reader of the code will ask is "why

current->pid is non-unique in a containerised setup.  What are the
implications of this?  It needs a comment, because the containerisation
guys will end up coming here and scratching their heads over the same

This reader is wondering what a "channel" is.  Understanding high-level

<looks in fuse_i.h>

"The number of requests waiting for completion".



Some description of the kernel<->userspace versioning design would be

Nice-looking code, but I do not feel able to properly review it with
its current level of description.
--

From: Greg KH
Date: Thursday, August 28, 2008 - 3:15 pm

"traditionally" container_of() is used in #define, not a function call
as it is just pointer math that can be done at compile time.

thanks,

greg k-h
--

From: Andrew Morton
Date: Thursday, August 28, 2008 - 3:32 pm

On Thu, 28 Aug 2008 15:15:25 -0700

Well yeah.  But it isn't a very good tradition.

static inline struct cuse_conn *cdev_to_cc(struct cdev *cdev)
{
	return container_of(cdev, struct cuse_conn, cdev);
}

should generate the same code and is prettier.

Unfortunately it has no additional type-safety.  You can still pass it
the address of a tty_driver.cdev instead of a cuse_conn.cdev and the
compiler will happily swallow it.  Not a big problem in practice though.

--

From: Tejun Heo
Date: Thursday, August 28, 2008 - 7:09 pm

Hello, Andrew.


Yeah, as much as other character devices are.  As long as uevent works,





The thing is when error handling paths are lumped up using "if (xxx)
destroy(xxx);", it's a good idea to always initialize variables which
will carry allocated resource.  For this iteration, it doesn't make any
difference but later if the order of initialization changes and/or a new
sequence is added before cc initialization, it's all to easy to forget
whether cc was initialized to NULL or not.  gcc will catch it most of
the time but there's no guarantee, so I think it's better to keep it

Yeah, reading an undocumented parsing function is really painful.  Sorry
about lack of comments there.  With too many components to update at
hands, I kinda ran out of steam in the last stages where I do the final
review pass through patches and add comments.

The input is packed strings - "key0=val0\0key1=val1\0" - and parse one

CUSE initialization worker, it is, meaning that it's a kernel thread


These will be feeded to uevent verbatim.  The only reason why it's
called hotplug_info instead of uevent_envp is because FUSE works on

It's briefly mentioned in comment below.  It's because the
initialization needs to talk with userland and the talk should happen
over a file which the following cuse_channel_open() creates.  So,
cuse_channel_open() can't really talk with the client without completing

Not much.  It's just the name of the worker.  The name differentiation
is mainly to help debugging a bit when something went wrong and doesn't

Yes, and from the comment at the top of the file.

 * channel	: file handle connected to the userland CUSE client
...
 * Note that 'channel' is what 'dev' is in FUSE.  As CUSE deals with
 * devices, it's called 'channel' to reduce confusion.
 *
 * channel determines when the character device dies.  When channel is
 * closed, everything should begin to destruct.  As cuse_conn and mnt

"waiting" and "abort" are what fuse exports as controls in fuse ...
From: Andrew Morton
Date: Thursday, August 28, 2008 - 7:20 pm

oop, I failed to note the struct assignment there.  The usual rule of
thumb applies: I can be safely ignored.

--

From: Nick Bowler
Date: Friday, August 29, 2008 - 8:27 am

Boolean bit-fields are indeed valid.  Usual semantics for objects of type
_Bool apply.

C99 6.7.2.1 Structure and union specifiers

4   A bit-field shall have a type that is a qualified or unqualified
    version of _Bool, signed int, unsigned int, or some other
    implementation-defined type.

-- 
Nick Bowler, Elliptic Semiconductor (http://www.ellipticsemi.com/)

--

From: Mike Hommey
Date: Thursday, August 28, 2008 - 10:50 pm

It would be nice to have BUSE, Block device in Userspace, too.

Mike
--

From: Tejun Heo
Date: Thursday, August 28, 2008 - 10:52 pm

Thought about that but it's really no different from nbd or loop
depending on your application and block devices don't really implement
the file operations so it won't have too much in common with FUSE.
Also, there's the complication of going out to disk for more memory cases.

-- 
tejun
--

From: Archie Cobbs
Date: Friday, August 29, 2008 - 11:50 am

I think BUSE would be useful. For one, it allows you to avoid problems with
the extra caching you get with a loopback device. And NBD is too limiting
for some applications.

For my half-ignorant analysis of the caching issues, see:
  http://code.google.com/p/s3backer/wiki/PerformanceConsiderations#Caching


Not sure what you mean exactly (my fault), but it seems BUSE would have fewer
places for memory problems (including deadlocks) than loopback over FUSE,
which is the only way to do this kind of stuff now.

-Archie

--
Archie L. Cobbs
--

From: Tejun Heo
Date: Saturday, August 30, 2008 - 5:30 am

[Empty message]
From: Mike Hommey
Date: Saturday, August 30, 2008 - 11:56 am

My gutt feeling is that it would have less overhead when done via FUSE
than through nbd, but that could be wrong.

Mike
--

From: Goswin von Brederlow
Date: Monday, September 1, 2008 - 12:20 am

What I would hope is that BUSE would add some zero copy transport in
there. A way to just redirect the BIOs from the BUSE device to other
block devices without copying the data around.

But maybe BUSE is a bad name for that. That would be more a DUSE -
Device mapper in USEr space.

MfG
        Goswin.
--

From: Mike Hommey
Date: Monday, September 1, 2008 - 12:38 am

From: Archie Cobbs
Date: Saturday, August 30, 2008 - 3:39 pm

Well, NBD is the bird in hand, but that doesn't mean it's the best way
to do things generically for all block device emulation applications.

I'd even argue that NBD should be removed from the kernel and replaced
by BUSE plus a user-land daemon. A BUSE interface could be a lot more
general, and simpler.

Not to mention that converting all block reads and writes to TCP
operations that talk to another process on the same machine via the
loopback interface seems awfully inefficient.

-Archie

--
Archie L. Cobbs
--

From: hooanon05
Date: Saturday, August 30, 2008 - 9:52 pm

While it is not based upon FUSE, you may be interested in ULOOP driver
which is based upon the loopback block device.
Here is a README file from http://aufs.sourceforge.net/uloop.txt.
If you want to checkout the source files, please refer to
http://aufs.sourceforge.net/.


Junjiro R. Okajima

----------------------------------------------------------------------

ULOOP -- Loopback block device in userspace
(and a sample for HTTP and generic block device)
Junjiro Okajima

# $Id: 00readme.txt,v 1.6 2008/08/17 23:04:29 sfjro Exp $


0. Introduction
As you know, there is a Loopback block device in Linux, /dev/loop,
which enables you to mount a fs-image local file.
Also it can adopt a userspace program, such as cryptloop.
This sample ULOOP driver makes it generic, and enables to adopt any
userspace program.
You can give an empty or non-existing file to /dev/loop backend.
When a process reads from /dev/loop, this dirver wakes a user process
up and passes the I/O transaction to it. A user process makes the
required block ready and tells the driver. Then the driver completes
the I/O transaction.
Also there is sample scripts or usage for diskless nodes working with
aufs. This driver may work with it well.
The name is unrelated to YouTube. :-)


1. sample for HTTP
Simple 'make' will build ./drivers/block/uloop.ko and ./ulohttp.
Ulohttp application behaves like losetup(8). Additionally, ulohttp is
an actual daemon which handles I/O request.
Here is a syntax.

ulohttp [-b bitmap] [-c cache] device URL

The device is /dev/loopN and the URL is a URL for fs-image file via
HTTP. The http server must support byte range (Range: header).
The bitmap is a new filename or previously specified as the bitmap for
the same URL. Its filesize will be 'the size of the specified fs-image
/ pagesize (usually 4k) / bits in a byte (8)', and round-up to
pagesize.
The cache is a new filename or previously specified as the cache for
the same URL. Its filesize will be 'the size of the ...
From: Goswin von Brederlow
Date: Saturday, August 30, 2008 - 9:35 am

Would it be hard to extend this to block devices as well?

MfG
        Goswin
--

Previous thread: [PATCHSET] FUSE: extend FUSE to support more operations by Tejun Heo on Thursday, August 28, 2008 - 10:40 am. (81 messages)

Next thread: Re: Fwd: Re: [PATCH] USB: add USB test and measurement class driver by Marcel Janssen on Thursday, August 28, 2008 - 11:22 am. (3 messages)