Re: [PATCH 4/7] Allow sysfs memory directories to be split

Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]
From: KAMEZAWA Hiroyuki
Date: Wednesday, July 14, 2010 - 1:30 am

On Wed, 14 Jul 2010 12:25:03 +0900
KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> wrote:


This is a sample configfs for handling memory hotplug.
I wrote this just for my fun and study. code-duplication was not as
big as expected...most of codes are for configfs management.

you can ignore this. but please avoid changing existing interace in fancy way.

==
[root@bluextal kamezawa]# mount -t configfs none /configfs/
[root@bluextal kamezawa]# mkdir /configfs/memory/72
[root@bluextal kamezawa]# cat /configfs/memory/72/phys_index
00000048
[root@bluextal kamezawa]# cat /sys/devices/system/memory/memory72/phys_index
00000048
[root@bluextal kamezawa]# echo offline > /configfs/memory/72/state
[root@bluextal kamezawa]# cat /configfs/memory/72/state
offline
[root@bluextal kamezawa]# cat /sys/devices/system/memory/memory72/state
offline
[root@bluextal kamezawa]# echo online > /configfs/memory/72/state
[root@bluextal kamezawa]# cat /sys/devices/system/memory/memory72/state
online

No sign.

---
 drivers/base/Makefile        |    2 
 drivers/base/memory.c        |   87 +++++++++++++++++--
 drivers/base/memory_config.c |  192 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/memory.h       |   10 ++
 mm/Kconfig                   |    1 
 5 files changed, 280 insertions(+), 12 deletions(-)

Index: mmotm-2.6.35-0701/drivers/base/memory.c
===================================================================
--- mmotm-2.6.35-0701.orig/drivers/base/memory.c
+++ mmotm-2.6.35-0701/drivers/base/memory.c
@@ -23,12 +23,15 @@
 #include <linux/mutex.h>
 #include <linux/stat.h>
 #include <linux/slab.h>
+#include <linux/radix-tree.h>
 
 #include <asm/atomic.h>
 #include <asm/uaccess.h>
 
 #define MEMORY_CLASS_NAME	"memory"
 
+
+
 static struct sysdev_class memory_sysdev_class = {
 	.name = MEMORY_CLASS_NAME,
 };
@@ -104,17 +107,57 @@ unregister_memory(struct memory_block *m
 	sysdev_unregister(&memory->sysdev);
 }
 
+/* routine for remember memory's status when configs is not used. */
+
+RADIX_TREE(hidden_mems, GFP_KERNEL);
+DEFINE_MUTEX(hidden_mems_mutex);
+int record_memory_state(unsigned long section_nr, int status)
+{
+	int ret = -ENOMEM;
+	long lstat = status << 8; /* for avoid radix'trees special handling */
+	mutex_lock(&hidden_mems_mutex);
+	radix_tree_delete(&hidden_mems, section_nr);
+	if (radix_tree_preload(GFP_KERNEL))
+		goto out;
+	ret = radix_tree_insert(&hidden_mems, section_nr, (void*)lstat);
+	radix_tree_preload_end();
+out:
+	mutex_unlock(&hidden_mems_mutex);
+	return ret;
+}
+
+int lookup_memory_state(unsigned long section_nr)
+{
+	void *ptr;
+	/* we already have big mutex */
+	ptr= radix_tree_lookup(&hidden_mems, section_nr);
+	/* treate not-recorded mems'state as ONLINE...? */
+	return ((long)ptr) >> 8;
+}
+
+void forget_memory_state(unsigned long section_nr)
+{
+	radix_tree_delete(&hidden_mems, section_nr);
+}
+
+
 /*
  * use this as the physical section index that this memsection
  * uses.
  */
 
+ssize_t show_memoryblock_phys_index(struct memory_block *mem,
+	char *buf)
+{
+	return sprintf(buf, "%08lx\n", mem->phys_index);
+}
+
 static ssize_t show_mem_phys_index(struct sys_device *dev,
 			struct sysdev_attribute *attr, char *buf)
 {
 	struct memory_block *mem =
 		container_of(dev, struct memory_block, sysdev);
-	return sprintf(buf, "%08lx\n", mem->phys_index);
+	return show_memoryblock_phys_index(mem, buf);
 }
 
 /*
@@ -136,11 +179,8 @@ static ssize_t show_mem_removable(struct
 /*
  * online, offline, going offline, etc.
  */
-static ssize_t show_mem_state(struct sys_device *dev,
-			struct sysdev_attribute *attr, char *buf)
+ssize_t show_memoryblock_state(struct memory_block *mem, char *buf)
 {
-	struct memory_block *mem =
-		container_of(dev, struct memory_block, sysdev);
 	ssize_t len = 0;
 
 	/*
@@ -167,6 +207,15 @@ static ssize_t show_mem_state(struct sys
 	return len;
 }
 
+ssize_t show_mem_state(struct sys_device *dev,
+		struct sysdev_attribute *attr, char *buf)
+{
+	struct memory_block *mem =
+		container_of(dev, struct memory_block, sysdev);
+	mem->state = lookup_memory_state(mem->phys_index);
+	return show_memoryblock_state(mem, buf);
+}
+
 int memory_notify(unsigned long val, void *v)
 {
 	return blocking_notifier_call_chain(&memory_chain, val, v);
@@ -218,11 +267,14 @@ memory_block_action(struct memory_block 
 			break;
 		case MEM_OFFLINE:
 			mem->state = MEM_GOING_OFFLINE;
+			record_memory_state(mem->phys_index, mem->state);
 			start_paddr = page_to_pfn(first_page) << PAGE_SHIFT;
 			ret = remove_memory(start_paddr,
 					    PAGES_PER_SECTION << PAGE_SHIFT);
 			if (ret) {
 				mem->state = old_state;
+				record_memory_state(mem->phys_index,
+					mem->state);
 				break;
 			}
 			break;
@@ -241,6 +293,8 @@ static int memory_block_change_state(str
 	int ret = 0;
 	mutex_lock(&mem->state_mutex);
 
+	mem->state = lookup_memory_state(mem->phys_index);
+
 	if (mem->state != from_state_req) {
 		ret = -EINVAL;
 		goto out;
@@ -249,21 +303,18 @@ static int memory_block_change_state(str
 	ret = memory_block_action(mem, to_state);
 	if (!ret)
 		mem->state = to_state;
-
+	record_memory_state(mem->phys_index, mem->state);
 out:
 	mutex_unlock(&mem->state_mutex);
 	return ret;
 }
 
-static ssize_t
-store_mem_state(struct sys_device *dev,
-		struct sysdev_attribute *attr, const char *buf, size_t count)
+ssize_t store_memoryblock_state(struct memory_block *mem,
+				const char *buf, size_t count)
 {
-	struct memory_block *mem;
 	unsigned int phys_section_nr;
 	int ret = -EINVAL;
 
-	mem = container_of(dev, struct memory_block, sysdev);
 	phys_section_nr = mem->phys_index;
 
 	if (!present_section_nr(phys_section_nr))
@@ -279,6 +330,16 @@ out:
 	return count;
 }
 
+static ssize_t
+store_mem_state(struct sys_device *dev,
+		struct sysdev_attribute *attr, const char *buf, size_t count)
+{
+	struct memory_block *mem;
+
+	mem = container_of(dev, struct memory_block, sysdev);
+	return store_memoryblock_state(mem, buf, count);
+}
+
 /*
  * phys_device is a bad name for this.  What I really want
  * is a way to differentiate between memory ranges that
@@ -451,6 +512,8 @@ static int add_memory_block(int nid, str
 	start_pfn = section_nr_to_pfn(mem->phys_index);
 	mem->phys_device = arch_get_memory_phys_device(start_pfn);
 
+	record_memory_state(mem->phys_index, state);
+
 	ret = register_memory(mem, section);
 	if (!ret)
 		ret = mem_create_simple_file(mem, phys_index);
@@ -505,6 +568,7 @@ int remove_memory_block(unsigned long no
 	struct memory_block *mem;
 
 	mem = find_memory_block(section);
+	forget_memory_state(mem->phys_index);
 	unregister_mem_sect_under_nodes(mem);
 	mem_remove_simple_file(mem, phys_index);
 	mem_remove_simple_file(mem, state);
@@ -573,3 +637,4 @@ out:
 		printk(KERN_ERR "%s() failed: %d\n", __func__, ret);
 	return ret;
 }
+
Index: mmotm-2.6.35-0701/drivers/base/memory_config.c
===================================================================
--- /dev/null
+++ mmotm-2.6.35-0701/drivers/base/memory_config.c
@@ -0,0 +1,192 @@
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/memory.h>
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+#include <linux/configfs.h>
+
+
+
+struct memory_hp_block {
+	struct config_group group;
+	struct memory_block mb;
+};
+struct memory_hp_attribute {
+	struct configfs_attribute attr;
+	ssize_t (*show)(struct memory_hp_block *, char *);
+	ssize_t (*store)(struct memory_hp_block *, const char *);
+};
+
+static struct memory_hp_block *to_mhp_block(struct config_item *item)
+{
+	if (!item)
+		return NULL;
+	return container_of(to_config_group(item),
+		struct memory_hp_block, group);
+}
+
+static ssize_t
+memory_hp_phys_index_read(struct memory_hp_block *mhb, char *page)
+{
+        return show_memoryblock_phys_index(&mhb->mb, page);
+}
+
+static struct memory_hp_attribute memory_hp_phys_index = {
+	.attr = { .ca_owner = THIS_MODULE,
+		  .ca_name = "phys_index",
+		  .ca_mode = S_IRUGO },
+	.show = memory_hp_phys_index_read,
+};
+
+static ssize_t
+memory_hp_state_read(struct memory_hp_block *mhb, char *page)
+{
+	/* synchronize */
+	printk("lookup section %ld\n", mhb->mb.phys_index);
+	mhb->mb.state = lookup_memory_state(mhb->mb.phys_index);
+	return show_memoryblock_state(&mhb->mb, page);
+}
+
+static ssize_t
+memory_hp_state_store(struct memory_hp_block *mhb, const char *page)
+{
+	int len = strnlen(page, 8);
+	printk("length %d str %s\n", len, page);
+	if (len > 8) /* online/offline */
+		return -EINVAL;
+	/* synchronize */
+	mhb->mb.state = lookup_memory_state(mhb->mb.phys_index);
+	return store_memoryblock_state(&mhb->mb, page, len);
+}
+
+static struct memory_hp_attribute memory_hp_state = {
+	.attr = { .ca_owner = THIS_MODULE,
+		  .ca_name = "state",
+		  .ca_mode = S_IRUGO|S_IWUSR },
+	.show = memory_hp_state_read,
+	.store = memory_hp_state_store,
+};
+
+static struct configfs_attribute *memory_hp_attrs[] = {
+	&memory_hp_phys_index.attr,
+	&memory_hp_state.attr,
+	NULL,
+};
+
+static ssize_t memory_hp_attr_show(struct config_item *item,
+		struct configfs_attribute *attr,
+		char *page)
+{
+	struct memory_hp_block *mhb = to_mhp_block(item);
+	struct memory_hp_attribute *memhp_attr =
+		container_of(attr, struct memory_hp_attribute, attr);
+	ssize_t ret = 0;
+
+	if (memhp_attr->show)
+		ret = memhp_attr->show(mhb, page);
+	return ret;
+}
+
+static ssize_t memory_hp_attr_store(struct config_item *item,
+		struct configfs_attribute *attr,
+		const char *page, size_t count)
+{
+	struct memory_hp_block *mhb = to_mhp_block(item);
+	struct memory_hp_attribute *memhp_attr =
+		container_of(attr, struct memory_hp_attribute, attr);
+	ssize_t ret = 0;
+
+	if (memhp_attr->store)
+		ret = memhp_attr->store(mhb, page);
+	else
+		ret = -EINVAL;
+	return ret;
+}
+
+static struct configfs_item_operations memory_hp_item_ops = {
+	.show_attribute = memory_hp_attr_show,
+	.store_attribute = memory_hp_attr_store,
+};
+
+static struct  config_item_type memory_hp_type = {
+	.ct_item_ops    = &memory_hp_item_ops,
+	.ct_attrs       = memory_hp_attrs,
+	.ct_owner       = THIS_MODULE,
+};
+
+static struct config_group *
+memory_hp_make_group(struct config_group *group, const char *name)
+{
+	struct memory_hp_block *mhb;
+	unsigned long long section_id;
+
+
+	if (strict_strtoull(name, 10, &section_id))
+		return ERR_PTR(-EINVAL);
+
+	if (!valid_section_nr(section_id))
+		return ERR_PTR(-EINVAL);
+
+	mhb = kzalloc(sizeof(*mhb), GFP_KERNEL);
+	if (!mhb)
+		return NULL;
+
+	config_group_init_type_name(&mhb->group, name, &memory_hp_type);
+
+	mhb->mb.phys_index = section_id;
+	mutex_init(&mhb->mb.state_mutex);
+	mhb->mb.state = lookup_memory_state(section_id);
+
+	return &mhb->group;
+}
+
+static void memory_hp_drop_item(struct config_group *group,
+		struct config_item *item)
+{
+	struct memory_hp_block *mhb;
+
+	mhb = container_of(group, struct memory_hp_block, group);
+	config_item_put(item);
+}
+
+static struct configfs_group_operations memory_hp_group_ops = {
+        .make_group     = memory_hp_make_group,
+	.drop_item	= memory_hp_drop_item,
+};
+
+static struct config_item_type memory_hp_subsys_type = {
+	.ct_group_ops = &memory_hp_group_ops,
+	.ct_owner = THIS_MODULE,
+};
+
+static struct configfs_subsystem memory_hp_subsys = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf = "memory",
+			.ci_type = &memory_hp_subsys_type,
+		},
+	},
+};
+
+static int __init memory_config_init(void)
+{
+	int ret;
+
+	config_group_init(&memory_hp_subsys.su_group);
+	mutex_init(&memory_hp_subsys.su_mutex);
+	ret =  configfs_register_subsystem(&memory_hp_subsys);
+	if (ret) {
+		printk(KERN_ERR "Error %d while registering memory configfs\n", ret);
+		return ret;
+	}
+	return 0;
+}
+
+#if 0
+static void __exit memory_config_exit(void)
+{
+	configfs_unregister_subsystem(&memory_hp_subsys);
+}
+#endif
+late_initcall(memory_config_init);
Index: mmotm-2.6.35-0701/drivers/base/Makefile
===================================================================
--- mmotm-2.6.35-0701.orig/drivers/base/Makefile
+++ mmotm-2.6.35-0701/drivers/base/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) 
 obj-$(CONFIG_ISA)	+= isa.o
 obj-$(CONFIG_FW_LOADER)	+= firmware_class.o
 obj-$(CONFIG_NUMA)	+= node.o
-obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o
+obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o memory_config.o
 obj-$(CONFIG_SMP)	+= topology.o
 obj-$(CONFIG_IOMMU_API) += iommu.o
 ifeq ($(CONFIG_SYSFS),y)
Index: mmotm-2.6.35-0701/mm/Kconfig
===================================================================
--- mmotm-2.6.35-0701.orig/mm/Kconfig
+++ mmotm-2.6.35-0701/mm/Kconfig
@@ -138,6 +138,7 @@ config MEMORY_HOTPLUG
 config MEMORY_HOTPLUG_SPARSE
 	def_bool y
 	depends on SPARSEMEM && MEMORY_HOTPLUG
+	select CONFIGFS_FS
 
 config MEMORY_HOTREMOVE
 	bool "Allow for memory hot remove"
Index: mmotm-2.6.35-0701/include/linux/memory.h
===================================================================
--- mmotm-2.6.35-0701.orig/include/linux/memory.h
+++ mmotm-2.6.35-0701/include/linux/memory.h
@@ -68,6 +68,16 @@ struct memory_isolate_notify {
 struct notifier_block;
 struct mem_section;
 
+
+ssize_t show_memoryblock_phys_index(struct memory_block *mb, char *buf);
+ssize_t show_memoryblock_state(struct memory_block *mb, char *buf);
+ssize_t store_memoryblock_state(struct memory_block *mb,
+			const char *buf, size_t count);
+
+int record_memory_state(unsigned long section, int state);
+int lookup_memory_state(unsigned long section);
+void forget_memory_state(unsigned long section);
+
 /*
  * Priorities for the hotplug memory callback routines (stored in decreasing
  * order in the callback chain)





--
Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]

Messages in current thread:
[PATCH 1/7] Split the memory_block structure, Nathan Fontenot, (Mon Jul 12, 8:42 am)
[PATCH 2/7] Create the new 'end_phys_index' file, Nathan Fontenot, (Mon Jul 12, 8:43 am)
[PATCH 3/7] Update the [register,unregister]_memory routines, Nathan Fontenot, (Mon Jul 12, 8:44 am)
[PATCH 4/7] Allow sysfs memory directories to be split, Nathan Fontenot, (Mon Jul 12, 8:45 am)
[PATCH 5/7] update the mutex name in the memory_block struct, Nathan Fontenot, (Mon Jul 12, 8:46 am)
Re: [PATCH 1/7] Split the memory_block structure, KAMEZAWA Hiroyuki, (Mon Jul 12, 11:18 pm)
Re: [PATCH 3/7] Update the [register,unregister]_memory ro ..., KAMEZAWA Hiroyuki, (Mon Jul 12, 11:20 pm)
Re: [PATCH 4/7] Allow sysfs memory directories to be split, KAMEZAWA Hiroyuki, (Mon Jul 12, 11:28 pm)
Re: [PATCH 1/7] Split the memory_block structure, Brian King, (Tue Jul 13, 7:00 am)
Re: [PATCH 1/7] Split the memory_block structure, Nathan Fontenot, (Tue Jul 13, 8:44 am)
Re: [PATCH 4/7] Allow sysfs memory directories to be split, Nathan Fontenot, (Tue Jul 13, 8:51 am)
Re: [PATCH 1/7] Split the memory_block structure, Nathan Fontenot, (Tue Jul 13, 8:59 am)
Re: [PATCH 4/7] Allow sysfs memory directories to be split, KAMEZAWA Hiroyuki, (Tue Jul 13, 5:35 pm)
Re: [PATCH 4/7] Allow sysfs memory directories to be split, Nathan Fontenot, (Tue Jul 13, 8:18 pm)
Re: [PATCH 4/7] Allow sysfs memory directories to be split, KAMEZAWA Hiroyuki, (Tue Jul 13, 8:25 pm)
Re: [PATCH 4/7] Allow sysfs memory directories to be split, KAMEZAWA Hiroyuki, (Wed Jul 14, 1:30 am)
Re: [PATCH 4/7] Allow sysfs memory directories to be split, Nathan Fontenot, (Wed Jul 14, 10:16 am)