This patch is the combination of:
virt_after_migrate
! include/linux/init_task.h |    1 
! include/linux/sched.h     |    1 
! kernel/cpuset.c           |  173 ++++++++++++++++++++++++++++++++++++++++++----
! kernel/sched.c            |   20 ++++-

proc_cpuinfo
! arch/i386/kernel/cpu/proc.c |    9 +++++++++
! arch/ia64/kernel/setup.c    |   13 ++++++++++++-
! fs/proc/proc_misc.c         |   13 +++++++++++++
! include/linux/cpuset.h      |    3 +++
! kernel/cpuset.c             |   42 ++++++++++++++++++++++++++++++++++++++++++

kthread_bin_virt_allowed
! kthread.c |    9 +++++++++



unchanged:
--- mm1.orig/include/linux/init_task.h	2004-10-12 15:04:40.000000000 +0200
+++ mm1/include/linux/init_task.h	2004-10-12 15:12:34.000000000 +0200
@@ -76,6 +76,7 @@ extern struct group_info init_groups;
 	.static_prio	= MAX_PRIO-20,					\
 	.policy		= SCHED_NORMAL,					\
 	.cpus_allowed	= CPU_MASK_ALL,					\
+	.cpus_virt_allowed	= CPU_MASK_ALL,					\
 	.mm		= NULL,						\
 	.active_mm	= &init_mm,					\
 	.run_list	= LIST_HEAD_INIT(tsk.run_list),			\
unchanged:
--- mm1.orig/include/linux/sched.h	2004-10-12 15:04:40.000000000 +0200
+++ mm1/include/linux/sched.h	2004-10-12 15:12:34.000000000 +0200
@@ -665,6 +665,7 @@ struct task_struct {
   	short il_next;		/* could be shared with used_math */
 #endif
 #ifdef CONFIG_CPUSETS
+	cpumask_t cpus_virt_allowed;
 	struct cpuset *cpuset;
 	nodemask_t mems_allowed;
 	int cpuset_mems_generation;
unchanged:
--- mm1/kernel/cpuset.c	2004-10-12 15:56:57.338453582 +0200
+++ mm1/kernel/cpuset.c	2004-10-12 16:00:36.773997769 +0200
@@ -82,6 +82,7 @@
 typedef enum {
 	CS_CPU_EXCLUSIVE,
 	CS_MEM_EXCLUSIVE,
+	CS_VIRTUALIZED,
 	CS_REMOVED,
 	CS_NOTIFY_ON_RELEASE
 } cpuset_flagbits_t;
@@ -97,6 +98,10 @@
 	return !!test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
 }
 
+static inline int is_virtualized(const struct cpuset *cs)
+{
+	return !!test_bit(CS_VIRTUALIZED, &cs->flags);
+}
 static inline int is_removed(const struct cpuset *cs)
 {
 	return !!test_bit(CS_REMOVED, &cs->flags);
@@ -519,6 +524,145 @@
 		is_mem_exclusive(p) <= is_mem_exclusive(q);
 }
 
+#define cyclic_next_cpu(index, mask)	__cyclic_next_cpu(index, &mask)
+static inline int __cyclic_next_cpu(int index, const cpumask_t * mask)
+{
+	int i;
+	i = next_cpu(index, *mask);
+	if (i >= NR_CPUS) {
+		if (cpu_isset(0, *mask))
+			return 0;
+		i = next_cpu(0, *mask);
+	}
+	return i;
+}
+
+/**
+ *	cpuset_combine_mask - translate a user cpu mask to a physical one.
+ *	@virt_allowed:	the mask given by the user to sched_setaffinity()
+ *	@cs_allowed:	the mask of the current cpuset.
+ *
+ *	Returns combined mask in *mask.
+ */
+static int combine_mask(cpumask_t *mask, const cpumask_t virt_allowed, const cpumask_t cs_allowed)
+{
+	int i;
+
+	/* start with current cpu out of the mask
+	 * so the first call to next_cpu will take the first cpu
+	 * even if it is cpu zero
+	 */
+	int cpu = NR_CPUS;
+	cpus_clear(*mask);
+
+	if (cpus_empty(virt_allowed)) return 0;
+	if (cpus_empty(cs_allowed)) return 0;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		cpu = cyclic_next_cpu(cpu, cs_allowed);
+		if (cpu_isset(i, virt_allowed))
+			cpu_set(cpu, *mask);
+	}
+	return 0;
+}
+
+/**
+ * Find out whether a cpu should be listed in /proc/cpuinfo
+ *
+ * For virtualized cpusets, only cpus present in the cpuset are shown
+ */
+int cpu_visible_in_cpuset(int cpu, struct cpuset * cs)
+{
+	/* all cpus are visible in non-virtualized cpusets */
+	if (!is_virtualized(cs))
+		return 1;
+
+	return cpu_isset(cpu, cs->cpus_allowed);
+}
+
+/**
+ *	cpuid_in_cpuset - translate a "real" cpu number to a "inside cpuset" (logical)
+ *	@cs:	the cpuset where all the magic occurs.
+ *	@cpu:	cpu number to be translated
+ *
+ *	Used for /proc/cpuinfo.
+ *	Returns the translated cpu number.
+ */
+int cpuid_in_cpuset(int cpu, struct cpuset * cs)
+{
+	int i;
+	int l = 0;
+	
+	/* translation needed only for virtualized cpusets */
+	if (!is_virtualized(cs))
+		return cpu;
+		
+	for(i=0; i < NR_CPUS; i++)
+	{
+		if (i == cpu) return l;
+		if (cpu_isset(i, cs->cpus_allowed))
+			l++;
+	}
+	/* NOT REACHED */
+	BUG();
+	return 0;
+}
+
+/**
+ *	set_cpus_virt_allowed - updated cpus_virt_allowed AND cpus_allowed masks
+ *	@virt_allowed:        the mask given by the user to sched_setaffinity()
+ *	@p:		the task
+ *
+ *	This function does not mess with scheduler internals. Here we rely
+ *	on set_cpus_allowed(), that should, for instance, migrate the task 
+ *	if necessary.
+ */
+static int set_cpus_virt_allowed(task_t *p, cpumask_t mask)
+{
+	cpumask_t new_mask;
+	int retval;
+
+	p->cpus_virt_allowed = mask;
+	combine_mask(&new_mask, p->cpus_virt_allowed, p->cpuset->cpus_allowed);
+	retval = set_cpus_allowed(p, new_mask);
+	return retval;
+}
+
+/**
+ *	This is the exported entry point that will be called
+ *	by sched_setaffinity().
+ */
+int cpuset_set_cpus_affinity(task_t *p, cpumask_t mask)
+{
+	int retval;
+
+	down(&cpuset_sem);
+	if (is_virtualized(p->cpuset))
+		retval = set_cpus_virt_allowed(p, mask);
+	else {
+		cpumask_t cpus_allowed;
+		cpus_allowed = p->cpuset->cpus_allowed;
+		cpus_and(mask, mask, cpus_allowed);
+		retval = set_cpus_allowed(p, mask);
+	}
+	up(&cpuset_sem);
+	return retval;
+}
+
+/**
+ *	This is the exported entry point that will be called
+ *	by sched_getaffinity().
+ */
+int cpuset_get_cpus_virt_affinity(task_t *p, cpumask_t *mask)
+{
+	if (is_virtualized(p->cpuset)) {
+		*mask = p->cpus_virt_allowed;
+		return 0;
+	}
+	return -1;
+}
+
+
 /*
  * validate_change() - Used to validate that any proposed cpuset change
  *		       follows the structural rules for cpusets.
@@ -553,6 +697,11 @@
 	if ((par = cur->parent) == NULL)
 		return 0;
 
+	/* virtualization can only be turned on/off on empty cpusets  */
+	if ((atomic_read(&cur->count) > 0) || (!list_empty(&cur->children)))
+		if (is_virtualized(cur) != is_virtualized(trial))
+			return -EBUSY;
+
 	/* We must be a subset of our parent cpuset */
 	if (!is_cpuset_subset(trial, par))
 		return -EACCES;
@@ -599,7 +748,7 @@
 	int nb = 0;
 	int sz;
 
-retry:	
+again:	
 	/* at most cs->count - 1 processes to migrate */
 	/* keep some room in case some processes fork() during kmalloc() */
 	sz = atomic_read(&cs->count) + 10; 
@@ -616,7 +765,7 @@
 				printk("migrate_cpuset_processes: array full !\n");
 				read_unlock(&tasklist_lock);
 				kfree(array);
-				goto retry; 
+				goto again; 
 			}
 			get_task_struct(p);
 			array[nb++] = p;
@@ -626,16 +775,20 @@
 
 	while(nb) {
 		struct task_struct * p = array[--nb];
-		cpumask_t cpus;
-		/*
-		 * If the tasks current CPU placement overlaps with its new cpuset,
-		 * then let it run in that overlap.  Otherwise fallback to simply
-		 * letting it have the run of the CPUs in the new cpuset.
-		 */
-		cpus_and(cpus, p->cpus_allowed, cs->cpus_allowed);
-		if (cpus_empty(cpus))
-			cpus = cs->cpus_allowed;
-		set_cpus_allowed(p, cpus);
+		if (is_virtualized(cs))
+			set_cpus_virt_allowed(p, p->cpus_virt_allowed);
+		else {
+			cpumask_t cpus;
+			/*
+			 * If the tasks current CPU placement overlaps with its new cpuset,
+			 * then let it run in that overlap.  Otherwise fallback to simply
+			 * letting it have the run of the CPUs in the new cpuset.
+			 */
+			cpus_and(cpus, p->cpus_allowed, cs->cpus_allowed);
+			if (cpus_empty(cpus))
+				cpus = cs->cpus_allowed;
+			set_cpus_allowed(p, cpus);
+		}
 		put_task_struct(p);
 	}
 	kfree(array);
@@ -646,7 +799,7 @@
 	 * by the first pass */
 	if (first) {
 		first = 0;
-		goto retry;
+		goto again;
 	}
 }
 
@@ -765,11 +918,29 @@
 		return -ESRCH;
 	}
 	atomic_inc(&cs->count);
+
+	/* depending on current and future cpuset for this task,
+	 * affinity masks may be meaningful or not
+	 */
+	cpumask_t virt_allowed, allowed;
+	if (is_virtualized(cs) == is_virtualized(tsk->cpuset)) {
+		virt_allowed = tsk->cpus_virt_allowed;
+		allowed = tsk->cpus_allowed;
+	} else {
+		virt_allowed = CPU_MASK_ALL;
+		allowed = CPU_MASK_ALL;
+	}
+		
 	tsk->cpuset = cs;
 	task_unlock(tsk);
 
-	guarantee_online_cpus(cs, &cpus);
-	set_cpus_allowed(tsk, cpus);
+
+	if (is_virtualized(cs))
+		set_cpus_virt_allowed(tsk, virt_allowed);
+	else {
+		guarantee_online_cpus(cs, &cpus);
+		set_cpus_allowed(tsk, cpus);
+	}
 
 	put_task_struct(tsk);
 	if (atomic_dec_and_test(&oldcs->count))
@@ -786,6 +957,7 @@
 	FILE_MEMLIST,
 	FILE_CPU_EXCLUSIVE,
 	FILE_MEM_EXCLUSIVE,
+	FILE_VIRTUALIZE,
 	FILE_NOTIFY_ON_RELEASE,
 	FILE_TASKLIST,
 } cpuset_filetype_t;
@@ -833,6 +1005,9 @@
 	case FILE_MEM_EXCLUSIVE:
 		retval = update_flag(CS_MEM_EXCLUSIVE, cs, buffer);
 		break;
+	case FILE_VIRTUALIZE:
+		retval = update_flag(CS_VIRTUALIZED, cs, buffer);
+		break;
 	case FILE_NOTIFY_ON_RELEASE:
 		retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer);
 		break;
@@ -934,6 +1109,9 @@
 	case FILE_MEM_EXCLUSIVE:
 		*s++ = is_mem_exclusive(cs) ? '1' : '0';
 		break;
+	case FILE_VIRTUALIZE:
+		*s++ = is_virtualized(cs) ? '1' : '0';
+		break;
 	case FILE_NOTIFY_ON_RELEASE:
 		*s++ = notify_on_release(cs) ? '1' : '0';
 		break;
@@ -1272,6 +1450,11 @@
 	.private = FILE_MEM_EXCLUSIVE,
 };
 
+static struct cftype cft_virtualize = {
+	.name = "virtualize",
+	.private = FILE_VIRTUALIZE,
+};
+
 static struct cftype cft_notify_on_release = {
 	.name = "notify_on_release",
 	.private = FILE_NOTIFY_ON_RELEASE,
@@ -1289,6 +1472,8 @@
 		return err;
 	if ((err = cpuset_add_file(cs_dentry, &cft_mem_exclusive)) < 0)
 		return err;
+	if ((err = cpuset_add_file(cs_dentry, &cft_virtualize)) < 0)
+		return err;
 	if ((err = cpuset_add_file(cs_dentry, &cft_notify_on_release)) < 0)
 		return err;
 	if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0)
unchanged:
--- mm1.orig/kernel/sched.c	2004-10-12 15:04:40.000000000 +0200
+++ mm1/kernel/sched.c	2004-10-12 16:00:11.262279331 +0200
@@ -3512,9 +3512,11 @@ long sched_setaffinity(pid_t pid, cpumas
 			!capable(CAP_SYS_NICE))
 		goto out_unlock;
 
-	cpus_allowed = cpuset_cpus_allowed(p);
-	cpus_and(new_mask, new_mask, cpus_allowed);
-	retval = set_cpus_allowed(p, new_mask);
+#ifdef CONFIG_CPUSETS
+	retval = cpuset_set_cpus_affinity(p, new_mask);
+#else
+  	retval = set_cpus_allowed(p, new_mask);
+#endif
 
 out_unlock:
 	put_task_struct(p);
@@ -3533,6 +3535,11 @@ static int get_user_cpu_mask(unsigned lo
 	return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
 }
 
+#ifdef CONFIG_CPUSETS
+int cpuset_set_cpus_affinity(task_t *p, cpumask_t mask);
+int cpuset_get_cpus_virt_affinity(task_t *p, cpumask_t *mask);
+#endif
+
 /**
  * sys_sched_setaffinity - set the cpu affinity of a process
  * @pid: pid of the process
@@ -3581,7 +3588,12 @@ long sched_getaffinity(pid_t pid, cpumas
 		goto out_unlock;
 
 	retval = 0;
-	cpus_and(*mask, p->cpus_allowed, cpu_possible_map);
+#ifdef CONFIG_CPUSETS
+	if (cpuset_get_cpus_virt_affinity(p, &mask) < 0)
+		cpus_and(mask, p->cpus_allowed, cpu_possible_map);
+#else
+  	cpus_and(mask, p->cpus_allowed, cpu_possible_map);
+#endif
 
 out_unlock:
 	read_unlock(&tasklist_lock);
unchanged:
--- mm1.orig/arch/i386/kernel/cpu/proc.c	2004-10-12 14:54:06.000000000 +0200
+++ mm1/arch/i386/kernel/cpu/proc.c	2004-10-12 16:00:36.769114956 +0200
@@ -3,6 +3,7 @@
 #include <linux/string.h>
 #include <asm/semaphore.h>
 #include <linux/seq_file.h>
+#include <linux/cpuset.h>
 
 /*
  *	Get CPU information for use by the procfs.
@@ -63,12 +64,20 @@ static int show_cpuinfo(struct seq_file 
 	if (!cpu_online(n))
 		return 0;
 #endif
+#ifdef CONFIG_CPUSETS
+	if (!cpu_visible_in_cpuset(cpunum, current->cpuset))
+		return 0;
+#endif
 	seq_printf(m, "processor\t: %d\n"
 		"vendor_id\t: %s\n"
 		"cpu family\t: %d\n"
 		"model\t\t: %d\n"
 		"model name\t: %s\n",
+#ifdef CONFIG_CPUSETS
+		cpuid_in_cpuset(n, current->cpuset),
+#else
 		n,
+#endif
 		c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
 		c->x86,
 		c->x86_model,
unchanged:
--- mm1.orig/arch/ia64/kernel/setup.c	2004-10-12 14:54:06.000000000 +0200
+++ mm1/arch/ia64/kernel/setup.c	2004-10-12 16:00:36.770091519 +0200
@@ -35,6 +35,7 @@
 #include <linux/serial_core.h>
 #include <linux/efi.h>
 #include <linux/initrd.h>
+#include <linux/cpuset.h>
 
 #include <asm/ia32.h>
 #include <asm/machvec.h>
@@ -432,6 +433,11 @@ show_cpuinfo (struct seq_file *m, void *
 	unsigned long mask;
 	int i;
 
+#ifdef CONFIG_CPUSETS
+	if (!cpu_visible_in_cpuset(cpunum, current->cpuset))
+		return 0;
+#endif
+	
 	mask = c->features;
 
 	switch (c->family) {
@@ -476,7 +482,12 @@ show_cpuinfo (struct seq_file *m, void *
 		   "cpu MHz    : %lu.%06lu\n"
 		   "itc MHz    : %lu.%06lu\n"
 		   "BogoMIPS   : %lu.%02lu\n\n",
-		   cpunum, c->vendor, family, c->model, c->revision, c->archrev,
+#ifdef CONFIG_CPUSETS
+		   cpuid_in_cpuset(cpunum, current->cpuset),
+#else
+		   cpunum,
+#endif
+		   c->vendor, family, c->model, c->revision, c->archrev,
 		   features, c->ppn, c->number,
 		   c->proc_freq / 1000000, c->proc_freq % 1000000,
 		   c->itc_freq / 1000000, c->itc_freq % 1000000,
unchanged:
--- mm1.orig/fs/proc/proc_misc.c	2004-10-12 14:54:24.000000000 +0200
+++ mm1/fs/proc/proc_misc.c	2004-10-12 16:00:54.003489745 +0200
@@ -44,6 +44,7 @@
 #include <linux/jiffies.h>
 #include <linux/sysrq.h>
 #include <linux/vmalloc.h>
+#include <linux/cpuset.h>
 #include <linux/crash_dump.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -376,6 +377,10 @@ int show_stat(struct seq_file *p, void *
 	for_each_cpu(i) {
 		int j;
 
+#ifdef CONFIG_CPUSETS
+		if (!cpu_visible_in_cpuset(i, current->cpuset))
+			continue;
+#endif
 		user += kstat_cpu(i).cpustat.user;
 		nice += kstat_cpu(i).cpustat.nice;
 		system += kstat_cpu(i).cpustat.system;
@@ -397,6 +402,10 @@ int show_stat(struct seq_file *p, void *
 		(unsigned long long)jiffies_64_to_clock_t(softirq));
 	for_each_online_cpu(i) {
 
+#ifdef CONFIG_CPUSETS
+		if (!cpu_visible_in_cpuset(i, current->cpuset))
+			continue;
+#endif
 		/* Copy values here to work around gcc-2.95.3, gcc-2.96 */
 		user = kstat_cpu(i).cpustat.user;
 		nice = kstat_cpu(i).cpustat.nice;
@@ -406,7 +415,11 @@ int show_stat(struct seq_file *p, void *
 		irq = kstat_cpu(i).cpustat.irq;
 		softirq = kstat_cpu(i).cpustat.softirq;
 		seq_printf(p, "cpu%d %llu %llu %llu %llu %llu %llu %llu\n",
+#ifdef CONFIG_CPUSETS
+			cpuid_in_cpuset(i, current->cpuset),
+#else
 			i,
+#endif
 			(unsigned long long)jiffies_64_to_clock_t(user),
 			(unsigned long long)jiffies_64_to_clock_t(nice),
 			(unsigned long long)jiffies_64_to_clock_t(system),
unchanged:
--- mm1.orig/include/linux/cpuset.h	2004-10-12 14:54:26.000000000 +0200
+++ mm1/include/linux/cpuset.h	2004-10-12 16:00:36.772044644 +0200
@@ -27,6 +27,9 @@ int cpuset_zone_allowed(struct zone *z);
 extern struct file_operations proc_cpuset_operations;
 extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer);
 
+int cpu_visible_in_cpuset(int cpu, struct cpuset * cs);
+int cpuid_in_cpuset(int cpu, struct cpuset * cs);
+
 #else /* !CONFIG_CPUSETS */
 
 static inline int cpuset_init(void) { return 0; }
only in patch2:
unchanged:
--- mm1.orig/kernel/kthread.c	2004-10-12 14:54:28.000000000 +0200
+++ mm1/kernel/kthread.c	2004-10-12 16:01:05.000000000 +0200
@@ -160,6 +160,15 @@ void kthread_bind(struct task_struct *k,
 	wait_task_inactive(k);
 	set_task_cpu(k, cpu);
 	k->cpus_allowed = cpumask_of_cpu(cpu);
+#ifdef CONFIG_CPUSETS
+	/* kthreads don't use sched_setaffinity() to bind themselves to
+	 * CPUs, we need to take care.
+	 * This should not be problem since it is unlikely that kthreads
+	 * will run in a virtualized cpuset.
+	 * But better be ready, so:
+	 */
+	k->cpus_virt_allowed = cpumask_of_cpu(cpu);
+#endif
 }
 EXPORT_SYMBOL(kthread_bind);
 

