> +/*
> + * Example eclone() usage - Create a child process with pid CHILD_TID1 in
> + * the current pid namespace. The child gets the usual "random" pid in any
> + * ancestor pid namespaces.
> + */
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <signal.h>
> +#include <errno.h>
> +#include <unistd.h>
> +#include <wait.h>
> +#include <sys/syscall.h>
> +
> +#define __NR_eclone 337
> +#define CLONE_NEWPID 0x20000000
> +#define CLONE_CHILD_SETTID 0x01000000
> +#define CLONE_PARENT_SETTID 0x00100000
> +#define CLONE_UNUSED 0x00001000
> +
> +#define STACKSIZE 8192
> +
> +typedef unsigned long long u64;
> +typedef unsigned int u32;
> +typedef int pid_t;
> +struct clone_args {
> + u64 clone_flags_high;
> + u64 child_stack;
> + u64 child_stack_size;
> +
> + u64 parent_tid_ptr;
> + u64 child_tid_ptr;
> +
> + u32 nr_pids;
> +
> + u32 reserved0;
> +};
> +
> +#define exit _exit
> +
> +/*
> + * Following eclone() is based on code posted by Oren Laadan at:
> + *
https://lists.linux-foundation.org/pipermail/containers/2009-June/018463.html
> + */
> +#if defined(__i386__) && defined(__NR_eclone)
> +
> +int eclone(u32 flags_low, struct clone_args *clone_args, int args_size,
> + int *pids)
> +{
> + long retval;
> +
> + __asm__ __volatile__(
> + "movl %3, %%ebx\n\t" /* flags_low -> 1st (ebx) */
> + "movl %4, %%ecx\n\t" /* clone_args -> 2nd (ecx)*/
> + "movl %5, %%edx\n\t" /* args_size -> 3rd (edx) */
> + "movl %6, %%edi\n\t" /* pids -> 4th (edi)*/
> +
> + "pushl %%ebp\n\t" /* save value of ebp */
> + "int __PLACEHOLDER__13_x80\n\t" /* Linux/i386 system call */
> + "testl %0,%0\n\t" /* check return value */
> + "jne 1f\n\t" /* jump if parent */
> +
> + "popl %%esi\n\t" /* get subthread function */
> + "call *%%esi\n\t" /* start subthread function */
> + "movl %2,%0\n\t"
> + "int __PLACEHOLDER__13_x80\n" /* exit system call: exit subthread */
> + "1:\n\t"
> + "popl %%ebp\t" /* restore parent's ebp */
> +
> + :"=a" (retval)
> +
> + :"0" (__NR_eclone),
> + "i" (__NR_exit),
> + "m" (flags_low),
> + "m" (clone_args),
> + "m" (args_size),
> + "m" (pids)
> + );
> +
> + if (retval < 0) {
> + errno = -retval;
> + retval = -1;
> + }
> + return retval;
> +}
> +
> +/*
> + * Allocate a stack for the clone-child and arrange to have the child
> + * execute @child_fn with @child_arg as the argument.
> + */
> +void *setup_stack(int (*child_fn)(void *), void *child_arg, int size)
> +{
> + void *stack_base;
> + void **stack_top;
> +
> + stack_base = malloc(size + size);
> + if (!stack_base) {
> + perror("malloc()");
> + exit(1);
> + }
> +
> + stack_top = (void **)((char *)stack_base + (size - 4));
> + *--stack_top = child_arg;
> + *--stack_top = child_fn;
> +
> + return stack_top;
> +}
> +#endif
> +
> +/* gettid() is a bit more useful than getpid() when messing with clone() */
> +int gettid()
> +{
> + int rc;
> +
> + rc = syscall(__NR_gettid, 0, 0, 0);
> + if (rc < 0) {
> + printf("rc %d, errno %d\n", rc, errno);
> + exit(1);
> + }
> + return rc;
> +}
> +
> +#define CHILD_TID1 377
> +#define CHILD_TID2 1177
> +#define CHILD_TID3 2799
> +
> +struct clone_args clone_args;
> +void *child_arg = &clone_args;
> +int child_tid;
> +
> +int do_child(void *arg)
> +{
> + struct clone_args *cs = (struct clone_args *)arg;
> + int ctid;
> +
> + /* Verify we pushed the arguments correctly on the stack... */
> + if (arg != child_arg) {
> + printf("Child: Incorrect child arg pointer, expected %p,"
> + "actual %p\n", child_arg, arg);
> + exit(1);
> + }
> +
> + /* ... and that we got the thread-id we expected */
> + ctid = *((int *)(unsigned long)cs->child_tid_ptr);
> + if (ctid != CHILD_TID1) {
> + printf("Child: Incorrect child tid, expected %d, actual %d\n",
> + CHILD_TID1, ctid);
> + exit(1);
> + } else {
> + printf("Child got the expected tid, %d\n", gettid());
> + }
> + sleep(2);
> +
> + printf("[%d, %d]: Child exiting\n", getpid(), ctid);
> + exit(0);
> +}
> +
> +static int do_clone(int (*child_fn)(void *), void *child_arg,
> + unsigned int flags_low, int nr_pids, pid_t *pids_list)
> +{
> + int rc;
> + void *stack;
> + struct clone_args *ca = &clone_args;
> + int args_size;
> +
> + stack = setup_stack(child_fn, child_arg, STACKSIZE);
> +
> + memset(ca, 0, sizeof(*ca));
> +
> + ca->child_stack = (u64)(unsigned long)stack;
> + ca->child_stack_size = (u64)0;
> + ca->child_tid_ptr = (u64)(unsigned long)&child_tid;
> + ca->nr_pids = nr_pids;
> +
> + args_size = sizeof(struct clone_args);
> + rc = eclone(flags_low, ca, args_size, pids_list);
> +
> + printf("[%d, %d]: eclone() returned %d, error %d\n", getpid(), gettid(),
> + rc, errno);
> + return rc;
> +}
> +
> +/*
> + * Multiple pid_t pid_t values in pids_list[] here are just for illustration.
> + * The test case creates a child in the current pid namespace and uses only
> + * the first value, CHILD_TID1.
> + */
> +pid_t pids_list[] = { CHILD_TID1, CHILD_TID2, CHILD_TID3 };
> +int main()
> +{
> + int rc, pid, status;
> + unsigned long flags;
> + int nr_pids = 1;
> +
> + flags = SIGCHLD|CLONE_CHILD_SETTID;
> +
> + pid = do_clone(do_child, &clone_args, flags, nr_pids, pids_list);
> +
> + printf("[%d, %d]: Parent waiting for %d\n", getpid(), gettid(), pid);
> +
> + rc = waitpid(pid, &status, __WALL);
> + if (rc < 0) {
> + printf("waitpid(): rc %d, error %d\n", rc, errno);
> + } else {
> + printf("[%d, %d]: child %d:\n\t wait-status 0x%x\n", getpid(),
> + gettid(), rc, status);
> +
> + if (WIFEXITED(status)) {
> + printf("\t EXITED, %d\n", WEXITSTATUS(status));
> + } else if (WIFSIGNALED(status)) {
> + printf("\t SIGNALED, %d\n", WTERMSIG(status));
> + }
> + }
> + return 0;
> +}
> --