little cp diff

Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]
From: Ted Unangst
Date: Friday, February 5, 2010 - 7:24 pm

Though for a program like cp, this may qualify as a big diff.  :)

Continuing in my "make IO suck less" phase, cp would be a lot more 
efficient if it didn't bounce the disk heads around so much.  Instead of 
using a tiny 64k buffer, use an amount based on a small fraction of RAM.  

Index: utils.c
===================================================================
RCS file: /home/tedu/cvs/src/bin/cp/utils.c,v
retrieving revision 1.30
diff -u -r1.30 utils.c
--- utils.c	27 Oct 2009 23:59:21 -0000	1.30
+++ utils.c	6 Feb 2010 01:46:42 -0000
@@ -34,6 +34,7 @@
 #include <sys/stat.h>
 #include <sys/mman.h>
 #include <sys/time.h>
+#include <sys/sysctl.h>
 
 #include <err.h>
 #include <errno.h>
@@ -46,29 +47,75 @@
 
 #include "extern.h"
 
-int
-copy_file(FTSENT *entp, int dne)
+static void *
+allocbuf(size_t *sizep)
 {
-	static char *buf;
-	static char *zeroes;
-	struct stat to_stat, *fs;
-	int ch, checkch, from_fd, rcount, rval, to_fd, wcount;
-#ifdef VM_AND_BUFFER_CACHE_SYNCHRONIZED
-	char *p;
-#endif
-
-	if (!buf) {
-		buf = malloc(MAXBSIZE);
-		if (!buf)
-			err(1, "malloc");
+	int mib[2];
+	uint64_t physmem;
+	void *buf;
+	size_t size = 0;
+	size_t oldlen;
+
+	mib[0] = CTL_HW;
+	mib[1] = HW_PHYSMEM64;
+	oldlen = sizeof(physmem);
+	if (sysctl(mib, 2, &physmem, &oldlen, NULL, 0) == 0) {
+		size = physmem / 512;
+		size -= size % MAXBSIZE;
 	}
+	if (size < MAXBSIZE)
+		size = MAXBSIZE;
+	buf = malloc(size);
+	if (!buf)
+		err(1, "allocbuf(%ld)", size);
+
+	*sizep = size;
+	return buf;
+}
+
+static ssize_t
+writebuf(int to_fd, void *buf, ssize_t rcount, int skipholes)
+{
+	static void *zeroes;
+	ssize_t wcount = 0;
+	int amt = MAXBSIZE;
+	const char *bp = buf;
+	ssize_t left = rcount;
+
 	if (!zeroes) {
-		zeroes = malloc(MAXBSIZE);
+		zeroes = calloc(1, amt);
 		if (!zeroes)
-			err(1, "malloc");
-		memset(zeroes, 0, MAXBSIZE);
+			err(1, "calloc");
+	}
+	while (left > 0) {
+		amt = MIN(amt, rcount);
+		if (skipholes && memcmp(bp, zeroes, amt) == 0)
+			wcount = lseek(to_fd, amt, SEEK_CUR) == -1 ? -1 : amt;
+		else
+			wcount = write(to_fd, bp, amt);
+		if (wcount != amt)
+			return -1;
+		bp += wcount;
+		left -= wcount;
 	}
 
+	return rcount;
+}
+
+int
+copy_file(FTSENT *entp, int dne)
+{
+	static void *buf;
+	static size_t bufsize;
+	struct stat to_stat, *fs;
+	int ch, checkch, from_fd, rval, to_fd;
+	size_t rcount, wcount;
+	int skipholes = 0;
+	struct stat tosb;
+
+	if (!buf)
+		buf = allocbuf(&bufsize);
+
 	if ((from_fd = open(entp->fts_path, O_RDONLY, 0)) == -1) {
 		warn("%s", entp->fts_path);
 		return (1);
@@ -114,54 +161,21 @@
 	}
 
 	rval = 0;
-
-	/*
-	 * Mmap and write if less than 8M (the limit is so we don't totally
-	 * trash memory on big files.  This is really a minor hack, but it
-	 * wins some CPU back.
-	 */
-#ifdef VM_AND_BUFFER_CACHE_SYNCHRONIZED
-	if (fs->st_size <= 8 * 1048576) {
-		if ((p = mmap(NULL, (size_t)fs->st_size, PROT_READ,
-		    MAP_FILE|MAP_SHARED, from_fd, (off_t)0)) == MAP_FAILED) {
-			warn("mmap: %s", entp->fts_path);
-			rval = 1;
-		} else {
-			madvise(p, fs->st_size, MADV_SEQUENTIAL);
-			if (write(to_fd, p, fs->st_size) != fs->st_size) {
-				warn("%s", to.p_path);
-				rval = 1;
-			}
-			/* Some systems don't unmap on close(2). */
-			if (munmap(p, fs->st_size) < 0) {
-				warn("%s", entp->fts_path);
-				rval = 1;
-			}
-		}
-	} else
-#endif
-	{
-		int skipholes = 0;
-		struct stat tosb;
-		if (!fstat(to_fd, &tosb) && S_ISREG(tosb.st_mode))
-			skipholes = 1;
-		while ((rcount = read(from_fd, buf, MAXBSIZE)) > 0) {
-			if (skipholes && memcmp(buf, zeroes, rcount) == 0)
-				wcount = lseek(to_fd, rcount, SEEK_CUR) == -1 ? -1 : rcount;
-			else
-				wcount = write(to_fd, buf, rcount);
-			if (rcount != wcount || wcount == -1) {
-				warn("%s", to.p_path);
-				rval = 1;
-				break;
-			}
-		}
-		if (skipholes && rcount >= 0)
-			rcount = ftruncate(to_fd, fs->st_size);
-		if (rcount < 0) {
-			warn("%s", entp->fts_path);
+	if (!fstat(to_fd, &tosb) && S_ISREG(tosb.st_mode))
+		skipholes = 1;
+	while ((rcount = read(from_fd, buf, bufsize)) > 0) {
+		wcount = writebuf(to_fd, buf, rcount, skipholes);
+		if (rcount != wcount) {
+			warn("%s", to.p_path);
 			rval = 1;
+			break;
 		}
+	}
+	if (skipholes && rcount >= 0)
+		rcount = ftruncate(to_fd, fs->st_size);
+	if (rcount < 0) {
+		warn("%s", entp->fts_path);
+		rval = 1;
 	}
 
 	if (rval == 1) {
Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]

Messages in current thread:
little cp diff, Ted Unangst, (Fri Feb 5, 7:24 pm)
Re: little cp diff, Otto Moerbeek, (Sat Feb 6, 5:32 am)
Re: little cp diff, Mark Kettenis, (Sat Feb 6, 6:44 am)
Re: little cp diff, Ted Unangst, (Sat Feb 6, 7:14 am)
Re: little cp diff, Otto Moerbeek, (Sat Feb 6, 7:53 am)
Re: little cp diff, Vadim Zhukov, (Sat Feb 6, 8:27 am)
Re: little cp diff, Bob Beck, (Sat Feb 6, 9:45 am)
Re: little cp diff, Ted Unangst, (Sat Feb 6, 10:58 am)
Re: little cp diff, Bob Beck, (Sat Feb 6, 11:32 am)
Re: little cp diff, Otto Moerbeek, (Sat Feb 6, 11:35 am)
Re: little cp diff, Claus Assmann, (Sat Feb 6, 11:37 am)
Re: little cp diff, Claudio Jeker, (Sat Feb 6, 11:42 am)
Re: little cp diff, Bob Beck, (Sat Feb 6, 11:45 am)
Re: little cp diff, Bob Beck, (Sat Feb 6, 11:49 am)
Re: little cp diff, Ted Unangst, (Sat Feb 6, 8:42 pm)