login
Header Space

 
 

[PATCH] pack-objects --repack-unpacked

Score:
Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]
To: Nicolas Pitre <nico@...>
Cc: Linus Torvalds <torvalds@...>, Johannes Schindelin <Johannes.Schindelin@...>, Nix <nix@...>, Steven Grimm <koreth@...>, Git Mailing List <git@...>
Date: Saturday, September 8, 2007 - 6:01 am

The usual command line that uses "--unpacked=<existing>" option
looks like this:

	git pack-objects --non-empty --all --reflog \
        	--unpacked --unpacked=<existing> \
                packname-prefix

This packs loose objects and objects in the named existing
packs that are reachable from any and all refs and reflog
entries.  It is typically used by "git repack -a -d", which
then removes the named existing packs from the repository, and
has an effect of getting rid of unreachable objects these packs
hold.

This adds "--repack-unpacked" option to pack-objects to help
combining small packs into one, without losing unreferenced
objects that are in the packs.  When this option is given in
addition to the above command line, we also make sure all the
objects in the named existing packs are included in the result.

This allows us to safely remove the packs that were named on the
command line after installing the resulting pack in the
repository.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---

 I am too tired to keep staring at this code now.  Fixes,
 improvements, replacements and enhancements, in the code,
 documentation and tests, are very much welcomed.

 builtin-pack-objects.c |   95 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 93 insertions(+), 2 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 12509fa..9bc2faa 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -21,7 +21,7 @@ git-pack-objects [{ -q | --progress | --all-progress }] \n\
 	[--window=N] [--window-memory=N] [--depth=N] \n\
 	[--no-reuse-delta] [--no-reuse-object] [--delta-base-offset] \n\
 	[--non-empty] [--revs [--unpacked | --all]*] [--reflog] \n\
-	[--stdout | base-name] [<ref-list | <object-list]";
+	[--stdout | base-name] [--repack-unpacked] [<ref-list | <object-list]";
 
 struct object_entry {
 	struct pack_idx_entry idx;
@@ -57,7 +57,7 @@ static struct object_entry **written_list;
 static uint32_t nr_objects, nr_alloc, nr_result, nr_written;
 
 static int non_empty;
-static int no_reuse_delta, no_reuse_object;
+static int no_reuse_delta, no_reuse_object, repack_unpacked;
 static int local;
 static int incremental;
 static int allow_ofs_delta;
@@ -1625,15 +1625,21 @@ static void read_object_list_from_stdin(void)
 	}
 }
 
+#define OBJECT_ADDED (1u<<20)
+
 static void show_commit(struct commit *commit)
 {
 	add_object_entry(commit->object.sha1, OBJ_COMMIT, NULL, 0);
+	commit->object.flags |= OBJECT_ADDED;
 }
 
 static void show_object(struct object_array_entry *p)
 {
+	struct object *o = lookup_unknown_object(p->item->sha1);
+
 	add_preferred_base_object(p->name);
 	add_object_entry(p->item->sha1, p->item->type, p->name, 0);
+	o->flags |= OBJECT_ADDED;
 }
 
 static void show_edge(struct commit *commit)
@@ -1641,6 +1647,84 @@ static void show_edge(struct commit *commit)
 	add_preferred_base(commit->object.sha1);
 }
 
+struct in_pack_object {
+	off_t offset;
+	const unsigned char *sha1;
+};
+
+struct in_pack {
+	int alloc;
+	int nr;
+	struct in_pack_object *array;
+};
+
+static void mark_in_pack_object(const unsigned char *sha1, struct packed_git *p, struct in_pack *in_pack)
+{
+	in_pack->array[in_pack->nr].offset = find_pack_entry_one(sha1, p);
+	in_pack->array[in_pack->nr].sha1 = sha1;
+	in_pack->nr++;
+}
+
+/*
+ * Compare the objects in the offset order, in order to emulate the
+ * "git-rev-list --objects" output that produced the pack originally.
+ */
+static int ofscmp(const void *a_, const void *b_)
+{
+	struct in_pack_object *a = (struct in_pack_object *)a_;
+	struct in_pack_object *b = (struct in_pack_object *)b_;
+
+	if (a->offset < b->offset)
+		return -1;
+	else if (a->offset > b->offset)
+		return 1;
+	else
+		return hashcmp(a->sha1, b->sha1);
+}
+
+static void add_objects_in_unpacked_packs(struct rev_info *revs)
+{
+	struct packed_git *p;
+
+	for (p = packed_git; p; p = p->next) {
+		struct in_pack in_pack;
+		const unsigned char *sha1;
+		struct object *o;
+		uint32_t i;
+
+		for (i = 0; i < revs->num_ignore_packed; i++) {
+			if (matches_pack_name(p, revs->ignore_packed[i]))
+				break;
+		}
+		if (revs->num_ignore_packed <= i)
+			continue;
+		if (open_pack_index(p))
+			die("cannot open pack index");
+
+		in_pack.alloc = p->num_objects;
+		in_pack.nr = 0;
+		in_pack.array = xmalloc(sizeof(in_pack.array[0]) *
+					p->num_objects);
+		for (i = 0; i < p->num_objects; i++) {
+			sha1 = nth_packed_object_sha1(p, i);
+			o = lookup_unknown_object(sha1);
+			if (!(o->flags & OBJECT_ADDED))
+				mark_in_pack_object(sha1, p, &in_pack);
+			o->flags |= OBJECT_ADDED;
+		}
+		if (!in_pack.nr)
+			continue;
+		qsort(in_pack.array, in_pack.nr, sizeof(in_pack.array[0]),
+		      ofscmp);
+		for (i = 0; i < in_pack.nr; i++) {
+			sha1 = in_pack.array[i].sha1;
+			o = lookup_unknown_object(sha1);
+			add_object_entry(sha1, o->type, "", 0);
+		}
+		free(in_pack.array);
+	}
+}
+
 static void get_object_list(int ac, const char **av)
 {
 	struct rev_info revs;
@@ -1672,6 +1756,9 @@ static void get_object_list(int ac, const char **av)
 	prepare_revision_walk(&revs);
 	mark_edges_uninteresting(revs.commits, &revs, show_edge);
 	traverse_commit_list(&revs, show_commit, show_object);
+
+	if (repack_unpacked)
+		add_objects_in_unpacked_packs(&revs);
 }
 
 static int adjust_perm(const char *path, mode_t mode)
@@ -1789,6 +1876,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 			use_internal_rev_list = 1;
 			continue;
 		}
+		if (!strcmp("--repack-unpacked", arg)) {
+			repack_unpacked = 1;
+			continue;
+		}
 		if (!strcmp("--unpacked", arg) ||
 		    !prefixcmp(arg, "--unpacked=") ||
 		    !strcmp("--reflog", arg) ||
-
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]

Messages in current thread:
People unaware of the importance of "git gc"?, Linus Torvalds, (Wed Sep 5, 3:09 am)
Re: People unaware of the importance of "git gc"?, Alex Riesen, (Wed Sep 5, 5:07 pm)
Re: People unaware of the importance of "git gc"?, J. Bruce Fields, (Wed Sep 5, 1:44 pm)
Re: People unaware of the importance of "git gc"?, Brandon Casey, (Wed Sep 5, 2:46 pm)
Re: People unaware of the importance of "git gc"?, David Kastrup, (Wed Sep 5, 3:09 pm)
Re: People unaware of the importance of "git gc"?, Mike Hommey, (Wed Sep 5, 3:20 pm)
Re: People unaware of the importance of "git gc"?, J. Bruce Fields, (Wed Sep 5, 3:13 pm)
Re: People unaware of the importance of "git gc"?, David Kastrup, (Wed Sep 5, 3:43 pm)
Re: People unaware of the importance of "git gc"?, Govind Salinas, (Wed Sep 5, 12:47 pm)
Re: People unaware of the importance of "git gc"?, Steven Grimm, (Wed Sep 5, 1:35 pm)
Re: People unaware of the importance of "git gc"?, Carl Worth, (Wed Sep 5, 1:19 pm)
Re: People unaware of the importance of "git gc"?, David Kastrup, (Wed Sep 5, 4:16 am)
Re: People unaware of the importance of "git gc"?, Pierre Habouzit, (Wed Sep 5, 3:42 am)
Re: People unaware of the importance of "git gc"?, Steven Grimm, (Wed Sep 5, 2:14 pm)
Re: People unaware of the importance of "git gc"?, Nicolas Pitre, (Wed Sep 5, 2:54 pm)
Re: People unaware of the importance of "git gc"?, Junio C Hamano, (Wed Sep 5, 4:01 pm)
Re: People unaware of the importance of "git gc"?, Johannes Schindelin, (Thu Sep 6, 11:54 am)
Re: People unaware of the importance of "git gc"?, Junio C Hamano, (Thu Sep 6, 1:49 pm)
Re: People unaware of the importance of "git gc"?, Johannes Schindelin, (Fri Sep 7, 6:12 am)
Re: People unaware of the importance of "git gc"?, Shawn O. Pearce, (Fri Sep 7, 12:48 am)
Re: People unaware of the importance of "git gc"?, Linus Torvalds, (Thu Sep 6, 2:15 pm)
Subject: [PATCH] git-merge-pack, Junio C Hamano, (Thu Sep 6, 7:12 pm)
Re: Subject: [PATCH] git-merge-pack, Andy Parkins, (Fri Sep 7, 3:24 am)
Re: Subject: [PATCH] git-merge-pack, Johannes Sixt, (Fri Sep 7, 3:11 am)
Re: Subject: [PATCH] git-merge-pack, Junio C Hamano, (Fri Sep 7, 3:34 am)
Re: Subject: [PATCH] git-merge-pack, Nicolas Pitre, (Thu Sep 6, 8:51 pm)
Re: Subject: [PATCH] git-merge-pack, Junio C Hamano, (Fri Sep 7, 12:43 am)
[PATCH] pack-objects --repack-unpacked, Junio C Hamano, (Sat Sep 8, 6:01 am)
Re: [PATCH] pack-objects --repack-unpacked, Shawn O. Pearce, (Sat Sep 8, 10:57 pm)
Re: [PATCH] pack-objects --repack-unpacked, Junio C Hamano, (Sun Sep 9, 1:04 am)
Re: [PATCH] pack-objects --repack-unpacked, Nicolas Pitre, (Sun Sep 9, 8:29 am)
Re: [PATCH] pack-objects --repack-unpacked, Shawn O. Pearce, (Sun Sep 9, 1:49 pm)
Re: Subject: [PATCH] git-merge-pack, Shawn O. Pearce, (Fri Sep 7, 12:07 am)
Re: Subject: [PATCH] git-merge-pack, Junio C Hamano, (Thu Sep 6, 9:58 pm)
Re: Subject: [PATCH] git-merge-pack, Nicolas Pitre, (Thu Sep 6, 10:32 pm)
Re: Subject: [PATCH] git-merge-pack, Linus Torvalds, (Thu Sep 6, 7:35 pm)
Re: People unaware of the importance of "git gc"?, Steven Grimm, (Thu Sep 6, 2:29 pm)
Re: People unaware of the importance of "git gc"?, Shawn O. Pearce, (Wed Sep 5, 10:45 pm)
Re: People unaware of the importance of "git gc"?, Steven Grimm, (Wed Sep 5, 10:49 pm)
Re: People unaware of the importance of "git gc"?, Shawn O. Pearce, (Wed Sep 5, 10:56 pm)
Re: People unaware of the importance of "git gc"?, Alex Riesen, (Wed Sep 5, 5:18 pm)
Re: [PATCH] Invoke "git gc --auto" from "git add" and "git f..., Johannes Schindelin, (Thu Sep 6, 8:02 am)
Re: People unaware of the importance of "git gc"?, Nicolas Pitre, (Wed Sep 5, 4:35 pm)
Re: People unaware of the importance of "git gc"?, Junio C Hamano, (Wed Sep 5, 5:49 pm)
Invoke "git gc --auto" from commit, merge, am and rebase., Junio C Hamano, (Wed Sep 5, 5:59 pm)
Re: People unaware of the importance of "git gc"?, Junio C Hamano, (Wed Sep 5, 5:46 pm)
Re: People unaware of the importance of "git gc"?, David Kastrup, (Thu Sep 6, 1:55 am)
Re: People unaware of the importance of "git gc"?, Nicolas Pitre, (Wed Sep 5, 7:04 pm)
Re: People unaware of the importance of "git gc"?, Junio C Hamano, (Wed Sep 5, 7:42 pm)
Re: People unaware of the importance of "git gc"?, Carlos Rica, (Wed Sep 5, 8:27 pm)
Re: People unaware of the importance of "git gc"?, Steven Grimm, (Wed Sep 5, 4:50 am)
Re: People unaware of the importance of "git gc"?, David Kastrup, (Wed Sep 5, 5:13 am)
Re: People unaware of the importance of "git gc"?, Pierre Habouzit, (Wed Sep 5, 5:14 am)
Re: People unaware of the importance of "git gc"?, Junio C Hamano, (Wed Sep 5, 5:07 am)
Re: People unaware of the importance of "git gc"?, Martin Langhoff, (Wed Sep 5, 5:27 am)
Re: People unaware of the importance of "git gc"?, Matthieu Moy, (Wed Sep 5, 5:33 am)
Re: People unaware of the importance of "git gc"?, Johan De Messemaeker, (Wed Sep 5, 10:17 am)
Re: People unaware of the importance of "git gc"?, Matthieu Moy, (Wed Sep 5, 1:31 pm)
Re: People unaware of the importance of "git gc"?, Jeff King, (Wed Sep 5, 7:56 pm)
Re: People unaware of the importance of "git gc"?, Junio C Hamano, (Wed Sep 5, 4:16 am)
Re: People unaware of the importance of "git gc"?, Junio C Hamano, (Wed Sep 5, 3:30 am)
Re: People unaware of the importance of "git gc"?, Wincent Colaiuta, (Wed Sep 5, 4:51 am)
Re: People unaware of the importance of "git gc"?, Johan Herland, (Wed Sep 5, 4:13 am)
Re: People unaware of the importance of "git gc"?, Matthieu Moy, (Wed Sep 5, 4:39 am)
Re: People unaware of the importance of "git gc"?, Pierre Habouzit, (Wed Sep 5, 4:51 am)
Re: People unaware of the importance of "git gc"?, Matthieu Moy, (Wed Sep 5, 5:04 am)
Re: People unaware of the importance of "git gc"?, Johan Herland, (Wed Sep 5, 4:41 am)
Re: People unaware of the importance of "git gc"?, Tomash Brechko, (Wed Sep 5, 3:26 am)
Re: People unaware of the importance of "git gc"?, Martin Langhoff, (Wed Sep 5, 3:21 am)
speck-geostationary