I am starting to hate some parts of the strbuf API.
Here is an example. Can you spot what goes wrong?
static int handle_file(const char *path,
unsigned char *sha1, const char *output)
{
SHA_CTX ctx;
char buf[1024];
int hunk = 0, hunk_no = 0;
struct strbuf one, two;
...
if (sha1)
SHA1_Init(&ctx);strbuf_init(&one, 0);
strbuf_init(&two, 0);
while (fgets(buf, sizeof(buf), f)) {
if (!prefixcmp(buf, "<<<<<<< "))
hunk = 1;
else if (!prefixcmp(buf, "======="))
hunk = 2;
else if (!prefixcmp(buf, ">>>>>>> ")) {
int cmp = strbuf_cmp(&one, &two);hunk_no++;
hunk = 0;
if (cmp > 0) {
strbuf_swap(&one, &two);
}
if (out) {
fputs("<<<<<<<\n", out);
fwrite(one.buf, one.len, 1, out);
fputs("=======\n", out);
fwrite(two.buf, two.len, 1, out);
fputs(">>>>>>>\n", out);
}
if (sha1) {
SHA1_Update(&ctx, one.buf, one.len + 1);
SHA1_Update(&ctx, two.buf, two.len + 1);
}
strbuf_reset(&one);
strbuf_reset(&two);
} else if (hunk == 1)
strbuf_addstr(&one, buf);
else if (hunk == 2)
strbuf_addstr(&two, buf);
else if (out)
fputs(buf, out);
}
strbuf_release(&one);
strbuf_release(&two);
...
}When one or two are empty and the caller asked for checksumming,
the code still relies on one.buf being an allocated memory with
an extra NUL termination and tries to feed the NULL pointer to
SHA1_Update() with length of 1!An obvious workaround is to say "strbuf_init(&one, !!sha1)" to
force the allocation. However, because the second parameter to
strbuf_init() is defined to be merely a hint, we should not rely
on strbuf_init() with non-zero hint to have allocation from the
beginning. It is assuming too much.A more defensive way would be for the user to do something like:
SHA1_Update(&ctx, one.buf ? one.buf : "", one.len...
Like said in the 2/2 patch, I think it's better if people could be
able to always assume that and be done with it, else you have to know
this internal duality of the empty strbuf and it sucks.Instead, what is important, is that people that initialized a strbuf,
then want to go back in the char* world gets a NULL if nothing was
allocated. It is a semantics that is used in a few places (it's arguable
that it's a right thing to assume though). For those, making
strbuf_detach use mandatory, and dealing with the special ->alloc =3D=3D 0
case is the easiest way.And as I don't trust my eyes to be fresh, I've used the aid of the
compiler to bust any place where we were using .buf members directly,
possibly doing something stupid.--=20
=C2=B7O=C2=B7 Pierre Habouzit
=C2=B7=C2=B7O madcoder@debia=
n.org
OOO http://www.madism.org
So that we don't need to use strbuf_detach.
Signed-off-by: Pierre Habouzit <madcoder@debian.org>
---Sorry, in the hurry I sent a wrong patch before, this one works :)
builtin-apply.c | 27 +++++++++++----------------
1 files changed, 11 insertions(+), 16 deletions(-)diff --git a/builtin-apply.c b/builtin-apply.c
index 833b142..1f0a672 100644
--- a/builtin-apply.c
+++ b/builtin-apply.c
@@ -178,12 +178,9 @@ static void say_patch_name(FILE *output, const char *pre, struct patch *patch, c
#define CHUNKSIZE (8192)
#define SLOP (16)-static void *read_patch_file(int fd, size_t *sizep)
+static void read_patch_file(struct strbuf *sb, int fd)
{
- struct strbuf buf;
-
- strbuf_init(&buf, 0);
- if (strbuf_read(&buf, fd, 0) < 0)
+ if (strbuf_read(sb, fd, 0) < 0)
die("git-apply: read returned %s", strerror(errno));/*
@@ -191,9 +188,8 @@ static void *read_patch_file(int fd, size_t *sizep)
* so that we can do speculative "memcmp" etc, and
* see to it that it is NUL-filled.
*/
- strbuf_grow(&buf, SLOP);
- memset(buf.buf + buf.len, 0, SLOP);
- return strbuf_detach(&buf, sizep);
+ strbuf_grow(sb, SLOP);
+ memset(sb->buf + sb->len, 0, SLOP);
}static unsigned long linelen(const char *buffer, unsigned long size)
@@ -2648,22 +2644,22 @@ static void prefix_patches(struct patch *p)static int apply_patch(int fd, const char *filename, int inaccurate_eof)
{
- unsigned long offset, size;
- char *buffer = read_patch_file(fd, &size);
+ size_t offset;
+ struct strbuf buf;
struct patch *list = NULL, **listp = &list;
int skipped_patch = 0;+ strbuf_init(&buf, 0);
patch_input_file = filename;
- if (!buffer)
- return -1;
+ read_patch_file(&buf, fd);
offset = 0;
- while (size > 0) {
+ while (offset < buf.len) {
struct patch *patch;
int nr;patch = xcalloc(1, sizeof(*patch));
patch->inaccurate_eof = inaccurate_eof;
- nr = parse_chunk(buffer + offset, size, patch...
I find this ugly, so I've even checked if we did assume that, which is
easy now that such places all use strbuf_detach. One place seemed to,
but wasn't, so I patched it so that it's not the case anymore.All other places that use strbuf_detach don't use the fact that it can
return NULL to detect error cases, so we are safe.--=20
=C2=B7O=C2=B7 Pierre Habouzit
=C2=B7=C2=B7O madcoder@debia=
n.org
OOO http://www.madism.org
So that we don't need to use strbuf_detach.
Signed-off-by: Pierre Habouzit <madcoder@debian.org>
---
builtin-apply.c | 18 ++++++++----------
1 files changed, 8 insertions(+), 10 deletions(-)diff --git a/builtin-apply.c b/builtin-apply.c
index 833b142..b0c6e60 100644
--- a/builtin-apply.c
+++ b/builtin-apply.c
@@ -178,7 +178,7 @@ static void say_patch_name(FILE *output, const char *pre, struct patch *patch, c
#define CHUNKSIZE (8192)
#define SLOP (16)-static void *read_patch_file(int fd, size_t *sizep)
+static void read_patch_file(struct strbuf *sb, int fd)
{
struct strbuf buf;@@ -193,7 +193,6 @@ static void *read_patch_file(int fd, size_t *sizep)
*/
strbuf_grow(&buf, SLOP);
memset(buf.buf + buf.len, 0, SLOP);
- return strbuf_detach(&buf, sizep);
}static unsigned long linelen(const char *buffer, unsigned long size)
@@ -2648,22 +2647,22 @@ static void prefix_patches(struct patch *p)static int apply_patch(int fd, const char *filename, int inaccurate_eof)
{
- unsigned long offset, size;
- char *buffer = read_patch_file(fd, &size);
+ size_t offset;
+ struct strbuf buf;
struct patch *list = NULL, **listp = &list;
int skipped_patch = 0;+ strbuf_init(&buf, 0);
patch_input_file = filename;
- if (!buffer)
- return -1;
+ read_patch_file(&buf, fd);
offset = 0;
- while (size > 0) {
+ while (offset < buf.len) {
struct patch *patch;
int nr;patch = xcalloc(1, sizeof(*patch));
patch->inaccurate_eof = inaccurate_eof;
- nr = parse_chunk(buffer + offset, size, patch);
+ nr = parse_chunk(buf.buf + offset, buf.len, patch);
if (nr < 0)
break;
if (apply_in_reverse)
@@ -2681,7 +2680,6 @@ static int apply_patch(int fd, const char *filename, int inaccurate_eof)
skipped_patch++;
}
offset += nr;
- size -= nr;
}if (whitespace_error && (new_whitespace == error_on_whitespace))
@@ -2716,7 +2714,7 @@ static int apply_patch(int fd, const char *...
I can see a way, that would need special proof-reading of the strbuf
module, but should not harm its users, that would be to change
STRBUF_INIT to work this way:{ .buf =3D "", .len =3D 0, .alloc =3D 0 }
It needs to make strbuf_grow and strbuf_release check for ->alloc
before doing anything stupid.Though we may have some bits of code that rely on .buf being NULL if
nothing happened. I tried to track them down, but some may remain.If you agree with this change, that would solve most of the issues
with almost no cost, then I'll propose a new patch with this change.--=20
=C2=B7O=C2=B7 Pierre Habouzit
=C2=B7=C2=B7O madcoder@debia=
n.org
OOO http://www.madism.org
I'd like to pipe up a bit here..
I think the above is a good fix for the current problem of wanting to
always be able to use "sb->buf", but I thinkit actually has the potential
to fix another issue entirely.Namely strbuf's that are initialized from various static strings and/or
strings not directly allocated with malloc().That's not necessarily something really unusual. Wanting to initialize a
string with a fixed constant value is a common problem.And wouldn't it be nice if you could actually do that, with
{ .buf = "static initializer", .len = 18, .alloc = 0 }
and have all the strbuf routines that modify the initializer (including
making it shorter!) notice that the allocation is too short, and create a
new allocation?Hmm?
Linus
-
We could probably do that. The places to change to make this work are
seldom:
* strbuf_grow to emulate a realloc (and copy the buffer into the new
malloc()ed one),
* strbuf_release assumes that ->alloc =3D=3D 0 means _init isn't
necessary, it would be now,
* strbuf_setlen should not have the assert anymore (though I'm not
sure this assert still makes sense with the new initializer).
and that's it.But we cannot initialize a strbuf with an immediate string because all
the strbuf APIs suppose that the strbuf buffer are writeable (and IMHO
it's pointless to use a strbuf for reading purposes). Other point, I've
made many readings of the code searching for specific patterns of code,
to replace with strbuf's, and I've never seen places (I do not say thoseSo I'd say I'll keep the idea in mind, because it's tasteful and could
help, though I'd prefer Junio to review that patch, and then later add
this new semantics if the need arises.The sole thing that may be worth investigating would look like:
char internal_buf[PATH_MAX]; /* should be damn long enough */
struct strbuf buf;strbuf_init_static(&buf, internal_buf, sizeof(internal_buf);
/* hack with the strbuf API */
strbuf_release(&buf); /* do release memory, in case we went over
PATH_MAX octets */because it could save some allocations _and_ be safe at the same time.
But I don't really like it, when allocation is critical in git, it's
rarely in functions where there is an obvious size limit for the
problem, and avoiding allocations can be done using static strbufs
(fast-import.c does that in many places).--=20
=C2=B7O=C2=B7 Pierre Habouzit
=C2=B7=C2=B7O madcoder@debia=
n.org
OOO http://www.madism.org
For that purpose, the ->buf is always initialized with a char * buf living
in the strbuf module. It is made a char * so that we can sloppily accept
things that perform: sb->buf[0] = '\0', and because you can't pass "" as an
initializer for ->buf without making gcc unhappy for very good reasons.strbuf_init/_detach/_grow have been fixed to trust ->alloc and not ->buf
anymore.as a consequence strbuf_detach is _mandatory_ to detach a buffer, copying
->buf isn't an option anymore, if ->buf is going to escape from the scope,
and eventually be free'd.API changes:
* strbuf_setlen now always works, so just make strbuf_reset a convenience
macro.
* strbuf_detatch takes a size_t* optional argument (meaning it can be
NULL) to copy the buffer's len, as it was needed for this refactor to
make the code more readable, and working like the callers.Signed-off-by: Pierre Habouzit <madcoder@debian.org>
---I've written this patch using the following method:
(1) I've renamed the ->buf member into sb_buf (I grepped before it wasn't
a string used in the project) and made the changes I describe in
strbuf.[hc](2) I've built the project, and renamed every ->buf into sb_buf, and
applied the needed semantics changes. It's doing that that I found the
issue I fix in the patch 1/2.(3) I've then run the testsuite, it passes.
(4) I've sed -i -e 's/\<sb_buf\>/buf/g' *.h *.c
Nobody was directly using the fact that a strbuf that wasn't touched had
its pointer NULL, though people detach'ing them do, and strbuf_detach
complies with that. That's why I think, despite the somehow tasteless
"strbuf_slopbuf" it's the best way to go.builtin-apply.c | 16 +++++++---------
builtin-archive.c | 5 ++---
builtin-fetch--tool.c | 2 +-
commit.c | 2 +-
convert.c | 4 ++--
diff.c | 14 ++++++--------
entry.c | 3 +--
fast-import.c | 2 ...
path_name is either ptr that should not be freed, or a pointer to a strbuf
buffer that is deallocated when exiting the loop. Don't do that !Signed-off-by: Pierre Habouzit <madcoder@debian.org>
---
builtin-update-index.c | 2 --
1 files changed, 0 insertions(+), 2 deletions(-)diff --git a/builtin-update-index.c b/builtin-update-index.c
index c76879e..e1a938d 100644
--- a/builtin-update-index.c
+++ b/builtin-update-index.c
@@ -377,8 +377,6 @@ static void read_index_info(int line_termination)
die("git-update-index: unable to update %s",
path_name);
}
- if (path_name != ptr)
- free(path_name);
continue;bad_line:
--
1.5.3.2.1100.g015a-dirty-
| Tarkan Erimer | Re: Dual-Licensing Linux Kernel with GPL V2 and GPL V3 |
| Nigel Cunningham | Re: [PATCH] Remove process freezer from suspend to RAM pathway |
| Paul Mundt | Re: 2.6.22-rc4-mm2 |
| Greg Kroah-Hartman | [PATCH 001/196] Chinese: Add the known_regression URI to the HOWTO |
git: | |
| Arjan van de Ven | Re: [GIT]: Networking |
| Gerrit Renker | [PATCH 27/37] dccp: Integration of dynamic feature activation - part 2 (server side) |
| Jarek Poplawski | Re: [PATCH] pkt_sched: Destroy gen estimators under rtnl_lock(). |
| Natalie Protasevich | [BUG] New Kernel Bugs |
