Here are three patches that come from msysgit. They do not bring
any functional changes, but only clean up code, or fix warnings.
Steffen
builtin-init-db.c | 4 +---
git.c | 6 +++---
path.c | 2 +-
setup.c | 2 +-
sha1_file.c | 10 ++++++----
5 files changed, 12 insertions(+), 12 deletions(-)
[PATCH 1/3] sha1_file.c: Fix size_t related printf format warnings
[PATCH 2/3] builtin-init-db: use get_git_dir() instead of getenv()
[PATCH 3/3] Replace setenv(GIT_DIR_ENVIRONMENT, ...) with set_git_dir()
-
The old way of fixing warnings did not succeed on MinGW. MinGW does not support C99 printf format strings for size_t [1]. But gcc on MinGW issues warnings if C99 printf format is not used. Hence, the old stragegy to avoid warnings fails. [1] http://www.mingw.org/MinGWiki/index.php/C99 This commits passes arguments of type size_t through a tiny helper functions that casts to the type expected by the format string. Signed-off-by: Steffen Prohaska <prohaska@zib.de> --- sha1_file.c | 10 ++++++---- 1 files changed, 6 insertions(+), 4 deletions(-) diff --git a/sha1_file.c b/sha1_file.c index f007874..4f68e53 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -25,8 +25,10 @@ #ifdef NO_C99_FORMAT #define SZ_FMT "lu" +static unsigned long sz_fmt(size_t s) { return (unsigned long)s; } #else #define SZ_FMT "zu" +static size_t sz_fmt(size_t s) { return s; } #endif const unsigned char null_sha1[20]; @@ -423,9 +425,9 @@ void pack_report(void) "pack_report: getpagesize() = %10" SZ_FMT "\n" "pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n" "pack_report: core.packedGitLimit = %10" SZ_FMT "\n", - (size_t) getpagesize(), - packed_git_window_size, - packed_git_limit); + sz_fmt(getpagesize()), + sz_fmt(packed_git_window_size), + sz_fmt(packed_git_limit)); fprintf(stderr, "pack_report: pack_used_ctr = %10u\n" "pack_report: pack_mmap_calls = %10u\n" @@ -435,7 +437,7 @@ void pack_report(void) pack_used_ctr, pack_mmap_calls, pack_open_windows, peak_pack_open_windows, - pack_mapped, peak_pack_mapped); + sz_fmt(pack_mapped), sz_fmt(peak_pack_mapped)); } static int check_packed_git_idx(const char *path, struct packed_git *p) -- 1.5.3.5.750.g8692 -
We have a function get_git_dir(). So let's use it, instead of querying the environment. Signed-off-by: Steffen Prohaska <prohaska@zib.de> --- builtin-init-db.c | 4 +--- 1 files changed, 1 insertions(+), 3 deletions(-) diff --git a/builtin-init-db.c b/builtin-init-db.c index 763fa55..80d2f27 100644 --- a/builtin-init-db.c +++ b/builtin-init-db.c @@ -378,9 +378,7 @@ int cmd_init_db(int argc, const char **argv, const char *prefix) /* * Set up the default .git directory contents */ - git_dir = getenv(GIT_DIR_ENVIRONMENT); - if (!git_dir) - git_dir = DEFAULT_GIT_DIR_ENVIRONMENT; + git_dir = get_git_dir(); safe_create_dir(git_dir, 0); /* Check to see if the repository version is right. -- 1.5.3.5.750.g8692 -
From: Dmitry Kakurin <Dmitry.Kakurin@gmail.com>
We have a function set_git_dir(). So let's use it, instead
of setting the evironment directly.
This also fixes a problem on Windows: environment.c caches
results of many getenv calls. A setenv(X) call to the Windows
C runtime (CRT) invalidates all previous values returned by
getenv(X). So cached values become dangling pointers.
Before this change, git clone was failing with 'invalid object
name HEAD' if ran from Windows' cmd.exe.
[sp: rebased; split off get_git_dir() in builtin-init-db.c
to separate commit; adjusted commit message. ]
Signed-off-by: Dmitry Kakurin <Dmitry.Kakurin@gmail.com>
Signed-off-by: Steffen Prohaska <prohaska@zib.de>
---
git.c | 6 +++---
path.c | 2 +-
setup.c | 2 +-
3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/git.c b/git.c
index 7604319..8bc25b7 100644
--- a/git.c
+++ b/git.c
@@ -45,14 +45,14 @@ static int handle_options(const char*** argv, int* argc, int* envchanged)
fprintf(stderr, "No directory given for --git-dir.\n" );
usage(git_usage_string);
}
- setenv(GIT_DIR_ENVIRONMENT, (*argv)[1], 1);
+ set_git_dir( (*argv)[1] );
if (envchanged)
*envchanged = 1;
(*argv)++;
(*argc)--;
handled++;
} else if (!prefixcmp(cmd, "--git-dir=")) {
- setenv(GIT_DIR_ENVIRONMENT, cmd + 10, 1);
+ set_git_dir(cmd + 10);
if (envchanged)
*envchanged = 1;
} else if (!strcmp(cmd, "--work-tree")) {
@@ -72,7 +72,7 @@ static int handle_options(const char*** argv, int* argc, int* envchanged)
} else if (!strcmp(cmd, "--bare")) {
static char git_dir[PATH_MAX+1];
is_bare_repository_cfg = 1;
- setenv(GIT_DIR_ENVIRONMENT, getcwd(git_dir, sizeof(git_dir)), 0);
+ set_git_dir(getcwd(git_dir, sizeof(git_dir)));
if (envchanged)
*envchanged = 1;
} else {
diff --git a/path.c b/path.c
index 4260952..f26a4a1 100644
--- a/path.c
+++ b/path.c
@@ -248,7 +248,7 @@ char *enter_repo(char *path, int ...Hi, Does this not have a fundamental issue? When you call other git programs with run_command(), you _need_ GIT_DIR to be set, no? Ciao, Dscho -
It is much worse. set_git_dir() does not just setenv() but does setup_git_env() as well. -
What do your comments mean? My understanding is that set_git_dir() sets the environment and then calls setup_git_env() to cache all pointers. This call updates dangling pointer if they have been cached earlier. But maybe there is a hidden secret that I don't see. Steffen -
Well, I was agreeing with you. "Worse" was about what the current code does _not_ do. If there are earlier calls that obtain locations relative to the earlier definition of GIT_DIR, the locations they obtained are not just stored in memory that is "dangling" (which was what your proposed log message described) but they are also inconsistent with the updated definition of GIT_DIR. I suspect Johannes mistook set_git_dir() was only local (i.e. per calling process) matter without noticing that it has its own setenv() when he made that comment, hence my response to point out that the current code only calls setenv(), but set_git_dir() does setup_git_env() too, which should hide the inconsistency problem. HOWEVER. I suspect that if there are even earlier callers than these early parts in the codepaths (handle_options, enter_repo, and setup_git_directory_gently), maybe these earlier callers are doing something wrong. Logically, if you are somewhere very early in the codepath that you can still change the value of GIT_DIR, you shouldn't have assumed the unknown value of GIT_DIR and cached locations relative to that directory, no? What are the problematic callers? What values do they access and why? -
I thought about these questions, too. But only very briefly. I did not analyze the code path that lead to calls of getenv(). I'm not sure if it's really necessary. Calling set_git_dir() looks more sensible too me than the old code. I believe using set_git_dir() is the safer choice, and should not do any harm. So I stopped analyzing too much, and instead proposed to use set_git_dir(). Interesting, though, is to find out if we have other potentially dangerous calls to getenv() that are not removed by this patch. Steffen -
Junio's point is this: If we stumble over a dangling pointer that getenv() produced, then this has obviously happened before setenv(GIT_DIR), and caching that pointer is probably the wrong thing to do anyway (because it refers to the wrong GIT_DIR) and needs to be fixed. So the task is to find those traps. Dmitry obviously stumbled over one case, but I haven't ever encountered any problems with the current code. But then this might be sheer luck. And I'm not a heavy user of export GIT_DIR=foo, Side note for other readers: This is a Windows specific problem for the moment because its getenv() does not behave well. -- Hannes -
I see your point. It is probably more important to investigate No. I only stumbled over the code, when I reviewed differences between msysgit and mingw. I rarely use GIT_DIR=foo. Actually, Yes, and apparently even nobody knows how to trigger the problem on Windows. At this point, we only know that caching getenv() calls is unsafe, while on Unix it is safe (at least for BSD it's documented to be safe). Steffen -
Hi,
A quick and easy way would be to instrument getenv(), unsetenv() and
setenv(), which would trigger an error. Something like this (but you
will have to put in a few "extern called_getenv; called_getenv = 0;",
since already a simple git-init fails because of setup_path()):
-- snipsnap --
[PATCH] Instrument getenv(), setenv(), unsetenv() and putenv()
... for finding places where a pointer obtained by getenv() could
be invalidated later.
---
environment.c | 1 +
git-compat-util.h | 31 +++++++++++++++++++++++++++++++
2 files changed, 32 insertions(+), 0 deletions(-)
diff --git a/environment.c b/environment.c
index ce75e98..027340e 100644
--- a/environment.c
+++ b/environment.c
@@ -9,6 +9,7 @@
*/
#include "cache.h"
+int called_setenv, called_getenv;
char git_default_email[MAX_GITNAME];
char git_default_name[MAX_GITNAME];
int trust_executable_bit = 1;
diff --git a/git-compat-util.h b/git-compat-util.h
index 79eb10e..a41469b 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -427,4 +427,35 @@ static inline int strtol_i(char const *s, int base, int *result)
return 0;
}
+extern int called_setenv, called_getenv;
+static inline char *test_getenv(const char *name)
+{
+ if (!called_setenv)
+ warning ("called test_getenv %s", name);
+ called_getenv = 1;
+ return getenv(name);
+}
+static inline int test_setenv(const char *name, const char *value, int overwrite)
+{
+ if (!called_setenv && called_getenv)
+ die ("getenv was called before setenv(%s, %s, %d)",
+ name, value, overwrite);
+ return setenv(name, value, overwrite);
+}
+static inline int test_unsetenv(const char *name)
+{
+ if (!called_setenv && called_getenv)
+ die ("getenv was called before unsetenv(%s)", name);
+ return unsetenv(name);
+}
+static inline int test_putenv(char *string)
+{
+ if (!called_setenv && called_getenv)
+ die ("getenv was called before putenv(%s)", string);
+ return putenv(string);
+}
+#define getenv test_getenv
+#define ...I don't see what's wrong with that. Could you please explain? -- Hannes -
