Perhaps isspecial() is misnamed if we were to enhance git-ctype in this
way. It is about a byte being shell glob pattern or a NUL (!!!), and it
should be renamed to isglobspecial() or something.
dir.c uses isspecial() in two places, and both callers rely on NUL being a
part of special to terminate the loops they are in, like this:
for (;;) {
unsigned char c = *match++;
len++;
if (isspecial(c))
return len;
}
It may be a cunning and cute logic, but I do not particularly like it. It
might be cleaner to rename it to isglobspecial(), drop NUL from it, and
have these two call existing call sites to explicitly check for (c == NUL)
for loop termination.
--
Manipulating the character class table in ctype.c by hand is error prone. To ensure that typos are found quickly, add a test program and script. test-ctype checks the output of the character class macros isspace() et. al. by applying them on all possible char values and consulting a list of all characters in the particular class. It doesn't check tolower() and toupper(); this could be added later. The test script t0070-fundamental.sh is created because there is no good place for the ctype test, yet -- except for t0000-basic.sh perhaps, but it doesn't run well on Windows, yet. Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> --- Makefile | 3 ++ t/t0070-fundamental.sh | 15 +++++++++++ test-ctype.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 0 deletions(-) create mode 100755 t/t0070-fundamental.sh create mode 100644 test-ctype.c diff --git a/Makefile b/Makefile index 4b1d488..dca61f5 100644 --- a/Makefile +++ b/Makefile @@ -1360,6 +1360,7 @@ endif ### Testing rules TEST_PROGRAMS += test-chmtime$X +TEST_PROGRAMS += test-ctype$X TEST_PROGRAMS += test-date$X TEST_PROGRAMS += test-delta$X TEST_PROGRAMS += test-genrandom$X @@ -1379,6 +1380,8 @@ export NO_SVN_TESTS test: all $(MAKE) -C t/ all +test-ctype$X: ctype.o + test-date$X: date.o ctype.o test-delta$X: diff-delta.o patch-delta.o diff --git a/t/t0070-fundamental.sh b/t/t0070-fundamental.sh new file mode 100755 index 0000000..680d7d6 --- /dev/null +++ b/t/t0070-fundamental.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +test_description='check that the most basic functions work + + +Verify wrappers and compatibility functions. +' + +. ./test-lib.sh + +test_expect_success 'character classes (isspace, isalpha etc.)' ' + test-ctype +' + +test_done diff --git a/test-ctype.c b/test-ctype.c new file mode 100644 index 0000000..723eff4 --- /dev/null +++ b/test-ctype.c @@ -0,0 +1,66 @@ +#include ...
Enhance the readability of ctype.c by using an enum instead of macros
to initialize the character class table. This allows the use of a single
letter to mark a char, making the table fit within 80 columns.
Also list the index of the last entry in each row in the following comment.
Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
ctype.c | 32 ++++++++++++++------------------
1 files changed, 14 insertions(+), 18 deletions(-)
diff --git a/ctype.c b/ctype.c
index 9208d67..6528687 100644
--- a/ctype.c
+++ b/ctype.c
@@ -5,25 +5,21 @@
*/
#include "cache.h"
-/* Just so that no insane platform contaminate namespace with these symbols */
-#undef SS
-#undef AA
-#undef DD
-#undef GS
-
-#define SS GIT_SPACE
-#define AA GIT_ALPHA
-#define DD GIT_DIGIT
-#define GS GIT_SPECIAL /* \0, *, ?, [, \\ */
+enum {
+ S = GIT_SPACE,
+ A = GIT_ALPHA,
+ D = GIT_DIGIT,
+ G = GIT_SPECIAL, /* \0, *, ?, [, \\ */
+};
unsigned char sane_ctype[256] = {
- GS, 0, 0, 0, 0, 0, 0, 0, 0, SS, SS, 0, 0, SS, 0, 0, /* 0-15 */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16-15 */
- SS, 0, 0, 0, 0, 0, 0, 0, 0, 0, GS, 0, 0, 0, 0, 0, /* 32-15 */
- DD, DD, DD, DD, DD, DD, DD, DD, DD, DD, 0, 0, 0, 0, 0, GS, /* 48-15 */
- 0, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, /* 64-15 */
- AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, GS, GS, 0, 0, 0, /* 80-15 */
- 0, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, /* 96-15 */
- AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, 0, 0, 0, 0, 0, /* 112-15 */
+ G, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */
+ S, 0, 0, 0, 0, 0, 0, 0, 0, 0, G, 0, 0, 0, 0, 0, /* 32.. 47 */
+ D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G, /* 48.. 63 */
+ 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */
+ A, A, A, A, A, A, A, A, A, A, A, G, G, 0, 0, 0, /* 80.. 95 ...Add is_regex_special(), a character class macro for chars that have a
special meaning in regular expressions.
Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
This patch applies to next (plus the previous ones in this series).
ctype.c | 7 ++++---
git-compat-util.h | 2 ++
grep.c | 9 +--------
test-ctype.c | 6 ++++++
4 files changed, 13 insertions(+), 11 deletions(-)
diff --git a/ctype.c b/ctype.c
index 9de187c..b90ec00 100644
--- a/ctype.c
+++ b/ctype.c
@@ -10,16 +10,17 @@ enum {
A = GIT_ALPHA,
D = GIT_DIGIT,
G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */
+ R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | * */
};
unsigned char sane_ctype[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */
- S, 0, 0, 0, 0, 0, 0, 0, 0, 0, G, 0, 0, 0, 0, 0, /* 32.. 47 */
+ S, 0, 0, 0, R, 0, 0, 0, R, R, G, R, 0, 0, R, 0, /* 32.. 47 */
D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G, /* 48.. 63 */
0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */
- A, A, A, A, A, A, A, A, A, A, A, G, G, 0, 0, 0, /* 80.. 95 */
+ A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, 0, /* 80.. 95 */
0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */
- A, A, A, A, A, A, A, A, A, A, A, 0, 0, 0, 0, 0, /* 112..127 */
+ A, A, A, A, A, A, A, A, A, A, A, R, R, 0, 0, 0, /* 112..127 */
/* Nothing in the 128.. range */
};
diff --git a/git-compat-util.h b/git-compat-util.h
index 7c92588..079cbe9 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -328,12 +328,14 @@ extern unsigned char sane_ctype[256];
#define GIT_DIGIT 0x02
#define GIT_ALPHA 0x04
#define GIT_GLOB_SPECIAL 0x08
+#define GIT_REGEX_SPECIAL 0x10
#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
#define isspace(x) sane_istest(x,GIT_SPACE)
#define isdigit(x) sane_istest(x,GIT_DIGIT)
#define isalpha(x) ...Replace isspecial() by the new macro is_glob_special(), which is more,
well, specialized. The former included the NUL char in its character
class, while the letter only included characters that are special to
file name globbing.
The new name contains underscores because they enhance readability
considerably now that it's made up of three words. Renaming the
function is necessary to document its changed scope.
The call sites of isspecial() are updated to check explicitly for NUL.
Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
This patch applies to next (plus the previous ones in this series).
ctype.c | 4 ++--
dir.c | 4 ++--
git-compat-util.h | 4 ++--
grep.c | 5 +++--
test-ctype.c | 6 ++++++
5 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/ctype.c b/ctype.c
index 6528687..9de187c 100644
--- a/ctype.c
+++ b/ctype.c
@@ -9,11 +9,11 @@ enum {
S = GIT_SPACE,
A = GIT_ALPHA,
D = GIT_DIGIT,
- G = GIT_SPECIAL, /* \0, *, ?, [, \\ */
+ G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */
};
unsigned char sane_ctype[256] = {
- G, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */
S, 0, 0, 0, 0, 0, 0, 0, 0, 0, G, 0, 0, 0, 0, 0, /* 32.. 47 */
D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G, /* 48.. 63 */
diff --git a/dir.c b/dir.c
index 7c59829..d55a41a 100644
--- a/dir.c
+++ b/dir.c
@@ -75,7 +75,7 @@ static int match_one(const char *match, const char *name, int namelen)
for (;;) {
unsigned char c1 = *match;
unsigned char c2 = *name;
- if (isspecial(c1))
+ if (c1 == '\0' || is_glob_special(c1))
break;
if (c1 != c2)
return 0;
@@ -678,7 +678,7 @@ static int simple_length(const char *match)
for (;;) {
unsigned char c = *match++;
len++;
- if (isspecial(c))
+ if (c == '\0' || is_glob_special(c))
...