Skip to content

Commit 8c7359b

Browse files
committed
diff-process-normalize: add built-in whitespace normalizer
Add git diff-process-normalize, a built-in diff process that detects whitespace-only changes. It compares files line by line using xdiff_compare_lines() with XDF_IGNORE_WHITESPACE (same logic as "git diff -w"). If all lines match, it returns zero hunks; otherwise it returns an error so git falls back to the builtin diff algorithm. [diff "cdiff"] process = git diff-process-normalize Update documentation to describe zero-hunk behavior for diff and blame, and document the built-in normalize tool. Signed-off-by: Michael Montalbo <mmontalbo@gmail.com>
1 parent 4e6ea6d commit 8c7359b

7 files changed

Lines changed: 223 additions & 0 deletions

File tree

Documentation/config/diff.adoc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,8 @@ endif::git-diff[]
224224
hunks that are fed into Git's diff and blame pipelines.
225225
If the tool returns zero hunks, the file is treated as
226226
unchanged for both diff output and blame attribution.
227+
Git provides `git diff-process-normalize` as a built-in
228+
tool that detects whitespace-only changes.
227229
See linkgit:gitattributes[5] for details.
228230

229231
`diff.indentHeuristic`::

Documentation/gitattributes.adoc

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -861,6 +861,21 @@ the file as having no changes and produces no diff output.
861861
where it reports zero hunks, attributing lines to earlier commits
862862
instead.
863863

864+
Git ships with a built-in diff process, `git diff-process-normalize`,
865+
that detects whitespace-only changes. Files whose only differences
866+
are whitespace produce zero hunks; files with non-whitespace changes
867+
fall back to the builtin diff algorithm. To use it:
868+
869+
----------------------------------------------------------------
870+
[diff "cdiff"]
871+
process = git diff-process-normalize
872+
----------------------------------------------------------------
873+
874+
This is useful after running a code formatter: `git diff` shows
875+
no output for files that only had whitespace changes,
876+
`git blame` skips whitespace-only commits automatically without
877+
requiring a `.git-blame-ignore-revs` file.
878+
864879
Tools should ignore unknown keys in the per-file request to
865880
remain forward-compatible.
866881

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1409,6 +1409,7 @@ BUILTIN_OBJS += builtin/diagnose.o
14091409
BUILTIN_OBJS += builtin/diff-files.o
14101410
BUILTIN_OBJS += builtin/diff-index.o
14111411
BUILTIN_OBJS += builtin/diff-pairs.o
1412+
BUILTIN_OBJS += builtin/diff-process-normalize.o
14121413
BUILTIN_OBJS += builtin/diff-tree.o
14131414
BUILTIN_OBJS += builtin/diff.o
14141415
BUILTIN_OBJS += builtin/difftool.o

builtin.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ int cmd_diff_files(int argc, const char **argv, const char *prefix, struct repos
178178
int cmd_diff_index(int argc, const char **argv, const char *prefix, struct repository *repo);
179179
int cmd_diff(int argc, const char **argv, const char *prefix, struct repository *repo);
180180
int cmd_diff_pairs(int argc, const char **argv, const char *prefix, struct repository *repo);
181+
int cmd_diff_process_normalize(int argc, const char **argv, const char *prefix, struct repository *repo);
181182
int cmd_diff_tree(int argc, const char **argv, const char *prefix, struct repository *repo);
182183
int cmd_difftool(int argc, const char **argv, const char *prefix, struct repository *repo);
183184
int cmd_env__helper(int argc, const char **argv, const char *prefix, struct repository *repo);

builtin/diff-process-normalize.c

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
/*
2+
* Built-in diff process that returns zero hunks for files whose
3+
* only differences are whitespace, and status=error otherwise.
4+
* See diff-process.c for the protocol and gitattributes(5) for usage.
5+
*
6+
* Uses xdiff_compare_lines() with XDF_IGNORE_WHITESPACE to compare
7+
* lines, giving the same whitespace handling as "git diff -w".
8+
*/
9+
10+
#include "builtin.h"
11+
#include "pkt-line.h"
12+
#include "strbuf.h"
13+
#include "xdiff-interface.h"
14+
15+
/*
16+
* Read a single pkt-line. Returns 1 for data, 0 for flush, -1 for EOF.
17+
*/
18+
static int read_pkt(int fd, struct strbuf *line)
19+
{
20+
int len;
21+
char *data;
22+
23+
if (packet_read_line_gently(fd, &len, &data) < 0)
24+
return -1;
25+
if (!data || !len)
26+
return 0; /* flush */
27+
strbuf_reset(line);
28+
strbuf_add(line, data, len);
29+
strbuf_rtrim(line);
30+
return 1;
31+
}
32+
33+
/*
34+
* Read packetized content until a flush packet.
35+
*/
36+
static int read_content(int fd, struct strbuf *out)
37+
{
38+
strbuf_reset(out);
39+
if (read_packetized_to_strbuf(fd, out, PACKET_READ_GENTLE_ON_EOF) < 0)
40+
return -1;
41+
return 0;
42+
}
43+
44+
/*
45+
* Compare two buffers line by line using xdiff_compare_lines() with
46+
* XDF_IGNORE_WHITESPACE (same logic as "git diff -w").
47+
* Returns 1 if all lines match, 0 otherwise.
48+
*/
49+
static int whitespace_equivalent(const char *a, long size_a,
50+
const char *b, long size_b)
51+
{
52+
const char *ea = a + size_a;
53+
const char *eb = b + size_b;
54+
55+
while (a < ea && b < eb) {
56+
const char *eol_a = memchr(a, '\n', ea - a);
57+
const char *eol_b = memchr(b, '\n', eb - b);
58+
long len_a = (eol_a ? eol_a : ea) - a;
59+
long len_b = (eol_b ? eol_b : eb) - b;
60+
61+
if (!xdiff_compare_lines(a, len_a, b, len_b,
62+
XDF_IGNORE_WHITESPACE))
63+
return 0;
64+
65+
a += len_a + (eol_a ? 1 : 0);
66+
b += len_b + (eol_b ? 1 : 0);
67+
}
68+
69+
/* Both sides must be exhausted */
70+
return a >= ea && b >= eb;
71+
}
72+
73+
int cmd_diff_process_normalize(int argc UNUSED, const char **argv UNUSED,
74+
const char *prefix UNUSED,
75+
struct repository *repo UNUSED)
76+
{
77+
struct strbuf line = STRBUF_INIT;
78+
struct strbuf old_content = STRBUF_INIT;
79+
struct strbuf new_content = STRBUF_INIT;
80+
int ret;
81+
82+
/* Handshake: read client greeting */
83+
ret = read_pkt(0, &line);
84+
if (ret <= 0 || strcmp(line.buf, "git-diff-client"))
85+
return 1;
86+
ret = read_pkt(0, &line);
87+
if (ret <= 0 || strcmp(line.buf, "version=1"))
88+
return 1;
89+
read_pkt(0, &line); /* flush */
90+
91+
/* Send server greeting */
92+
packet_write_fmt(1, "git-diff-server\n");
93+
packet_write_fmt(1, "version=1\n");
94+
packet_flush(1);
95+
96+
/* Read client capabilities until flush */
97+
while ((ret = read_pkt(0, &line)) > 0)
98+
; /* consume */
99+
100+
/* Send our capabilities */
101+
packet_write_fmt(1, "capability=hunks\n");
102+
packet_flush(1);
103+
104+
/* Main loop: process file pairs */
105+
for (;;) {
106+
int have_command = 0;
107+
108+
/* Read request headers until flush */
109+
while ((ret = read_pkt(0, &line)) > 0) {
110+
if (starts_with(line.buf, "command="))
111+
have_command = 1;
112+
}
113+
if (ret < 0)
114+
break; /* EOF: client closed connection */
115+
if (!have_command)
116+
break;
117+
118+
/* Read old file content */
119+
if (read_content(0, &old_content) < 0)
120+
break;
121+
/* Read new file content */
122+
if (read_content(0, &new_content) < 0)
123+
break;
124+
125+
if (whitespace_equivalent(old_content.buf, old_content.len,
126+
new_content.buf, new_content.len)) {
127+
/* Whitespace-only differences */
128+
packet_flush(1); /* zero hunks */
129+
packet_write_fmt(1, "status=success\n");
130+
packet_flush(1);
131+
} else {
132+
/* Non-whitespace differences: fall back */
133+
packet_flush(1);
134+
packet_write_fmt(1, "status=error\n");
135+
packet_flush(1);
136+
}
137+
}
138+
139+
strbuf_release(&line);
140+
strbuf_release(&old_content);
141+
strbuf_release(&new_content);
142+
return 0;
143+
}

git.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,7 @@ static struct cmd_struct commands[] = {
568568
{ "diff-files", cmd_diff_files, RUN_SETUP | NEED_WORK_TREE | NO_PARSEOPT },
569569
{ "diff-index", cmd_diff_index, RUN_SETUP | NO_PARSEOPT },
570570
{ "diff-pairs", cmd_diff_pairs, RUN_SETUP | NO_PARSEOPT },
571+
{ "diff-process-normalize", cmd_diff_process_normalize, NO_PARSEOPT },
571572
{ "diff-tree", cmd_diff_tree, RUN_SETUP | NO_PARSEOPT },
572573
{ "difftool", cmd_difftool, RUN_SETUP_GENTLY },
573574
{ "fast-export", cmd_fast_export, RUN_SETUP },

t/t4080-diff-process.sh

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,5 +366,65 @@ test_expect_success PYTHON 'blame skips commits with zero hunks from diff proces
366366
! grep "$BLAME_COMMIT" with
367367
'
368368

369+
NORMALIZE="git diff-process-normalize"
370+
371+
test_expect_success 'diff-process-normalize setup' '
372+
echo "*.c diff=cdiff" >.gitattributes &&
373+
git add .gitattributes &&
374+
test_commit normalize-base
375+
'
376+
377+
test_expect_success 'diff-process-normalize suppresses whitespace-only changes' '
378+
cat >ws.c <<-\EOF &&
379+
int main(void)
380+
{
381+
return 0;
382+
}
383+
EOF
384+
git add ws.c &&
385+
git commit -m "add ws.c" &&
386+
387+
cat >ws.c <<-\EOF &&
388+
int main(void)
389+
{
390+
return 0;
391+
}
392+
EOF
393+
394+
git -c diff.cdiff.process="$NORMALIZE" \
395+
diff ws.c >actual &&
396+
test_must_be_empty actual
397+
'
398+
399+
test_expect_success 'diff-process-normalize falls back on non-whitespace changes' '
400+
cat >ws.c <<-\EOF &&
401+
int main(void)
402+
{
403+
return 0;
404+
}
405+
406+
int added_function(void)
407+
{
408+
return 99;
409+
}
410+
EOF
411+
412+
git -c diff.cdiff.process="$NORMALIZE" \
413+
diff ws.c >actual &&
414+
grep "added_function" actual
415+
'
416+
417+
test_expect_success 'diff-process-normalize falls back on mixed whitespace and real changes' '
418+
cat >ws.c <<-\EOF &&
419+
int main(void)
420+
{
421+
return 42;
422+
}
423+
EOF
424+
425+
git -c diff.cdiff.process="$NORMALIZE" \
426+
diff ws.c >actual &&
427+
grep "return 42" actual
428+
'
369429

370430
test_done

0 commit comments

Comments
 (0)