Skip to content

Commit

Permalink
[PATCH] Add -B flag to diff-* brothers.
Browse files Browse the repository at this point in the history
A new diffcore transformation, diffcore-break.c, is introduced.

When the -B flag is given, a patch that represents a complete
rewrite is broken into a deletion followed by a creation.  This
makes it easier to review such a complete rewrite patch.

The -B flag takes the same syntax as the -M and -C flags to
specify the minimum amount of non-source material the resulting
file needs to have to be considered a complete rewrite, and
defaults to 99% if not specified.

As the new test t4008-diff-break-rewrite.sh demonstrates, if a
file is a complete rewrite, it is broken into a delete/create
pair, which can further be subjected to the usual rename
detection if -M or -C is used.  For example, if file0 gets
completely rewritten to make it as if it were rather based on
file1 which itself disappeared, the following happens:

    The original change looks like this:

	file0     --> file0' (quite different from file0)
	file1     --> /dev/null

    After diffcore-break runs, it would become this:

	file0     --> /dev/null
	/dev/null --> file0'
	file1     --> /dev/null

    Then diffcore-rename matches them up:

	file1     --> file0'

The internal score values are finer grained now.  Earlier
maximum of 10000 has been raised to 60000; there is no user
visible changes but there is no reason to waste available bits.

Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
  • Loading branch information
Junio C Hamano authored and Linus Torvalds committed May 30, 2005
1 parent 2cd6888 commit f345b0a
Show file tree
Hide file tree
Showing 13 changed files with 433 additions and 29 deletions.
5 changes: 4 additions & 1 deletion Documentation/git-diff-cache.txt
Expand Up @@ -9,7 +9,7 @@ git-diff-cache - Compares content and mode of blobs between the cache and reposi

SYNOPSIS
--------
'git-diff-cache' [-p] [-r] [-z] [-m] [-M] [-R] [-C] [-S<string>] [--pickaxe-all] [--cached] <tree-ish> [<path>...]
'git-diff-cache' [-p] [-r] [-z] [-m] [-B] [-M] [-R] [-C] [-S<string>] [--pickaxe-all] [--cached] <tree-ish> [<path>...]

DESCRIPTION
-----------
Expand All @@ -35,6 +35,9 @@ OPTIONS
-z::
\0 line termination on output

-B::
Break complete rewrite changes into pairs of delete and create.

-M::
Detect renames.

Expand Down
5 changes: 4 additions & 1 deletion Documentation/git-diff-files.txt
Expand Up @@ -9,7 +9,7 @@ git-diff-files - Compares files in the working tree and the cache

SYNOPSIS
--------
'git-diff-files' [-p] [-q] [-r] [-z] [-M] [-C] [-R] [-S<string>] [--pickaxe-all] [<pattern>...]
'git-diff-files' [-p] [-q] [-r] [-z] [-B] [-M] [-C] [-R] [-S<string>] [--pickaxe-all] [<pattern>...]

DESCRIPTION
-----------
Expand All @@ -29,6 +29,9 @@ OPTIONS
-R::
Output diff in reverse.

-B::
Break complete rewrite changes into pairs of delete and create.

-M::
Detect renames.

Expand Down
5 changes: 4 additions & 1 deletion Documentation/git-diff-tree.txt
Expand Up @@ -9,7 +9,7 @@ git-diff-tree - Compares the content and mode of blobs found via two tree object

SYNOPSIS
--------
'git-diff-tree' [-p] [-r] [-z] [--stdin] [-M] [-R] [-C] [-S<string>] [--pickaxe-all] [-m] [-s] [-v] [-t] <tree-ish> <tree-ish> [<pattern>]\*
'git-diff-tree' [-p] [-r] [-z] [--stdin] [-B] [-M] [-R] [-C] [-S<string>] [--pickaxe-all] [-m] [-s] [-v] [-t] <tree-ish> <tree-ish> [<pattern>]\*

DESCRIPTION
-----------
Expand All @@ -33,6 +33,9 @@ OPTIONS
generate patch (see section on generating patches). For
git-diff-tree, this flag implies '-r' as well.

-B::
Break complete rewrite changes into pairs of delete and create.

-M::
Detect renames.

Expand Down
3 changes: 2 additions & 1 deletion Makefile
Expand Up @@ -48,7 +48,7 @@ LIB_OBJS += strbuf.o

LIB_H += diff.h count-delta.h
LIB_OBJS += diff.o diffcore-rename.o diffcore-pickaxe.o diffcore-pathspec.o \
count-delta.o
count-delta.o diffcore-break.o

LIB_OBJS += gitenv.o

Expand Down Expand Up @@ -130,6 +130,7 @@ diff.o: $(LIB_H) diffcore.h
diffcore-rename.o : $(LIB_H) diffcore.h
diffcore-pathspec.o : $(LIB_H) diffcore.h
diffcore-pickaxe.o : $(LIB_H) diffcore.h
diffcore-break.o : $(LIB_H) diffcore.h

test: all
$(MAKE) -C t/ all
Expand Down
11 changes: 9 additions & 2 deletions diff-cache.c
Expand Up @@ -9,6 +9,7 @@ static int diff_setup_opt = 0;
static int diff_score_opt = 0;
static const char *pickaxe = NULL;
static int pickaxe_opts = 0;
static int diff_break_opt = -1;

/* A file entry went away or appeared */
static void show_file(const char *prefix, struct cache_entry *ce, unsigned char *sha1, unsigned int mode)
Expand Down Expand Up @@ -188,6 +189,10 @@ int main(int argc, const char **argv)
diff_output_format = DIFF_FORMAT_PATCH;
continue;
}
if (!strncmp(arg, "-B", 2)) {
diff_break_opt = diff_scoreopt_parse(arg);
continue;
}
if (!strncmp(arg, "-M", 2)) {
detect_rename = DIFF_DETECT_RENAME;
diff_score_opt = diff_scoreopt_parse(arg);
Expand Down Expand Up @@ -240,9 +245,11 @@ int main(int argc, const char **argv)
die("unable to read tree object %s", tree_name);

ret = diff_cache(active_cache, active_nr);
diffcore_std(pathspec,

diffcore_std(pathspec ? : NULL,
detect_rename, diff_score_opt,
pickaxe, pickaxe_opts);
pickaxe, pickaxe_opts,
diff_break_opt);
diff_flush(diff_output_format, 1);
return ret;
}
8 changes: 6 additions & 2 deletions diff-files.c
Expand Up @@ -15,6 +15,7 @@ static int diff_setup_opt = 0;
static int diff_score_opt = 0;
static const char *pickaxe = NULL;
static int pickaxe_opts = 0;
static int diff_break_opt = -1;
static int silent = 0;

static void show_unmerge(const char *path)
Expand Down Expand Up @@ -57,6 +58,8 @@ int main(int argc, const char **argv)
pickaxe = argv[1] + 2;
else if (!strcmp(argv[1], "--pickaxe-all"))
pickaxe_opts = DIFF_PICKAXE_ALL;
else if (!strncmp(argv[1], "-B", 2))
diff_break_opt = diff_scoreopt_parse(argv[1]);
else if (!strncmp(argv[1], "-M", 2)) {
diff_score_opt = diff_scoreopt_parse(argv[1]);
detect_rename = DIFF_DETECT_RENAME;
Expand Down Expand Up @@ -116,9 +119,10 @@ int main(int argc, const char **argv)
show_modified(oldmode, mode, ce->sha1, null_sha1,
ce->name);
}
diffcore_std(argv + 1,
diffcore_std((1 < argc) ? argv + 1 : NULL,
detect_rename, diff_score_opt,
pickaxe, pickaxe_opts);
pickaxe, pickaxe_opts,
diff_break_opt);
diff_flush(diff_output_format, 1);
return 0;
}
8 changes: 7 additions & 1 deletion diff-tree.c
Expand Up @@ -14,6 +14,7 @@ static int diff_setup_opt = 0;
static int diff_score_opt = 0;
static const char *pickaxe = NULL;
static int pickaxe_opts = 0;
static int diff_break_opt = -1;
static const char *header = NULL;
static const char *header_prefix = "";

Expand Down Expand Up @@ -263,7 +264,8 @@ static int call_diff_flush(void)
{
diffcore_std(0,
detect_rename, diff_score_opt,
pickaxe, pickaxe_opts);
pickaxe, pickaxe_opts,
diff_break_opt);
if (diff_queue_is_empty()) {
diff_flush(DIFF_FORMAT_NO_OUTPUT, 0);
return 0;
Expand Down Expand Up @@ -523,6 +525,10 @@ int main(int argc, const char **argv)
diff_score_opt = diff_scoreopt_parse(arg);
continue;
}
if (!strncmp(arg, "-B", 2)) {
diff_break_opt = diff_scoreopt_parse(arg);
continue;
}
if (!strcmp(arg, "-z")) {
diff_output_format = DIFF_FORMAT_MACHINE;
continue;
Expand Down
21 changes: 18 additions & 3 deletions diff.c
Expand Up @@ -603,6 +603,7 @@ struct diff_filepair *diff_queue(struct diff_queue_struct *queue,
dp->two = two;
dp->score = 0;
dp->source_stays = 0;
dp->broken_pair = 0;
diff_q(queue, dp);
return dp;
}
Expand Down Expand Up @@ -637,6 +638,16 @@ static void diff_flush_raw(struct diff_filepair *p,
sprintf(status, "%c%03d", p->status,
(int)(0.5 + p->score * 100.0/MAX_SCORE));
break;
case 'N': case 'D':
two_paths = 0;
if (p->score)
sprintf(status, "%c%03d", p->status,
(int)(0.5 + p->score * 100.0/MAX_SCORE));
else {
status[0] = p->status;
status[1] = 0;
}
break;
default:
two_paths = 0;
status[0] = p->status;
Expand Down Expand Up @@ -760,8 +771,9 @@ void diff_debug_filepair(const struct diff_filepair *p, int i)
{
diff_debug_filespec(p->one, i, "one");
diff_debug_filespec(p->two, i, "two");
fprintf(stderr, "score %d, status %c source_stays %d\n",
p->score, p->status ? : '?', p->source_stays);
fprintf(stderr, "score %d, status %c stays %d broken %d\n",
p->score, p->status ? : '?',
p->source_stays, p->broken_pair);
}

void diff_debug_queue(const char *msg, struct diff_queue_struct *q)
Expand Down Expand Up @@ -875,10 +887,13 @@ void diff_flush(int diff_output_style, int resolve_rename_copy)

void diffcore_std(const char **paths,
int detect_rename, int rename_score,
const char *pickaxe, int pickaxe_opts)
const char *pickaxe, int pickaxe_opts,
int break_opt)
{
if (paths && paths[0])
diffcore_pathspec(paths);
if (0 <= break_opt)
diffcore_break(break_opt);
if (detect_rename)
diffcore_rename(detect_rename, rename_score);
if (pickaxe)
Expand Down
5 changes: 4 additions & 1 deletion diff.h
Expand Up @@ -43,9 +43,12 @@ extern void diffcore_pickaxe(const char *needle, int opts);

extern void diffcore_pathspec(const char **pathspec);

extern void diffcore_break(int);

extern void diffcore_std(const char **paths,
int detect_rename, int rename_score,
const char *pickaxe, int pickaxe_opts);
const char *pickaxe, int pickaxe_opts,
int break_opt);

extern int diff_queue_is_empty(void);

Expand Down
127 changes: 127 additions & 0 deletions diffcore-break.c
@@ -0,0 +1,127 @@
/*
* Copyright (C) 2005 Junio C Hamano
*/
#include "cache.h"
#include "diff.h"
#include "diffcore.h"
#include "delta.h"
#include "count-delta.h"

static int very_different(struct diff_filespec *src,
struct diff_filespec *dst,
int min_score)
{
/* dst is recorded as a modification of src. Are they so
* different that we are better off recording this as a pair
* of delete and create? min_score is the minimum amount of
* new material that must exist in the dst and not in src for
* the pair to be considered a complete rewrite, and recommended
* to be set to a very high value, 99% or so.
*
* The value we return represents the amount of new material
* that is in dst and not in src. We return 0 when we do not
* want to get the filepair broken.
*/
void *delta;
unsigned long delta_size, base_size;

if (!S_ISREG(src->mode) || !S_ISREG(dst->mode))
return 0; /* leave symlink rename alone */

if (diff_populate_filespec(src, 1) || diff_populate_filespec(dst, 1))
return 0; /* error but caught downstream */

delta_size = ((src->size < dst->size) ?
(dst->size - src->size) : (src->size - dst->size));

/* Notice that we use max of src and dst as the base size,
* unlike rename similarity detection. This is so that we do
* not mistake a large addition as a complete rewrite.
*/
base_size = ((src->size < dst->size) ? dst->size : src->size);

/*
* If file size difference is too big compared to the
* base_size, we declare this a complete rewrite.
*/
if (base_size * min_score < delta_size * MAX_SCORE)
return MAX_SCORE;

if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
return 0; /* error but caught downstream */

delta = diff_delta(src->data, src->size,
dst->data, dst->size,
&delta_size);

/* A delta that has a lot of literal additions would have
* big delta_size no matter what else it does.
*/
if (base_size * min_score < delta_size * MAX_SCORE)
return MAX_SCORE;

/* Estimate the edit size by interpreting delta. */
delta_size = count_delta(delta, delta_size);
free(delta);
if (delta_size == UINT_MAX)
return 0; /* error in delta computation */

if (base_size < delta_size)
return MAX_SCORE;

return delta_size * MAX_SCORE / base_size;
}

void diffcore_break(int min_score)
{
struct diff_queue_struct *q = &diff_queued_diff;
struct diff_queue_struct outq;
int i;

if (!min_score)
min_score = DEFAULT_BREAK_SCORE;

outq.nr = outq.alloc = 0;
outq.queue = NULL;

for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
int score;

/* We deal only with in-place edit of non directory.
* We do not break anything else.
*/
if (DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two) &&
!S_ISDIR(p->one->mode) && !S_ISDIR(p->two->mode) &&
!strcmp(p->one->path, p->two->path)) {
score = very_different(p->one, p->two, min_score);
if (min_score <= score) {
/* Split this into delete and create */
struct diff_filespec *null_one, *null_two;
struct diff_filepair *dp;

/* deletion of one */
null_one = alloc_filespec(p->one->path);
dp = diff_queue(&outq, p->one, null_one);
dp->score = score;
dp->broken_pair = 1;

/* creation of two */
null_two = alloc_filespec(p->two->path);
dp = diff_queue(&outq, null_two, p->two);
dp->score = score;
dp->broken_pair = 1;

free(p); /* not diff_free_filepair(), we are
* reusing one and two here.
*/
continue;
}
}
diff_q(&outq, p);
}
free(q->queue);
*q = outq;

return;
}

0 comments on commit f345b0a

Please sign in to comment.