X-Git-Url: https://git.octo.it/?a=blobdiff_plain;f=diffcore-rename.c;h=39d9126cb9d397df06d41495a3402123fa4ba46b;hb=a348ab702a84983c258e4961a58b1b9502f428c8;hp=81e4d9df3dcb0ba8646fa8f902f6ac918d04bdb7;hpb=a00d7d106aa333c4b4d0095f58e05c0c4621bbc2;p=git.git diff --git a/diffcore-rename.c b/diffcore-rename.c index 81e4d9df..39d9126c 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -47,19 +47,21 @@ static struct diff_rename_dst *locate_rename_dst(struct diff_filespec *two, if (first < rename_dst_nr) memmove(rename_dst + first + 1, rename_dst + first, (rename_dst_nr - first - 1) * sizeof(*rename_dst)); - rename_dst[first].two = two; + rename_dst[first].two = alloc_filespec(two->path); + fill_filespec(rename_dst[first].two, two->sha1, two->mode); rename_dst[first].pair = NULL; return &(rename_dst[first]); } +/* Table of rename/copy src files */ static struct diff_rename_src { struct diff_filespec *one; - unsigned src_used : 1; + unsigned src_path_left : 1; } *rename_src; static int rename_src_nr, rename_src_alloc; -static struct diff_rename_src *locate_rename_src(struct diff_filespec *one, - int insert_ok) +static struct diff_rename_src *register_rename_src(struct diff_filespec *one, + int src_path_left) { int first, last; @@ -77,9 +79,7 @@ static struct diff_rename_src *locate_rename_src(struct diff_filespec *one, } first = next+1; } - /* not found */ - if (!insert_ok) - return NULL; + /* insert to make it at "first" */ if (rename_src_alloc <= rename_src_nr) { rename_src_alloc = alloc_nr(rename_src_alloc); @@ -91,7 +91,7 @@ static struct diff_rename_src *locate_rename_src(struct diff_filespec *one, memmove(rename_src + first + 1, rename_src + first, (rename_src_nr - first - 1) * sizeof(*rename_src)); rename_src[first].one = one; - rename_src[first].src_used = 0; + rename_src[first].src_path_left = src_path_left; return &(rename_src[first]); } @@ -100,8 +100,11 @@ static int is_exact_match(struct diff_filespec *src, struct diff_filespec *dst) if (src->sha1_valid && dst->sha1_valid && !memcmp(src->sha1, dst->sha1, 20)) return 1; - if (diff_populate_filespec(src) || diff_populate_filespec(dst)) - /* this is an error but will be caught downstream */ + if (diff_populate_filespec(src, 1) || diff_populate_filespec(dst, 1)) + return 0; + if (src->size != dst->size) + return 0; + if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0)) return 0; if (src->size == dst->size && !memcmp(src->data, dst->data, src->size)) @@ -113,7 +116,6 @@ struct diff_score { int src; /* index in rename_src */ int dst; /* index in rename_dst */ int score; - int rank; }; static int estimate_similarity(struct diff_filespec *src, @@ -127,12 +129,15 @@ static int estimate_similarity(struct diff_filespec *src, * dst, and then some edit has been applied to dst. * * Compare them and return how similar they are, representing - * the score as an integer between 0 and 10000, except - * where they match exactly it is considered better than anything - * else. + * the score as an integer between 0 and MAX_SCORE. + * + * When there is an exact match, it is considered a better + * match than anything else; the destination does not even + * call into this function in that case. */ void *delta; - unsigned long delta_size, base_size; + unsigned long delta_size, base_size, src_copied, literal_added; + unsigned long delta_limit; int score; /* We deal only with regular files. Symlink renames are handled @@ -149,6 +154,7 @@ static int estimate_similarity(struct diff_filespec *src, /* We would not consider edits that change the file size so * drastically. delta_size must be smaller than * (MAX_SCORE-minimum_score)/MAX_SCORE * min(src->size, dst->size). + * * Note that base_size == 0 case is handled here already * and the final score computation below would not have a * divide-by-zero issue. @@ -156,9 +162,16 @@ static int estimate_similarity(struct diff_filespec *src, if (base_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE) return 0; + if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0)) + return 0; /* error but caught downstream */ + + delta_limit = base_size * (MAX_SCORE-minimum_score) / MAX_SCORE; delta = diff_delta(src->data, src->size, dst->data, dst->size, - &delta_size); + &delta_size, delta_limit); + if (!delta) + /* If delta_limit is exceeded, we have too much differences */ + return 0; /* A delta that has a lot of literal additions would have * big delta_size no matter what else it does. @@ -167,10 +180,17 @@ static int estimate_similarity(struct diff_filespec *src, return 0; /* Estimate the edit size by interpreting delta. */ - delta_size = count_delta(delta, delta_size); - free(delta); - if (delta_size == UINT_MAX) + if (count_delta(delta, delta_size, &src_copied, &literal_added)) { + free(delta); return 0; + } + free(delta); + + /* Extent of damage */ + if (src->size + literal_added < src_copied) + delta_size = 0; + else + delta_size = (src->size - src_copied) + literal_added; /* * Now we will give some score to it. 100% edit gets 0 points @@ -182,8 +202,7 @@ static int estimate_similarity(struct diff_filespec *src, return score; } -static void record_rename_pair(struct diff_queue_struct *renq, - int dst_index, int src_index, int score) +static void record_rename_pair(int dst_index, int src_index, int score) { struct diff_filespec *one, *two, *src, *dst; struct diff_filepair *dp; @@ -199,16 +218,15 @@ static void record_rename_pair(struct diff_queue_struct *renq, two = alloc_filespec(dst->path); fill_filespec(two, dst->sha1, dst->mode); - dp = diff_queue(renq, one, two); + dp = diff_queue(NULL, one, two); dp->score = score; - - rename_src[src_index].src_used = 1; + dp->source_stays = rename_src[src_index].src_path_left; rename_dst[dst_index].pair = dp; } /* * We sort the rename similarity matrix with the score, in descending - * order (more similar first). + * order (the most similar first). */ static int score_compare(const void *a_, const void *b_) { @@ -216,36 +234,35 @@ static int score_compare(const void *a_, const void *b_) return b->score - a->score; } -int diff_scoreopt_parse(const char *opt) +static int compute_stays(struct diff_queue_struct *q, + struct diff_filespec *one) { - int diglen, num, scale, i; - if (opt[0] != '-' || (opt[1] != 'M' && opt[1] != 'C')) - return -1; /* that is not a -M nor -C option */ - diglen = strspn(opt+2, "0123456789"); - if (diglen == 0 || strlen(opt+2) != diglen) - return 0; /* use default */ - sscanf(opt+2, "%d", &num); - for (i = 0, scale = 1; i < diglen; i++) - scale *= 10; - - /* user says num divided by scale and we say internally that - * is MAX_SCORE * num / scale. - */ - return MAX_SCORE * num / scale; + int i; + for (i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + if (strcmp(one->path, p->two->path)) + continue; + if (DIFF_PAIR_RENAME(p)) { + return 0; /* something else is renamed into this */ + } + } + return 1; } -void diffcore_rename(int detect_rename, int minimum_score) +void diffcore_rename(struct diff_options *options) { + int detect_rename = options->detect_rename; + int minimum_score = options->rename_score; + int rename_limit = options->rename_limit; struct diff_queue_struct *q = &diff_queued_diff; - struct diff_queue_struct renq, outq; + struct diff_queue_struct outq; struct diff_score *mx; - int i, j; + int i, j, rename_count; int num_create, num_src, dst_cnt; if (!minimum_score) - minimum_score = DEFAULT_MINIMUM_SCORE; - renq.queue = NULL; - renq.nr = renq.alloc = 0; + minimum_score = DEFAULT_RENAME_SCORE; + rename_count = 0; for (i = 0; i < q->nr; i++) { struct diff_filepair *p = q->queue[i]; @@ -254,12 +271,19 @@ void diffcore_rename(int detect_rename, int minimum_score) continue; /* unmerged */ else locate_rename_dst(p->two, 1); - else if (!DIFF_FILE_VALID(p->two)) - locate_rename_src(p->one, 1); - else if (1 < detect_rename) /* find copy, too */ - locate_rename_src(p->one, 1); + else if (!DIFF_FILE_VALID(p->two)) { + /* If the source is a broken "delete", and + * they did not really want to get broken, + * that means the source actually stays. + */ + int stays = (p->broken_pair && !p->score); + register_rename_src(p->one, stays); + } + else if (detect_rename == DIFF_DETECT_COPY) + register_rename_src(p->one, 1); } - if (rename_dst_nr == 0) + if (rename_dst_nr == 0 || rename_src_nr == 0 || + (0 < rename_limit && rename_limit < rename_dst_nr)) goto cleanup; /* nothing to do */ /* We really want to cull the candidates list early @@ -271,19 +295,22 @@ void diffcore_rename(int detect_rename, int minimum_score) struct diff_filespec *one = rename_src[j].one; if (!is_exact_match(one, two)) continue; - record_rename_pair(&renq, i, j, MAX_SCORE); + record_rename_pair(i, j, MAX_SCORE); + rename_count++; break; /* we are done with this entry */ } } - diff_debug_queue("done detecting exact", &renq); /* Have we run out the created file pool? If so we can avoid * doing the delta matrix altogether. */ - if (renq.nr == rename_dst_nr) - goto flush_rest; + if (rename_count == rename_dst_nr) + goto cleanup; - num_create = (rename_dst_nr - renq.nr); + if (minimum_score == MAX_SCORE) + goto cleanup; + + num_create = (rename_dst_nr - rename_count); num_src = rename_src_nr; mx = xmalloc(sizeof(*mx) * num_create * num_src); for (dst_cnt = i = 0; i < rename_dst_nr; i++) { @@ -308,73 +335,124 @@ void diffcore_rename(int detect_rename, int minimum_score) if (dst->pair) continue; /* already done, either exact or fuzzy. */ if (mx[i].score < minimum_score) - break; /* there is not any more diffs applicable. */ - record_rename_pair(&renq, mx[i].dst, mx[i].src, mx[i].score); + break; /* there is no more usable pair. */ + record_rename_pair(mx[i].dst, mx[i].src, mx[i].score); + rename_count++; } free(mx); - diff_debug_queue("done detecting fuzzy", &renq); - flush_rest: + cleanup: /* At this point, we have found some renames and copies and they - * are kept in renq. The original list is still in *q. - * - * Scan the original list and move them into the outq; we will sort - * outq and swap it into the queue supplied to pass that to - * downstream, so we assign the sort keys in this loop. - * - * See comments at the top of record_rename_pair for numbers used - * to assign rename_rank. + * are recorded in rename_dst. The original list is still in *q. */ outq.queue = NULL; outq.nr = outq.alloc = 0; for (i = 0; i < q->nr; i++) { struct diff_filepair *p = q->queue[i]; - struct diff_rename_src *src = locate_rename_src(p->one, 0); - struct diff_rename_dst *dst = locate_rename_dst(p->two, 0); struct diff_filepair *pair_to_free = NULL; - if (dst) { - /* creation */ - if (dst->pair) { - /* renq has rename/copy already to produce - * this file, so we do not emit the creation - * record in the output. - */ + if (!DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two)) { + /* + * Creation + * + * We would output this create record if it has + * not been turned into a rename/copy already. + */ + struct diff_rename_dst *dst = + locate_rename_dst(p->two, 0); + if (dst && dst->pair) { diff_q(&outq, dst->pair); pair_to_free = p; } else - /* no matching rename/copy source, so record - * this as a creation. + /* no matching rename/copy source, so + * record this as a creation. */ diff_q(&outq, p); } - else if (!diff_unmodified_pair(p)) - /* all the other cases need to be recorded as is */ - diff_q(&outq, p); - else { - /* unmodified pair needs to be recorded only if - * it is used as the source of rename/copy + else if (DIFF_FILE_VALID(p->one) && !DIFF_FILE_VALID(p->two)) { + /* + * Deletion + * + * We would output this delete record if: + * + * (1) this is a broken delete and the counterpart + * broken create remains in the output; or + * (2) this is not a broken delete, and rename_dst + * does not have a rename/copy to move p->one->path + * out of existence. + * + * Otherwise, the counterpart broken create + * has been turned into a rename-edit; or + * delete did not have a matching create to + * begin with. */ - if (src && src->src_used) - diff_q(&outq, p); + if (DIFF_PAIR_BROKEN(p)) { + /* broken delete */ + struct diff_rename_dst *dst = + locate_rename_dst(p->one, 0); + if (dst && dst->pair) + /* counterpart is now rename/copy */ + pair_to_free = p; + } + else { + for (j = 0; j < rename_dst_nr; j++) { + if (!rename_dst[j].pair) + continue; + if (strcmp(rename_dst[j].pair-> + one->path, + p->one->path)) + continue; + break; + } + if (j < rename_dst_nr) + /* this path remains */ + pair_to_free = p; + } + + if (pair_to_free) + ; else - pair_to_free = p; - } - if (pair_to_free) { - diff_free_filespec_data(pair_to_free->one); - diff_free_filespec_data(pair_to_free->two); - free(pair_to_free); + diff_q(&outq, p); } + else if (!diff_unmodified_pair(p)) + /* all the usual ones need to be kept */ + diff_q(&outq, p); + else + /* no need to keep unmodified pairs */ + pair_to_free = p; + + if (pair_to_free) + diff_free_filepair(pair_to_free); } diff_debug_queue("done copying original", &outq); - free(renq.queue); free(q->queue); *q = outq; diff_debug_queue("done collapsing", q); - cleanup: + /* We need to see which rename source really stays here; + * earlier we only checked if the path is left in the result, + * but even if a path remains in the result, if that is coming + * from copying something else on top of it, then the original + * source is lost and does not stay. + */ + for (i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + if (DIFF_PAIR_RENAME(p) && p->source_stays) { + /* If one appears as the target of a rename-copy, + * then mark p->source_stays = 0; otherwise + * leave it as is. + */ + p->source_stays = compute_stays(q, p->one); + } + } + + for (i = 0; i < rename_dst_nr; i++) { + diff_free_filespec_data(rename_dst[i].two); + free(rename_dst[i].two); + } + free(rename_dst); rename_dst = NULL; rename_dst_nr = rename_dst_alloc = 0;