From: Junio C Hamano <junkio@cox.net>
Date: Sat, 4 Mar 2006 21:53:27 +0000 (-0800)
Subject: Merge branch 'master' into next
X-Git-Tag: v1.3.0-rc1~51
X-Git-Url: https://git.octo.it/?a=commitdiff_plain;h=b463ceb885290a6656239ff3f1e73c5774dc8ee9;hp=2eb063c9331d73548e0fe68802e73ccd68a41046;p=git.git

Merge branch 'master' into next

* master:
  AsciiDoc fix for tutorial
  git.el: Added customize support for all parameters.
  git.el: Added support for Signed-off-by.
  git.el: Automatically update .gitignore status.
  git.el: Set default directory before running the status mode setup hooks.
  git.el: Portability fixes for XEmacs and Emacs CVS.
  contrib/emacs: Add an Emacs VC backend.
---

diff --git a/Documentation/git-checkout-index.txt b/Documentation/git-checkout-index.txt
index 2a1e526c..b0b65889 100644
--- a/Documentation/git-checkout-index.txt
+++ b/Documentation/git-checkout-index.txt
@@ -10,7 +10,9 @@ SYNOPSIS
 --------
 [verse]
 'git-checkout-index' [-u] [-q] [-a] [-f] [-n] [--prefix=<string>]
-		   [--stage=<number>] [--] <file>...
+		   [--stage=<number>]
+		   [-z] [--stdin]
+		   [--] [<file>]\*
 
 DESCRIPTION
 -----------
@@ -45,6 +47,15 @@ OPTIONS
 	Instead of checking out unmerged entries, copy out the
 	files from named stage.  <number> must be between 1 and 3.
 
+--stdin::
+	Instead of taking list of paths from the command line,
+	read list of paths from the standard input.  Paths are
+	separated by LF (i.e. one path per line) by default.
+
+-z::
+	Only meaningful with `--stdin`; paths are separated with
+	NUL character instead of LF.
+
 --::
 	Do not interpret any more arguments as options.
 
@@ -64,7 +75,12 @@ $ find . -name '*.h' -print0 | xargs -0 git-checkout-index -f --
 
 which will force all existing `*.h` files to be replaced with their
 cached copies. If an empty command line implied "all", then this would
-force-refresh everything in the index, which was not the point.
+force-refresh everything in the index, which was not the point.  But
+since git-checkout-index accepts --stdin it would be faster to use:
+
+----------------
+$ find . -name '*.h' -print0 | git-checkout-index -f -z --stdin
+----------------
 
 The `--` is just a good idea when you know the rest will be filenames;
 it will prevent problems with a filename of, for example,  `-a`.
diff --git a/Makefile b/Makefile
index b6d8804d..ab2890d6 100644
--- a/Makefile
+++ b/Makefile
@@ -190,7 +190,7 @@ PYMODULES = \
 LIB_FILE=libgit.a
 
 LIB_H = \
-	blob.h cache.h commit.h count-delta.h csum-file.h delta.h \
+	blob.h cache.h commit.h csum-file.h delta.h \
 	diff.h object.h pack.h pkt-line.h quote.h refs.h \
 	run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h
 
@@ -200,7 +200,7 @@ DIFF_OBJS = \
 	diffcore-delta.o
 
 LIB_OBJS = \
-	blob.o commit.o connect.o count-delta.o csum-file.o \
+	blob.o commit.o connect.o csum-file.o \
 	date.o diff-delta.o entry.o exec_cmd.o ident.o index.o \
 	object.o pack-check.o patch-delta.o path.o pkt-line.o \
 	quote.o read-cache.o refs.o run-command.o \
diff --git a/checkout-index.c b/checkout-index.c
index 957b4a86..f54c6064 100644
--- a/checkout-index.c
+++ b/checkout-index.c
@@ -22,6 +22,10 @@
  *
  *	find . -name '*.h' -print0 | xargs -0 git-checkout-index -f --
  *
+ * or:
+ *
+ *	find . -name '*.h' -print0 | git-checkout-index -f -z --stdin
+ *
  * which will force all existing *.h files to be replaced with
  * their cached copies. If an empty command line implied "all",
  * then this would force-refresh everything in the cache, which
@@ -33,6 +37,8 @@
  * but get used to it in scripting!).
  */
 #include "cache.h"
+#include "strbuf.h"
+#include "quote.h"
 
 static const char *prefix;
 static int prefix_length;
@@ -114,6 +120,8 @@ int main(int argc, char **argv)
 	int i;
 	int newfd = -1;
 	int all = 0;
+	int read_from_stdin = 0;
+	int line_termination = '\n';
 
 	prefix = setup_git_directory();
 	git_config(git_default_config);
@@ -156,6 +164,17 @@ int main(int argc, char **argv)
 				die("cannot open index.lock file.");
 			continue;
 		}
+		if (!strcmp(arg, "-z")) {
+			line_termination = 0;
+			continue;
+		}
+		if (!strcmp(arg, "--stdin")) {
+			if (i != argc - 1)
+				die("--stdin must be at the end");
+			read_from_stdin = 1;
+			i++; /* do not consider arg as a file name */
+			break;
+		}
 		if (!strncmp(arg, "--prefix=", 9)) {
 			state.base_dir = arg+9;
 			state.base_dir_len = strlen(state.base_dir);
@@ -191,9 +210,31 @@ int main(int argc, char **argv)
 
 		if (all)
 			die("git-checkout-index: don't mix '--all' and explicit filenames");
+		if (read_from_stdin)
+			die("git-checkout-index: don't mix '--stdin' and explicit filenames");
 		checkout_file(prefix_path(prefix, prefix_length, arg));
 	}
 
+	if (read_from_stdin) {
+		struct strbuf buf;
+		if (all)
+			die("git-checkout-index: don't mix '--all' and '--stdin'");
+		strbuf_init(&buf);
+		while (1) {
+			char *path_name;
+			read_line(&buf, stdin, line_termination);
+			if (buf.eof)
+				break;
+			if (line_termination && buf.buf[0] == '"')
+				path_name = unquote_c_style(buf.buf, NULL);
+			else
+				path_name = buf.buf;
+			checkout_file(prefix_path(prefix, prefix_length, path_name));
+			if (path_name != buf.buf)
+				free(path_name);
+		}
+	}
+
 	if (all)
 		checkout_all();
 
diff --git a/count-delta.c b/count-delta.c
deleted file mode 100644
index 058a2aad..00000000
--- a/count-delta.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (C) 2005 Junio C Hamano
- * The delta-parsing part is almost straight copy of patch-delta.c
- * which is (C) 2005 Nicolas Pitre <nico@cam.org>.
- */
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-#include "delta.h"
-#include "count-delta.h"
-
-/*
- * NOTE.  We do not _interpret_ delta fully.  As an approximation, we
- * just count the number of bytes that are copied from the source, and
- * the number of literal data bytes that are inserted.
- *
- * Number of bytes that are _not_ copied from the source is deletion,
- * and number of inserted literal bytes are addition, so sum of them
- * is the extent of damage.
- */
-int count_delta(void *delta_buf, unsigned long delta_size,
-		unsigned long *src_copied, unsigned long *literal_added)
-{
-	unsigned long copied_from_source, added_literal;
-	const unsigned char *data, *top;
-	unsigned char cmd;
-	unsigned long src_size, dst_size, out;
-
-	if (delta_size < DELTA_SIZE_MIN)
-		return -1;
-
-	data = delta_buf;
-	top = delta_buf + delta_size;
-
-	src_size = get_delta_hdr_size(&data);
-	dst_size = get_delta_hdr_size(&data);
-
-	added_literal = copied_from_source = out = 0;
-	while (data < top) {
-		cmd = *data++;
-		if (cmd & 0x80) {
-			unsigned long cp_off = 0, cp_size = 0;
-			if (cmd & 0x01) cp_off = *data++;
-			if (cmd & 0x02) cp_off |= (*data++ << 8);
-			if (cmd & 0x04) cp_off |= (*data++ << 16);
-			if (cmd & 0x08) cp_off |= (*data++ << 24);
-			if (cmd & 0x10) cp_size = *data++;
-			if (cmd & 0x20) cp_size |= (*data++ << 8);
-			if (cmd & 0x40) cp_size |= (*data++ << 16);
-			if (cp_size == 0) cp_size = 0x10000;
-
-			copied_from_source += cp_size;
-			out += cp_size;
-		} else {
-			/* write literal into dst */
-			added_literal += cmd;
-			out += cmd;
-			data += cmd;
-		}
-	}
-
-	/* sanity check */
-	if (data != top || out != dst_size)
-		return -1;
-
-	/* delete size is what was _not_ copied from source.
-	 * edit size is that and literal additions.
-	 */
-	*src_copied = copied_from_source;
-	*literal_added = added_literal;
-	return 0;
-}
diff --git a/count-delta.h b/count-delta.h
deleted file mode 100644
index 73596298..00000000
--- a/count-delta.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/*
- * Copyright (C) 2005 Junio C Hamano
- */
-#ifndef COUNT_DELTA_H
-#define COUNT_DELTA_H
-
-int count_delta(void *, unsigned long,
-		unsigned long *src_copied, unsigned long *literal_added);
-
-#endif
diff --git a/diffcore-break.c b/diffcore-break.c
index 0fc2b860..71ad58a2 100644
--- a/diffcore-break.c
+++ b/diffcore-break.c
@@ -45,8 +45,8 @@ static int should_break(struct diff_filespec *src,
 	 * The value we return is 1 if we want the pair to be broken,
 	 * or 0 if we do not.
 	 */
-	unsigned long delta_size, base_size, src_copied, literal_added;
-	int to_break = 0;
+	unsigned long delta_size, base_size, src_copied, literal_added,
+		src_removed;
 
 	*merge_score_p = 0; /* assume no deletion --- "do not break"
 			     * is the default.
@@ -72,33 +72,40 @@ static int should_break(struct diff_filespec *src,
 				   &src_copied, &literal_added))
 		return 0;
 
+	/* sanity */
+	if (src->size < src_copied)
+		src_copied = src->size;
+	if (dst->size < literal_added + src_copied) {
+		if (src_copied < dst->size)
+			literal_added = dst->size - src_copied;
+		else
+			literal_added = 0;
+	}
+	src_removed = src->size - src_copied;
+
 	/* Compute merge-score, which is "how much is removed
 	 * from the source material".  The clean-up stage will
 	 * merge the surviving pair together if the score is
 	 * less than the minimum, after rename/copy runs.
 	 */
-	if (src->size <= src_copied)
-		; /* all copied, nothing removed */
-	else {
-		delta_size = src->size - src_copied;
-		*merge_score_p = delta_size * MAX_SCORE / src->size;
-	}
-	
+	*merge_score_p = src_removed * MAX_SCORE / src->size;
+
 	/* Extent of damage, which counts both inserts and
 	 * deletes.
 	 */
-	if (src->size + literal_added <= src_copied)
-		delta_size = 0; /* avoid wrapping around */
-	else
-		delta_size = (src->size - src_copied) + literal_added;
-	
-	/* We break if the edit exceeds the minimum.
-	 * i.e. (break_score / MAX_SCORE < delta_size / base_size)
+	delta_size = src_removed + literal_added;
+	if (delta_size * MAX_SCORE / base_size < break_score)
+		return 0;
+
+	/* If you removed a lot without adding new material, that is
+	 * not really a rewrite.
 	 */
-	if (break_score * base_size < delta_size * MAX_SCORE)
-		to_break = 1;
+	if ((src->size * break_score < src_removed * MAX_SCORE) &&
+	    (literal_added * 20 < src_removed) &&
+	    (literal_added * 20 < src_copied))
+		return 0;
 
-	return to_break;
+	return 1;
 }
 
 void diffcore_break(int break_score)
diff --git a/diffcore-delta.c b/diffcore-delta.c
index 1e6a6911..70bacff8 100644
--- a/diffcore-delta.c
+++ b/diffcore-delta.c
@@ -1,32 +1,53 @@
 #include "cache.h"
 #include "diff.h"
 #include "diffcore.h"
-#include "delta.h"
-#include "count-delta.h"
-
-static int diffcore_count_changes_1(void *src, unsigned long src_size,
-				    void *dst, unsigned long dst_size,
-				    unsigned long delta_limit,
-				    unsigned long *src_copied,
-				    unsigned long *literal_added)
+
+/*
+ * Idea here is very simple.
+ *
+ * We have total of (sz-N+1) N-byte overlapping sequences in buf whose
+ * size is sz.  If the same N-byte sequence appears in both source and
+ * destination, we say the byte that starts that sequence is shared
+ * between them (i.e. copied from source to destination).
+ *
+ * For each possible N-byte sequence, if the source buffer has more
+ * instances of it than the destination buffer, that means the
+ * difference are the number of bytes not copied from source to
+ * destination.  If the counts are the same, everything was copied
+ * from source to destination.  If the destination has more,
+ * everything was copied, and destination added more.
+ *
+ * We are doing an approximation so we do not really have to waste
+ * memory by actually storing the sequence.  We just hash them into
+ * somewhere around 2^16 hashbuckets and count the occurrences.
+ *
+ * The length of the sequence is arbitrarily set to 8 for now.
+ */
+
+#define HASHBASE 65537 /* next_prime(2^16) */
+
+static void hash_chars(unsigned char *buf, unsigned long sz, int *count)
 {
-	void *delta;
-	unsigned long delta_size;
-
-	delta = diff_delta(src, src_size,
-			   dst, dst_size,
-			   &delta_size, delta_limit);
-	if (!delta)
-		/* If delta_limit is exceeded, we have too much differences */
-		return -1;
+	unsigned int accum1, accum2, i;
 
-	/* Estimate the edit size by interpreting delta. */
-	if (count_delta(delta, delta_size, src_copied, literal_added)) {
-		free(delta);
-		return -1;
+	/* an 8-byte shift register made of accum1 and accum2.  New
+	 * bytes come at LSB of accum2, and shifted up to accum1
+	 */
+	for (i = accum1 = accum2 = 0; i < 7; i++, sz--) {
+		accum1 = (accum1 << 8) | (accum2 >> 24);
+		accum2 = (accum2 << 8) | *buf++;
+	}
+	while (sz) {
+		accum1 = (accum1 << 8) | (accum2 >> 24);
+		accum2 = (accum2 << 8) | *buf++;
+		/* We want something that hashes permuted byte
+		 * sequences nicely; simpler hash like (accum1 ^
+		 * accum2) does not perform as well.
+		 */
+		i = (accum1 + accum2 * 0x61) % HASHBASE;
+		count[i]++;
+		sz--;
 	}
-	free(delta);
-	return 0;
 }
 
 int diffcore_count_changes(void *src, unsigned long src_size,
@@ -35,9 +56,28 @@ int diffcore_count_changes(void *src, unsigned long src_size,
 			   unsigned long *src_copied,
 			   unsigned long *literal_added)
 {
-	return diffcore_count_changes_1(src, src_size,
-					dst, dst_size,
-					delta_limit,
-					src_copied,
-					literal_added);
+	int *src_count, *dst_count, i;
+	unsigned long sc, la;
+
+	if (src_size < 8 || dst_size < 8)
+		return -1;
+
+	src_count = xcalloc(HASHBASE * 2, sizeof(int));
+	dst_count = src_count + HASHBASE;
+	hash_chars(src, src_size, src_count);
+	hash_chars(dst, dst_size, dst_count);
+
+	sc = la = 0;
+	for (i = 0; i < HASHBASE; i++) {
+		if (src_count[i] < dst_count[i]) {
+			la += dst_count[i] - src_count[i];
+			sc += src_count[i];
+		}
+		else /* i.e. if (dst_count[i] <= src_count[i]) */
+			sc += dst_count[i];
+	}
+	*src_copied = sc;
+	*literal_added = la;
+	free(src_count);
+	return 0;
 }
diff --git a/diffcore-rename.c b/diffcore-rename.c
index 55cf1c37..625b589f 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -170,19 +170,15 @@ static int estimate_similarity(struct diff_filespec *src,
 				   &src_copied, &literal_added))
 		return 0;
 
-	/* Extent of damage */
-	if (src->size + literal_added < src_copied)
-		delta_size = 0;
-	else
-		delta_size = (src->size - src_copied) + literal_added;
-
-	/*
-	 * Now we will give some score to it.  100% edit gets 0 points
-	 * and 0% edit gets MAX_SCORE points.
+	/* How similar are they?
+	 * what percentage of material in dst are from source?
 	 */
-	score = MAX_SCORE - (MAX_SCORE * delta_size / base_size); 
-	if (score < 0) return 0;
-	if (MAX_SCORE < score) return MAX_SCORE;
+	if (dst->size < src_copied)
+		score = MAX_SCORE;
+	else if (!dst->size)
+		score = 0; /* should not happen */
+	else
+		score = src_copied * MAX_SCORE / dst->size;
 	return score;
 }
 
diff --git a/diffcore.h b/diffcore.h
index dba4f176..d31b3b47 100644
--- a/diffcore.h
+++ b/diffcore.h
@@ -17,8 +17,8 @@
  */
 #define MAX_SCORE 60000.0
 #define DEFAULT_RENAME_SCORE 30000 /* rename/copy similarity minimum (50%) */
-#define DEFAULT_BREAK_SCORE  30000 /* minimum for break to happen (50%)*/
-#define DEFAULT_MERGE_SCORE  48000 /* maximum for break-merge to happen (80%)*/
+#define DEFAULT_BREAK_SCORE  30000 /* minimum for break to happen (50%) */
+#define DEFAULT_MERGE_SCORE  36000 /* maximum for break-merge to happen 60%) */
 
 #define MINIMUM_BREAK_SIZE     400 /* do not break a file smaller than this */