X-Git-Url: https://git.octo.it/?a=blobdiff_plain;f=pack-objects.c;h=179560f2bd67e08d25bef0199c41ee0ee70793f9;hb=ae448e3854d8b6e7e37aa88fa3917f5dd97f3210;hp=9346392150633a8f240e4f51086a069c091631fe;hpb=ce18135d862b5dbc731d203b27c279529e58b54b;p=git.git diff --git a/pack-objects.c b/pack-objects.c index 93463921..179560f2 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -156,7 +156,7 @@ static void prepare_pack_revindex(struct pack_revindex *rix) rix->revindex = xmalloc(sizeof(unsigned long) * (num_ent + 1)); for (i = 0; i < num_ent; i++) { - long hl = *((long *)(index + 24 * i)); + unsigned int hl = *((unsigned int *)(index + 24 * i)); rix->revindex[i] = ntohl(hl); } /* This knows the pack format -- the 20-byte trailer @@ -453,7 +453,7 @@ static void rehash_objects(void) if (object_ix_hashsz < 1024) object_ix_hashsz = 1024; object_ix = xrealloc(object_ix, sizeof(int) * object_ix_hashsz); - object_ix = memset(object_ix, 0, sizeof(int) * object_ix_hashsz); + memset(object_ix, 0, sizeof(int) * object_ix_hashsz); for (i = 0, oe = objects; i < nr_objects; i++, oe++) { int ix = locate_object_entry_hash(oe->sha1); if (0 <= ix) @@ -463,62 +463,20 @@ static void rehash_objects(void) } } -struct name_path { - struct name_path *up; - const char *elem; - int len; -}; - -#define DIRBITS 12 - -static unsigned name_hash(struct name_path *path, const char *name) +static unsigned name_hash(const char *name) { - struct name_path *p = path; - const char *n = name + strlen(name); - unsigned hash = 0, name_hash = 0, name_done = 0; - - if (n != name && n[-1] == '\n') - n--; - while (name <= --n) { - unsigned char c = *n; - if (c == '/' && !name_done) { - name_hash = hash; - name_done = 1; - hash = 0; - } - hash = hash * 11 + c; - } - if (!name_done) { - name_hash = hash; - hash = 0; - } - for (p = path; p; p = p->up) { - hash = hash * 11 + '/'; - n = p->elem + p->len; - while (p->elem <= --n) { - unsigned char c = *n; - hash = hash * 11 + c; - } - } + unsigned char c; + unsigned hash = 0; + /* - * Make sure "Makefile" and "t/Makefile" are hashed separately - * but close enough. + * This effectively just creates a sortable number from the + * last sixteen non-whitespace characters. Last characters + * count "most", so things that end in ".c" sort together. */ - hash = (name_hash<up) { - fputc('/', stderr); - n = p->elem + p->len; - while (p->elem <= --n) - fputc(*n, stderr); - } - fprintf(stderr, "\t%08x\n", hash); + while ((c = *name++) != 0) { + if (isspace(c)) + continue; + hash = (hash >> 2) + (c << 24); } return hash; } @@ -587,56 +545,245 @@ static int add_object_entry(const unsigned char *sha1, unsigned hash, int exclud return status; } -static void add_pbase_tree(struct tree_desc *tree, struct name_path *up) +struct pbase_tree_cache { + unsigned char sha1[20]; + int ref; + int temporary; + void *tree_data; + unsigned long tree_size; +}; + +static struct pbase_tree_cache *(pbase_tree_cache[256]); +static int pbase_tree_cache_ix(const unsigned char *sha1) { - while (tree->size) { - const unsigned char *sha1; - const char *name; - unsigned mode, hash; + return sha1[0] % ARRAY_SIZE(pbase_tree_cache); +} +static int pbase_tree_cache_ix_incr(int ix) +{ + return (ix+1) % ARRAY_SIZE(pbase_tree_cache); +} + +static struct pbase_tree { + struct pbase_tree *next; + /* This is a phony "cache" entry; we are not + * going to evict it nor find it through _get() + * mechanism -- this is for the toplevel node that + * would almost always change with any commit. + */ + struct pbase_tree_cache pcache; +} *pbase_tree; + +static struct pbase_tree_cache *pbase_tree_get(const unsigned char *sha1) +{ + struct pbase_tree_cache *ent, *nent; + void *data; + unsigned long size; + char type[20]; + int neigh; + int my_ix = pbase_tree_cache_ix(sha1); + int available_ix = -1; + + /* pbase-tree-cache acts as a limited hashtable. + * your object will be found at your index or within a few + * slots after that slot if it is cached. + */ + for (neigh = 0; neigh < 8; neigh++) { + ent = pbase_tree_cache[my_ix]; + if (ent && !memcmp(ent->sha1, sha1, 20)) { + ent->ref++; + return ent; + } + else if (((available_ix < 0) && (!ent || !ent->ref)) || + ((0 <= available_ix) && + (!ent && pbase_tree_cache[available_ix]))) + available_ix = my_ix; + if (!ent) + break; + my_ix = pbase_tree_cache_ix_incr(my_ix); + } + + /* Did not find one. Either we got a bogus request or + * we need to read and perhaps cache. + */ + data = read_sha1_file(sha1, type, &size); + if (!data) + return NULL; + if (strcmp(type, tree_type)) { + free(data); + return NULL; + } + + /* We need to either cache or return a throwaway copy */ + + if (available_ix < 0) + ent = NULL; + else { + ent = pbase_tree_cache[available_ix]; + my_ix = available_ix; + } + + if (!ent) { + nent = xmalloc(sizeof(*nent)); + nent->temporary = (available_ix < 0); + } + else { + /* evict and reuse */ + free(ent->tree_data); + nent = ent; + } + memcpy(nent->sha1, sha1, 20); + nent->tree_data = data; + nent->tree_size = size; + nent->ref = 1; + if (!nent->temporary) + pbase_tree_cache[my_ix] = nent; + return nent; +} + +static void pbase_tree_put(struct pbase_tree_cache *cache) +{ + if (!cache->temporary) { + cache->ref--; + return; + } + free(cache->tree_data); + free(cache); +} + +static int name_cmp_len(const char *name) +{ + int i; + for (i = 0; name[i] && name[i] != '\n' && name[i] != '/'; i++) + ; + return i; +} + +static void add_pbase_object(struct tree_desc *tree, + const char *name, + int cmplen, + const char *fullname) +{ + struct name_entry entry; + + while (tree_entry(tree,&entry)) { unsigned long size; char type[20]; - sha1 = tree_entry_extract(tree, &name, &mode); - update_tree_entry(tree); - if (!has_sha1_file(sha1)) - continue; - if (sha1_object_info(sha1, type, &size)) + if (entry.pathlen != cmplen || + memcmp(entry.path, name, cmplen) || + !has_sha1_file(entry.sha1) || + sha1_object_info(entry.sha1, type, &size)) continue; - - hash = name_hash(up, name); - if (!add_object_entry(sha1, hash, 1)) - continue; - + if (name[cmplen] != '/') { + unsigned hash = name_hash(fullname); + add_object_entry(entry.sha1, hash, 1); + return; + } if (!strcmp(type, tree_type)) { struct tree_desc sub; - void *elem; - struct name_path me; - - elem = read_sha1_file(sha1, type, &sub.size); - sub.buf = elem; - if (sub.buf) { - me.up = up; - me.elem = name; - me.len = strlen(name); - add_pbase_tree(&sub, &me); - free(elem); - } + struct pbase_tree_cache *tree; + const char *down = name+cmplen+1; + int downlen = name_cmp_len(down); + + tree = pbase_tree_get(entry.sha1); + if (!tree) + return; + sub.buf = tree->tree_data; + sub.size = tree->tree_size; + + add_pbase_object(&sub, down, downlen, fullname); + pbase_tree_put(tree); + } + } +} + +static unsigned *done_pbase_paths; +static int done_pbase_paths_num; +static int done_pbase_paths_alloc; +static int done_pbase_path_pos(unsigned hash) +{ + int lo = 0; + int hi = done_pbase_paths_num; + while (lo < hi) { + int mi = (hi + lo) / 2; + if (done_pbase_paths[mi] == hash) + return mi; + if (done_pbase_paths[mi] < hash) + hi = mi; + else + lo = mi + 1; + } + return -lo-1; +} + +static int check_pbase_path(unsigned hash) +{ + int pos = (!done_pbase_paths) ? -1 : done_pbase_path_pos(hash); + if (0 <= pos) + return 1; + pos = -pos - 1; + if (done_pbase_paths_alloc <= done_pbase_paths_num) { + done_pbase_paths_alloc = alloc_nr(done_pbase_paths_alloc); + done_pbase_paths = xrealloc(done_pbase_paths, + done_pbase_paths_alloc * + sizeof(unsigned)); + } + done_pbase_paths_num++; + if (pos < done_pbase_paths_num) + memmove(done_pbase_paths + pos + 1, + done_pbase_paths + pos, + (done_pbase_paths_num - pos - 1) * sizeof(unsigned)); + done_pbase_paths[pos] = hash; + return 0; +} + +static void add_preferred_base_object(char *name, unsigned hash) +{ + struct pbase_tree *it; + int cmplen = name_cmp_len(name); + + if (check_pbase_path(hash)) + return; + + for (it = pbase_tree; it; it = it->next) { + if (cmplen == 0) { + hash = name_hash(""); + add_object_entry(it->pcache.sha1, hash, 1); + } + else { + struct tree_desc tree; + tree.buf = it->pcache.tree_data; + tree.size = it->pcache.tree_size; + add_pbase_object(&tree, name, cmplen, name); } } } static void add_preferred_base(unsigned char *sha1) { - struct tree_desc tree; - void *elem; + struct pbase_tree *it; + void *data; + unsigned long size; + unsigned char tree_sha1[20]; - elem = read_object_with_reference(sha1, tree_type, &tree.size, NULL); - tree.buf = elem; - if (!tree.buf) + data = read_object_with_reference(sha1, tree_type, &size, tree_sha1); + if (!data) return; - if (add_object_entry(sha1, name_hash(NULL, ""), 1)) - add_pbase_tree(&tree, NULL); - free(elem); + + for (it = pbase_tree; it; it = it->next) { + if (!memcmp(it->pcache.sha1, tree_sha1, 20)) { + free(data); + return; + } + } + + it = xcalloc(1, sizeof(*it)); + it->next = pbase_tree; + pbase_tree = it; + + memcpy(it->pcache.sha1, tree_sha1, 20); + it->pcache.tree_data = data; + it->pcache.tree_size = size; } static void check_object(struct object_entry *entry) @@ -811,6 +958,7 @@ static int type_size_sort(const struct object_entry *a, const struct object_entr struct unpacked { struct object_entry *entry; void *data; + struct delta_index *index; }; /* @@ -821,64 +969,59 @@ struct unpacked { * more importantly, the bigger file is likely the more recent * one. */ -static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_depth) +static int try_delta(struct unpacked *trg, struct unpacked *src, + struct delta_index *src_index, unsigned max_depth) { - struct object_entry *cur_entry = cur->entry; - struct object_entry *old_entry = old->entry; - unsigned long size, oldsize, delta_size, sizediff; - long max_size; + struct object_entry *trg_entry = trg->entry; + struct object_entry *src_entry = src->entry; + unsigned long size, src_size, delta_size, sizediff, max_size; void *delta_buf; /* Don't bother doing diffs between different types */ - if (cur_entry->type != old_entry->type) + if (trg_entry->type != src_entry->type) return -1; /* We do not compute delta to *create* objects we are not * going to pack. */ - if (cur_entry->preferred_base) + if (trg_entry->preferred_base) return -1; - /* If the current object is at pack edge, take the depth the + /* + * If the current object is at pack edge, take the depth the * objects that depend on the current object into account -- * otherwise they would become too deep. */ - if (cur_entry->delta_child) { - if (max_depth <= cur_entry->delta_limit) + if (trg_entry->delta_child) { + if (max_depth <= trg_entry->delta_limit) return 0; - max_depth -= cur_entry->delta_limit; + max_depth -= trg_entry->delta_limit; } - - size = cur_entry->size; - oldsize = old_entry->size; - sizediff = oldsize > size ? oldsize - size : size - oldsize; - - if (size < 50) - return -1; - if (old_entry->depth >= max_depth) + if (src_entry->depth >= max_depth) return 0; - /* - * NOTE! - * - * We always delta from the bigger to the smaller, since that's - * more space-efficient (deletes don't have to say _what_ they - * delete). - */ - max_size = size / 2 - 20; - if (cur_entry->delta) - max_size = cur_entry->delta_size-1; + /* Now some size filtering heuristics. */ + size = trg_entry->size; + max_size = size/2 - 20; + max_size = max_size * (max_depth - src_entry->depth) / max_depth; + if (max_size == 0) + return 0; + if (trg_entry->delta && trg_entry->delta_size <= max_size) + max_size = trg_entry->delta_size-1; + src_size = src_entry->size; + sizediff = src_size < size ? size - src_size : 0; if (sizediff >= max_size) - return -1; - delta_buf = diff_delta(old->data, oldsize, - cur->data, size, &delta_size, max_size); + return 0; + + delta_buf = create_delta(src_index, trg->data, size, &delta_size, max_size); if (!delta_buf) return 0; - cur_entry->delta = old_entry; - cur_entry->delta_size = delta_size; - cur_entry->depth = old_entry->depth + 1; + + trg_entry->delta = src_entry; + trg_entry->delta_size = delta_size; + trg_entry->depth = src_entry->depth + 1; free(delta_buf); - return 0; + return 1; } static void progress_interval(int signum) @@ -926,11 +1069,16 @@ static void find_deltas(struct object_entry **list, int window, int depth) */ continue; + if (entry->size < 50) + continue; + free_delta_index(n->index); + n->index = NULL; free(n->data); n->entry = entry; n->data = read_sha1_file(entry->sha1, type, &size); if (size != entry->size) - die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size); + die("object %s inconsistent object length (%lu vs %lu)", + sha1_to_hex(entry->sha1), size, entry->size); j = window; while (--j > 0) { @@ -941,18 +1089,20 @@ static void find_deltas(struct object_entry **list, int window, int depth) m = array + other_idx; if (!m->entry) break; - if (try_delta(n, m, depth) < 0) + if (try_delta(n, m, m->index, depth) < 0) break; } -#if 0 /* if we made n a delta, and if n is already at max * depth, leaving it in the window is pointless. we * should evict it first. - * ... in theory only; somehow this makes things worse. */ if (entry->delta && depth <= entry->depth) continue; -#endif + + n->index = create_delta_index(n->data, size); + if (!n->index) + die("out of memory"); + idx++; if (idx >= window) idx = 0; @@ -961,8 +1111,10 @@ static void find_deltas(struct object_entry **list, int window, int depth) if (progress) fputc('\n', stderr); - for (i = 0; i < window; ++i) + for (i = 0; i < window; ++i) { + free_delta_index(array[i].index); free(array[i].data); + } free(array); } @@ -1048,13 +1200,15 @@ static void setup_progress_signal(void) int main(int argc, char **argv) { SHA_CTX ctx; - char line[PATH_MAX + 20]; + char line[40 + 1 + PATH_MAX + 2]; int window = 10, depth = 10, pack_to_stdout = 0; struct object_entry **list; + int num_preferred_base = 0; int i; setup_git_directory(); + progress = isatty(2); for (i = 1; i < argc; i++) { const char *arg = argv[i]; @@ -1085,6 +1239,10 @@ int main(int argc, char **argv) usage(pack_usage); continue; } + if (!strcmp("--progress", arg)) { + progress = 1; + continue; + } if (!strcmp("-q", arg)) { progress = 0; continue; @@ -1116,6 +1274,7 @@ int main(int argc, char **argv) for (;;) { unsigned char sha1[20]; + unsigned hash; if (!fgets(line, sizeof(line), stdin)) { if (feof(stdin)) @@ -1132,12 +1291,15 @@ int main(int argc, char **argv) if (get_sha1_hex(line+1, sha1)) die("expected edge sha1, got garbage:\n %s", line+1); - add_preferred_base(sha1); + if (num_preferred_base++ < window) + add_preferred_base(sha1); continue; } if (get_sha1_hex(line, sha1)) die("expected sha1, got garbage:\n %s", line); - add_object_entry(sha1, name_hash(NULL, line+41), 0); + hash = name_hash(line+41); + add_preferred_base_object(line+41, hash); + add_object_entry(sha1, hash, 0); } if (progress) fprintf(stderr, "Done counting %d objects.\n", nr_objects);