X-Git-Url: https://git.octo.it/?a=blobdiff_plain;f=pack-objects.c;h=53caed42dd2778e96cb39d8ba8b9cb175bdf1303;hb=e686eb9870d6b382f0760e3e859e93c8c2dfb31b;hp=136a7f5aad61e8a90866ee7d0b4bc22e387f0396;hpb=70e1cca3886b2efac5bcd42065bde45395dd92c7;p=git.git diff --git a/pack-objects.c b/pack-objects.c index 136a7f5a..53caed42 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -1,9 +1,13 @@ #include "cache.h" #include "object.h" +#include "blob.h" +#include "commit.h" +#include "tag.h" +#include "tree.h" #include "delta.h" #include "pack.h" #include "csum-file.h" -#include "diff.h" +#include "tree-walk.h" #include #include @@ -32,9 +36,6 @@ struct object_entry { * be used as the base objectto delta huge * objects against. */ - int based_on_preferred; /* current delta candidate is a preferred - * one, or delta against a preferred one. - */ }; /* @@ -61,7 +62,7 @@ static int nr_objects = 0, nr_alloc = 0, nr_result = 0; static const char *base_name; static unsigned char pack_file_sha1[20]; static int progress = 1; -static volatile int progress_update = 0; +static volatile sig_atomic_t progress_update = 0; /* * The object names in objects array are hashed with this hashtable, @@ -452,7 +453,7 @@ static void rehash_objects(void) if (object_ix_hashsz < 1024) object_ix_hashsz = 1024; object_ix = xrealloc(object_ix, sizeof(int) * object_ix_hashsz); - object_ix = memset(object_ix, 0, sizeof(int) * object_ix_hashsz); + memset(object_ix, 0, sizeof(int) * object_ix_hashsz); for (i = 0, oe = objects; i < nr_objects; i++, oe++) { int ix = locate_object_entry_hash(oe->sha1); if (0 <= ix) @@ -504,21 +505,6 @@ static unsigned name_hash(struct name_path *path, const char *name) * but close enough. */ hash = (name_hash<up) { - fputc('/', stderr); - n = p->elem + p->len; - while (p->elem <= --n) - fputc(*n, stderr); - } - fprintf(stderr, "\t%08x\n", hash); - } return hash; } @@ -586,56 +572,254 @@ static int add_object_entry(const unsigned char *sha1, unsigned hash, int exclud return status; } -static void add_pbase_tree(struct tree_desc *tree, struct name_path *up) +struct pbase_tree_cache { + unsigned char sha1[20]; + int ref; + int temporary; + void *tree_data; + unsigned long tree_size; +}; + +static struct pbase_tree_cache *(pbase_tree_cache[256]); +static int pbase_tree_cache_ix(const unsigned char *sha1) +{ + return sha1[0] % ARRAY_SIZE(pbase_tree_cache); +} +static int pbase_tree_cache_ix_incr(int ix) +{ + return (ix+1) % ARRAY_SIZE(pbase_tree_cache); +} + +static struct pbase_tree { + struct pbase_tree *next; + /* This is a phony "cache" entry; we are not + * going to evict it nor find it through _get() + * mechanism -- this is for the toplevel node that + * would almost always change with any commit. + */ + struct pbase_tree_cache pcache; +} *pbase_tree; + +static struct pbase_tree_cache *pbase_tree_get(const unsigned char *sha1) +{ + struct pbase_tree_cache *ent, *nent; + void *data; + unsigned long size; + char type[20]; + int neigh; + int my_ix = pbase_tree_cache_ix(sha1); + int available_ix = -1; + + /* pbase-tree-cache acts as a limited hashtable. + * your object will be found at your index or within a few + * slots after that slot if it is cached. + */ + for (neigh = 0; neigh < 8; neigh++) { + ent = pbase_tree_cache[my_ix]; + if (ent && !memcmp(ent->sha1, sha1, 20)) { + ent->ref++; + return ent; + } + else if (((available_ix < 0) && (!ent || !ent->ref)) || + ((0 <= available_ix) && + (!ent && pbase_tree_cache[available_ix]))) + available_ix = my_ix; + if (!ent) + break; + my_ix = pbase_tree_cache_ix_incr(my_ix); + } + + /* Did not find one. Either we got a bogus request or + * we need to read and perhaps cache. + */ + data = read_sha1_file(sha1, type, &size); + if (!data) + return NULL; + if (strcmp(type, tree_type)) { + free(data); + return NULL; + } + + /* We need to either cache or return a throwaway copy */ + + if (available_ix < 0) + ent = NULL; + else { + ent = pbase_tree_cache[available_ix]; + my_ix = available_ix; + } + + if (!ent) { + nent = xmalloc(sizeof(*nent)); + nent->temporary = (available_ix < 0); + } + else { + /* evict and reuse */ + free(ent->tree_data); + nent = ent; + } + memcpy(nent->sha1, sha1, 20); + nent->tree_data = data; + nent->tree_size = size; + nent->ref = 1; + if (!nent->temporary) + pbase_tree_cache[my_ix] = nent; + return nent; +} + +static void pbase_tree_put(struct pbase_tree_cache *cache) +{ + if (!cache->temporary) { + cache->ref--; + return; + } + free(cache->tree_data); + free(cache); +} + +static int name_cmp_len(const char *name) +{ + int i; + for (i = 0; name[i] && name[i] != '\n' && name[i] != '/'; i++) + ; + return i; +} + +static void add_pbase_object(struct tree_desc *tree, + struct name_path *up, + const char *name, + int cmplen) { while (tree->size) { const unsigned char *sha1; - const char *name; - unsigned mode, hash; + const char *entry_name; + int entry_len; + unsigned mode; unsigned long size; char type[20]; - sha1 = tree_entry_extract(tree, &name, &mode); + sha1 = tree_entry_extract(tree, &entry_name, &mode); update_tree_entry(tree); - if (!has_sha1_file(sha1)) - continue; - if (sha1_object_info(sha1, type, &size)) + entry_len = strlen(entry_name); + if (entry_len != cmplen || + memcmp(entry_name, name, cmplen) || + !has_sha1_file(sha1) || + sha1_object_info(sha1, type, &size)) continue; - - hash = name_hash(up, name); - if (!add_object_entry(sha1, hash, 1)) - continue; - - if (!strcmp(type, "tree")) { + if (name[cmplen] != '/') { + unsigned hash = name_hash(up, name); + add_object_entry(sha1, hash, 1); + return; + } + if (!strcmp(type, tree_type)) { struct tree_desc sub; - void *elem; struct name_path me; + struct pbase_tree_cache *tree; + const char *down = name+cmplen+1; + int downlen = name_cmp_len(down); + + tree = pbase_tree_get(sha1); + if (!tree) + return; + sub.buf = tree->tree_data; + sub.size = tree->tree_size; + + me.up = up; + me.elem = entry_name; + me.len = entry_len; + add_pbase_object(&sub, &me, down, downlen); + pbase_tree_put(tree); + } + } +} - elem = read_sha1_file(sha1, type, &sub.size); - sub.buf = elem; - if (sub.buf) { - me.up = up; - me.elem = name; - me.len = strlen(name); - add_pbase_tree(&sub, &me); - free(elem); - } +static unsigned *done_pbase_paths; +static int done_pbase_paths_num; +static int done_pbase_paths_alloc; +static int done_pbase_path_pos(unsigned hash) +{ + int lo = 0; + int hi = done_pbase_paths_num; + while (lo < hi) { + int mi = (hi + lo) / 2; + if (done_pbase_paths[mi] == hash) + return mi; + if (done_pbase_paths[mi] < hash) + hi = mi; + else + lo = mi + 1; + } + return -lo-1; +} + +static int check_pbase_path(unsigned hash) +{ + int pos = (!done_pbase_paths) ? -1 : done_pbase_path_pos(hash); + if (0 <= pos) + return 1; + pos = -pos - 1; + if (done_pbase_paths_alloc <= done_pbase_paths_num) { + done_pbase_paths_alloc = alloc_nr(done_pbase_paths_alloc); + done_pbase_paths = xrealloc(done_pbase_paths, + done_pbase_paths_alloc * + sizeof(unsigned)); + } + done_pbase_paths_num++; + if (pos < done_pbase_paths_num) + memmove(done_pbase_paths + pos + 1, + done_pbase_paths + pos, + (done_pbase_paths_num - pos - 1) * sizeof(unsigned)); + done_pbase_paths[pos] = hash; + return 0; +} + +static void add_preferred_base_object(char *name, unsigned hash) +{ + struct pbase_tree *it; + int cmplen = name_cmp_len(name); + + if (check_pbase_path(hash)) + return; + + for (it = pbase_tree; it; it = it->next) { + if (cmplen == 0) { + hash = name_hash(NULL, ""); + add_object_entry(it->pcache.sha1, hash, 1); + } + else { + struct tree_desc tree; + tree.buf = it->pcache.tree_data; + tree.size = it->pcache.tree_size; + add_pbase_object(&tree, NULL, name, cmplen); } } } static void add_preferred_base(unsigned char *sha1) { - struct tree_desc tree; - void *elem; + struct pbase_tree *it; + void *data; + unsigned long size; + unsigned char tree_sha1[20]; - elem = read_object_with_reference(sha1, "tree", &tree.size, NULL); - tree.buf = elem; - if (!tree.buf) + data = read_object_with_reference(sha1, tree_type, &size, tree_sha1); + if (!data) return; - if (add_object_entry(sha1, name_hash(NULL, ""), 1)) - add_pbase_tree(&tree, NULL); - free(elem); + + for (it = pbase_tree; it; it = it->next) { + if (!memcmp(it->pcache.sha1, tree_sha1, 20)) { + free(data); + return; + } + } + + it = xcalloc(1, sizeof(*it)); + it->next = pbase_tree; + pbase_tree = it; + + memcpy(it->pcache.sha1, tree_sha1, 20); + it->pcache.tree_data = data; + it->pcache.tree_size = size; } static void check_object(struct object_entry *entry) @@ -687,13 +871,13 @@ static void check_object(struct object_entry *entry) die("unable to get type of object %s", sha1_to_hex(entry->sha1)); - if (!strcmp(type, "commit")) { + if (!strcmp(type, commit_type)) { entry->type = OBJ_COMMIT; - } else if (!strcmp(type, "tree")) { + } else if (!strcmp(type, tree_type)) { entry->type = OBJ_TREE; - } else if (!strcmp(type, "blob")) { + } else if (!strcmp(type, blob_type)) { entry->type = OBJ_BLOB; - } else if (!strcmp(type, "tag")) { + } else if (!strcmp(type, tag_type)) { entry->type = OBJ_TAG; } else die("unable to pack object %s of type %s", @@ -824,8 +1008,6 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de { struct object_entry *cur_entry = cur->entry; struct object_entry *old_entry = old->entry; - int old_preferred = (old_entry->preferred_base || - old_entry->based_on_preferred); unsigned long size, oldsize, delta_size, sizediff; long max_size; void *delta_buf; @@ -850,12 +1032,6 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de max_depth -= cur_entry->delta_limit; } - size = cur_entry->size; - oldsize = old_entry->size; - sizediff = oldsize > size ? oldsize - size : size - oldsize; - - if (size < 50) - return -1; if (old_entry->depth >= max_depth) return 0; @@ -866,30 +1042,14 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de * more space-efficient (deletes don't have to say _what_ they * delete). */ + size = cur_entry->size; max_size = size / 2 - 20; - if (cur_entry->delta) { - if (cur_entry->based_on_preferred) { - if (old_preferred) - max_size = cur_entry->delta_size-1; - else - /* trying with non-preferred one when we - * already have a delta based on preferred - * one is pointless. - */ - return -1; - } - else if (!old_preferred) - max_size = cur_entry->delta_size-1; - else - /* otherwise... even if delta with a - * preferred one produces a bigger result than - * what we currently have, which is based on a - * non-preferred one, it is OK. - */ - ; - } + if (cur_entry->delta) + max_size = cur_entry->delta_size-1; + oldsize = old_entry->size; + sizediff = oldsize < size ? size - oldsize : 0; if (sizediff >= max_size) - return -1; + return 0; delta_buf = diff_delta(old->data, oldsize, cur->data, size, &delta_size, max_size); if (!delta_buf) @@ -897,14 +1057,12 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de cur_entry->delta = old_entry; cur_entry->delta_size = delta_size; cur_entry->depth = old_entry->depth + 1; - cur_entry->based_on_preferred = old_preferred; free(delta_buf); return 0; } static void progress_interval(int signum) { - signal(SIGALRM, progress_interval); progress_update = 1; } @@ -948,6 +1106,9 @@ static void find_deltas(struct object_entry **list, int window, int depth) */ continue; + if (entry->size < 50) + continue; + free(n->data); n->entry = entry; n->data = read_sha1_file(entry->sha1, type, &size); @@ -966,6 +1127,15 @@ static void find_deltas(struct object_entry **list, int window, int depth) if (try_delta(n, m, depth) < 0) break; } +#if 0 + /* if we made n a delta, and if n is already at max + * depth, leaving it in the window is pointless. we + * should evict it first. + * ... in theory only; somehow this makes things worse. + */ + if (entry->delta && depth <= entry->depth) + continue; +#endif idx++; if (idx >= window) idx = 0; @@ -1041,16 +1211,35 @@ static int reuse_cached_pack(unsigned char *sha1, int pack_to_stdout) return 1; } +static void setup_progress_signal(void) +{ + struct sigaction sa; + struct itimerval v; + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = progress_interval; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + sigaction(SIGALRM, &sa, NULL); + + v.it_interval.tv_sec = 1; + v.it_interval.tv_usec = 0; + v.it_value = v.it_interval; + setitimer(ITIMER_REAL, &v, NULL); +} + int main(int argc, char **argv) { SHA_CTX ctx; - char line[PATH_MAX + 20]; + char line[40 + 1 + PATH_MAX + 2]; int window = 10, depth = 10, pack_to_stdout = 0; struct object_entry **list; + int num_preferred_base = 0; int i; setup_git_directory(); + progress = isatty(2); for (i = 1; i < argc; i++) { const char *arg = argv[i]; @@ -1081,6 +1270,10 @@ int main(int argc, char **argv) usage(pack_usage); continue; } + if (!strcmp("--progress", arg)) { + progress = 1; + continue; + } if (!strcmp("-q", arg)) { progress = 0; continue; @@ -1106,28 +1299,38 @@ int main(int argc, char **argv) prepare_packed_git(); if (progress) { - struct itimerval v; - v.it_interval.tv_sec = 1; - v.it_interval.tv_usec = 0; - v.it_value = v.it_interval; - signal(SIGALRM, progress_interval); - setitimer(ITIMER_REAL, &v, NULL); fprintf(stderr, "Generating pack...\n"); + setup_progress_signal(); } - while (fgets(line, sizeof(line), stdin) != NULL) { + for (;;) { unsigned char sha1[20]; + unsigned hash; + + if (!fgets(line, sizeof(line), stdin)) { + if (feof(stdin)) + break; + if (!ferror(stdin)) + die("fgets returned NULL, not EOF, not error!"); + if (errno != EINTR) + die("fgets: %s", strerror(errno)); + clearerr(stdin); + continue; + } if (line[0] == '-') { if (get_sha1_hex(line+1, sha1)) die("expected edge sha1, got garbage:\n %s", line+1); - add_preferred_base(sha1); + if (num_preferred_base++ < window) + add_preferred_base(sha1); continue; } if (get_sha1_hex(line, sha1)) die("expected sha1, got garbage:\n %s", line); - add_object_entry(sha1, name_hash(NULL, line+41), 0); + hash = name_hash(NULL, line+41); + add_preferred_base_object(line+41, hash); + add_object_entry(sha1, hash, 0); } if (progress) fprintf(stderr, "Done counting %d objects.\n", nr_objects);