X-Git-Url: https://git.octo.it/?a=blobdiff_plain;f=pack-objects.c;h=8f352aa6c1c99e7bebc7680909c9139c7a6cf623;hb=7bd1527d2d8c80a6e9a0f8583082a5aee5428c68;hp=d9328a98d8f349b75fc5ab8302b66116317781ec;hpb=75c42d8cc3b42e4b82946848b8ba902b4bbcc38d;p=git.git diff --git a/pack-objects.c b/pack-objects.c index d9328a98..8f352aa6 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -1,77 +1,189 @@ #include "cache.h" #include "object.h" #include "delta.h" +#include "pack.h" +#include "csum-file.h" +#include +#include -static const char pack_usage[] = "git-pack-objects [--window=N] base-name < object-list"; - -enum object_type { - OBJ_NONE, - OBJ_COMMIT, - OBJ_TREE, - OBJ_BLOB, - OBJ_DELTA // NOTE! This is _not_ the same as a "delta" object in the filesystem -}; +static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list"; struct object_entry { unsigned char sha1[20]; - unsigned long size; - unsigned long offset; - unsigned int depth; - unsigned int flags; + unsigned long size; /* uncompressed size */ + unsigned long offset; /* offset into the final pack file; + * nonzero if already written. + */ + unsigned int depth; /* delta depth */ + unsigned int delta_limit; /* base adjustment for in-pack delta */ + unsigned int hash; /* name hint hash */ enum object_type type; - unsigned long delta_size; - struct object_entry *delta; + enum object_type in_pack_type; /* could be delta */ + unsigned long delta_size; /* delta data size (uncompressed) */ + struct object_entry *delta; /* delta base object */ + struct packed_git *in_pack; /* already in pack */ + unsigned int in_pack_offset; + struct object_entry *delta_child; /* delitified objects who bases me */ + struct object_entry *delta_sibling; /* other deltified objects who + * uses the same base as me + */ }; +/* + * Objects we are going to pack are colected in objects array (dynamically + * expanded). nr_objects & nr_alloc controls this array. They are stored + * in the order we see -- typically rev-list --objects order that gives us + * nice "minimum seek" order. + * + * sorted-by-sha ans sorted-by-type are arrays of pointers that point at + * elements in the objects array. The former is used to build the pack + * index (lists object names in the ascending order to help offset lookup), + * and the latter is used to group similar things together by try_delta() + * heuristics. + */ + +static unsigned char object_list_sha1[20]; +static int non_empty = 0; +static int no_reuse_delta = 0; +static int local = 0; +static int incremental = 0; static struct object_entry **sorted_by_sha, **sorted_by_type; static struct object_entry *objects = NULL; static int nr_objects = 0, nr_alloc = 0; -static int delta_window = 10; static const char *base_name; +static unsigned char pack_file_sha1[20]; +static int progress = 1; +static volatile int progress_update = 0; -struct myfile { - int fd; - unsigned long chars; - unsigned char buffer[8192]; -}; +/* + * The object names in objects array are hashed with this hashtable, + * to help looking up the entry by object name. Binary search from + * sorted_by_sha is also possible but this was easier to code and faster. + * This hashtable is built after all the objects are seen. + */ +static int *object_ix = NULL; +static int object_ix_hashsz = 0; + +/* + * Pack index for existing packs give us easy access to the offsets into + * corresponding pack file where each object's data starts, but the entries + * do not store the size of the compressed representation (uncompressed + * size is easily available by examining the pack entry header). We build + * a hashtable of existing packs (pack_revindex), and keep reverse index + * here -- pack index file is sorted by object name mapping to offset; this + * pack_revindex[].revindex array is an ordered list of offsets, so if you + * know the offset of an object, next offset is where its packed + * representation ends. + */ +struct pack_revindex { + struct packed_git *p; + unsigned long *revindex; +} *pack_revindex = NULL; +static int pack_revindex_hashsz = 0; + +/* + * stats + */ +static int written = 0; +static int written_delta = 0; +static int reused = 0; +static int reused_delta = 0; + +static int pack_revindex_ix(struct packed_git *p) +{ + unsigned int ui = (unsigned int) p; + int i; + + ui = ui ^ (ui >> 16); /* defeat structure alignment */ + i = (int)(ui % pack_revindex_hashsz); + while (pack_revindex[i].p) { + if (pack_revindex[i].p == p) + return i; + if (++i == pack_revindex_hashsz) + i = 0; + } + return -1 - i; +} -static FILE *create_file(const char *suffix) +static void prepare_pack_ix(void) { - static char filename[PATH_MAX]; - unsigned len; + int num; + struct packed_git *p; + for (num = 0, p = packed_git; p; p = p->next) + num++; + if (!num) + return; + pack_revindex_hashsz = num * 11; + pack_revindex = xcalloc(sizeof(*pack_revindex), pack_revindex_hashsz); + for (p = packed_git; p; p = p->next) { + num = pack_revindex_ix(p); + num = - 1 - num; + pack_revindex[num].p = p; + } + /* revindex elements are lazily initialized */ +} - len = snprintf(filename, PATH_MAX, "%s.%s", base_name, suffix); - if (len >= PATH_MAX) - die("you wascally wabbit, you"); - return fopen(filename, "w"); +static int cmp_offset(const void *a_, const void *b_) +{ + unsigned long a = *(unsigned long *) a_; + unsigned long b = *(unsigned long *) b_; + if (a < b) + return -1; + else if (a == b) + return 0; + else + return 1; } -static unsigned long fwrite_compressed(void *in, unsigned long size, FILE *f) +/* + * Ordered list of offsets of objects in the pack. + */ +static void prepare_pack_revindex(struct pack_revindex *rix) { - z_stream stream; - unsigned long maxsize; - void *out; - - memset(&stream, 0, sizeof(stream)); - deflateInit(&stream, Z_DEFAULT_COMPRESSION); - maxsize = deflateBound(&stream, size); - out = xmalloc(maxsize); - - /* Compress it */ - stream.next_in = in; - stream.avail_in = size; - - stream.next_out = out; - stream.avail_out = maxsize; - - while (deflate(&stream, Z_FINISH) == Z_OK) - /* nothing */; - deflateEnd(&stream); - - size = stream.total_out; - fwrite(out, size, 1, f); - free(out); - return size; + struct packed_git *p = rix->p; + int num_ent = num_packed_objects(p); + int i; + void *index = p->index_base + 256; + + rix->revindex = xmalloc(sizeof(unsigned long) * (num_ent + 1)); + for (i = 0; i < num_ent; i++) { + long hl = *((long *)(index + 24 * i)); + rix->revindex[i] = ntohl(hl); + } + /* This knows the pack format -- the 20-byte trailer + * follows immediately after the last object data. + */ + rix->revindex[num_ent] = p->pack_size - 20; + qsort(rix->revindex, num_ent, sizeof(unsigned long), cmp_offset); +} + +static unsigned long find_packed_object_size(struct packed_git *p, + unsigned long ofs) +{ + int num; + int lo, hi; + struct pack_revindex *rix; + unsigned long *revindex; + num = pack_revindex_ix(p); + if (num < 0) + die("internal error: pack revindex uninitialized"); + rix = &pack_revindex[num]; + if (!rix->revindex) + prepare_pack_revindex(rix); + revindex = rix->revindex; + lo = 0; + hi = num_packed_objects(p) + 1; + do { + int mi = (lo + hi) / 2; + if (revindex[mi] == ofs) { + return revindex[mi+1] - ofs; + } + else if (ofs < revindex[mi]) + hi = mi; + else + lo = mi + 1; + } while (lo < hi); + die("internal error: pack revindex corrupt"); } static void *delta_against(void *buf, unsigned long size, struct object_entry *entry) @@ -83,7 +195,8 @@ static void *delta_against(void *buf, unsigned long size, struct object_entry *e if (!otherbuf) die("unable to read %s", sha1_to_hex(entry->delta->sha1)); - delta_buf = diff_delta(buf, size, otherbuf, othersize, &delta_size, ~0UL); + delta_buf = diff_delta(otherbuf, othersize, + buf, size, &delta_size, 0); if (!delta_buf || delta_size != entry->delta_size) die("delta size changed"); free(buf); @@ -91,62 +204,166 @@ static void *delta_against(void *buf, unsigned long size, struct object_entry *e return delta_buf; } -static unsigned long write_object(FILE *f, struct object_entry *entry) +/* + * The per-object header is a pretty dense thing, which is + * - first byte: low four bits are "size", then three bits of "type", + * and the high bit is "size continues". + * - each byte afterwards: low seven bits are size continuation, + * with the high bit being "size continues" + */ +static int encode_header(enum object_type type, unsigned long size, unsigned char *hdr) +{ + int n = 1; + unsigned char c; + + if (type < OBJ_COMMIT || type > OBJ_DELTA) + die("bad type %d", type); + + c = (type << 4) | (size & 15); + size >>= 4; + while (size) { + *hdr++ = c | 0x80; + c = size & 0x7f; + size >>= 7; + n++; + } + *hdr = c; + return n; +} + +static unsigned long write_object(struct sha1file *f, struct object_entry *entry) { unsigned long size; char type[10]; - void *buf = read_sha1_file(entry->sha1, type, &size); - char header[21]; + void *buf; + unsigned char header[10]; unsigned hdrlen, datalen; - - if (!buf) - die("unable to read %s", sha1_to_hex(entry->sha1)); - if (size != entry->size) - die("object %s size inconsistency (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size); - - /* - * The object header is a byte of 'type' followed by four bytes of - * length, except for deltas that has the 20 bytes of delta sha - * instead. - */ - header[0] = ".CTB"[entry->type]; - datalen = htonl(size); - memcpy(header+1, &datalen, 4); - hdrlen = 5; - if (entry->delta) { - header[0] = 'D'; - memcpy(header+1, entry->delta, 20); - buf = delta_against(buf, size, entry); - size = entry->delta_size; - hdrlen = 21; + enum object_type obj_type; + int to_reuse = 0; + + obj_type = entry->type; + if (! entry->in_pack) + to_reuse = 0; /* can't reuse what we don't have */ + else if (obj_type == OBJ_DELTA) + to_reuse = 1; /* check_object() decided it for us */ + else if (obj_type != entry->in_pack_type) + to_reuse = 0; /* pack has delta which is unusable */ + else if (entry->delta) + to_reuse = 0; /* we want to pack afresh */ + else + to_reuse = 1; /* we have it in-pack undeltified, + * and we do not need to deltify it. + */ + + if (! to_reuse) { + buf = read_sha1_file(entry->sha1, type, &size); + if (!buf) + die("unable to read %s", sha1_to_hex(entry->sha1)); + if (size != entry->size) + die("object %s size inconsistency (%lu vs %lu)", + sha1_to_hex(entry->sha1), size, entry->size); + if (entry->delta) { + buf = delta_against(buf, size, entry); + size = entry->delta_size; + obj_type = OBJ_DELTA; + } + /* + * The object header is a byte of 'type' followed by zero or + * more bytes of length. For deltas, the 20 bytes of delta + * sha1 follows that. + */ + hdrlen = encode_header(obj_type, size, header); + sha1write(f, header, hdrlen); + + if (entry->delta) { + sha1write(f, entry->delta, 20); + hdrlen += 20; + } + datalen = sha1write_compressed(f, buf, size); + free(buf); } - fwrite(header, hdrlen, 1, f); - datalen = fwrite_compressed(buf, size, f); - free(buf); + else { + struct packed_git *p = entry->in_pack; + use_packed_git(p); + + datalen = find_packed_object_size(p, entry->in_pack_offset); + buf = p->pack_base + entry->in_pack_offset; + sha1write(f, buf, datalen); + unuse_packed_git(p); + hdrlen = 0; /* not really */ + if (obj_type == OBJ_DELTA) + reused_delta++; + reused++; + } + if (obj_type == OBJ_DELTA) + written_delta++; + written++; return hdrlen + datalen; } +static unsigned long write_one(struct sha1file *f, + struct object_entry *e, + unsigned long offset) +{ + if (e->offset) + /* offset starts from header size and cannot be zero + * if it is written already. + */ + return offset; + e->offset = offset; + offset += write_object(f, e); + /* if we are deltified, write out its base object. */ + if (e->delta) + offset = write_one(f, e->delta, offset); + return offset; +} + static void write_pack_file(void) { int i; - FILE *f = create_file("pack"); - unsigned long offset = 0; - unsigned long mb; + struct sha1file *f; + unsigned long offset; + struct pack_header hdr; + unsigned last_percent = 999; + int do_progress = 0; + if (!base_name) + f = sha1fd(1, ""); + else { + f = sha1create("%s-%s.%s", base_name, + sha1_to_hex(object_list_sha1), "pack"); + do_progress = progress; + } + if (do_progress) + fprintf(stderr, "Writing %d objects.\n", nr_objects); + + hdr.hdr_signature = htonl(PACK_SIGNATURE); + hdr.hdr_version = htonl(PACK_VERSION); + hdr.hdr_entries = htonl(nr_objects); + sha1write(f, &hdr, sizeof(hdr)); + offset = sizeof(hdr); for (i = 0; i < nr_objects; i++) { - struct object_entry *entry = objects + i; - entry->offset = offset; - offset += write_object(f, entry); + offset = write_one(f, objects + i, offset); + if (do_progress) { + unsigned percent = written * 100 / nr_objects; + if (progress_update || percent != last_percent) { + fprintf(stderr, "%4u%% (%u/%u) done\r", + percent, written, nr_objects); + progress_update = 0; + last_percent = percent; + } + } } - fclose(f); - mb = offset >> 20; - offset &= 0xfffff; + if (do_progress) + fputc('\n', stderr); + + sha1close(f, pack_file_sha1, 1); } static void write_index_file(void) { int i; - FILE *f = create_file("idx"); + struct sha1file *f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "idx"); struct object_entry **list = sorted_by_sha; struct object_entry **last = list + nr_objects; unsigned int array[256]; @@ -154,7 +371,7 @@ static void write_index_file(void) /* * Write the first-level table (the list is sorted, * but we use a 256-entry lookup to be able to avoid - * having to do eight extra binary search iterations) + * having to do eight extra binary search iterations). */ for (i = 0; i < 256; i++) { struct object_entry **next = list; @@ -167,7 +384,7 @@ static void write_index_file(void) array[i] = htonl(next - sorted_by_sha); list = next; } - fwrite(array, 256, sizeof(int), f); + sha1write(f, array, 256 * sizeof(int)); /* * Write the actual SHA1 entries.. @@ -176,16 +393,34 @@ static void write_index_file(void) for (i = 0; i < nr_objects; i++) { struct object_entry *entry = *list++; unsigned int offset = htonl(entry->offset); - fwrite(&offset, 4, 1, f); - fwrite(entry->sha1, 20, 1, f); + sha1write(f, &offset, 4); + sha1write(f, entry->sha1, 20); } - fclose(f); + sha1write(f, pack_file_sha1, 20); + sha1close(f, NULL, 1); } -static void add_object_entry(unsigned char *sha1) +static int add_object_entry(unsigned char *sha1, unsigned int hash) { unsigned int idx = nr_objects; struct object_entry *entry; + struct packed_git *p; + unsigned int found_offset = 0; + struct packed_git *found_pack = NULL; + + for (p = packed_git; p; p = p->next) { + struct pack_entry e; + if (find_pack_entry_one(sha1, &e, p)) { + if (incremental) + return 0; + if (local && !p->pack_local) + return 0; + if (!found_pack) { + found_offset = e.offset; + found_pack = e.p; + } + } + } if (idx >= nr_alloc) { unsigned int needed = (idx + 1024) * 3 / 2; @@ -195,42 +430,144 @@ static void add_object_entry(unsigned char *sha1) entry = objects + idx; memset(entry, 0, sizeof(*entry)); memcpy(entry->sha1, sha1, 20); + entry->hash = hash; + if (found_pack) { + entry->in_pack = found_pack; + entry->in_pack_offset = found_offset; + } nr_objects = idx+1; + return 1; +} + +static int locate_object_entry_hash(unsigned char *sha1) +{ + int i; + unsigned int ui; + memcpy(&ui, sha1, sizeof(unsigned int)); + i = ui % object_ix_hashsz; + while (0 < object_ix[i]) { + if (!memcmp(sha1, objects[object_ix[i]-1].sha1, 20)) + return i; + if (++i == object_ix_hashsz) + i = 0; + } + return -1 - i; +} + +static struct object_entry *locate_object_entry(unsigned char *sha1) +{ + int i = locate_object_entry_hash(sha1); + if (0 <= i) + return &objects[object_ix[i]-1]; + return NULL; } static void check_object(struct object_entry *entry) { - char buffer[128]; - char type[10]; - unsigned long mapsize; - z_stream stream; - void *map; - - map = map_sha1_file(entry->sha1, &mapsize); - if (!map) - die("unable to map %s", sha1_to_hex(entry->sha1)); - if (unpack_sha1_header(&stream, map, mapsize, buffer, sizeof(buffer)) < 0) - die("unable to unpack %s header", sha1_to_hex(entry->sha1)); - munmap(map, mapsize); - if (parse_sha1_header(buffer, type, &entry->size) < 0) - die("unable to parse %s header", sha1_to_hex(entry->sha1)); + char type[20]; + + if (entry->in_pack) { + unsigned char base[20]; + unsigned long size; + struct object_entry *base_entry; + + /* We want in_pack_type even if we do not reuse delta. + * There is no point not reusing non-delta representations. + */ + check_reuse_pack_delta(entry->in_pack, + entry->in_pack_offset, + base, &size, + &entry->in_pack_type); + + /* Check if it is delta, and the base is also an object + * we are going to pack. If so we will reuse the existing + * delta. + */ + if (!no_reuse_delta && + entry->in_pack_type == OBJ_DELTA && + (base_entry = locate_object_entry(base))) { + + /* Depth value does not matter - find_deltas() + * will never consider reused delta as the + * base object to deltify other objects + * against, in order to avoid circular deltas. + */ + + /* uncompressed size of the delta data */ + entry->size = entry->delta_size = size; + entry->delta = base_entry; + entry->type = OBJ_DELTA; + + entry->delta_sibling = base_entry->delta_child; + base_entry->delta_child = entry; + + return; + } + /* Otherwise we would do the usual */ + } + + if (sha1_object_info(entry->sha1, type, &entry->size)) + die("unable to get type of object %s", + sha1_to_hex(entry->sha1)); + if (!strcmp(type, "commit")) { entry->type = OBJ_COMMIT; } else if (!strcmp(type, "tree")) { entry->type = OBJ_TREE; } else if (!strcmp(type, "blob")) { entry->type = OBJ_BLOB; + } else if (!strcmp(type, "tag")) { + entry->type = OBJ_TAG; } else - die("unable to pack object %s of type %s", sha1_to_hex(entry->sha1), type); + die("unable to pack object %s of type %s", + sha1_to_hex(entry->sha1), type); +} + +static void hash_objects(void) +{ + int i; + struct object_entry *oe; + + object_ix_hashsz = nr_objects * 2; + object_ix = xcalloc(sizeof(int), object_ix_hashsz); + for (i = 0, oe = objects; i < nr_objects; i++, oe++) { + int ix = locate_object_entry_hash(oe->sha1); + if (0 <= ix) { + error("the same object '%s' added twice", + sha1_to_hex(oe->sha1)); + continue; + } + ix = -1 - ix; + object_ix[ix] = i + 1; + } +} + +static unsigned int check_delta_limit(struct object_entry *me, unsigned int n) +{ + struct object_entry *child = me->delta_child; + unsigned int m = n; + while (child) { + unsigned int c = check_delta_limit(child, n + 1); + if (m < c) + m = c; + child = child->delta_sibling; + } + return m; } static void get_object_details(void) { int i; - struct object_entry *entry = objects; + struct object_entry *entry; - for (i = 0; i < nr_objects; i++) - check_object(entry++); + hash_objects(); + prepare_pack_ix(); + for (i = 0, entry = objects; i < nr_objects; i++, entry++) + check_object(entry); + for (i = 0, entry = objects; i < nr_objects; i++, entry++) + if (!entry->delta && entry->delta_child) + entry->delta_limit = + check_delta_limit(entry, 1); } typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *); @@ -267,6 +604,10 @@ static int type_size_sort(const struct object_entry *a, const struct object_entr return -1; if (a->type > b->type) return 1; + if (a->hash < b->hash) + return -1; + if (a->hash > b->hash) + return 1; if (a->size < b->size) return -1; if (a->size > b->size) @@ -280,18 +621,18 @@ struct unpacked { }; /* - * We search for deltas in a list sorted by type and by size, and - * walk the "old" chain backwards in the list, so if the type - * has changed or the size difference is too big, there's no point - * in even continuing the walk, since the other old objects are - * going to be even smaller or of a different type. So return -1 - * once we determine that there's no point even trying. + * We search for deltas _backwards_ in a list sorted by type and + * by size, so that we see progressively smaller and smaller files. + * That's because we prefer deltas to be from the bigger file + * to the smaller - deletes are potentially cheaper, but perhaps + * more importantly, the bigger file is likely the more recent + * one. */ -static int try_delta(struct unpacked *cur, struct unpacked *old) +static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_depth) { struct object_entry *cur_entry = cur->entry; struct object_entry *old_entry = old->entry; - unsigned long size, oldsize, delta_size; + unsigned long size, oldsize, delta_size, sizediff; long max_size; void *delta_buf; @@ -299,13 +640,25 @@ static int try_delta(struct unpacked *cur, struct unpacked *old) if (cur_entry->type != old_entry->type) return -1; - /* Size is guaranteed to be larger than or equal to oldsize */ + /* If the current object is at edge, take the depth the objects + * that depend on the current object into account -- otherwise + * they would become too deep. + */ + if (cur_entry->delta_child) { + if (max_depth <= cur_entry->delta_limit) + return 0; + max_depth -= cur_entry->delta_limit; + } + size = cur_entry->size; if (size < 50) return -1; oldsize = old_entry->size; - if (size - oldsize > oldsize / 4) + sizediff = oldsize > size ? oldsize - size : size - oldsize; + if (sizediff > size / 8) return -1; + if (old_entry->depth >= max_depth) + return 0; /* * NOTE! @@ -317,35 +670,69 @@ static int try_delta(struct unpacked *cur, struct unpacked *old) max_size = size / 2 - 20; if (cur_entry->delta) max_size = cur_entry->delta_size-1; - delta_buf = diff_delta(cur->data, size, old->data, oldsize, &delta_size, max_size); + if (sizediff >= max_size) + return -1; + delta_buf = diff_delta(old->data, oldsize, + cur->data, size, &delta_size, max_size); if (!delta_buf) return 0; cur_entry->delta = old_entry; cur_entry->delta_size = delta_size; + cur_entry->depth = old_entry->depth + 1; free(delta_buf); return 0; } -static void find_deltas(struct object_entry **list, int window) +static void progress_interval(int signum) { - unsigned int i; + signal(SIGALRM, progress_interval); + progress_update = 1; +} + +static void find_deltas(struct object_entry **list, int window, int depth) +{ + int i, idx; unsigned int array_size = window * sizeof(struct unpacked); struct unpacked *array = xmalloc(array_size); + unsigned processed = 0; + unsigned last_percent = 999; memset(array, 0, array_size); - for (i = 0; i < nr_objects; i++) { - unsigned int idx = i % window; + i = nr_objects; + idx = 0; + if (progress) + fprintf(stderr, "Deltifying %d objects.\n", nr_objects); + + while (--i >= 0) { struct object_entry *entry = list[i]; struct unpacked *n = array + idx; unsigned long size; char type[10]; int j; + processed++; + if (progress) { + unsigned percent = processed * 100 / nr_objects; + if (percent != last_percent || progress_update) { + fprintf(stderr, "%4u%% (%u/%u) done\r", + percent, processed, nr_objects); + progress_update = 0; + last_percent = percent; + } + } + + if (entry->delta) + /* This happens if we decided to reuse existing + * delta from a pack. "!no_reuse_delta &&" is implied. + */ + continue; + free(n->data); n->entry = entry; n->data = read_sha1_file(entry->sha1, type, &size); if (size != entry->size) die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size); + j = window; while (--j > 0) { unsigned int other_idx = idx + j; @@ -355,28 +742,136 @@ static void find_deltas(struct object_entry **list, int window) m = array + other_idx; if (!m->entry) break; - if (try_delta(n, m) < 0) + if (try_delta(n, m, depth) < 0) break; } + idx++; + if (idx >= window) + idx = 0; } + + if (progress) + fputc('\n', stderr); + + for (i = 0; i < window; ++i) + free(array[i].data); + free(array); +} + +static void prepare_pack(int window, int depth) +{ + get_object_details(); + sorted_by_type = create_sorted_list(type_size_sort); + if (window && depth) + find_deltas(sorted_by_type, window+1, depth); +} + +static int reuse_cached_pack(unsigned char *sha1, int pack_to_stdout) +{ + static const char cache[] = "pack-cache/pack-%s.%s"; + char *cached_pack, *cached_idx; + int ifd, ofd, ifd_ix = -1; + + cached_pack = git_path(cache, sha1_to_hex(sha1), "pack"); + ifd = open(cached_pack, O_RDONLY); + if (ifd < 0) + return 0; + + if (!pack_to_stdout) { + cached_idx = git_path(cache, sha1_to_hex(sha1), "idx"); + ifd_ix = open(cached_idx, O_RDONLY); + if (ifd_ix < 0) { + close(ifd); + return 0; + } + } + + if (progress) + fprintf(stderr, "Reusing %d objects pack %s\n", nr_objects, + sha1_to_hex(sha1)); + + if (pack_to_stdout) { + if (copy_fd(ifd, 1)) + exit(1); + close(ifd); + } + else { + char name[PATH_MAX]; + snprintf(name, sizeof(name), + "%s-%s.%s", base_name, sha1_to_hex(sha1), "pack"); + ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666); + if (ofd < 0) + die("unable to open %s (%s)", name, strerror(errno)); + if (copy_fd(ifd, ofd)) + exit(1); + close(ifd); + + snprintf(name, sizeof(name), + "%s-%s.%s", base_name, sha1_to_hex(sha1), "idx"); + ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666); + if (ofd < 0) + die("unable to open %s (%s)", name, strerror(errno)); + if (copy_fd(ifd_ix, ofd)) + exit(1); + close(ifd_ix); + puts(sha1_to_hex(sha1)); + } + + return 1; } int main(int argc, char **argv) { - char line[128]; + SHA_CTX ctx; + char line[PATH_MAX + 20]; + int window = 10, depth = 10, pack_to_stdout = 0; + struct object_entry **list; int i; + setup_git_directory(); + for (i = 1; i < argc; i++) { const char *arg = argv[i]; if (*arg == '-') { + if (!strcmp("--non-empty", arg)) { + non_empty = 1; + continue; + } + if (!strcmp("--local", arg)) { + local = 1; + continue; + } + if (!strcmp("--incremental", arg)) { + incremental = 1; + continue; + } if (!strncmp("--window=", arg, 9)) { char *end; - delta_window = strtoul(arg+9, &end, 0); + window = strtoul(arg+9, &end, 0); if (!arg[9] || *end) usage(pack_usage); continue; } + if (!strncmp("--depth=", arg, 8)) { + char *end; + depth = strtoul(arg+8, &end, 0); + if (!arg[8] || *end) + usage(pack_usage); + continue; + } + if (!strcmp("-q", arg)) { + progress = 0; + continue; + } + if (!strcmp("--no-reuse-delta", arg)) { + no_reuse_delta = 1; + continue; + } + if (!strcmp("--stdout", arg)) { + pack_to_stdout = 1; + continue; + } usage(pack_usage); } if (base_name) @@ -384,25 +879,75 @@ int main(int argc, char **argv) base_name = arg; } - if (!base_name) + if (pack_to_stdout != !base_name) usage(pack_usage); + prepare_packed_git(); + + if (progress) { + struct itimerval v; + v.it_interval.tv_sec = 1; + v.it_interval.tv_usec = 0; + v.it_value = v.it_interval; + signal(SIGALRM, progress_interval); + setitimer(ITIMER_REAL, &v, NULL); + fprintf(stderr, "Generating pack...\n"); + } + while (fgets(line, sizeof(line), stdin) != NULL) { + unsigned int hash; + char *p; unsigned char sha1[20]; + + if (progress_update) { + fprintf(stderr, "Counting objects...%d\r", nr_objects); + progress_update = 0; + } if (get_sha1_hex(line, sha1)) - die("expected sha1, got garbage"); - add_object_entry(sha1); + die("expected sha1, got garbage:\n %s", line); + hash = 0; + p = line+40; + while (*p) { + unsigned char c = *p++; + if (isspace(c)) + continue; + hash = hash * 11 + c; + } + add_object_entry(sha1, hash); } - get_object_details(); - - printf("Packing %d objects\n", nr_objects); + if (progress) + fprintf(stderr, "Done counting %d objects.\n", nr_objects); + if (non_empty && !nr_objects) + return 0; sorted_by_sha = create_sorted_list(sha1_sort); - sorted_by_type = create_sorted_list(type_size_sort); - if (delta_window) - find_deltas(sorted_by_type, delta_window); - - write_pack_file(); - write_index_file(); + SHA1_Init(&ctx); + list = sorted_by_sha; + for (i = 0; i < nr_objects; i++) { + struct object_entry *entry = *list++; + SHA1_Update(&ctx, entry->sha1, 20); + } + SHA1_Final(object_list_sha1, &ctx); + + if (reuse_cached_pack(object_list_sha1, pack_to_stdout)) + ; + else { + prepare_pack(window, depth); + if (progress && pack_to_stdout) { + /* the other end usually displays progress itself */ + struct itimerval v = {{0,},}; + setitimer(ITIMER_REAL, &v, NULL); + signal(SIGALRM, SIG_IGN ); + progress_update = 0; + } + write_pack_file(); + if (!pack_to_stdout) { + write_index_file(); + puts(sha1_to_hex(object_list_sha1)); + } + } + if (progress) + fprintf(stderr, "Total %d, written %d (delta %d), reused %d (delta %d)\n", + nr_objects, written, written_delta, reused, reused_delta); return 0; }