X-Git-Url: https://git.octo.it/?a=blobdiff_plain;f=pack-objects.c;h=b3e61520339231f6d9a43c545100f4d5f87e0aac;hb=0a8944dd48b97d258240af967dfdfcea4f203b85;hp=d9328a98d8f349b75fc5ab8302b66116317781ec;hpb=75c42d8cc3b42e4b82946848b8ba902b4bbcc38d;p=git.git diff --git a/pack-objects.c b/pack-objects.c index d9328a98..b3e61520 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -1,78 +1,31 @@ #include "cache.h" #include "object.h" #include "delta.h" +#include "pack.h" +#include "csum-file.h" -static const char pack_usage[] = "git-pack-objects [--window=N] base-name < object-list"; - -enum object_type { - OBJ_NONE, - OBJ_COMMIT, - OBJ_TREE, - OBJ_BLOB, - OBJ_DELTA // NOTE! This is _not_ the same as a "delta" object in the filesystem -}; +static const char pack_usage[] = "git-pack-objects [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list"; struct object_entry { unsigned char sha1[20]; unsigned long size; unsigned long offset; unsigned int depth; - unsigned int flags; + unsigned int hash; enum object_type type; unsigned long delta_size; struct object_entry *delta; }; +static unsigned char object_list_sha1[20]; +static int non_empty = 0; +static int local = 0; +static int incremental = 0; static struct object_entry **sorted_by_sha, **sorted_by_type; static struct object_entry *objects = NULL; static int nr_objects = 0, nr_alloc = 0; -static int delta_window = 10; static const char *base_name; - -struct myfile { - int fd; - unsigned long chars; - unsigned char buffer[8192]; -}; - -static FILE *create_file(const char *suffix) -{ - static char filename[PATH_MAX]; - unsigned len; - - len = snprintf(filename, PATH_MAX, "%s.%s", base_name, suffix); - if (len >= PATH_MAX) - die("you wascally wabbit, you"); - return fopen(filename, "w"); -} - -static unsigned long fwrite_compressed(void *in, unsigned long size, FILE *f) -{ - z_stream stream; - unsigned long maxsize; - void *out; - - memset(&stream, 0, sizeof(stream)); - deflateInit(&stream, Z_DEFAULT_COMPRESSION); - maxsize = deflateBound(&stream, size); - out = xmalloc(maxsize); - - /* Compress it */ - stream.next_in = in; - stream.avail_in = size; - - stream.next_out = out; - stream.avail_out = maxsize; - - while (deflate(&stream, Z_FINISH) == Z_OK) - /* nothing */; - deflateEnd(&stream); - - size = stream.total_out; - fwrite(out, size, 1, f); - free(out); - return size; -} +static unsigned char pack_file_sha1[20]; static void *delta_against(void *buf, unsigned long size, struct object_entry *entry) { @@ -83,7 +36,8 @@ static void *delta_against(void *buf, unsigned long size, struct object_entry *e if (!otherbuf) die("unable to read %s", sha1_to_hex(entry->delta->sha1)); - delta_buf = diff_delta(buf, size, otherbuf, othersize, &delta_size, ~0UL); + delta_buf = diff_delta(otherbuf, othersize, + buf, size, &delta_size, 0); if (!delta_buf || delta_size != entry->delta_size) die("delta size changed"); free(buf); @@ -91,13 +45,41 @@ static void *delta_against(void *buf, unsigned long size, struct object_entry *e return delta_buf; } -static unsigned long write_object(FILE *f, struct object_entry *entry) +/* + * The per-object header is a pretty dense thing, which is + * - first byte: low four bits are "size", then three bits of "type", + * and the high bit is "size continues". + * - each byte afterwards: low seven bits are size continuation, + * with the high bit being "size continues" + */ +static int encode_header(enum object_type type, unsigned long size, unsigned char *hdr) +{ + int n = 1; + unsigned char c; + + if (type < OBJ_COMMIT || type > OBJ_DELTA) + die("bad type %d", type); + + c = (type << 4) | (size & 15); + size >>= 4; + while (size) { + *hdr++ = c | 0x80; + c = size & 0x7f; + size >>= 7; + n++; + } + *hdr = c; + return n; +} + +static unsigned long write_object(struct sha1file *f, struct object_entry *entry) { unsigned long size; char type[10]; void *buf = read_sha1_file(entry->sha1, type, &size); - char header[21]; + unsigned char header[10]; unsigned hdrlen, datalen; + enum object_type obj_type; if (!buf) die("unable to read %s", sha1_to_hex(entry->sha1)); @@ -105,40 +87,65 @@ static unsigned long write_object(FILE *f, struct object_entry *entry) die("object %s size inconsistency (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size); /* - * The object header is a byte of 'type' followed by four bytes of - * length, except for deltas that has the 20 bytes of delta sha - * instead. + * The object header is a byte of 'type' followed by zero or + * more bytes of length. For deltas, the 20 bytes of delta sha1 + * follows that. */ - header[0] = ".CTB"[entry->type]; - datalen = htonl(size); - memcpy(header+1, &datalen, 4); - hdrlen = 5; + obj_type = entry->type; if (entry->delta) { - header[0] = 'D'; - memcpy(header+1, entry->delta, 20); buf = delta_against(buf, size, entry); size = entry->delta_size; - hdrlen = 21; + obj_type = OBJ_DELTA; } - fwrite(header, hdrlen, 1, f); - datalen = fwrite_compressed(buf, size, f); + hdrlen = encode_header(obj_type, size, header); + sha1write(f, header, hdrlen); + if (entry->delta) { + sha1write(f, entry->delta, 20); + hdrlen += 20; + } + datalen = sha1write_compressed(f, buf, size); free(buf); return hdrlen + datalen; } +static unsigned long write_one(struct sha1file *f, + struct object_entry *e, + unsigned long offset) +{ + if (e->offset) + /* offset starts from header size and cannot be zero + * if it is written already. + */ + return offset; + e->offset = offset; + offset += write_object(f, e); + /* if we are delitified, write out its base object. */ + if (e->delta) + offset = write_one(f, e->delta, offset); + return offset; +} + static void write_pack_file(void) { int i; - FILE *f = create_file("pack"); - unsigned long offset = 0; + struct sha1file *f; + unsigned long offset; unsigned long mb; + struct pack_header hdr; - for (i = 0; i < nr_objects; i++) { - struct object_entry *entry = objects + i; - entry->offset = offset; - offset += write_object(f, entry); - } - fclose(f); + if (!base_name) + f = sha1fd(1, ""); + else + f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "pack"); + hdr.hdr_signature = htonl(PACK_SIGNATURE); + hdr.hdr_version = htonl(PACK_VERSION); + hdr.hdr_entries = htonl(nr_objects); + sha1write(f, &hdr, sizeof(hdr)); + offset = sizeof(hdr); + for (i = 0; i < nr_objects; i++) + offset = write_one(f, objects + i, offset); + + sha1close(f, pack_file_sha1, 1); mb = offset >> 20; offset &= 0xfffff; } @@ -146,7 +153,7 @@ static void write_pack_file(void) static void write_index_file(void) { int i; - FILE *f = create_file("idx"); + struct sha1file *f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "idx"); struct object_entry **list = sorted_by_sha; struct object_entry **last = list + nr_objects; unsigned int array[256]; @@ -154,7 +161,7 @@ static void write_index_file(void) /* * Write the first-level table (the list is sorted, * but we use a 256-entry lookup to be able to avoid - * having to do eight extra binary search iterations) + * having to do eight extra binary search iterations). */ for (i = 0; i < 256; i++) { struct object_entry **next = list; @@ -167,7 +174,7 @@ static void write_index_file(void) array[i] = htonl(next - sorted_by_sha); list = next; } - fwrite(array, 256, sizeof(int), f); + sha1write(f, array, 256 * sizeof(int)); /* * Write the actual SHA1 entries.. @@ -176,17 +183,33 @@ static void write_index_file(void) for (i = 0; i < nr_objects; i++) { struct object_entry *entry = *list++; unsigned int offset = htonl(entry->offset); - fwrite(&offset, 4, 1, f); - fwrite(entry->sha1, 20, 1, f); + sha1write(f, &offset, 4); + sha1write(f, entry->sha1, 20); } - fclose(f); + sha1write(f, pack_file_sha1, 20); + sha1close(f, NULL, 1); } -static void add_object_entry(unsigned char *sha1) +static int add_object_entry(unsigned char *sha1, unsigned int hash) { unsigned int idx = nr_objects; struct object_entry *entry; + if (incremental || local) { + struct packed_git *p; + + for (p = packed_git; p; p = p->next) { + struct pack_entry e; + + if (find_pack_entry_one(sha1, &e, p)) { + if (incremental) + return 0; + if (local && !p->pack_local) + return 0; + } + } + } + if (idx >= nr_alloc) { unsigned int needed = (idx + 1024) * 3 / 2; objects = xrealloc(objects, needed * sizeof(*entry)); @@ -195,33 +218,31 @@ static void add_object_entry(unsigned char *sha1) entry = objects + idx; memset(entry, 0, sizeof(*entry)); memcpy(entry->sha1, sha1, 20); + entry->hash = hash; nr_objects = idx+1; + return 1; } static void check_object(struct object_entry *entry) { - char buffer[128]; - char type[10]; - unsigned long mapsize; - z_stream stream; - void *map; - - map = map_sha1_file(entry->sha1, &mapsize); - if (!map) - die("unable to map %s", sha1_to_hex(entry->sha1)); - if (unpack_sha1_header(&stream, map, mapsize, buffer, sizeof(buffer)) < 0) - die("unable to unpack %s header", sha1_to_hex(entry->sha1)); - munmap(map, mapsize); - if (parse_sha1_header(buffer, type, &entry->size) < 0) - die("unable to parse %s header", sha1_to_hex(entry->sha1)); - if (!strcmp(type, "commit")) { - entry->type = OBJ_COMMIT; - } else if (!strcmp(type, "tree")) { - entry->type = OBJ_TREE; - } else if (!strcmp(type, "blob")) { - entry->type = OBJ_BLOB; - } else - die("unable to pack object %s of type %s", sha1_to_hex(entry->sha1), type); + char type[20]; + + if (!sha1_object_info(entry->sha1, type, &entry->size)) { + if (!strcmp(type, "commit")) { + entry->type = OBJ_COMMIT; + } else if (!strcmp(type, "tree")) { + entry->type = OBJ_TREE; + } else if (!strcmp(type, "blob")) { + entry->type = OBJ_BLOB; + } else if (!strcmp(type, "tag")) { + entry->type = OBJ_TAG; + } else + die("unable to pack object %s of type %s", + sha1_to_hex(entry->sha1), type); + } + else + die("unable to get type of object %s", + sha1_to_hex(entry->sha1)); } static void get_object_details(void) @@ -267,6 +288,10 @@ static int type_size_sort(const struct object_entry *a, const struct object_entr return -1; if (a->type > b->type) return 1; + if (a->hash < b->hash) + return -1; + if (a->hash > b->hash) + return 1; if (a->size < b->size) return -1; if (a->size > b->size) @@ -280,18 +305,18 @@ struct unpacked { }; /* - * We search for deltas in a list sorted by type and by size, and - * walk the "old" chain backwards in the list, so if the type - * has changed or the size difference is too big, there's no point - * in even continuing the walk, since the other old objects are - * going to be even smaller or of a different type. So return -1 - * once we determine that there's no point even trying. + * We search for deltas _backwards_ in a list sorted by type and + * by size, so that we see progressively smaller and smaller files. + * That's because we prefer deltas to be from the bigger file + * to the smaller - deletes are potentially cheaper, but perhaps + * more importantly, the bigger file is likely the more recent + * one. */ -static int try_delta(struct unpacked *cur, struct unpacked *old) +static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_depth) { struct object_entry *cur_entry = cur->entry; struct object_entry *old_entry = old->entry; - unsigned long size, oldsize, delta_size; + unsigned long size, oldsize, delta_size, sizediff; long max_size; void *delta_buf; @@ -299,13 +324,15 @@ static int try_delta(struct unpacked *cur, struct unpacked *old) if (cur_entry->type != old_entry->type) return -1; - /* Size is guaranteed to be larger than or equal to oldsize */ size = cur_entry->size; if (size < 50) return -1; oldsize = old_entry->size; - if (size - oldsize > oldsize / 4) + sizediff = oldsize > size ? oldsize - size : size - oldsize; + if (sizediff > size / 8) return -1; + if (old_entry->depth >= max_depth) + return 0; /* * NOTE! @@ -317,24 +344,29 @@ static int try_delta(struct unpacked *cur, struct unpacked *old) max_size = size / 2 - 20; if (cur_entry->delta) max_size = cur_entry->delta_size-1; - delta_buf = diff_delta(cur->data, size, old->data, oldsize, &delta_size, max_size); + if (sizediff >= max_size) + return -1; + delta_buf = diff_delta(old->data, oldsize, + cur->data, size, &delta_size, max_size); if (!delta_buf) return 0; cur_entry->delta = old_entry; cur_entry->delta_size = delta_size; + cur_entry->depth = old_entry->depth + 1; free(delta_buf); return 0; } -static void find_deltas(struct object_entry **list, int window) +static void find_deltas(struct object_entry **list, int window, int depth) { - unsigned int i; + int i, idx; unsigned int array_size = window * sizeof(struct unpacked); struct unpacked *array = xmalloc(array_size); memset(array, 0, array_size); - for (i = 0; i < nr_objects; i++) { - unsigned int idx = i % window; + i = nr_objects; + idx = 0; + while (--i >= 0) { struct object_entry *entry = list[i]; struct unpacked *n = array + idx; unsigned long size; @@ -355,28 +387,61 @@ static void find_deltas(struct object_entry **list, int window) m = array + other_idx; if (!m->entry) break; - if (try_delta(n, m) < 0) + if (try_delta(n, m, depth) < 0) break; } + idx++; + if (idx >= window) + idx = 0; } + + for (i = 0; i < window; ++i) + free(array[i].data); + free(array); } int main(int argc, char **argv) { - char line[128]; + SHA_CTX ctx; + char line[PATH_MAX + 20]; + int window = 10, depth = 10, pack_to_stdout = 0; + struct object_entry **list; int i; for (i = 1; i < argc; i++) { const char *arg = argv[i]; if (*arg == '-') { + if (!strcmp("--non-empty", arg)) { + non_empty = 1; + continue; + } + if (!strcmp("--local", arg)) { + local = 1; + continue; + } + if (!strcmp("--incremental", arg)) { + incremental = 1; + continue; + } if (!strncmp("--window=", arg, 9)) { char *end; - delta_window = strtoul(arg+9, &end, 0); + window = strtoul(arg+9, &end, 0); if (!arg[9] || *end) usage(pack_usage); continue; } + if (!strncmp("--depth=", arg, 8)) { + char *end; + depth = strtoul(arg+8, &end, 0); + if (!arg[8] || *end) + usage(pack_usage); + continue; + } + if (!strcmp("--stdout", arg)) { + pack_to_stdout = 1; + continue; + } usage(pack_usage); } if (base_name) @@ -384,25 +449,50 @@ int main(int argc, char **argv) base_name = arg; } - if (!base_name) + if (pack_to_stdout != !base_name) usage(pack_usage); + prepare_packed_git(); while (fgets(line, sizeof(line), stdin) != NULL) { + unsigned int hash; + char *p; unsigned char sha1[20]; + if (get_sha1_hex(line, sha1)) die("expected sha1, got garbage"); - add_object_entry(sha1); + hash = 0; + p = line+40; + while (*p) { + unsigned char c = *p++; + if (isspace(c)) + continue; + hash = hash * 11 + c; + } + add_object_entry(sha1, hash); } + if (non_empty && !nr_objects) + return 0; get_object_details(); - printf("Packing %d objects\n", nr_objects); + fprintf(stderr, "Packing %d objects\n", nr_objects); sorted_by_sha = create_sorted_list(sha1_sort); + SHA1_Init(&ctx); + list = sorted_by_sha; + for (i = 0; i < nr_objects; i++) { + struct object_entry *entry = *list++; + SHA1_Update(&ctx, entry->sha1, 20); + } + SHA1_Final(object_list_sha1, &ctx); + sorted_by_type = create_sorted_list(type_size_sort); - if (delta_window) - find_deltas(sorted_by_type, delta_window); + if (window && depth) + find_deltas(sorted_by_type, window+1, depth); write_pack_file(); - write_index_file(); + if (!pack_to_stdout) { + write_index_file(); + puts(sha1_to_hex(object_list_sha1)); + } return 0; }