X-Git-Url: https://git.octo.it/?a=blobdiff_plain;f=pack-objects.c;h=7d6247791d9374986c6fb85159433ecd91b9ccec;hb=687dd75c95f9212244b6cf4fe60b40db44de01ba;hp=70fb2afeb8cfdef06b20dc7f96cd427f7c50ab60;hpb=3f9ac8d259fb919e001671c5e403e5fceaabf0d8;p=git.git diff --git a/pack-objects.c b/pack-objects.c index 70fb2afe..7d624779 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -4,21 +4,29 @@ #include "pack.h" #include "csum-file.h" #include +#include -static const char pack_usage[] = "git-pack-objects [-q] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list"; +static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list"; struct object_entry { unsigned char sha1[20]; unsigned long size; /* uncompressed size */ - unsigned long offset; /* offset into the final pack file (nonzero if already written) */ + unsigned long offset; /* offset into the final pack file; + * nonzero if already written. + */ unsigned int depth; /* delta depth */ + unsigned int delta_limit; /* base adjustment for in-pack delta */ unsigned int hash; /* name hint hash */ enum object_type type; + enum object_type in_pack_type; /* could be delta */ unsigned long delta_size; /* delta data size (uncompressed) */ struct object_entry *delta; /* delta base object */ struct packed_git *in_pack; /* already in pack */ - enum object_type in_pack_type; /* could be delta */ unsigned int in_pack_offset; + struct object_entry *delta_child; /* delitified objects who bases me */ + struct object_entry *delta_sibling; /* other deltified objects who + * uses the same base as me + */ }; /* @@ -36,6 +44,7 @@ struct object_entry { static unsigned char object_list_sha1[20]; static int non_empty = 0; +static int no_reuse_delta = 0; static int local = 0; static int incremental = 0; static struct object_entry **sorted_by_sha, **sorted_by_type; @@ -44,6 +53,7 @@ static int nr_objects = 0, nr_alloc = 0; static const char *base_name; static unsigned char pack_file_sha1[20]; static int progress = 1; +static volatile sig_atomic_t progress_update = 0; /* * The object names in objects array are hashed with this hashtable, @@ -75,7 +85,9 @@ static int pack_revindex_hashsz = 0; * stats */ static int written = 0; +static int written_delta = 0; static int reused = 0; +static int reused_delta = 0; static int pack_revindex_ix(struct packed_git *p) { @@ -227,10 +239,23 @@ static unsigned long write_object(struct sha1file *f, struct object_entry *entry unsigned char header[10]; unsigned hdrlen, datalen; enum object_type obj_type; + int to_reuse = 0; obj_type = entry->type; - if (!entry->in_pack || - (obj_type != entry->in_pack_type)) { + if (! entry->in_pack) + to_reuse = 0; /* can't reuse what we don't have */ + else if (obj_type == OBJ_DELTA) + to_reuse = 1; /* check_object() decided it for us */ + else if (obj_type != entry->in_pack_type) + to_reuse = 0; /* pack has delta which is unusable */ + else if (entry->delta) + to_reuse = 0; /* we want to pack afresh */ + else + to_reuse = 1; /* we have it in-pack undeltified, + * and we do not need to deltify it. + */ + + if (! to_reuse) { buf = read_sha1_file(entry->sha1, type, &size); if (!buf) die("unable to read %s", sha1_to_hex(entry->sha1)); @@ -266,8 +291,12 @@ static unsigned long write_object(struct sha1file *f, struct object_entry *entry sha1write(f, buf, datalen); unuse_packed_git(p); hdrlen = 0; /* not really */ + if (obj_type == OBJ_DELTA) + reused_delta++; reused++; } + if (obj_type == OBJ_DELTA) + written_delta++; written++; return hdrlen + datalen; } @@ -294,20 +323,39 @@ static void write_pack_file(void) int i; struct sha1file *f; unsigned long offset; - unsigned long mb; struct pack_header hdr; + unsigned last_percent = 999; + int do_progress = 0; if (!base_name) f = sha1fd(1, ""); - else - f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "pack"); + else { + f = sha1create("%s-%s.%s", base_name, + sha1_to_hex(object_list_sha1), "pack"); + do_progress = progress; + } + if (do_progress) + fprintf(stderr, "Writing %d objects.\n", nr_objects); + hdr.hdr_signature = htonl(PACK_SIGNATURE); hdr.hdr_version = htonl(PACK_VERSION); hdr.hdr_entries = htonl(nr_objects); sha1write(f, &hdr, sizeof(hdr)); offset = sizeof(hdr); - for (i = 0; i < nr_objects; i++) + for (i = 0; i < nr_objects; i++) { offset = write_one(f, objects + i, offset); + if (do_progress) { + unsigned percent = written * 100 / nr_objects; + if (progress_update || percent != last_percent) { + fprintf(stderr, "%4u%% (%u/%u) done\r", + percent, written, nr_objects); + progress_update = 0; + last_percent = percent; + } + } + } + if (do_progress) + fputc('\n', stderr); sha1close(f, pack_file_sha1, 1); } @@ -357,10 +405,9 @@ static int add_object_entry(unsigned char *sha1, unsigned int hash) unsigned int idx = nr_objects; struct object_entry *entry; struct packed_git *p; - unsigned int found_offset; - struct packed_git *found_pack; + unsigned int found_offset = 0; + struct packed_git *found_pack = NULL; - found_pack = NULL; for (p = packed_git; p; p = p->next) { struct pack_entry e; if (find_pack_entry_one(sha1, &e, p)) { @@ -420,32 +467,40 @@ static void check_object(struct object_entry *entry) char type[20]; if (entry->in_pack) { + unsigned char base[20]; + unsigned long size; + struct object_entry *base_entry; + + /* We want in_pack_type even if we do not reuse delta. + * There is no point not reusing non-delta representations. + */ + check_reuse_pack_delta(entry->in_pack, + entry->in_pack_offset, + base, &size, + &entry->in_pack_type); + /* Check if it is delta, and the base is also an object * we are going to pack. If so we will reuse the existing * delta. */ - unsigned char base[20]; - unsigned long size; - struct object_entry *base_entry; - if (!check_reuse_pack_delta(entry->in_pack, - entry->in_pack_offset, - base, &size, - &entry->in_pack_type) && + if (!no_reuse_delta && + entry->in_pack_type == OBJ_DELTA && (base_entry = locate_object_entry(base))) { - /* We do not know depth at this point, but it - * does not matter. Getting delta_chain_length - * with packed_object_info_detail() is not so - * expensive, so we could do that later if we - * wanted to. Calling sha1_object_info to get - * the true size (and later an uncompressed - * representation) of deeply deltified object - * is quite expensive. + + /* Depth value does not matter - find_deltas() + * will never consider reused delta as the + * base object to deltify other objects + * against, in order to avoid circular deltas. */ - entry->depth = 1; - /* uncompressed size */ + + /* uncompressed size of the delta data */ entry->size = entry->delta_size = size; entry->delta = base_entry; entry->type = OBJ_DELTA; + + entry->delta_sibling = base_entry->delta_child; + base_entry->delta_child = entry; + return; } /* Otherwise we would do the usual */ @@ -487,15 +542,32 @@ static void hash_objects(void) } } +static unsigned int check_delta_limit(struct object_entry *me, unsigned int n) +{ + struct object_entry *child = me->delta_child; + unsigned int m = n; + while (child) { + unsigned int c = check_delta_limit(child, n + 1); + if (m < c) + m = c; + child = child->delta_sibling; + } + return m; +} + static void get_object_details(void) { int i; - struct object_entry *entry = objects; + struct object_entry *entry; hash_objects(); prepare_pack_ix(); - for (i = 0; i < nr_objects; i++) - check_object(entry++); + for (i = 0, entry = objects; i < nr_objects; i++, entry++) + check_object(entry); + for (i = 0, entry = objects; i < nr_objects; i++, entry++) + if (!entry->delta && entry->delta_child) + entry->delta_limit = + check_delta_limit(entry, 1); } typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *); @@ -568,6 +640,16 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de if (cur_entry->type != old_entry->type) return -1; + /* If the current object is at edge, take the depth the objects + * that depend on the current object into account -- otherwise + * they would become too deep. + */ + if (cur_entry->delta_child) { + if (max_depth <= cur_entry->delta_limit) + return 0; + max_depth -= cur_entry->delta_limit; + } + size = cur_entry->size; if (size < 50) return -1; @@ -601,17 +683,24 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de return 0; } +static void progress_interval(int signum) +{ + progress_update = 1; +} + static void find_deltas(struct object_entry **list, int window, int depth) { int i, idx; unsigned int array_size = window * sizeof(struct unpacked); struct unpacked *array = xmalloc(array_size); - int eye_candy; + unsigned processed = 0; + unsigned last_percent = 999; memset(array, 0, array_size); i = nr_objects; idx = 0; - eye_candy = i - (nr_objects / 20); + if (progress) + fprintf(stderr, "Deltifying %d objects.\n", nr_objects); while (--i >= 0) { struct object_entry *entry = list[i]; @@ -620,14 +709,20 @@ static void find_deltas(struct object_entry **list, int window, int depth) char type[10]; int j; - if (progress && i <= eye_candy) { - eye_candy -= nr_objects / 20; - fputc('.', stderr); + processed++; + if (progress) { + unsigned percent = processed * 100 / nr_objects; + if (percent != last_percent || progress_update) { + fprintf(stderr, "%4u%% (%u/%u) done\r", + percent, processed, nr_objects); + progress_update = 0; + last_percent = percent; + } } if (entry->delta) /* This happens if we decided to reuse existing - * delta from a pack. + * delta from a pack. "!no_reuse_delta &&" is implied. */ continue; @@ -636,6 +731,7 @@ static void find_deltas(struct object_entry **list, int window, int depth) n->data = read_sha1_file(entry->sha1, type, &size); if (size != entry->size) die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size); + j = window; while (--j > 0) { unsigned int other_idx = idx + j; @@ -653,6 +749,9 @@ static void find_deltas(struct object_entry **list, int window, int depth) idx = 0; } + if (progress) + fputc('\n', stderr); + for (i = 0; i < window; ++i) free(array[i].data); free(array); @@ -660,18 +759,10 @@ static void find_deltas(struct object_entry **list, int window, int depth) static void prepare_pack(int window, int depth) { - if (progress) - fprintf(stderr, "Packing %d objects", nr_objects); get_object_details(); - if (progress) - fprintf(stderr, "."); - sorted_by_type = create_sorted_list(type_size_sort); if (window && depth) find_deltas(sorted_by_type, window+1, depth); - if (progress) - fputc('\n', stderr); - write_pack_file(); } static int reuse_cached_pack(unsigned char *sha1, int pack_to_stdout) @@ -694,8 +785,9 @@ static int reuse_cached_pack(unsigned char *sha1, int pack_to_stdout) } } - fprintf(stderr, "Reusing %d objects pack %s\n", nr_objects, - sha1_to_hex(sha1)); + if (progress) + fprintf(stderr, "Reusing %d objects pack %s\n", nr_objects, + sha1_to_hex(sha1)); if (pack_to_stdout) { if (copy_fd(ifd, 1)) @@ -727,6 +819,23 @@ static int reuse_cached_pack(unsigned char *sha1, int pack_to_stdout) return 1; } +static void setup_progress_signal(void) +{ + struct sigaction sa; + struct itimerval v; + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = progress_interval; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + sigaction(SIGALRM, &sa, NULL); + + v.it_interval.tv_sec = 1; + v.it_interval.tv_usec = 0; + v.it_value = v.it_interval; + setitimer(ITIMER_REAL, &v, NULL); +} + int main(int argc, char **argv) { SHA_CTX ctx; @@ -734,10 +843,6 @@ int main(int argc, char **argv) int window = 10, depth = 10, pack_to_stdout = 0; struct object_entry **list; int i; - struct timeval prev_tv; - int eye_candy = 0; - int eye_candy_incr = 500; - setup_git_directory(); @@ -775,6 +880,10 @@ int main(int argc, char **argv) progress = 0; continue; } + if (!strcmp("--no-reuse-delta", arg)) { + no_reuse_delta = 1; + continue; + } if (!strcmp("--stdout", arg)) { pack_to_stdout = 1; continue; @@ -790,30 +899,31 @@ int main(int argc, char **argv) usage(pack_usage); prepare_packed_git(); + if (progress) { fprintf(stderr, "Generating pack...\n"); - gettimeofday(&prev_tv, NULL); + setup_progress_signal(); } - while (fgets(line, sizeof(line), stdin) != NULL) { + + for (;;) { unsigned int hash; char *p; unsigned char sha1[20]; - if (progress && (eye_candy <= nr_objects)) { + if (!fgets(line, sizeof(line), stdin)) { + if (feof(stdin)) + break; + if (!ferror(stdin)) + die("fgets returned NULL, not EOF, not error!"); + if (errno != EINTR) + die("fgets: %s", strerror(errno)); + clearerr(stdin); + continue; + } + + if (progress_update) { fprintf(stderr, "Counting objects...%d\r", nr_objects); - if (eye_candy && (50 <= eye_candy_incr)) { - struct timeval tv; - int time_diff; - gettimeofday(&tv, NULL); - time_diff = (tv.tv_sec - prev_tv.tv_sec); - time_diff <<= 10; - time_diff += (tv.tv_usec - prev_tv.tv_usec); - if ((1 << 9) < time_diff) - eye_candy_incr += 50; - else if (50 < eye_candy_incr) - eye_candy_incr -= 50; - } - eye_candy += eye_candy_incr; + progress_update = 0; } if (get_sha1_hex(line, sha1)) die("expected sha1, got garbage:\n %s", line); @@ -845,12 +955,21 @@ int main(int argc, char **argv) ; else { prepare_pack(window, depth); + if (progress && pack_to_stdout) { + /* the other end usually displays progress itself */ + struct itimerval v = {{0,},}; + setitimer(ITIMER_REAL, &v, NULL); + signal(SIGALRM, SIG_IGN ); + progress_update = 0; + } + write_pack_file(); if (!pack_to_stdout) { write_index_file(); puts(sha1_to_hex(object_list_sha1)); } } - fprintf(stderr, "Total %d, written %d, reused %d\n", - nr_objects, written, reused); + if (progress) + fprintf(stderr, "Total %d, written %d (delta %d), reused %d (delta %d)\n", + nr_objects, written, written_delta, reused, reused_delta); return 0; }