Reduce memory usage in git-update-server-info.
[git.git] / server-info.c
1 #include "cache.h"
2 #include "refs.h"
3 #include "object.h"
4 #include "commit.h"
5 #include "tag.h"
6
7 /* refs */
8 static FILE *info_ref_fp;
9
10 static int add_info_ref(const char *path, const unsigned char *sha1)
11 {
12         fprintf(info_ref_fp, "%s        %s\n", sha1_to_hex(sha1), path);
13         return 0;
14 }
15
16 static int update_info_refs(int force)
17 {
18         char *path0 = strdup(git_path("info/refs"));
19         int len = strlen(path0);
20         char *path1 = xmalloc(len + 2);
21
22         strcpy(path1, path0);
23         strcpy(path1 + len, "+");
24
25         safe_create_leading_directories(path0);
26         info_ref_fp = fopen(path1, "w");
27         if (!info_ref_fp)
28                 return error("unable to update %s", path0);
29         for_each_ref(add_info_ref);
30         fclose(info_ref_fp);
31         rename(path1, path0);
32         free(path0);
33         free(path1);
34         return 0;
35 }
36
37 /* packs */
38 static struct pack_info {
39         unsigned long latest;
40         struct packed_git *p;
41         int old_num;
42         int new_num;
43         int nr_alloc;
44         int nr_heads;
45         unsigned char (*head)[20];
46         char dep[0]; /* more */
47 } **info;
48 static int num_pack;
49 static const char *objdir;
50 static int objdirlen;
51
52 static struct object *parse_object_cheap(const unsigned char *sha1)
53 {
54         struct object *o;
55
56         if ((o = parse_object(sha1)) == NULL)
57                 return NULL;
58         if (o->type == commit_type) {
59                 struct commit *commit = (struct commit *)o;
60                 free(commit->buffer);
61                 commit->buffer = NULL;
62         } else if (o->type == tree_type) {
63                 struct tree *tree = (struct tree *)o;
64                 struct tree_entry_list *e, *n;
65                 for (e = tree->entries; e; e = n) {
66                         free(e->name);
67                         e->name = NULL;
68                         n = e->next;
69                         free(e);
70                 }
71                 tree->entries = NULL;
72         }
73         return o;
74 }
75
76 static struct pack_info *find_pack_by_name(const char *name)
77 {
78         int i;
79         for (i = 0; i < num_pack; i++) {
80                 struct packed_git *p = info[i]->p;
81                 /* skip "/pack/" after ".git/objects" */
82                 if (!strcmp(p->pack_name + objdirlen + 6, name))
83                         return info[i];
84         }
85         return NULL;
86 }
87
88 static struct pack_info *find_pack_by_old_num(int old_num)
89 {
90         int i;
91         for (i = 0; i < num_pack; i++)
92                 if (info[i]->old_num == old_num)
93                         return info[i];
94         return NULL;
95 }
96
97 static int add_head_def(struct pack_info *this, unsigned char *sha1)
98 {
99         if (this->nr_alloc <= this->nr_heads) {
100                 this->nr_alloc = alloc_nr(this->nr_alloc);
101                 this->head = xrealloc(this->head, this->nr_alloc * 20);
102         }
103         memcpy(this->head[this->nr_heads++], sha1, 20);
104         return 0;
105 }
106
107 /* Returns non-zero when we detect that the info in the
108  * old file is useless.
109  */
110 static int parse_pack_def(const char *line, int old_cnt)
111 {
112         struct pack_info *i = find_pack_by_name(line + 2);
113         if (i) {
114                 i->old_num = old_cnt;
115                 return 0;
116         }
117         else {
118                 /* The file describes a pack that is no longer here;
119                  * dependencies between packs needs to be recalculated.
120                  */
121                 return 1;
122         }
123 }
124
125 /* Returns non-zero when we detect that the info in the
126  * old file is useless.
127  */
128 static int parse_depend_def(char *line)
129 {
130         unsigned long num;
131         char *cp, *ep;
132         struct pack_info *this, *that;
133
134         cp = line + 2;
135         num = strtoul(cp, &ep, 10);
136         if (ep == cp)
137                 return error("invalid input %s", line);
138         this = find_pack_by_old_num(num);
139         if (!this)
140                 return 0;
141         while (ep && *(cp = ep)) {
142                 num = strtoul(cp, &ep, 10);
143                 if (ep == cp)
144                         break;
145                 that = find_pack_by_old_num(num);
146                 if (!that)
147                         /* The pack this one depends on does not
148                          * exist; this should not happen because
149                          * we write out the list of packs first and
150                          * then dependency information, but it means
151                          * the file is useless anyway.
152                          */
153                         return 1;
154                 this->dep[that->new_num] = 1;
155         }
156         return 0;
157 }
158
159 /* Returns non-zero when we detect that the info in the
160  * old file is useless.
161  */
162 static int parse_head_def(char *line)
163 {
164         unsigned char sha1[20];
165         unsigned long num;
166         char *cp, *ep;
167         struct pack_info *this;
168         struct object *o;
169
170         cp = line + 2;
171         num = strtoul(cp, &ep, 10);
172         if (ep == cp || *ep++ != ' ')
173                 return error("invalid input ix %s", line);
174         this = find_pack_by_old_num(num);
175         if (!this)
176                 return 1; /* You know the drill. */
177         if (get_sha1_hex(ep, sha1) || ep[40] != ' ')
178                 return error("invalid input sha1 %s (%s)", line, ep);
179         if ((o = parse_object_cheap(sha1)) == NULL)
180                 return error("no such object: %s", line);
181         return add_head_def(this, sha1);
182 }
183
184 /* Returns non-zero when we detect that the info in the
185  * old file is useless.
186  */
187 static int read_pack_info_file(const char *infofile)
188 {
189         FILE *fp;
190         char line[1000];
191         int old_cnt = 0;
192
193         fp = fopen(infofile, "r");
194         if (!fp)
195                 return 1; /* nonexisting is not an error. */
196
197         while (fgets(line, sizeof(line), fp)) {
198                 int len = strlen(line);
199                 if (line[len-1] == '\n')
200                         line[len-1] = 0;
201
202                 switch (line[0]) {
203                 case 'P': /* P name */
204                         if (parse_pack_def(line, old_cnt++))
205                                 goto out_stale;
206                         break;
207                 case 'D': /* D ix dep-ix1 dep-ix2... */
208                         if (parse_depend_def(line))
209                                 goto out_stale;
210                         break;
211                 case 'T': /* T ix sha1 type */
212                         if (parse_head_def(line))
213                                 goto out_stale;
214                         break;
215                 default:
216                         error("unrecognized: %s", line);
217                         break;
218                 }
219         }
220         fclose(fp);
221         return 0;
222  out_stale:
223         fclose(fp);
224         return 1;
225 }
226
227 /* We sort the packs according to the date of the latest commit.  That
228  * in turn indicates how young the pack is, and in general we would
229  * want to depend on younger packs.
230  */
231 static unsigned long get_latest_commit_date(struct packed_git *p)
232 {
233         unsigned char sha1[20];
234         struct object *o;
235         int num = num_packed_objects(p);
236         int i;
237         unsigned long latest = 0;
238
239         for (i = 0; i < num; i++) {
240                 if (nth_packed_object_sha1(p, i, sha1))
241                         die("corrupt pack file %s?", p->pack_name);
242                 if ((o = parse_object_cheap(sha1)) == NULL)
243                         die("cannot parse %s", sha1_to_hex(sha1));
244                 if (o->type == commit_type) {
245                         struct commit *commit = (struct commit *)o;
246                         if (latest < commit->date)
247                                 latest = commit->date;
248                 }
249         }
250         return latest;
251 }
252
253 static int compare_info(const void *a_, const void *b_)
254 {
255         struct pack_info * const* a = a_;
256         struct pack_info * const* b = b_;
257
258         if (0 <= (*a)->old_num && 0 <= (*b)->old_num)
259                 /* Keep the order in the original */
260                 return (*a)->old_num - (*b)->old_num;
261         else if (0 <= (*a)->old_num)
262                 /* Only A existed in the original so B is obviously newer */
263                 return -1;
264         else if (0 <= (*b)->old_num)
265                 /* The other way around. */
266                 return 1;
267
268         if ((*a)->latest < (*b)->latest)
269                 return -1;
270         else if ((*a)->latest == (*b)->latest)
271                 return 0;
272         else
273                 return 1;
274 }
275
276 static void init_pack_info(const char *infofile, int force)
277 {
278         struct packed_git *p;
279         int stale;
280         int i = 0;
281         char *dep_temp;
282
283         objdir = get_object_directory();
284         objdirlen = strlen(objdir);
285
286         prepare_packed_git();
287         for (p = packed_git; p; p = p->next) {
288                 /* we ignore things on alternate path since they are
289                  * not available to the pullers in general.
290                  */
291                 if (strncmp(p->pack_name, objdir, objdirlen) ||
292                     strncmp(p->pack_name + objdirlen, "/pack/", 6))
293                         continue;
294                 i++;
295         }
296         num_pack = i;
297         info = xcalloc(num_pack, sizeof(struct pack_info *));
298         for (i = 0, p = packed_git; p; p = p->next) {
299                 if (strncmp(p->pack_name, objdir, objdirlen) ||
300                     p->pack_name[objdirlen] != '/')
301                         continue;
302                 info[i] = xcalloc(1, sizeof(struct pack_info) + num_pack);
303                 info[i]->p = p;
304                 info[i]->old_num = -1;
305                 i++;
306         }
307
308         if (infofile && !force)
309                 stale = read_pack_info_file(infofile);
310         else
311                 stale = 1;
312
313         for (i = 0; i < num_pack; i++) {
314                 if (stale) {
315                         info[i]->old_num = -1;
316                         memset(info[i]->dep, 0, num_pack);
317                         info[i]->nr_heads = 0;
318                 }
319                 if (info[i]->old_num < 0)
320                         info[i]->latest = get_latest_commit_date(info[i]->p);
321         }
322
323         qsort(info, num_pack, sizeof(info[0]), compare_info);
324         for (i = 0; i < num_pack; i++)
325                 info[i]->new_num = i;
326
327         /* we need to fix up the dependency information
328          * for the old ones.
329          */
330         dep_temp = NULL;
331         for (i = 0; i < num_pack; i++) {
332                 int old;
333
334                 if (info[i]->old_num < 0)
335                         continue;
336                 if (! dep_temp)
337                         dep_temp = xmalloc(num_pack);
338                 memset(dep_temp, 0, num_pack);
339                 for (old = 0; old < num_pack; old++) {
340                         struct pack_info *base;
341                         if (!info[i]->dep[old])
342                                 continue;
343                         base = find_pack_by_old_num(old);
344                         if (!base)
345                                 die("internal error renumbering");
346                         dep_temp[base->new_num] = 1;
347                 }
348                 memcpy(info[i]->dep, dep_temp, num_pack);
349         }
350         free(dep_temp);
351 }
352
353 static void write_pack_info_file(FILE *fp)
354 {
355         int i, j;
356         for (i = 0; i < num_pack; i++)
357                 fprintf(fp, "P %s\n", info[i]->p->pack_name + objdirlen + 6);
358
359         for (i = 0; i < num_pack; i++) {
360                 fprintf(fp, "D %1d", i);
361                 for (j = 0; j < num_pack; j++) {
362                         if ((i == j) || !(info[i]->dep[j]))
363                                 continue;
364                         fprintf(fp, " %1d", j);
365                 }
366                 fputc('\n', fp);
367         }
368
369         for (i = 0; i < num_pack; i++) {
370                 struct pack_info *this = info[i];
371                 for (j = 0; j < this->nr_heads; j++) {
372                         struct object *o = lookup_object(this->head[j]);
373                         fprintf(fp, "T %1d %s %s\n",
374                                 i, sha1_to_hex(this->head[j]), o->type);
375                 }
376         }
377
378 }
379
380 #define REFERENCED 01
381 #define INTERNAL  02
382 #define EMITTED   04
383
384 static void show(struct object *o, int pack_ix)
385 {
386         /*
387          * We are interested in objects that are not referenced,
388          * and objects that are referenced but not internal.
389          */
390         if (o->flags & EMITTED)
391                 return;
392
393         if (!(o->flags & REFERENCED))
394                 add_head_def(info[pack_ix], o->sha1);
395         else if ((o->flags & REFERENCED) && !(o->flags & INTERNAL)) {
396                 int i;
397
398                 /* Which pack contains this object?  That is what
399                  * pack_ix can depend on.  We earlier sorted info
400                  * array from youngest to oldest, so try newer packs
401                  * first to favor them here.
402                  */
403                 for (i = num_pack - 1; 0 <= i; i--) {
404                         struct packed_git *p = info[i]->p;
405                         struct pack_entry ent;
406                         if (find_pack_entry_one(o->sha1, &ent, p)) {
407                                 info[pack_ix]->dep[i] = 1;
408                                 break;
409                         }
410                 }
411         }
412         o->flags |= EMITTED;
413 }
414
415 static void find_pack_info_one(int pack_ix)
416 {
417         unsigned char sha1[20];
418         struct object *o;
419         struct object_list *ref;
420         int i;
421         struct packed_git *p = info[pack_ix]->p;
422         int num = num_packed_objects(p);
423
424         /* Scan objects, clear flags from all the edge ones and
425          * internal ones, possibly marked in the previous round.
426          */
427         for (i = 0; i < num; i++) {
428                 if (nth_packed_object_sha1(p, i, sha1))
429                         die("corrupt pack file %s?", p->pack_name);
430                 if ((o = lookup_object(sha1)) == NULL)
431                         die("cannot parse %s", sha1_to_hex(sha1));
432                 for (ref = o->refs; ref; ref = ref->next)
433                         ref->item->flags = 0;
434                 o->flags = 0;
435         }
436
437         /* Mark all the internal ones */
438         for (i = 0; i < num; i++) {
439                 if (nth_packed_object_sha1(p, i, sha1))
440                         die("corrupt pack file %s?", p->pack_name);
441                 if ((o = lookup_object(sha1)) == NULL)
442                         die("cannot find %s", sha1_to_hex(sha1));
443                 for (ref = o->refs; ref; ref = ref->next)
444                         ref->item->flags |= REFERENCED;
445                 o->flags |= INTERNAL;
446         }
447
448         for (i = 0; i < num; i++) {
449                 if (nth_packed_object_sha1(p, i, sha1))
450                         die("corrupt pack file %s?", p->pack_name);
451                 if ((o = lookup_object(sha1)) == NULL)
452                         die("cannot find %s", sha1_to_hex(sha1));
453
454                 show(o, pack_ix);
455                 for (ref = o->refs; ref; ref = ref->next)
456                         show(ref->item, pack_ix);
457         }
458
459 }
460
461 static void find_pack_info(void)
462 {
463         int i;
464         for (i = 0; i < num_pack; i++) {
465                 /* The packed objects are cast in stone, and a head
466                  * in a pack will stay as head, so is the set of missing
467                  * objects.  If the repo has been reorganized and we
468                  * are missing some packs available back then, we have
469                  * already discarded the info read from the file, so
470                  * we will find (old_num < 0) in that case.
471                  */
472                 if (0 <= info[i]->old_num)
473                         continue;
474                 find_pack_info_one(i);
475         }
476 }
477
478 static int update_info_packs(int force)
479 {
480         char infofile[PATH_MAX];
481         char name[PATH_MAX];
482         int namelen;
483         FILE *fp;
484
485         namelen = sprintf(infofile, "%s/info/packs", get_object_directory());
486         strcpy(name, infofile);
487         strcpy(name + namelen, "+");
488
489         init_pack_info(infofile, force);
490         find_pack_info();
491
492         safe_create_leading_directories(name);
493         fp = fopen(name, "w");
494         if (!fp)
495                 return error("cannot open %s", name);
496         write_pack_info_file(fp);
497         fclose(fp);
498         rename(name, infofile);
499         return 0;
500 }
501
502 /* public */
503 int update_server_info(int force)
504 {
505         /* We would add more dumb-server support files later,
506          * including index of available pack files and their
507          * intended audiences.
508          */
509         int errs = 0;
510
511         errs = errs | update_info_refs(force);
512         errs = errs | update_info_packs(force);
513
514         return errs;
515 }