builtin-grep: support '-l' option.
[git.git] / builtin-grep.c
1 /*
2  * Builtin "git grep"
3  *
4  * Copyright (c) 2006 Junio C Hamano
5  */
6 #include "cache.h"
7 #include "blob.h"
8 #include "tree.h"
9 #include "commit.h"
10 #include "tag.h"
11 #include "diff.h"
12 #include "revision.h"
13 #include "builtin.h"
14 #include <regex.h>
15 #include <fnmatch.h>
16
17 /*
18  * git grep pathspecs are somewhat different from diff-tree pathspecs;
19  * pathname wildcards are allowed.
20  */
21 static int pathspec_matches(struct diff_options *opt, const char *name)
22 {
23         int namelen, i;
24         if (!opt->nr_paths)
25                 return 1;
26         namelen = strlen(name);
27         for (i = 0; i < opt->nr_paths; i++) {
28                 const char *match = opt->paths[i];
29                 int matchlen = opt->pathlens[i];
30                 const char *slash, *cp;
31
32                 if ((matchlen <= namelen) &&
33                     !strncmp(name, match, matchlen) &&
34                     (match[matchlen-1] == '/' ||
35                      name[matchlen] == '\0' || name[matchlen] == '/'))
36                         return 1;
37                 if (!fnmatch(match, name, 0))
38                         return 1;
39                 if (name[namelen-1] != '/')
40                         continue;
41
42                 /* We are being asked if the name directory is worth
43                  * descending into.
44                  *
45                  * Find the longest leading directory name that does
46                  * not have metacharacter in the pathspec; the name
47                  * we are looking at must overlap with that directory.
48                  */
49                 for (cp = match, slash = NULL; cp - match < matchlen; cp++) {
50                         char ch = *cp;
51                         if (ch == '/')
52                                 slash = cp;
53                         if (ch == '*' || ch == '[')
54                                 break;
55                 }
56                 if (!slash)
57                         slash = match; /* toplevel */
58                 else
59                         slash++;
60                 if (namelen <= slash - match) {
61                         /* Looking at "Documentation/" and
62                          * the pattern says "Documentation/howto/", or
63                          * "Documentation/diff*.txt".
64                          */
65                         if (!memcmp(match, name, namelen))
66                                 return 1;
67                 }
68                 else {
69                         /* Looking at "Documentation/howto/" and
70                          * the pattern says "Documentation/h*".
71                          */
72                         if (!memcmp(match, name, slash - match))
73                                 return 1;
74                 }
75         }
76         return 0;
77 }
78
79 struct grep_opt {
80         const char *pattern;
81         regex_t regexp;
82         unsigned linenum:1;
83         unsigned invert:1;
84         unsigned name_only:1;
85         int regflags;
86         unsigned pre_context;
87         unsigned post_context;
88 };
89
90 static char *end_of_line(char *cp, unsigned long *left)
91 {
92         unsigned long l = *left;
93         while (l && *cp != '\n') {
94                 l--;
95                 cp++;
96         }
97         *left = l;
98         return cp;
99 }
100
101 static void show_line(struct grep_opt *opt, const char *bol, const char *eol,
102                       const char *name, unsigned lno, char sign)
103 {
104         printf("%s%c", name, sign);
105         if (opt->linenum)
106                 printf("%d%c", lno, sign);
107         printf("%.*s\n", eol-bol, bol);
108 }
109
110 static int grep_buffer(struct grep_opt *opt, const char *name,
111                        char *buf, unsigned long size)
112 {
113         char *bol = buf;
114         unsigned long left = size;
115         unsigned lno = 1;
116         struct pre_context_line {
117                 char *bol;
118                 char *eol;
119         } *prev = NULL, *pcl;
120         unsigned last_hit = 0;
121         unsigned last_shown = 0;
122         const char *hunk_mark = "";
123
124         if (opt->pre_context)
125                 prev = xcalloc(opt->pre_context, sizeof(*prev));
126         if (opt->pre_context || opt->post_context)
127                 hunk_mark = "--\n";
128
129         while (left) {
130                 regmatch_t pmatch[10];
131                 char *eol, ch;
132                 int hit;
133
134                 eol = end_of_line(bol, &left);
135                 ch = *eol;
136                 *eol = 0;
137
138                 hit = !regexec(&opt->regexp, bol, ARRAY_SIZE(pmatch),
139                                pmatch, 0);
140                 if (opt->invert)
141                         hit = !hit;
142                 if (hit) {
143                         if (opt->name_only) {
144                                 printf("%s\n", name);
145                                 return 1;
146                         }
147                         /* Hit at this line.  If we haven't shown the
148                          * pre-context lines, we would need to show them.
149                          */
150                         if (opt->pre_context) {
151                                 unsigned from;
152                                 if (opt->pre_context < lno)
153                                         from = lno - opt->pre_context;
154                                 else
155                                         from = 1;
156                                 if (from <= last_shown)
157                                         from = last_shown + 1;
158                                 if (last_shown && from != last_shown + 1)
159                                         printf(hunk_mark);
160                                 while (from < lno) {
161                                         pcl = &prev[lno-from-1];
162                                         show_line(opt, pcl->bol, pcl->eol,
163                                                   name, from, '-');
164                                         from++;
165                                 }
166                                 last_shown = lno-1;
167                         }
168                         if (last_shown && lno != last_shown + 1)
169                                 printf(hunk_mark);
170                         show_line(opt, bol, eol, name, lno, ':');
171                         last_shown = last_hit = lno;
172                 }
173                 else if (last_hit &&
174                          lno <= last_hit + opt->post_context) {
175                         /* If the last hit is within the post context,
176                          * we need to show this line.
177                          */
178                         if (last_shown && lno != last_shown + 1)
179                                 printf(hunk_mark);
180                         show_line(opt, bol, eol, name, lno, '-');
181                         last_shown = lno;
182                 }
183                 if (opt->pre_context) {
184                         memmove(prev+1, prev,
185                                 (opt->pre_context-1) * sizeof(*prev));
186                         prev->bol = bol;
187                         prev->eol = eol;
188                 }
189                 *eol = ch;
190                 bol = eol + 1;
191                 left--;
192                 lno++;
193         }
194         return !!last_hit;
195 }
196
197 static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, const char *name)
198 {
199         unsigned long size;
200         char *data;
201         char type[20];
202         int hit;
203         data = read_sha1_file(sha1, type, &size);
204         if (!data) {
205                 error("'%s': unable to read %s", name, sha1_to_hex(sha1));
206                 return 0;
207         }
208         hit = grep_buffer(opt, name, data, size);
209         free(data);
210         return hit;
211 }
212
213 static int grep_file(struct grep_opt *opt, const char *filename)
214 {
215         struct stat st;
216         int i;
217         char *data;
218         if (lstat(filename, &st) < 0) {
219         err_ret:
220                 if (errno != ENOENT)
221                         error("'%s': %s", filename, strerror(errno));
222                 return 0;
223         }
224         if (!st.st_size)
225                 return 0; /* empty file -- no grep hit */
226         if (!S_ISREG(st.st_mode))
227                 return 0;
228         i = open(filename, O_RDONLY);
229         if (i < 0)
230                 goto err_ret;
231         data = xmalloc(st.st_size + 1);
232         if (st.st_size != xread(i, data, st.st_size)) {
233                 error("'%s': short read %s", filename, strerror(errno));
234                 close(i);
235                 free(data);
236                 return 0;
237         }
238         close(i);
239         i = grep_buffer(opt, filename, data, st.st_size);
240         free(data);
241         return i;
242 }
243
244 static int grep_cache(struct grep_opt *opt, struct rev_info *revs, int cached)
245 {
246         int hit = 0;
247         int nr;
248         read_cache();
249
250         for (nr = 0; nr < active_nr; nr++) {
251                 struct cache_entry *ce = active_cache[nr];
252                 if (ce_stage(ce) || !S_ISREG(ntohl(ce->ce_mode)))
253                         continue;
254                 if (!pathspec_matches(&revs->diffopt, ce->name))
255                         continue;
256                 if (cached)
257                         hit |= grep_sha1(opt, ce->sha1, ce->name);
258                 else
259                         hit |= grep_file(opt, ce->name);
260         }
261         return hit;
262 }
263
264 static int grep_tree(struct grep_opt *opt, struct rev_info *revs,
265                      struct tree_desc *tree,
266                      const char *tree_name, const char *base)
267 {
268         unsigned mode;
269         int len;
270         int hit = 0;
271         const char *path;
272         const unsigned char *sha1;
273         char *down;
274         char *path_buf = xmalloc(PATH_MAX + strlen(tree_name) + 100);
275
276         if (tree_name[0]) {
277                 int offset = sprintf(path_buf, "%s:", tree_name);
278                 down = path_buf + offset;
279                 strcat(down, base);
280         }
281         else {
282                 down = path_buf;
283                 strcpy(down, base);
284         }
285         len = strlen(path_buf);
286
287         while (tree->size) {
288                 int pathlen;
289                 sha1 = tree_entry_extract(tree, &path, &mode);
290                 pathlen = strlen(path);
291                 strcpy(path_buf + len, path);
292
293                 if (S_ISDIR(mode))
294                         /* Match "abc/" against pathspec to
295                          * decide if we want to descend into "abc"
296                          * directory.
297                          */
298                         strcpy(path_buf + len + pathlen, "/");
299
300                 if (!pathspec_matches(&revs->diffopt, down))
301                         ;
302                 else if (S_ISREG(mode))
303                         hit |= grep_sha1(opt, sha1, path_buf);
304                 else if (S_ISDIR(mode)) {
305                         char type[20];
306                         struct tree_desc sub;
307                         void *data;
308                         data = read_sha1_file(sha1, type, &sub.size);
309                         if (!data)
310                                 die("unable to read tree (%s)",
311                                     sha1_to_hex(sha1));
312                         sub.buf = data;
313                         hit |= grep_tree(opt, revs, &sub, tree_name, down);
314                         free(data);
315                 }
316                 update_tree_entry(tree);
317         }
318         return hit;
319 }
320
321 static int grep_object(struct grep_opt *opt, struct rev_info *revs,
322                        struct object *obj, const char *name)
323 {
324         if (!strcmp(obj->type, blob_type))
325                 return grep_sha1(opt, obj->sha1, name);
326         if (!strcmp(obj->type, commit_type) ||
327             !strcmp(obj->type, tree_type)) {
328                 struct tree_desc tree;
329                 void *data;
330                 int hit;
331                 data = read_object_with_reference(obj->sha1, tree_type,
332                                                   &tree.size, NULL);
333                 if (!data)
334                         die("unable to read tree (%s)", sha1_to_hex(obj->sha1));
335                 tree.buf = data;
336                 hit = grep_tree(opt, revs, &tree, name, "");
337                 free(data);
338                 return hit;
339         }
340         die("unable to grep from object of type %s", obj->type);
341 }
342
343 static const char builtin_grep_usage[] =
344 "git-grep <option>* <rev>* [-e] <pattern> [<path>...]";
345
346 int cmd_grep(int argc, const char **argv, char **envp)
347 {
348         struct rev_info rev;
349         const char **dst, **src;
350         int err;
351         int hit = 0;
352         int no_more_arg = 0;
353         int seen_range = 0;
354         int seen_noncommit = 0;
355         int cached = 0;
356         struct grep_opt opt;
357         struct object_list *list;
358
359         memset(&opt, 0, sizeof(opt));
360         opt.regflags = REG_NEWLINE;
361
362         /*
363          * Interpret and remove the grep options upfront.  Sigh...
364          */
365         for (dst = src = &argv[1]; src < argc + argv; ) {
366                 const char *arg = *src++;
367                 if (!no_more_arg) {
368                         if (!strcmp("--", arg)) {
369                                 no_more_arg = 1;
370                                 *dst++ = arg;
371                                 continue;
372                         }
373                         if (!strcmp("--cached", arg)) {
374                                 cached = 1;
375                                 continue;
376                         }
377                         if (!strcmp("-i", arg) ||
378                             !strcmp("--ignore-case", arg)) {
379                                 opt.regflags |= REG_ICASE;
380                                 continue;
381                         }
382                         if (!strcmp("-v", arg) ||
383                             !strcmp("--invert-match", arg)) {
384                                 opt.invert = 1;
385                                 continue;
386                         }
387                         if (!strcmp("-E", arg) ||
388                             !strcmp("--extended-regexp", arg)) {
389                                 opt.regflags |= REG_EXTENDED;
390                                 continue;
391                         }
392                         if (!strcmp("-G", arg) ||
393                             !strcmp("--basic-regexp", arg)) {
394                                 opt.regflags &= ~REG_EXTENDED;
395                                 continue;
396                         }
397                         if (!strcmp("-e", arg)) {
398                                 if (src < argc + argv) {
399                                         opt.pattern = *src++;
400                                         continue;
401                                 }
402                                 usage(builtin_grep_usage);
403                         }
404                         if (!strcmp("-n", arg)) {
405                                 opt.linenum = 1;
406                                 continue;
407                         }
408                         if (!strcmp("-H", arg)) {
409                                 /* We always show the pathname, so this
410                                  * is a noop.
411                                  */
412                                 continue;
413                         }
414                         if (!strcmp("-l", arg) ||
415                             !strcmp("--files-with-matches", arg)) {
416                                 opt.name_only = 1;
417                                 continue;
418                         }
419                         if (!strcmp("-A", arg) ||
420                             !strcmp("-B", arg) ||
421                             !strcmp("-C", arg)) {
422                                 unsigned num;
423                                 if ((argc + argv <= src) ||
424                                     sscanf(*src++, "%u", &num) != 1)
425                                         usage(builtin_grep_usage);
426                                 switch (arg[1]) {
427                                 case 'A':
428                                         opt.post_context = num;
429                                         break;
430                                 case 'C':
431                                         opt.post_context = num;
432                                 case 'B':
433                                         opt.pre_context = num;
434                                         break;
435                                 }
436                                 continue;
437                         }
438                 }
439                 *dst++ = arg;
440         }
441         if (!opt.pattern)
442                 die("no pattern given.");
443
444         err = regcomp(&opt.regexp, opt.pattern, opt.regflags);
445         if (err) {
446                 char errbuf[1024];
447                 regerror(err, &opt.regexp, errbuf, 1024);
448                 regfree(&opt.regexp);
449                 die("'%s': %s", opt.pattern, errbuf);
450         }
451
452         init_revisions(&rev);
453         *dst = NULL;
454         argc = setup_revisions(dst - argv, argv, &rev, NULL);
455
456         /*
457          * Do not walk "grep -e foo master next pu -- Documentation/"
458          * but do walk "grep -e foo master..next -- Documentation/".
459          * Ranged request mixed with a blob or tree object, like
460          * "grep -e foo v1.0.0:Documentation/ master..next"
461          * so detect that and complain.
462          */
463         for (list = rev.pending_objects; list; list = list->next) {
464                 struct object *real_obj;
465                 if (list->item->flags & UNINTERESTING)
466                         seen_range = 1;
467                 real_obj = deref_tag(list->item, NULL, 0);
468                 if (strcmp(real_obj->type, commit_type))
469                         seen_noncommit = 1;
470         }
471         if (!rev.pending_objects)
472                 return !grep_cache(&opt, &rev, cached);
473         if (cached)
474                 die("both --cached and revisions given.");
475
476         if (seen_range && seen_noncommit)
477                 die("both A..B and non commit are given.");
478         if (seen_range) {
479                 struct commit *commit;
480                 prepare_revision_walk(&rev);
481                 while ((commit = get_revision(&rev)) != NULL) {
482                         unsigned char *sha1 = commit->object.sha1;
483                         const char *n = find_unique_abbrev(sha1, rev.abbrev);
484                         char rev_name[41];
485                         strcpy(rev_name, n);
486                         if (grep_object(&opt, &rev, &commit->object, rev_name))
487                                 hit = 1;
488                         commit->buffer = NULL;
489                 }
490                 return !hit;
491         }
492
493         /* all of them are non-commit; do not walk, and
494          * do not lose their names.
495          */
496         for (list = rev.pending_objects; list; list = list->next) {
497                 struct object *real_obj;
498                 real_obj = deref_tag(list->item, NULL, 0);
499                 if (grep_object(&opt, &rev, real_obj, list->name))
500                         hit = 1;
501         }
502         return !hit;
503 }