builtin-grep: wildcard pathspec fixes
[git.git] / builtin-grep.c
1 /*
2  * Builtin "git grep"
3  *
4  * Copyright (c) 2006 Junio C Hamano
5  */
6 #include "cache.h"
7 #include "blob.h"
8 #include "tree.h"
9 #include "commit.h"
10 #include "tag.h"
11 #include "diff.h"
12 #include "revision.h"
13 #include "builtin.h"
14 #include <regex.h>
15 #include <fnmatch.h>
16
17 /*
18  * git grep pathspecs are somewhat different from diff-tree pathspecs;
19  * pathname wildcards are allowed.
20  */
21 static int pathspec_matches(struct diff_options *opt, const char *name)
22 {
23         int namelen, i;
24         if (!opt->nr_paths)
25                 return 1;
26         namelen = strlen(name);
27         for (i = 0; i < opt->nr_paths; i++) {
28                 const char *match = opt->paths[i];
29                 int matchlen = opt->pathlens[i];
30                 const char *slash, *cp;
31
32                 if ((matchlen <= namelen) &&
33                     !strncmp(name, match, matchlen) &&
34                     (match[matchlen-1] == '/' ||
35                      name[matchlen] == '\0' || name[matchlen] == '/'))
36                         return 1;
37                 if (!fnmatch(match, name, 0))
38                         return 1;
39                 if (name[namelen-1] != '/')
40                         continue;
41
42                 /* We are being asked if the name directory is worth
43                  * descending into.
44                  *
45                  * Find the longest leading directory name that does
46                  * not have metacharacter in the pathspec; the name
47                  * we are looking at must overlap with that directory.
48                  */
49                 for (cp = match, slash = NULL; cp - match < matchlen; cp++) {
50                         char ch = *cp;
51                         if (ch == '/')
52                                 slash = cp;
53                         if (ch == '*' || ch == '[')
54                                 break;
55                 }
56                 if (!slash)
57                         slash = match; /* toplevel */
58                 else
59                         slash++;
60                 if (namelen <= slash - match) {
61                         /* Looking at "Documentation/" and
62                          * the pattern says "Documentation/howto/", or
63                          * "Documentation/diff*.txt".
64                          */
65                         if (!memcmp(match, name, namelen))
66                                 return 1;
67                 }
68                 else {
69                         /* Looking at "Documentation/howto/" and
70                          * the pattern says "Documentation/h*".
71                          */
72                         if (!memcmp(match, name, slash - match))
73                                 return 1;
74                 }
75         }
76         return 0;
77 }
78
79 struct grep_opt {
80         const char *pattern;
81         regex_t regexp;
82         unsigned linenum:1;
83         unsigned invert:1;
84         int regflags;
85         unsigned pre_context;
86         unsigned post_context;
87 };
88
89 static char *end_of_line(char *cp, unsigned long *left)
90 {
91         unsigned long l = *left;
92         while (l && *cp != '\n') {
93                 l--;
94                 cp++;
95         }
96         *left = l;
97         return cp;
98 }
99
100 static void show_line(struct grep_opt *opt, const char *bol, const char *eol,
101                       const char *name, unsigned lno, char sign)
102 {
103         printf("%s%c", name, sign);
104         if (opt->linenum)
105                 printf("%d%c", lno, sign);
106         printf("%.*s\n", eol-bol, bol);
107 }
108
109 static int grep_buffer(struct grep_opt *opt, const char *name,
110                        char *buf, unsigned long size)
111 {
112         char *bol = buf;
113         unsigned long left = size;
114         unsigned lno = 1;
115         struct pre_context_line {
116                 char *bol;
117                 char *eol;
118         } *prev = NULL, *pcl;
119         unsigned last_hit = 0;
120         unsigned last_shown = 0;
121         const char *hunk_mark = "";
122
123         if (opt->pre_context)
124                 prev = xcalloc(opt->pre_context, sizeof(*prev));
125         if (opt->pre_context || opt->post_context)
126                 hunk_mark = "--\n";
127
128         while (left) {
129                 regmatch_t pmatch[10];
130                 char *eol, ch;
131                 int hit;
132
133                 eol = end_of_line(bol, &left);
134                 ch = *eol;
135                 *eol = 0;
136
137                 hit = !regexec(&opt->regexp, bol, ARRAY_SIZE(pmatch),
138                                pmatch, 0);
139                 if (opt->invert)
140                         hit = !hit;
141                 if (hit) {
142                         /* Hit at this line.  If we haven't shown the
143                          * pre-context lines, we would need to show them.
144                          */
145                         if (opt->pre_context) {
146                                 unsigned from;
147                                 if (opt->pre_context < lno)
148                                         from = lno - opt->pre_context;
149                                 else
150                                         from = 1;
151                                 if (from <= last_shown)
152                                         from = last_shown + 1;
153                                 if (last_shown && from != last_shown + 1)
154                                         printf(hunk_mark);
155                                 while (from < lno) {
156                                         pcl = &prev[lno-from-1];
157                                         show_line(opt, pcl->bol, pcl->eol,
158                                                   name, from, '-');
159                                         from++;
160                                 }
161                                 last_shown = lno-1;
162                         }
163                         if (last_shown && lno != last_shown + 1)
164                                 printf(hunk_mark);
165                         show_line(opt, bol, eol, name, lno, ':');
166                         last_shown = last_hit = lno;
167                 }
168                 else if (last_hit &&
169                          lno <= last_hit + opt->post_context) {
170                         /* If the last hit is within the post context,
171                          * we need to show this line.
172                          */
173                         if (last_shown && lno != last_shown + 1)
174                                 printf(hunk_mark);
175                         show_line(opt, bol, eol, name, lno, '-');
176                         last_shown = lno;
177                 }
178                 if (opt->pre_context) {
179                         memmove(prev+1, prev,
180                                 (opt->pre_context-1) * sizeof(*prev));
181                         prev->bol = bol;
182                         prev->eol = eol;
183                 }
184                 *eol = ch;
185                 bol = eol + 1;
186                 left--;
187                 lno++;
188         }
189         return !!last_hit;
190 }
191
192 static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, const char *name)
193 {
194         unsigned long size;
195         char *data;
196         char type[20];
197         int hit;
198         data = read_sha1_file(sha1, type, &size);
199         if (!data) {
200                 error("'%s': unable to read %s", name, sha1_to_hex(sha1));
201                 return 0;
202         }
203         hit = grep_buffer(opt, name, data, size);
204         free(data);
205         return hit;
206 }
207
208 static int grep_file(struct grep_opt *opt, const char *filename)
209 {
210         struct stat st;
211         int i;
212         char *data;
213         if (lstat(filename, &st) < 0) {
214         err_ret:
215                 if (errno != ENOENT)
216                         error("'%s': %s", filename, strerror(errno));
217                 return 0;
218         }
219         if (!st.st_size)
220                 return 0; /* empty file -- no grep hit */
221         if (!S_ISREG(st.st_mode))
222                 return 0;
223         i = open(filename, O_RDONLY);
224         if (i < 0)
225                 goto err_ret;
226         data = xmalloc(st.st_size + 1);
227         if (st.st_size != xread(i, data, st.st_size)) {
228                 error("'%s': short read %s", filename, strerror(errno));
229                 close(i);
230                 free(data);
231                 return 0;
232         }
233         close(i);
234         i = grep_buffer(opt, filename, data, st.st_size);
235         free(data);
236         return i;
237 }
238
239 static int grep_cache(struct grep_opt *opt, struct rev_info *revs, int cached)
240 {
241         int hit = 0;
242         int nr;
243         read_cache();
244
245         for (nr = 0; nr < active_nr; nr++) {
246                 struct cache_entry *ce = active_cache[nr];
247                 if (ce_stage(ce) || !S_ISREG(ntohl(ce->ce_mode)))
248                         continue;
249                 if (!pathspec_matches(&revs->diffopt, ce->name))
250                         continue;
251                 if (cached)
252                         hit |= grep_sha1(opt, ce->sha1, ce->name);
253                 else
254                         hit |= grep_file(opt, ce->name);
255         }
256         return hit;
257 }
258
259 static int grep_tree(struct grep_opt *opt, struct rev_info *revs,
260                      struct tree_desc *tree,
261                      const char *tree_name, const char *base)
262 {
263         unsigned mode;
264         int len;
265         int hit = 0;
266         const char *path;
267         const unsigned char *sha1;
268         char *down;
269         char *path_buf = xmalloc(PATH_MAX + strlen(tree_name) + 100);
270
271         if (tree_name[0]) {
272                 int offset = sprintf(path_buf, "%s:", tree_name);
273                 down = path_buf + offset;
274                 strcat(down, base);
275         }
276         else {
277                 down = path_buf;
278                 strcpy(down, base);
279         }
280         len = strlen(path_buf);
281
282         while (tree->size) {
283                 int pathlen;
284                 sha1 = tree_entry_extract(tree, &path, &mode);
285                 pathlen = strlen(path);
286                 strcpy(path_buf + len, path);
287
288                 if (S_ISDIR(mode))
289                         /* Match "abc/" against pathspec to
290                          * decide if we want to descend into "abc"
291                          * directory.
292                          */
293                         strcpy(path_buf + len + pathlen, "/");
294
295                 if (!pathspec_matches(&revs->diffopt, down))
296                         ;
297                 else if (S_ISREG(mode))
298                         hit |= grep_sha1(opt, sha1, path_buf);
299                 else if (S_ISDIR(mode)) {
300                         char type[20];
301                         struct tree_desc sub;
302                         void *data;
303                         data = read_sha1_file(sha1, type, &sub.size);
304                         if (!data)
305                                 die("unable to read tree (%s)",
306                                     sha1_to_hex(sha1));
307                         sub.buf = data;
308                         hit |= grep_tree(opt, revs, &sub, tree_name, down);
309                         free(data);
310                 }
311                 update_tree_entry(tree);
312         }
313         return hit;
314 }
315
316 static int grep_object(struct grep_opt *opt, struct rev_info *revs,
317                        struct object *obj, const char *name)
318 {
319         if (!strcmp(obj->type, blob_type))
320                 return grep_sha1(opt, obj->sha1, name);
321         if (!strcmp(obj->type, commit_type) ||
322             !strcmp(obj->type, tree_type)) {
323                 struct tree_desc tree;
324                 void *data;
325                 int hit;
326                 data = read_object_with_reference(obj->sha1, tree_type,
327                                                   &tree.size, NULL);
328                 if (!data)
329                         die("unable to read tree (%s)", sha1_to_hex(obj->sha1));
330                 tree.buf = data;
331                 hit = grep_tree(opt, revs, &tree, name, "");
332                 free(data);
333                 return hit;
334         }
335         die("unable to grep from object of type %s", obj->type);
336 }
337
338 static const char builtin_grep_usage[] =
339 "git-grep <option>* <rev>* [-e] <pattern> [<path>...]";
340
341 int cmd_grep(int argc, const char **argv, char **envp)
342 {
343         struct rev_info rev;
344         const char **dst, **src;
345         int err;
346         int hit = 0;
347         int no_more_arg = 0;
348         int seen_range = 0;
349         int seen_noncommit = 0;
350         int cached = 0;
351         struct grep_opt opt;
352         struct object_list *list;
353
354         memset(&opt, 0, sizeof(opt));
355         opt.regflags = REG_NEWLINE;
356
357         /*
358          * Interpret and remove the grep options upfront.  Sigh...
359          */
360         for (dst = src = &argv[1]; src < argc + argv; ) {
361                 const char *arg = *src++;
362                 if (!no_more_arg) {
363                         if (!strcmp("--", arg)) {
364                                 no_more_arg = 1;
365                                 *dst++ = arg;
366                                 continue;
367                         }
368                         if (!strcmp("--cached", arg)) {
369                                 cached = 1;
370                                 continue;
371                         }
372                         if (!strcmp("-i", arg) ||
373                             !strcmp("--ignore-case", arg)) {
374                                 opt.regflags |= REG_ICASE;
375                                 continue;
376                         }
377                         if (!strcmp("-v", arg) ||
378                             !strcmp("--invert-match", arg)) {
379                                 opt.invert = 1;
380                                 continue;
381                         }
382                         if (!strcmp("-E", arg) ||
383                             !strcmp("--extended-regexp", arg)) {
384                                 opt.regflags |= REG_EXTENDED;
385                                 continue;
386                         }
387                         if (!strcmp("-G", arg) ||
388                             !strcmp("--basic-regexp", arg)) {
389                                 opt.regflags &= ~REG_EXTENDED;
390                                 continue;
391                         }
392                         if (!strcmp("-e", arg)) {
393                                 if (src < argc + argv) {
394                                         opt.pattern = *src++;
395                                         continue;
396                                 }
397                                 usage(builtin_grep_usage);
398                         }
399                         if (!strcmp("-n", arg)) {
400                                 opt.linenum = 1;
401                                 continue;
402                         }
403                         if (!strcmp("-H", arg)) {
404                                 /* We always show the pathname, so this
405                                  * is a noop.
406                                  */
407                                 continue;
408                         }
409                         if (!strcmp("-A", arg) ||
410                             !strcmp("-B", arg) ||
411                             !strcmp("-C", arg)) {
412                                 unsigned num;
413                                 if ((argc + argv <= src) ||
414                                     sscanf(*src++, "%u", &num) != 1)
415                                         usage(builtin_grep_usage);
416                                 switch (arg[1]) {
417                                 case 'A':
418                                         opt.post_context = num;
419                                         break;
420                                 case 'C':
421                                         opt.post_context = num;
422                                 case 'B':
423                                         opt.pre_context = num;
424                                         break;
425                                 }
426                                 continue;
427                         }
428                 }
429                 *dst++ = arg;
430         }
431         if (!opt.pattern)
432                 die("no pattern given.");
433
434         err = regcomp(&opt.regexp, opt.pattern, opt.regflags);
435         if (err) {
436                 char errbuf[1024];
437                 regerror(err, &opt.regexp, errbuf, 1024);
438                 regfree(&opt.regexp);
439                 die("'%s': %s", opt.pattern, errbuf);
440         }
441
442         init_revisions(&rev);
443         *dst = NULL;
444         argc = setup_revisions(dst - argv, argv, &rev, NULL);
445
446         /*
447          * Do not walk "grep -e foo master next pu -- Documentation/"
448          * but do walk "grep -e foo master..next -- Documentation/".
449          * Ranged request mixed with a blob or tree object, like
450          * "grep -e foo v1.0.0:Documentation/ master..next"
451          * so detect that and complain.
452          */
453         for (list = rev.pending_objects; list; list = list->next) {
454                 struct object *real_obj;
455                 if (list->item->flags & UNINTERESTING)
456                         seen_range = 1;
457                 real_obj = deref_tag(list->item, NULL, 0);
458                 if (strcmp(real_obj->type, commit_type))
459                         seen_noncommit = 1;
460         }
461         if (!rev.pending_objects)
462                 return !grep_cache(&opt, &rev, cached);
463         if (cached)
464                 die("both --cached and revisions given.");
465
466         if (seen_range && seen_noncommit)
467                 die("both A..B and non commit are given.");
468         if (seen_range) {
469                 struct commit *commit;
470                 prepare_revision_walk(&rev);
471                 while ((commit = get_revision(&rev)) != NULL) {
472                         unsigned char *sha1 = commit->object.sha1;
473                         const char *n = find_unique_abbrev(sha1, rev.abbrev);
474                         char rev_name[41];
475                         strcpy(rev_name, n);
476                         if (grep_object(&opt, &rev, &commit->object, rev_name))
477                                 hit = 1;
478                         commit->buffer = NULL;
479                 }
480                 return !hit;
481         }
482
483         /* all of them are non-commit; do not walk, and
484          * do not lose their names.
485          */
486         for (list = rev.pending_objects; list; list = list->next) {
487                 struct object *real_obj;
488                 real_obj = deref_tag(list->item, NULL, 0);
489                 if (grep_object(&opt, &rev, real_obj, list->name))
490                         hit = 1;
491         }
492         return !hit;
493 }