1 /*************************************************
3 *************************************************/
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
9 Copyright (c) 1997-2004 University of Cambridge
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
53 #define VERSION "3.0 14-Jan-2003"
54 #define MAX_PATTERN_COUNT 100
57 /*************************************************
59 *************************************************/
61 static char *pattern_filename = NULL;
62 static int pattern_count = 0;
63 static pcre **pattern_list;
64 static pcre_extra **hints_list;
66 static BOOL count_only = FALSE;
67 static BOOL filenames = TRUE;
68 static BOOL filenames_only = FALSE;
69 static BOOL invert = FALSE;
70 static BOOL number = FALSE;
71 static BOOL recurse = FALSE;
72 static BOOL silent = FALSE;
73 static BOOL whole_lines = FALSE;
75 /* Structure for options and list of them */
77 typedef struct option_item {
79 const char *long_name;
80 const char *help_text;
83 static option_item optionlist[] = {
84 { -1, "help", "display this help and exit" },
85 { 'c', "count", "print only a count of matching lines per FILE" },
86 { 'h', "no-filename", "suppress the prefixing filename on output" },
87 { 'i', "ignore-case", "ignore case distinctions" },
88 { 'l', "files-with-matches", "print only FILE names containing matches" },
89 { 'n', "line-number", "print line number with output lines" },
90 { 'r', "recursive", "recursively scan sub-directories" },
91 { 's', "no-messages", "suppress error messages" },
92 { 'u', "utf-8", "use UTF-8 mode" },
93 { 'V', "version", "print version information and exit" },
94 { 'v', "invert-match", "select non-matching lines" },
95 { 'x', "line-regex", "force PATTERN to match only whole lines" },
96 { 'x', "line-regexp", "force PATTERN to match only whole lines" },
101 /*************************************************
102 * Functions for directory scanning *
103 *************************************************/
105 /* These functions are defined so that they can be made system specific,
106 although at present the only ones are for Unix, Win32, and for "no directory
107 recursion support". */
110 /************* Directory scanning in Unix ***********/
113 #include <sys/types.h>
114 #include <sys/stat.h>
117 typedef DIR directory_type;
120 isdirectory(char *filename)
123 if (stat(filename, &statbuf) < 0)
124 return 0; /* In the expectation that opening as a file will fail */
125 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
128 static directory_type *
129 opendirectory(char *filename)
131 return opendir(filename);
135 readdirectory(directory_type *dir)
139 struct dirent *dent = readdir(dir);
140 if (dent == NULL) return NULL;
141 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
144 return NULL; /* Keep compiler happy; never executed */
148 closedirectory(directory_type *dir)
154 /************* Directory scanning in Win32 ***********/
156 /* I (Philip Hazel) have no means of testing this code. It was contributed by
165 #ifndef WIN32_LEAN_AND_MEAN
166 # define WIN32_LEAN_AND_MEAN
170 typedef struct directory_type
174 WIN32_FIND_DATA data;
178 isdirectory(char *filename)
180 DWORD attr = GetFileAttributes(filename);
181 if (attr == INVALID_FILE_ATTRIBUTES)
183 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
187 opendirectory(char *filename)
193 len = strlen(filename);
194 pattern = (char *) malloc(len + 3);
195 dir = (directory_type *) malloc(sizeof(*dir));
196 if ((pattern == NULL) || (dir == NULL))
198 fprintf(stderr, "pcregrep: malloc failed\n");
201 memcpy(pattern, filename, len);
202 memcpy(&(pattern[len]), "\\*", 3);
203 dir->handle = FindFirstFile(pattern, &(dir->data));
204 if (dir->handle != INVALID_HANDLE_VALUE)
210 err = GetLastError();
213 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
218 readdirectory(directory_type *dir)
224 if (!FindNextFile(dir->handle, &(dir->data)))
231 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
232 return dir->data.cFileName;
235 return NULL; /* Keep compiler happy; never executed */
240 closedirectory(directory_type *dir)
242 FindClose(dir->handle);
247 /************* Directory scanning when we can't do it ***********/
249 /* The type is void, and apart from isdirectory(), the functions do nothing. */
253 typedef void directory_type;
255 int isdirectory(char *filename) { return FALSE; }
256 directory_type * opendirectory(char *filename) {}
257 char *readdirectory(directory_type *dir) {}
258 void closedirectory(directory_type *dir) {}
265 /*************************************************
266 * Provide strerror() for non-ANSI libraries *
267 *************************************************/
269 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
270 in their libraries, but can provide the same facility by this simple
271 alternative function. */
274 extern char *sys_errlist[];
279 if (n < 0 || n >= sys_nerr) return "unknown error number";
280 return sys_errlist[n];
282 #endif /* HAVE_STRERROR */
286 /*************************************************
287 * Grep an individual file *
288 *************************************************/
291 pcregrep(FILE *in, char *name)
299 while (fgets(buffer, sizeof(buffer), in) != NULL)
303 int length = (int)strlen(buffer);
304 if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;
307 for (i = 0; !match && i < pattern_count; i++)
309 match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,
311 if (match && whole_lines && offsets[1] != length) match = FALSE;
316 if (count_only) count++;
318 else if (filenames_only)
320 fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);
324 else if (silent) return 0;
328 if (name != NULL) fprintf(stdout, "%s:", name);
329 if (number) fprintf(stdout, "%d:", linenumber);
330 fprintf(stdout, "%s\n", buffer);
339 if (name != NULL) fprintf(stdout, "%s:", name);
340 fprintf(stdout, "%d\n", count);
349 /*************************************************
350 * Grep a file or recurse into a directory *
351 *************************************************/
354 grep_or_recurse(char *filename, BOOL dir_recurse, BOOL show_filenames,
355 BOOL only_one_at_top)
361 /* If the file is a directory and we are recursing, scan each file within it.
362 The scanning code is localized so it can be made system-specific. */
364 if ((sep = isdirectory(filename)) != 0 && dir_recurse)
368 directory_type *dir = opendirectory(filename);
372 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,
377 while ((nextfile = readdirectory(dir)) != NULL)
380 sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);
381 frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);
382 if (frc == 0 && rc == 1) rc = 0;
389 /* If the file is not a directory, or we are not recursing, scan it. If this is
390 the first and only argument at top level, we don't show the file name (unless
391 we are only showing the file name). Otherwise, control is via the
392 show_filenames variable. */
394 in = fopen(filename, "r");
397 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));
401 rc = pcregrep(in, (filenames_only || (show_filenames && !only_one_at_top))?
410 /*************************************************
412 *************************************************/
417 fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] [pattern] [file1 file2 ...]\n");
418 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
425 /*************************************************
427 *************************************************/
434 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
435 printf("Search for PATTERN in each FILE or standard input.\n");
436 printf("PATTERN must be present if -f is not used.\n");
437 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
439 printf("Options:\n");
441 for (op = optionlist; op->one_char != 0; op++)
445 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
446 printf(" %s --%s%n", s, op->long_name, &n);
449 printf("%.*s%s\n", n, " ", op->help_text);
452 printf("\n -f<filename> or --file=<filename>\n");
453 printf(" Read patterns from <filename> instead of using a command line option.\n");
454 printf(" Trailing white space is removed; blanks lines are ignored.\n");
455 printf(" There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
457 printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");
458 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
464 /*************************************************
466 *************************************************/
469 handle_option(int letter, int options)
473 case -1: help(); exit(0);
474 case 'c': count_only = TRUE; break;
475 case 'h': filenames = FALSE; break;
476 case 'i': options |= PCRE_CASELESS; break;
477 case 'l': filenames_only = TRUE;
478 case 'n': number = TRUE; break;
479 case 'r': recurse = TRUE; break;
480 case 's': silent = TRUE; break;
481 case 'u': options |= PCRE_UTF8; break;
482 case 'v': invert = TRUE; break;
483 case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;
486 fprintf(stderr, "pcregrep version %s using ", VERSION);
487 fprintf(stderr, "PCRE version %s\n", pcre_version());
492 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
502 /*************************************************
504 *************************************************/
507 main(int argc, char **argv)
514 BOOL only_one_at_top;
516 /* Process the options */
518 for (i = 1; i < argc; i++)
520 if (argv[i][0] != '-') break;
522 /* Missing options */
524 if (argv[i][1] == 0) exit(usage(2));
526 /* Long name options */
528 if (argv[i][1] == '-')
532 if (strncmp(argv[i]+2, "file=", 5) == 0)
534 pattern_filename = argv[i] + 7;
538 for (op = optionlist; op->one_char != 0; op++)
540 if (strcmp(argv[i]+2, op->long_name) == 0)
542 options = handle_option(op->one_char, options);
546 if (op->one_char == 0)
548 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
553 /* One-char options */
557 char *s = argv[i] + 1;
562 pattern_filename = s + 1;
563 if (pattern_filename[0] == 0)
567 fprintf(stderr, "pcregrep: File name missing after -f\n");
570 pattern_filename = argv[++i];
574 else options = handle_option(*s++, options);
579 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
580 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
582 if (pattern_list == NULL || hints_list == NULL)
584 fprintf(stderr, "pcregrep: malloc failed\n");
588 /* Compile the regular expression(s). */
590 if (pattern_filename != NULL)
592 FILE *f = fopen(pattern_filename, "r");
596 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
600 while (fgets(buffer, sizeof(buffer), f) != NULL)
602 char *s = buffer + (int)strlen(buffer);
603 if (pattern_count >= MAX_PATTERN_COUNT)
605 fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",
609 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
610 if (s == buffer) continue;
612 pattern_list[pattern_count] = pcre_compile(buffer, options, &error,
614 if (pattern_list[pattern_count++] == NULL)
616 fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",
617 pattern_count, errptr, error);
624 /* If no file name, a single regex must be given inline */
628 if (i >= argc) return usage(2);
629 pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);
630 if (pattern_list[0] == NULL)
632 fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,
639 /* Study the regular expressions, as we will be running them may times */
641 for (j = 0; j < pattern_count; j++)
643 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
647 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
648 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
653 /* If there are no further arguments, do the business on stdin and exit */
655 if (i >= argc) return pcregrep(stdin, NULL);
657 /* Otherwise, work through the remaining arguments as files or directories.
658 Pass in the fact that there is only one argument at top level - this suppresses
659 the file name if the argument is not a directory. */
661 only_one_at_top = (i == argc - 1);
662 if (filenames_only) filenames = TRUE;
664 for (; i < argc; i++)
666 int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);
667 if (frc == 0 && rc == 1) rc = 0;