994
|
1 /* amatch.c */
|
|
2 #include <stdio.h>
|
|
3 #include "tools.h"
|
|
4
|
|
5 /* Scans throught the pattern template looking for a match
|
|
6 * with lin. Each element of lin is compared with the template
|
|
7 * until either a mis-match is found or the end of the template
|
|
8 * is reached. In the former case a 0 is returned; in the latter,
|
|
9 * a pointer into lin (pointing to the character following the
|
|
10 * matched pattern) is returned.
|
|
11 *
|
|
12 * "lin" is a pointer to the line being searched.
|
|
13 * "pat" is a pointer to a template made by makepat().
|
|
14 * "boln" is a pointer into "lin" which points at the
|
|
15 * character at the beginning of the line.
|
|
16 */
|
|
17 static char *match(); /* Predeclaration */
|
|
18
|
|
19 char *paropen[9], *parclose[9];
|
|
20 int between, parnum;
|
|
21
|
|
22 char *amatch(lin, pat, boln)
|
|
23 char *lin;
|
|
24 TOKEN *pat;
|
|
25 char *boln;
|
|
26 {
|
|
27 between = 0;
|
|
28 parnum = 0;
|
|
29
|
|
30 lin = match(lin, pat, boln);
|
|
31
|
|
32 if (between) return 0;
|
|
33
|
|
34 while (parnum < 9) {
|
|
35 paropen[parnum] = parclose[parnum] = "";
|
|
36 parnum++;
|
|
37 }
|
|
38 return lin;
|
|
39 }
|
|
40
|
|
41 static char *match(lin, pat, boln)
|
|
42 char *lin;
|
|
43 TOKEN *pat;
|
|
44 char *boln;
|
|
45 {
|
|
46 register char *bocl, *rval, *strstart;
|
|
47
|
|
48 if (pat == 0) return 0;
|
|
49
|
|
50 strstart = lin;
|
|
51
|
|
52 while (pat) {
|
|
53 if (pat->tok == CLOSURE && pat->next) {
|
|
54 /* Process a closure: first skip over the closure
|
|
55 * token to the object to be repeated. This object
|
|
56 * can be a character class. */
|
|
57
|
|
58 pat = pat->next;
|
|
59
|
|
60 /* Now match as many occurrences of the closure
|
|
61 * pattern as possible. */
|
|
62 bocl = lin;
|
|
63
|
|
64 while (*lin && omatch(&lin, pat, boln));
|
|
65
|
|
66 /* 'Lin' now points to the character that made made
|
|
67 * us fail. Now go on to process the rest of the
|
|
68 * string. A problem here is a character following
|
|
69 * the closure which could have been in the closure.
|
|
70 * For example, in the pattern "[a-z]*t" (which
|
|
71 * matches any lower-case word ending in a t), the
|
|
72 * final 't' will be sucked up in the while loop.
|
|
73 * So, if the match fails, we back up a notch and try
|
|
74 * to match the rest of the string again, repeating
|
|
75 * this process recursively until we get back to the
|
|
76 * beginning of the closure. The recursion goes, at
|
|
77 * most two levels deep. */
|
|
78
|
|
79 if (pat = pat->next) {
|
|
80 int savbtwn = between;
|
|
81 int savprnm = parnum;
|
|
82
|
|
83 while (bocl <= lin) {
|
|
84 if (rval = match(lin, pat, boln)) {
|
|
85 /* Success */
|
|
86 return(rval);
|
|
87 } else {
|
|
88 --lin;
|
|
89 between = savbtwn;
|
|
90 parnum = savprnm;
|
|
91 }
|
|
92 }
|
|
93 return(0); /* match failed */
|
|
94 }
|
|
95 } else if (pat->tok == OPEN) {
|
|
96 if (between || parnum >= 9) return 0;
|
|
97 paropen[parnum] = lin;
|
|
98 between = 1;
|
|
99 pat = pat->next;
|
|
100 } else if (pat->tok == CLOSE) {
|
|
101 if (!between) return 0;
|
|
102 parclose[parnum++] = lin;
|
|
103 between = 0;
|
|
104 pat = pat->next;
|
|
105 } else if (omatch(&lin, pat, boln)) {
|
|
106 pat = pat->next;
|
|
107 } else {
|
|
108 return(0);
|
|
109 }
|
|
110 }
|
|
111
|
|
112 /* Note that omatch() advances lin to point at the next character to
|
|
113 * be matched. Consequently, when we reach the end of the template,
|
|
114 * lin will be pointing at the character following the last character
|
|
115 * matched. The exceptions are templates containing only a BOLN or
|
|
116 * EOLN token. In these cases omatch doesn't advance.
|
|
117 *
|
|
118 * A philosophical point should be mentioned here. Is $ a position or a
|
|
119 * character? (i.e. does $ mean the EOL character itself or does it
|
|
120 * mean the character at the end of the line.) I decided here to
|
|
121 * make it mean the former, in order to make the behavior of match()
|
|
122 * consistent. If you give match the pattern ^$ (match all lines
|
|
123 * consisting only of an end of line) then, since something has to be
|
|
124 * returned, a pointer to the end of line character itself is
|
|
125 * returned. */
|
|
126
|
|
127 return((char *) max(strstart, lin));
|
|
128 }
|
|
129
|