150
|
1 //===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
|
|
2 //
|
|
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
4 // See https://llvm.org/LICENSE.txt for license information.
|
|
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
6 //
|
|
7 //===----------------------------------------------------------------------===//
|
|
8 //
|
|
9 // This file implements a glob pattern matcher.
|
|
10 //
|
|
11 //===----------------------------------------------------------------------===//
|
|
12
|
|
13 #include "llvm/Support/GlobPattern.h"
|
|
14 #include "llvm/ADT/ArrayRef.h"
|
|
15 #include "llvm/ADT/Optional.h"
|
|
16 #include "llvm/ADT/StringRef.h"
|
|
17 #include "llvm/Support/Errc.h"
|
|
18
|
|
19 using namespace llvm;
|
|
20
|
|
21 static bool hasWildcard(StringRef S) {
|
|
22 return S.find_first_of("?*[\\") != StringRef::npos;
|
|
23 }
|
|
24
|
|
25 // Expands character ranges and returns a bitmap.
|
|
26 // For example, "a-cf-hz" is expanded to "abcfghz".
|
|
27 static Expected<BitVector> expand(StringRef S, StringRef Original) {
|
|
28 BitVector BV(256, false);
|
|
29
|
|
30 // Expand X-Y.
|
|
31 for (;;) {
|
|
32 if (S.size() < 3)
|
|
33 break;
|
|
34
|
|
35 uint8_t Start = S[0];
|
|
36 uint8_t End = S[2];
|
|
37
|
|
38 // If it doesn't start with something like X-Y,
|
|
39 // consume the first character and proceed.
|
|
40 if (S[1] != '-') {
|
|
41 BV[Start] = true;
|
|
42 S = S.substr(1);
|
|
43 continue;
|
|
44 }
|
|
45
|
|
46 // It must be in the form of X-Y.
|
|
47 // Validate it and then interpret the range.
|
|
48 if (Start > End)
|
|
49 return make_error<StringError>("invalid glob pattern: " + Original,
|
|
50 errc::invalid_argument);
|
|
51
|
|
52 for (int C = Start; C <= End; ++C)
|
|
53 BV[(uint8_t)C] = true;
|
|
54 S = S.substr(3);
|
|
55 }
|
|
56
|
|
57 for (char C : S)
|
|
58 BV[(uint8_t)C] = true;
|
|
59 return BV;
|
|
60 }
|
|
61
|
|
62 // This is a scanner for the glob pattern.
|
|
63 // A glob pattern token is one of "*", "?", "\", "[<chars>]", "[^<chars>]"
|
|
64 // (which is a negative form of "[<chars>]"), "[!<chars>]" (which is
|
|
65 // equivalent to "[^<chars>]"), or a non-meta character.
|
|
66 // This function returns the first token in S.
|
|
67 static Expected<BitVector> scan(StringRef &S, StringRef Original) {
|
|
68 switch (S[0]) {
|
|
69 case '*':
|
|
70 S = S.substr(1);
|
|
71 // '*' is represented by an empty bitvector.
|
|
72 // All other bitvectors are 256-bit long.
|
|
73 return BitVector();
|
|
74 case '?':
|
|
75 S = S.substr(1);
|
|
76 return BitVector(256, true);
|
|
77 case '[': {
|
|
78 // ']' is allowed as the first character of a character class. '[]' is
|
|
79 // invalid. So, just skip the first character.
|
|
80 size_t End = S.find(']', 2);
|
|
81 if (End == StringRef::npos)
|
|
82 return make_error<StringError>("invalid glob pattern: " + Original,
|
|
83 errc::invalid_argument);
|
|
84
|
|
85 StringRef Chars = S.substr(1, End - 1);
|
|
86 S = S.substr(End + 1);
|
|
87 if (Chars.startswith("^") || Chars.startswith("!")) {
|
|
88 Expected<BitVector> BV = expand(Chars.substr(1), Original);
|
|
89 if (!BV)
|
|
90 return BV.takeError();
|
|
91 return BV->flip();
|
|
92 }
|
|
93 return expand(Chars, Original);
|
|
94 }
|
|
95 case '\\':
|
|
96 // Eat this character and fall through below to treat it like a non-meta
|
|
97 // character.
|
|
98 S = S.substr(1);
|
|
99 LLVM_FALLTHROUGH;
|
|
100 default:
|
|
101 BitVector BV(256, false);
|
|
102 BV[(uint8_t)S[0]] = true;
|
|
103 S = S.substr(1);
|
|
104 return BV;
|
|
105 }
|
|
106 }
|
|
107
|
|
108 Expected<GlobPattern> GlobPattern::create(StringRef S) {
|
|
109 GlobPattern Pat;
|
|
110
|
|
111 // S doesn't contain any metacharacter,
|
|
112 // so the regular string comparison should work.
|
|
113 if (!hasWildcard(S)) {
|
|
114 Pat.Exact = S;
|
|
115 return Pat;
|
|
116 }
|
|
117
|
|
118 // S is something like "foo*", and the "* is not escaped. We can use
|
|
119 // startswith().
|
|
120 if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) {
|
|
121 Pat.Prefix = S.drop_back();
|
|
122 return Pat;
|
|
123 }
|
|
124
|
|
125 // S is something like "*foo". We can use endswith().
|
|
126 if (S.startswith("*") && !hasWildcard(S.drop_front())) {
|
|
127 Pat.Suffix = S.drop_front();
|
|
128 return Pat;
|
|
129 }
|
|
130
|
|
131 // Otherwise, we need to do real glob pattern matching.
|
|
132 // Parse the pattern now.
|
|
133 StringRef Original = S;
|
|
134 while (!S.empty()) {
|
|
135 Expected<BitVector> BV = scan(S, Original);
|
|
136 if (!BV)
|
|
137 return BV.takeError();
|
|
138 Pat.Tokens.push_back(*BV);
|
|
139 }
|
|
140 return Pat;
|
|
141 }
|
|
142
|
|
143 bool GlobPattern::match(StringRef S) const {
|
|
144 if (Exact)
|
|
145 return S == *Exact;
|
|
146 if (Prefix)
|
|
147 return S.startswith(*Prefix);
|
|
148 if (Suffix)
|
|
149 return S.endswith(*Suffix);
|
|
150 return matchOne(Tokens, S);
|
|
151 }
|
|
152
|
|
153 // Runs glob pattern Pats against string S.
|
|
154 bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
|
|
155 for (;;) {
|
|
156 if (Pats.empty())
|
|
157 return S.empty();
|
|
158
|
|
159 // If Pats[0] is '*', try to match Pats[1..] against all possible
|
|
160 // tail strings of S to see at least one pattern succeeds.
|
|
161 if (Pats[0].size() == 0) {
|
|
162 Pats = Pats.slice(1);
|
|
163 if (Pats.empty())
|
|
164 // Fast path. If a pattern is '*', it matches anything.
|
|
165 return true;
|
|
166 for (size_t I = 0, E = S.size(); I < E; ++I)
|
|
167 if (matchOne(Pats, S.substr(I)))
|
|
168 return true;
|
|
169 return false;
|
|
170 }
|
|
171
|
|
172 // If Pats[0] is not '*', it must consume one character.
|
|
173 if (S.empty() || !Pats[0][(uint8_t)S[0]])
|
|
174 return false;
|
|
175 Pats = Pats.slice(1);
|
|
176 S = S.substr(1);
|
|
177 }
|
|
178 }
|