annotate llvm/lib/Support/GlobPattern.cpp @ 181:df311c476dd5

CreateIdentifierInfo in ParseCbC (not yet worked)
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Sun, 31 May 2020 12:30:11 +0900
parents 1d019706d866
children c4bab56944e8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
150
anatofuz
parents:
diff changeset
1 //===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
anatofuz
parents:
diff changeset
2 //
anatofuz
parents:
diff changeset
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
anatofuz
parents:
diff changeset
4 // See https://llvm.org/LICENSE.txt for license information.
anatofuz
parents:
diff changeset
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
anatofuz
parents:
diff changeset
6 //
anatofuz
parents:
diff changeset
7 //===----------------------------------------------------------------------===//
anatofuz
parents:
diff changeset
8 //
anatofuz
parents:
diff changeset
9 // This file implements a glob pattern matcher.
anatofuz
parents:
diff changeset
10 //
anatofuz
parents:
diff changeset
11 //===----------------------------------------------------------------------===//
anatofuz
parents:
diff changeset
12
anatofuz
parents:
diff changeset
13 #include "llvm/Support/GlobPattern.h"
anatofuz
parents:
diff changeset
14 #include "llvm/ADT/ArrayRef.h"
anatofuz
parents:
diff changeset
15 #include "llvm/ADT/Optional.h"
anatofuz
parents:
diff changeset
16 #include "llvm/ADT/StringRef.h"
anatofuz
parents:
diff changeset
17 #include "llvm/Support/Errc.h"
anatofuz
parents:
diff changeset
18
anatofuz
parents:
diff changeset
19 using namespace llvm;
anatofuz
parents:
diff changeset
20
anatofuz
parents:
diff changeset
21 static bool hasWildcard(StringRef S) {
anatofuz
parents:
diff changeset
22 return S.find_first_of("?*[\\") != StringRef::npos;
anatofuz
parents:
diff changeset
23 }
anatofuz
parents:
diff changeset
24
anatofuz
parents:
diff changeset
25 // Expands character ranges and returns a bitmap.
anatofuz
parents:
diff changeset
26 // For example, "a-cf-hz" is expanded to "abcfghz".
anatofuz
parents:
diff changeset
27 static Expected<BitVector> expand(StringRef S, StringRef Original) {
anatofuz
parents:
diff changeset
28 BitVector BV(256, false);
anatofuz
parents:
diff changeset
29
anatofuz
parents:
diff changeset
30 // Expand X-Y.
anatofuz
parents:
diff changeset
31 for (;;) {
anatofuz
parents:
diff changeset
32 if (S.size() < 3)
anatofuz
parents:
diff changeset
33 break;
anatofuz
parents:
diff changeset
34
anatofuz
parents:
diff changeset
35 uint8_t Start = S[0];
anatofuz
parents:
diff changeset
36 uint8_t End = S[2];
anatofuz
parents:
diff changeset
37
anatofuz
parents:
diff changeset
38 // If it doesn't start with something like X-Y,
anatofuz
parents:
diff changeset
39 // consume the first character and proceed.
anatofuz
parents:
diff changeset
40 if (S[1] != '-') {
anatofuz
parents:
diff changeset
41 BV[Start] = true;
anatofuz
parents:
diff changeset
42 S = S.substr(1);
anatofuz
parents:
diff changeset
43 continue;
anatofuz
parents:
diff changeset
44 }
anatofuz
parents:
diff changeset
45
anatofuz
parents:
diff changeset
46 // It must be in the form of X-Y.
anatofuz
parents:
diff changeset
47 // Validate it and then interpret the range.
anatofuz
parents:
diff changeset
48 if (Start > End)
anatofuz
parents:
diff changeset
49 return make_error<StringError>("invalid glob pattern: " + Original,
anatofuz
parents:
diff changeset
50 errc::invalid_argument);
anatofuz
parents:
diff changeset
51
anatofuz
parents:
diff changeset
52 for (int C = Start; C <= End; ++C)
anatofuz
parents:
diff changeset
53 BV[(uint8_t)C] = true;
anatofuz
parents:
diff changeset
54 S = S.substr(3);
anatofuz
parents:
diff changeset
55 }
anatofuz
parents:
diff changeset
56
anatofuz
parents:
diff changeset
57 for (char C : S)
anatofuz
parents:
diff changeset
58 BV[(uint8_t)C] = true;
anatofuz
parents:
diff changeset
59 return BV;
anatofuz
parents:
diff changeset
60 }
anatofuz
parents:
diff changeset
61
anatofuz
parents:
diff changeset
62 // This is a scanner for the glob pattern.
anatofuz
parents:
diff changeset
63 // A glob pattern token is one of "*", "?", "\", "[<chars>]", "[^<chars>]"
anatofuz
parents:
diff changeset
64 // (which is a negative form of "[<chars>]"), "[!<chars>]" (which is
anatofuz
parents:
diff changeset
65 // equivalent to "[^<chars>]"), or a non-meta character.
anatofuz
parents:
diff changeset
66 // This function returns the first token in S.
anatofuz
parents:
diff changeset
67 static Expected<BitVector> scan(StringRef &S, StringRef Original) {
anatofuz
parents:
diff changeset
68 switch (S[0]) {
anatofuz
parents:
diff changeset
69 case '*':
anatofuz
parents:
diff changeset
70 S = S.substr(1);
anatofuz
parents:
diff changeset
71 // '*' is represented by an empty bitvector.
anatofuz
parents:
diff changeset
72 // All other bitvectors are 256-bit long.
anatofuz
parents:
diff changeset
73 return BitVector();
anatofuz
parents:
diff changeset
74 case '?':
anatofuz
parents:
diff changeset
75 S = S.substr(1);
anatofuz
parents:
diff changeset
76 return BitVector(256, true);
anatofuz
parents:
diff changeset
77 case '[': {
anatofuz
parents:
diff changeset
78 // ']' is allowed as the first character of a character class. '[]' is
anatofuz
parents:
diff changeset
79 // invalid. So, just skip the first character.
anatofuz
parents:
diff changeset
80 size_t End = S.find(']', 2);
anatofuz
parents:
diff changeset
81 if (End == StringRef::npos)
anatofuz
parents:
diff changeset
82 return make_error<StringError>("invalid glob pattern: " + Original,
anatofuz
parents:
diff changeset
83 errc::invalid_argument);
anatofuz
parents:
diff changeset
84
anatofuz
parents:
diff changeset
85 StringRef Chars = S.substr(1, End - 1);
anatofuz
parents:
diff changeset
86 S = S.substr(End + 1);
anatofuz
parents:
diff changeset
87 if (Chars.startswith("^") || Chars.startswith("!")) {
anatofuz
parents:
diff changeset
88 Expected<BitVector> BV = expand(Chars.substr(1), Original);
anatofuz
parents:
diff changeset
89 if (!BV)
anatofuz
parents:
diff changeset
90 return BV.takeError();
anatofuz
parents:
diff changeset
91 return BV->flip();
anatofuz
parents:
diff changeset
92 }
anatofuz
parents:
diff changeset
93 return expand(Chars, Original);
anatofuz
parents:
diff changeset
94 }
anatofuz
parents:
diff changeset
95 case '\\':
anatofuz
parents:
diff changeset
96 // Eat this character and fall through below to treat it like a non-meta
anatofuz
parents:
diff changeset
97 // character.
anatofuz
parents:
diff changeset
98 S = S.substr(1);
anatofuz
parents:
diff changeset
99 LLVM_FALLTHROUGH;
anatofuz
parents:
diff changeset
100 default:
anatofuz
parents:
diff changeset
101 BitVector BV(256, false);
anatofuz
parents:
diff changeset
102 BV[(uint8_t)S[0]] = true;
anatofuz
parents:
diff changeset
103 S = S.substr(1);
anatofuz
parents:
diff changeset
104 return BV;
anatofuz
parents:
diff changeset
105 }
anatofuz
parents:
diff changeset
106 }
anatofuz
parents:
diff changeset
107
anatofuz
parents:
diff changeset
108 Expected<GlobPattern> GlobPattern::create(StringRef S) {
anatofuz
parents:
diff changeset
109 GlobPattern Pat;
anatofuz
parents:
diff changeset
110
anatofuz
parents:
diff changeset
111 // S doesn't contain any metacharacter,
anatofuz
parents:
diff changeset
112 // so the regular string comparison should work.
anatofuz
parents:
diff changeset
113 if (!hasWildcard(S)) {
anatofuz
parents:
diff changeset
114 Pat.Exact = S;
anatofuz
parents:
diff changeset
115 return Pat;
anatofuz
parents:
diff changeset
116 }
anatofuz
parents:
diff changeset
117
anatofuz
parents:
diff changeset
118 // S is something like "foo*", and the "* is not escaped. We can use
anatofuz
parents:
diff changeset
119 // startswith().
anatofuz
parents:
diff changeset
120 if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) {
anatofuz
parents:
diff changeset
121 Pat.Prefix = S.drop_back();
anatofuz
parents:
diff changeset
122 return Pat;
anatofuz
parents:
diff changeset
123 }
anatofuz
parents:
diff changeset
124
anatofuz
parents:
diff changeset
125 // S is something like "*foo". We can use endswith().
anatofuz
parents:
diff changeset
126 if (S.startswith("*") && !hasWildcard(S.drop_front())) {
anatofuz
parents:
diff changeset
127 Pat.Suffix = S.drop_front();
anatofuz
parents:
diff changeset
128 return Pat;
anatofuz
parents:
diff changeset
129 }
anatofuz
parents:
diff changeset
130
anatofuz
parents:
diff changeset
131 // Otherwise, we need to do real glob pattern matching.
anatofuz
parents:
diff changeset
132 // Parse the pattern now.
anatofuz
parents:
diff changeset
133 StringRef Original = S;
anatofuz
parents:
diff changeset
134 while (!S.empty()) {
anatofuz
parents:
diff changeset
135 Expected<BitVector> BV = scan(S, Original);
anatofuz
parents:
diff changeset
136 if (!BV)
anatofuz
parents:
diff changeset
137 return BV.takeError();
anatofuz
parents:
diff changeset
138 Pat.Tokens.push_back(*BV);
anatofuz
parents:
diff changeset
139 }
anatofuz
parents:
diff changeset
140 return Pat;
anatofuz
parents:
diff changeset
141 }
anatofuz
parents:
diff changeset
142
anatofuz
parents:
diff changeset
143 bool GlobPattern::match(StringRef S) const {
anatofuz
parents:
diff changeset
144 if (Exact)
anatofuz
parents:
diff changeset
145 return S == *Exact;
anatofuz
parents:
diff changeset
146 if (Prefix)
anatofuz
parents:
diff changeset
147 return S.startswith(*Prefix);
anatofuz
parents:
diff changeset
148 if (Suffix)
anatofuz
parents:
diff changeset
149 return S.endswith(*Suffix);
anatofuz
parents:
diff changeset
150 return matchOne(Tokens, S);
anatofuz
parents:
diff changeset
151 }
anatofuz
parents:
diff changeset
152
anatofuz
parents:
diff changeset
153 // Runs glob pattern Pats against string S.
anatofuz
parents:
diff changeset
154 bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
anatofuz
parents:
diff changeset
155 for (;;) {
anatofuz
parents:
diff changeset
156 if (Pats.empty())
anatofuz
parents:
diff changeset
157 return S.empty();
anatofuz
parents:
diff changeset
158
anatofuz
parents:
diff changeset
159 // If Pats[0] is '*', try to match Pats[1..] against all possible
anatofuz
parents:
diff changeset
160 // tail strings of S to see at least one pattern succeeds.
anatofuz
parents:
diff changeset
161 if (Pats[0].size() == 0) {
anatofuz
parents:
diff changeset
162 Pats = Pats.slice(1);
anatofuz
parents:
diff changeset
163 if (Pats.empty())
anatofuz
parents:
diff changeset
164 // Fast path. If a pattern is '*', it matches anything.
anatofuz
parents:
diff changeset
165 return true;
anatofuz
parents:
diff changeset
166 for (size_t I = 0, E = S.size(); I < E; ++I)
anatofuz
parents:
diff changeset
167 if (matchOne(Pats, S.substr(I)))
anatofuz
parents:
diff changeset
168 return true;
anatofuz
parents:
diff changeset
169 return false;
anatofuz
parents:
diff changeset
170 }
anatofuz
parents:
diff changeset
171
anatofuz
parents:
diff changeset
172 // If Pats[0] is not '*', it must consume one character.
anatofuz
parents:
diff changeset
173 if (S.empty() || !Pats[0][(uint8_t)S[0]])
anatofuz
parents:
diff changeset
174 return false;
anatofuz
parents:
diff changeset
175 Pats = Pats.slice(1);
anatofuz
parents:
diff changeset
176 S = S.substr(1);
anatofuz
parents:
diff changeset
177 }
anatofuz
parents:
diff changeset
178 }