Mercurial > hg > CbC > CbC_llvm
comparison lib/Support/Regex.cpp @ 3:9ad51c7bc036
1st commit. remove git dir and add all files.
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 15 May 2013 06:43:32 +0900 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 3:9ad51c7bc036 |
---|---|
1 //===-- Regex.cpp - Regular Expression matcher implementation -------------===// | |
2 // | |
3 // The LLVM Compiler Infrastructure | |
4 // | |
5 // This file is distributed under the University of Illinois Open Source | |
6 // License. See LICENSE.TXT for details. | |
7 // | |
8 //===----------------------------------------------------------------------===// | |
9 // | |
10 // This file implements a POSIX regular expression matcher. | |
11 // | |
12 //===----------------------------------------------------------------------===// | |
13 | |
14 #include "llvm/Support/Regex.h" | |
15 #include "regex_impl.h" | |
16 #include "llvm/ADT/SmallVector.h" | |
17 #include "llvm/Support/ErrorHandling.h" | |
18 #include "llvm/Support/raw_ostream.h" | |
19 #include <string> | |
20 using namespace llvm; | |
21 | |
22 Regex::Regex(StringRef regex, unsigned Flags) { | |
23 unsigned flags = 0; | |
24 preg = new llvm_regex(); | |
25 preg->re_endp = regex.end(); | |
26 if (Flags & IgnoreCase) | |
27 flags |= REG_ICASE; | |
28 if (Flags & Newline) | |
29 flags |= REG_NEWLINE; | |
30 if (!(Flags & BasicRegex)) | |
31 flags |= REG_EXTENDED; | |
32 error = llvm_regcomp(preg, regex.data(), flags|REG_PEND); | |
33 } | |
34 | |
35 Regex::~Regex() { | |
36 llvm_regfree(preg); | |
37 delete preg; | |
38 } | |
39 | |
40 bool Regex::isValid(std::string &Error) { | |
41 if (!error) | |
42 return true; | |
43 | |
44 size_t len = llvm_regerror(error, preg, NULL, 0); | |
45 | |
46 Error.resize(len); | |
47 llvm_regerror(error, preg, &Error[0], len); | |
48 return false; | |
49 } | |
50 | |
51 /// getNumMatches - In a valid regex, return the number of parenthesized | |
52 /// matches it contains. | |
53 unsigned Regex::getNumMatches() const { | |
54 return preg->re_nsub; | |
55 } | |
56 | |
57 bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches){ | |
58 unsigned nmatch = Matches ? preg->re_nsub+1 : 0; | |
59 | |
60 // pmatch needs to have at least one element. | |
61 SmallVector<llvm_regmatch_t, 8> pm; | |
62 pm.resize(nmatch > 0 ? nmatch : 1); | |
63 pm[0].rm_so = 0; | |
64 pm[0].rm_eo = String.size(); | |
65 | |
66 int rc = llvm_regexec(preg, String.data(), nmatch, pm.data(), REG_STARTEND); | |
67 | |
68 if (rc == REG_NOMATCH) | |
69 return false; | |
70 if (rc != 0) { | |
71 // regexec can fail due to invalid pattern or running out of memory. | |
72 error = rc; | |
73 return false; | |
74 } | |
75 | |
76 // There was a match. | |
77 | |
78 if (Matches) { // match position requested | |
79 Matches->clear(); | |
80 | |
81 for (unsigned i = 0; i != nmatch; ++i) { | |
82 if (pm[i].rm_so == -1) { | |
83 // this group didn't match | |
84 Matches->push_back(StringRef()); | |
85 continue; | |
86 } | |
87 assert(pm[i].rm_eo >= pm[i].rm_so); | |
88 Matches->push_back(StringRef(String.data()+pm[i].rm_so, | |
89 pm[i].rm_eo-pm[i].rm_so)); | |
90 } | |
91 } | |
92 | |
93 return true; | |
94 } | |
95 | |
96 std::string Regex::sub(StringRef Repl, StringRef String, | |
97 std::string *Error) { | |
98 SmallVector<StringRef, 8> Matches; | |
99 | |
100 // Reset error, if given. | |
101 if (Error && !Error->empty()) *Error = ""; | |
102 | |
103 // Return the input if there was no match. | |
104 if (!match(String, &Matches)) | |
105 return String; | |
106 | |
107 // Otherwise splice in the replacement string, starting with the prefix before | |
108 // the match. | |
109 std::string Res(String.begin(), Matches[0].begin()); | |
110 | |
111 // Then the replacement string, honoring possible substitutions. | |
112 while (!Repl.empty()) { | |
113 // Skip to the next escape. | |
114 std::pair<StringRef, StringRef> Split = Repl.split('\\'); | |
115 | |
116 // Add the skipped substring. | |
117 Res += Split.first; | |
118 | |
119 // Check for terminimation and trailing backslash. | |
120 if (Split.second.empty()) { | |
121 if (Repl.size() != Split.first.size() && | |
122 Error && Error->empty()) | |
123 *Error = "replacement string contained trailing backslash"; | |
124 break; | |
125 } | |
126 | |
127 // Otherwise update the replacement string and interpret escapes. | |
128 Repl = Split.second; | |
129 | |
130 // FIXME: We should have a StringExtras function for mapping C99 escapes. | |
131 switch (Repl[0]) { | |
132 // Treat all unrecognized characters as self-quoting. | |
133 default: | |
134 Res += Repl[0]; | |
135 Repl = Repl.substr(1); | |
136 break; | |
137 | |
138 // Single character escapes. | |
139 case 't': | |
140 Res += '\t'; | |
141 Repl = Repl.substr(1); | |
142 break; | |
143 case 'n': | |
144 Res += '\n'; | |
145 Repl = Repl.substr(1); | |
146 break; | |
147 | |
148 // Decimal escapes are backreferences. | |
149 case '0': case '1': case '2': case '3': case '4': | |
150 case '5': case '6': case '7': case '8': case '9': { | |
151 // Extract the backreference number. | |
152 StringRef Ref = Repl.slice(0, Repl.find_first_not_of("0123456789")); | |
153 Repl = Repl.substr(Ref.size()); | |
154 | |
155 unsigned RefValue; | |
156 if (!Ref.getAsInteger(10, RefValue) && | |
157 RefValue < Matches.size()) | |
158 Res += Matches[RefValue]; | |
159 else if (Error && Error->empty()) | |
160 *Error = "invalid backreference string '" + Ref.str() + "'"; | |
161 break; | |
162 } | |
163 } | |
164 } | |
165 | |
166 // And finally the suffix. | |
167 Res += StringRef(Matches[0].end(), String.end() - Matches[0].end()); | |
168 | |
169 return Res; | |
170 } |