Mercurial > hg > CbC > CbC_llvm
comparison lib/TableGen/StringMatcher.cpp @ 147:c2174574ed3a
LLVM 10
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 14 Aug 2019 16:55:33 +0900 |
parents | 3a76565eade5 |
children |
comparison
equal
deleted
inserted
replaced
134:3a76565eade5 | 147:c2174574ed3a |
---|---|
1 //===- StringMatcher.cpp - Generate a matcher for input strings -----------===// | 1 //===- StringMatcher.cpp - Generate a matcher for input strings -----------===// |
2 // | 2 // |
3 // The LLVM Compiler Infrastructure | 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 // | 4 // See https://llvm.org/LICENSE.txt for license information. |
5 // This file is distributed under the University of Illinois Open Source | 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 // License. See LICENSE.TXT for details. | |
7 // | 6 // |
8 //===----------------------------------------------------------------------===// | 7 //===----------------------------------------------------------------------===// |
9 // | 8 // |
10 // This file implements the StringMatcher class. | 9 // This file implements the StringMatcher class. |
11 // | 10 // |
23 using namespace llvm; | 22 using namespace llvm; |
24 | 23 |
25 /// FindFirstNonCommonLetter - Find the first character in the keys of the | 24 /// FindFirstNonCommonLetter - Find the first character in the keys of the |
26 /// string pairs that is not shared across the whole set of strings. All | 25 /// string pairs that is not shared across the whole set of strings. All |
27 /// strings are assumed to have the same length. | 26 /// strings are assumed to have the same length. |
28 static unsigned | 27 static unsigned |
29 FindFirstNonCommonLetter(const std::vector<const | 28 FindFirstNonCommonLetter(const std::vector<const |
30 StringMatcher::StringPair*> &Matches) { | 29 StringMatcher::StringPair*> &Matches) { |
31 assert(!Matches.empty()); | 30 assert(!Matches.empty()); |
32 for (unsigned i = 0, e = Matches[0]->first.size(); i != e; ++i) { | 31 for (unsigned i = 0, e = Matches[0]->first.size(); i != e; ++i) { |
33 // Check to see if letter i is the same across the set. | 32 // Check to see if letter i is the same across the set. |
34 char Letter = Matches[0]->first[i]; | 33 char Letter = Matches[0]->first[i]; |
35 | 34 |
36 for (unsigned str = 0, e = Matches.size(); str != e; ++str) | 35 for (unsigned str = 0, e = Matches.size(); str != e; ++str) |
37 if (Matches[str]->first[i] != Letter) | 36 if (Matches[str]->first[i] != Letter) |
38 return i; | 37 return i; |
39 } | 38 } |
40 | 39 |
41 return Matches[0]->first.size(); | 40 return Matches[0]->first.size(); |
42 } | 41 } |
43 | 42 |
44 /// EmitStringMatcherForChar - Given a set of strings that are known to be the | 43 /// EmitStringMatcherForChar - Given a set of strings that are known to be the |
45 /// same length and whose characters leading up to CharNo are the same, emit | 44 /// same length and whose characters leading up to CharNo are the same, emit |
49 bool StringMatcher::EmitStringMatcherForChar( | 48 bool StringMatcher::EmitStringMatcherForChar( |
50 const std::vector<const StringPair *> &Matches, unsigned CharNo, | 49 const std::vector<const StringPair *> &Matches, unsigned CharNo, |
51 unsigned IndentCount, bool IgnoreDuplicates) const { | 50 unsigned IndentCount, bool IgnoreDuplicates) const { |
52 assert(!Matches.empty() && "Must have at least one string to match!"); | 51 assert(!Matches.empty() && "Must have at least one string to match!"); |
53 std::string Indent(IndentCount * 2 + 4, ' '); | 52 std::string Indent(IndentCount * 2 + 4, ' '); |
54 | 53 |
55 // If we have verified that the entire string matches, we're done: output the | 54 // If we have verified that the entire string matches, we're done: output the |
56 // matching code. | 55 // matching code. |
57 if (CharNo == Matches[0]->first.size()) { | 56 if (CharNo == Matches[0]->first.size()) { |
58 if (Matches.size() > 1 && !IgnoreDuplicates) | 57 if (Matches.size() > 1 && !IgnoreDuplicates) |
59 report_fatal_error("Had duplicate keys to match on"); | 58 report_fatal_error("Had duplicate keys to match on"); |
60 | 59 |
61 // If the to-execute code has \n's in it, indent each subsequent line. | 60 // If the to-execute code has \n's in it, indent each subsequent line. |
62 StringRef Code = Matches[0]->second; | 61 StringRef Code = Matches[0]->second; |
63 | 62 |
64 std::pair<StringRef, StringRef> Split = Code.split('\n'); | 63 std::pair<StringRef, StringRef> Split = Code.split('\n'); |
65 OS << Indent << Split.first << "\t // \"" << Matches[0]->first << "\"\n"; | 64 OS << Indent << Split.first << "\t // \"" << Matches[0]->first << "\"\n"; |
66 | 65 |
67 Code = Split.second; | 66 Code = Split.second; |
68 while (!Code.empty()) { | 67 while (!Code.empty()) { |
70 OS << Indent << Split.first << "\n"; | 69 OS << Indent << Split.first << "\n"; |
71 Code = Split.second; | 70 Code = Split.second; |
72 } | 71 } |
73 return false; | 72 return false; |
74 } | 73 } |
75 | 74 |
76 // Bucket the matches by the character we are comparing. | 75 // Bucket the matches by the character we are comparing. |
77 std::map<char, std::vector<const StringPair*>> MatchesByLetter; | 76 std::map<char, std::vector<const StringPair*>> MatchesByLetter; |
78 | 77 |
79 for (unsigned i = 0, e = Matches.size(); i != e; ++i) | 78 for (unsigned i = 0, e = Matches.size(); i != e; ++i) |
80 MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]); | 79 MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]); |
81 | 80 |
82 | 81 |
83 // If we have exactly one bucket to match, see how many characters are common | 82 // If we have exactly one bucket to match, see how many characters are common |
84 // across the whole set and match all of them at once. | 83 // across the whole set and match all of them at once. |
85 if (MatchesByLetter.size() == 1) { | 84 if (MatchesByLetter.size() == 1) { |
86 unsigned FirstNonCommonLetter = FindFirstNonCommonLetter(Matches); | 85 unsigned FirstNonCommonLetter = FindFirstNonCommonLetter(Matches); |
87 unsigned NumChars = FirstNonCommonLetter-CharNo; | 86 unsigned NumChars = FirstNonCommonLetter-CharNo; |
88 | 87 |
89 // Emit code to break out if the prefix doesn't match. | 88 // Emit code to break out if the prefix doesn't match. |
90 if (NumChars == 1) { | 89 if (NumChars == 1) { |
91 // Do the comparison with if (Str[1] != 'f') | 90 // Do the comparison with if (Str[1] != 'f') |
92 // FIXME: Need to escape general characters. | 91 // FIXME: Need to escape general characters. |
93 OS << Indent << "if (" << StrVariableName << "[" << CharNo << "] != '" | 92 OS << Indent << "if (" << StrVariableName << "[" << CharNo << "] != '" |
103 } | 102 } |
104 | 103 |
105 return EmitStringMatcherForChar(Matches, FirstNonCommonLetter, IndentCount, | 104 return EmitStringMatcherForChar(Matches, FirstNonCommonLetter, IndentCount, |
106 IgnoreDuplicates); | 105 IgnoreDuplicates); |
107 } | 106 } |
108 | 107 |
109 // Otherwise, we have multiple possible things, emit a switch on the | 108 // Otherwise, we have multiple possible things, emit a switch on the |
110 // character. | 109 // character. |
111 OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n"; | 110 OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n"; |
112 OS << Indent << "default: break;\n"; | 111 OS << Indent << "default: break;\n"; |
113 | 112 |
114 for (std::map<char, std::vector<const StringPair*>>::iterator LI = | 113 for (std::map<char, std::vector<const StringPair*>>::iterator LI = |
115 MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) { | 114 MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) { |
116 // TODO: escape hard stuff (like \n) if we ever care about it. | 115 // TODO: escape hard stuff (like \n) if we ever care about it. |
117 OS << Indent << "case '" << LI->first << "':\t // " | 116 OS << Indent << "case '" << LI->first << "':\t // " |
118 << LI->second.size() << " string"; | 117 << LI->second.size() << " string"; |
119 if (LI->second.size() != 1) OS << 's'; | 118 if (LI->second.size() != 1) OS << 's'; |
120 OS << " to match.\n"; | 119 OS << " to match.\n"; |
121 if (EmitStringMatcherForChar(LI->second, CharNo + 1, IndentCount + 1, | 120 if (EmitStringMatcherForChar(LI->second, CharNo + 1, IndentCount + 1, |
122 IgnoreDuplicates)) | 121 IgnoreDuplicates)) |
123 OS << Indent << " break;\n"; | 122 OS << Indent << " break;\n"; |
124 } | 123 } |
125 | 124 |
126 OS << Indent << "}\n"; | 125 OS << Indent << "}\n"; |
127 return true; | 126 return true; |
128 } | 127 } |
129 | 128 |
130 /// Emit - Top level entry point. | 129 /// Emit - Top level entry point. |
131 /// | 130 /// |
132 void StringMatcher::Emit(unsigned Indent, bool IgnoreDuplicates) const { | 131 void StringMatcher::Emit(unsigned Indent, bool IgnoreDuplicates) const { |
133 // If nothing to match, just fall through. | 132 // If nothing to match, just fall through. |
134 if (Matches.empty()) return; | 133 if (Matches.empty()) return; |
135 | 134 |
136 // First level categorization: group strings by length. | 135 // First level categorization: group strings by length. |
137 std::map<unsigned, std::vector<const StringPair*>> MatchesByLength; | 136 std::map<unsigned, std::vector<const StringPair*>> MatchesByLength; |
138 | 137 |
139 for (unsigned i = 0, e = Matches.size(); i != e; ++i) | 138 for (unsigned i = 0, e = Matches.size(); i != e; ++i) |
140 MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]); | 139 MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]); |
141 | 140 |
142 // Output a switch statement on length and categorize the elements within each | 141 // Output a switch statement on length and categorize the elements within each |
143 // bin. | 142 // bin. |
144 OS.indent(Indent*2+2) << "switch (" << StrVariableName << ".size()) {\n"; | 143 OS.indent(Indent*2+2) << "switch (" << StrVariableName << ".size()) {\n"; |
145 OS.indent(Indent*2+2) << "default: break;\n"; | 144 OS.indent(Indent*2+2) << "default: break;\n"; |
146 | 145 |
147 for (std::map<unsigned, std::vector<const StringPair*>>::iterator LI = | 146 for (std::map<unsigned, std::vector<const StringPair*>>::iterator LI = |
148 MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) { | 147 MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) { |
149 OS.indent(Indent*2+2) << "case " << LI->first << ":\t // " | 148 OS.indent(Indent*2+2) << "case " << LI->first << ":\t // " |
150 << LI->second.size() | 149 << LI->second.size() |
151 << " string" << (LI->second.size() == 1 ? "" : "s") << " to match.\n"; | 150 << " string" << (LI->second.size() == 1 ? "" : "s") << " to match.\n"; |
152 if (EmitStringMatcherForChar(LI->second, 0, Indent, IgnoreDuplicates)) | 151 if (EmitStringMatcherForChar(LI->second, 0, Indent, IgnoreDuplicates)) |
153 OS.indent(Indent*2+4) << "break;\n"; | 152 OS.indent(Indent*2+4) << "break;\n"; |
154 } | 153 } |
155 | 154 |
156 OS.indent(Indent*2+2) << "}\n"; | 155 OS.indent(Indent*2+2) << "}\n"; |
157 } | 156 } |