CbC/CbC_llvm: lld/ELF/ScriptLexer.cpp annotate

annotate lld/ELF/ScriptLexer.cpp @ 173:0572611fdcc8 llvm10 llvm12

reorgnization done

author	Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date	Mon, 25 May 2020 11:55:54 +0900
parents	1d019706d866
children	2e18cbf3894f

rev	line source
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	1 //===- ScriptLexer.cpp ----------------------------------------------------===//
1d019706d866 LLVM10 anatofuz parents: diff changeset	2 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
1d019706d866 LLVM10 anatofuz parents: diff changeset	4 // See https://llvm.org/LICENSE.txt for license information.
1d019706d866 LLVM10 anatofuz parents: diff changeset	5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
1d019706d866 LLVM10 anatofuz parents: diff changeset	6 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	7 //===----------------------------------------------------------------------===//
1d019706d866 LLVM10 anatofuz parents: diff changeset	8 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	9 // This file defines a lexer for the linker script.
1d019706d866 LLVM10 anatofuz parents: diff changeset	10 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	11 // The linker script's grammar is not complex but ambiguous due to the
1d019706d866 LLVM10 anatofuz parents: diff changeset	12 // lack of the formal specification of the language. What we are trying to
1d019706d866 LLVM10 anatofuz parents: diff changeset	13 // do in this and other files in LLD is to make a "reasonable" linker
1d019706d866 LLVM10 anatofuz parents: diff changeset	14 // script processor.
1d019706d866 LLVM10 anatofuz parents: diff changeset	15 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	16 // Among simplicity, compatibility and efficiency, we put the most
1d019706d866 LLVM10 anatofuz parents: diff changeset	17 // emphasis on simplicity when we wrote this lexer. Compatibility with the
1d019706d866 LLVM10 anatofuz parents: diff changeset	18 // GNU linkers is important, but we did not try to clone every tiny corner
1d019706d866 LLVM10 anatofuz parents: diff changeset	19 // case of their lexers, as even ld.bfd and ld.gold are subtly different
1d019706d866 LLVM10 anatofuz parents: diff changeset	20 // in various corner cases. We do not care much about efficiency because
1d019706d866 LLVM10 anatofuz parents: diff changeset	21 // the time spent in parsing linker scripts is usually negligible.
1d019706d866 LLVM10 anatofuz parents: diff changeset	22 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	23 // Our grammar of the linker script is LL(2), meaning that it needs at
1d019706d866 LLVM10 anatofuz parents: diff changeset	24 // most two-token lookahead to parse. The only place we need two-token
1d019706d866 LLVM10 anatofuz parents: diff changeset	25 // lookahead is labels in version scripts, where we need to parse "local :"
1d019706d866 LLVM10 anatofuz parents: diff changeset	26 // as if "local:".
1d019706d866 LLVM10 anatofuz parents: diff changeset	27 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	28 // Overall, this lexer works fine for most linker scripts. There might
1d019706d866 LLVM10 anatofuz parents: diff changeset	29 // be room for improving compatibility, but that's probably not at the
1d019706d866 LLVM10 anatofuz parents: diff changeset	30 // top of our todo list.
1d019706d866 LLVM10 anatofuz parents: diff changeset	31 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	32 //===----------------------------------------------------------------------===//
1d019706d866 LLVM10 anatofuz parents: diff changeset	33
1d019706d866 LLVM10 anatofuz parents: diff changeset	34 #include "ScriptLexer.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	35 #include "lld/Common/ErrorHandler.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	36 #include "llvm/ADT/Twine.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	37
1d019706d866 LLVM10 anatofuz parents: diff changeset	38 using namespace llvm;
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	39 using namespace lld;
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	40 using namespace lld::elf;
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	41
1d019706d866 LLVM10 anatofuz parents: diff changeset	42 // Returns a whole line containing the current token.
1d019706d866 LLVM10 anatofuz parents: diff changeset	43 StringRef ScriptLexer::getLine() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	44 StringRef s = getCurrentMB().getBuffer();
1d019706d866 LLVM10 anatofuz parents: diff changeset	45 StringRef tok = tokens[pos - 1];
1d019706d866 LLVM10 anatofuz parents: diff changeset	46
1d019706d866 LLVM10 anatofuz parents: diff changeset	47 size_t pos = s.rfind('\n', tok.data() - s.data());
1d019706d866 LLVM10 anatofuz parents: diff changeset	48 if (pos != StringRef::npos)
1d019706d866 LLVM10 anatofuz parents: diff changeset	49 s = s.substr(pos + 1);
1d019706d866 LLVM10 anatofuz parents: diff changeset	50 return s.substr(0, s.find_first_of("\r\n"));
1d019706d866 LLVM10 anatofuz parents: diff changeset	51 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	52
1d019706d866 LLVM10 anatofuz parents: diff changeset	53 // Returns 1-based line number of the current token.
1d019706d866 LLVM10 anatofuz parents: diff changeset	54 size_t ScriptLexer::getLineNumber() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	55 StringRef s = getCurrentMB().getBuffer();
1d019706d866 LLVM10 anatofuz parents: diff changeset	56 StringRef tok = tokens[pos - 1];
1d019706d866 LLVM10 anatofuz parents: diff changeset	57 return s.substr(0, tok.data() - s.data()).count('\n') + 1;
1d019706d866 LLVM10 anatofuz parents: diff changeset	58 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	59
1d019706d866 LLVM10 anatofuz parents: diff changeset	60 // Returns 0-based column number of the current token.
1d019706d866 LLVM10 anatofuz parents: diff changeset	61 size_t ScriptLexer::getColumnNumber() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	62 StringRef tok = tokens[pos - 1];
1d019706d866 LLVM10 anatofuz parents: diff changeset	63 return tok.data() - getLine().data();
1d019706d866 LLVM10 anatofuz parents: diff changeset	64 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	65
1d019706d866 LLVM10 anatofuz parents: diff changeset	66 std::string ScriptLexer::getCurrentLocation() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	67 std::string filename = std::string(getCurrentMB().getBufferIdentifier());
1d019706d866 LLVM10 anatofuz parents: diff changeset	68 return (filename + ":" + Twine(getLineNumber())).str();
1d019706d866 LLVM10 anatofuz parents: diff changeset	69 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	70
1d019706d866 LLVM10 anatofuz parents: diff changeset	71 ScriptLexer::ScriptLexer(MemoryBufferRef mb) { tokenize(mb); }
1d019706d866 LLVM10 anatofuz parents: diff changeset	72
1d019706d866 LLVM10 anatofuz parents: diff changeset	73 // We don't want to record cascading errors. Keep only the first one.
1d019706d866 LLVM10 anatofuz parents: diff changeset	74 void ScriptLexer::setError(const Twine &msg) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	75 if (errorCount())
1d019706d866 LLVM10 anatofuz parents: diff changeset	76 return;
1d019706d866 LLVM10 anatofuz parents: diff changeset	77
1d019706d866 LLVM10 anatofuz parents: diff changeset	78 std::string s = (getCurrentLocation() + ": " + msg).str();
1d019706d866 LLVM10 anatofuz parents: diff changeset	79 if (pos)
1d019706d866 LLVM10 anatofuz parents: diff changeset	80 s += "\n>>> " + getLine().str() + "\n>>> " +
1d019706d866 LLVM10 anatofuz parents: diff changeset	81 std::string(getColumnNumber(), ' ') + "^";
1d019706d866 LLVM10 anatofuz parents: diff changeset	82 error(s);
1d019706d866 LLVM10 anatofuz parents: diff changeset	83 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	84
1d019706d866 LLVM10 anatofuz parents: diff changeset	85 // Split S into linker script tokens.
1d019706d866 LLVM10 anatofuz parents: diff changeset	86 void ScriptLexer::tokenize(MemoryBufferRef mb) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	87 std::vector<StringRef> vec;
1d019706d866 LLVM10 anatofuz parents: diff changeset	88 mbs.push_back(mb);
1d019706d866 LLVM10 anatofuz parents: diff changeset	89 StringRef s = mb.getBuffer();
1d019706d866 LLVM10 anatofuz parents: diff changeset	90 StringRef begin = s;
1d019706d866 LLVM10 anatofuz parents: diff changeset	91
1d019706d866 LLVM10 anatofuz parents: diff changeset	92 for (;;) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	93 s = skipSpace(s);
1d019706d866 LLVM10 anatofuz parents: diff changeset	94 if (s.empty())
1d019706d866 LLVM10 anatofuz parents: diff changeset	95 break;
1d019706d866 LLVM10 anatofuz parents: diff changeset	96
1d019706d866 LLVM10 anatofuz parents: diff changeset	97 // Quoted token. Note that double-quote characters are parts of a token
1d019706d866 LLVM10 anatofuz parents: diff changeset	98 // because, in a glob match context, only unquoted tokens are interpreted
1d019706d866 LLVM10 anatofuz parents: diff changeset	99 // as glob patterns. Double-quoted tokens are literal patterns in that
1d019706d866 LLVM10 anatofuz parents: diff changeset	100 // context.
1d019706d866 LLVM10 anatofuz parents: diff changeset	101 if (s.startswith("\"")) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	102 size_t e = s.find("\"", 1);
1d019706d866 LLVM10 anatofuz parents: diff changeset	103 if (e == StringRef::npos) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	104 StringRef filename = mb.getBufferIdentifier();
1d019706d866 LLVM10 anatofuz parents: diff changeset	105 size_t lineno = begin.substr(0, s.data() - begin.data()).count('\n');
1d019706d866 LLVM10 anatofuz parents: diff changeset	106 error(filename + ":" + Twine(lineno + 1) + ": unclosed quote");
1d019706d866 LLVM10 anatofuz parents: diff changeset	107 return;
1d019706d866 LLVM10 anatofuz parents: diff changeset	108 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	109
1d019706d866 LLVM10 anatofuz parents: diff changeset	110 vec.push_back(s.take_front(e + 1));
1d019706d866 LLVM10 anatofuz parents: diff changeset	111 s = s.substr(e + 1);
1d019706d866 LLVM10 anatofuz parents: diff changeset	112 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	113 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	114
1d019706d866 LLVM10 anatofuz parents: diff changeset	115 // ">foo" is parsed to ">" and "foo", but ">>" is parsed to ">>".
1d019706d866 LLVM10 anatofuz parents: diff changeset	116 // "\|", "\|\|", "&" and "&&" are different operators.
1d019706d866 LLVM10 anatofuz parents: diff changeset	117 if (s.startswith("<<") \|\| s.startswith("<=") \|\| s.startswith(">>") \|\|
1d019706d866 LLVM10 anatofuz parents: diff changeset	118 s.startswith(">=") \|\| s.startswith("\|\|") \|\| s.startswith("&&")) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	119 vec.push_back(s.substr(0, 2));
1d019706d866 LLVM10 anatofuz parents: diff changeset	120 s = s.substr(2);
1d019706d866 LLVM10 anatofuz parents: diff changeset	121 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	122 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	123
1d019706d866 LLVM10 anatofuz parents: diff changeset	124 // Unquoted token. This is more relaxed than tokens in C-like language,
1d019706d866 LLVM10 anatofuz parents: diff changeset	125 // so that you can write "file-name.cpp" as one bare token, for example.
1d019706d866 LLVM10 anatofuz parents: diff changeset	126 size_t pos = s.find_first_not_of(
1d019706d866 LLVM10 anatofuz parents: diff changeset	127 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
1d019706d866 LLVM10 anatofuz parents: diff changeset	128 "0123456789_.$/\\~=+[]*?-!^:");
1d019706d866 LLVM10 anatofuz parents: diff changeset	129
1d019706d866 LLVM10 anatofuz parents: diff changeset	130 // A character that cannot start a word (which is usually a
1d019706d866 LLVM10 anatofuz parents: diff changeset	131 // punctuation) forms a single character token.
1d019706d866 LLVM10 anatofuz parents: diff changeset	132 if (pos == 0)
1d019706d866 LLVM10 anatofuz parents: diff changeset	133 pos = 1;
1d019706d866 LLVM10 anatofuz parents: diff changeset	134 vec.push_back(s.substr(0, pos));
1d019706d866 LLVM10 anatofuz parents: diff changeset	135 s = s.substr(pos);
1d019706d866 LLVM10 anatofuz parents: diff changeset	136 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	137
1d019706d866 LLVM10 anatofuz parents: diff changeset	138 tokens.insert(tokens.begin() + pos, vec.begin(), vec.end());
1d019706d866 LLVM10 anatofuz parents: diff changeset	139 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	140
1d019706d866 LLVM10 anatofuz parents: diff changeset	141 // Skip leading whitespace characters or comments.
1d019706d866 LLVM10 anatofuz parents: diff changeset	142 StringRef ScriptLexer::skipSpace(StringRef s) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	143 for (;;) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	144 if (s.startswith("/*")) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	145 size_t e = s.find("*/", 2);
1d019706d866 LLVM10 anatofuz parents: diff changeset	146 if (e == StringRef::npos) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	147 error("unclosed comment in a linker script");
1d019706d866 LLVM10 anatofuz parents: diff changeset	148 return "";
1d019706d866 LLVM10 anatofuz parents: diff changeset	149 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	150 s = s.substr(e + 2);
1d019706d866 LLVM10 anatofuz parents: diff changeset	151 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	152 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	153 if (s.startswith("#")) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	154 size_t e = s.find('\n', 1);
1d019706d866 LLVM10 anatofuz parents: diff changeset	155 if (e == StringRef::npos)
1d019706d866 LLVM10 anatofuz parents: diff changeset	156 e = s.size() - 1;
1d019706d866 LLVM10 anatofuz parents: diff changeset	157 s = s.substr(e + 1);
1d019706d866 LLVM10 anatofuz parents: diff changeset	158 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	159 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	160 size_t size = s.size();
1d019706d866 LLVM10 anatofuz parents: diff changeset	161 s = s.ltrim();
1d019706d866 LLVM10 anatofuz parents: diff changeset	162 if (s.size() == size)
1d019706d866 LLVM10 anatofuz parents: diff changeset	163 return s;
1d019706d866 LLVM10 anatofuz parents: diff changeset	164 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	165 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	166
1d019706d866 LLVM10 anatofuz parents: diff changeset	167 // An erroneous token is handled as if it were the last token before EOF.
1d019706d866 LLVM10 anatofuz parents: diff changeset	168 bool ScriptLexer::atEOF() { return errorCount() \|\| tokens.size() == pos; }
1d019706d866 LLVM10 anatofuz parents: diff changeset	169
1d019706d866 LLVM10 anatofuz parents: diff changeset	170 // Split a given string as an expression.
1d019706d866 LLVM10 anatofuz parents: diff changeset	171 // This function returns "3", "" and "5" for "35" for example.
1d019706d866 LLVM10 anatofuz parents: diff changeset	172 static std::vector<StringRef> tokenizeExpr(StringRef s) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	173 StringRef ops = "+-*/:!~=<>"; // List of operators
1d019706d866 LLVM10 anatofuz parents: diff changeset	174
1d019706d866 LLVM10 anatofuz parents: diff changeset	175 // Quoted strings are literal strings, so we don't want to split it.
1d019706d866 LLVM10 anatofuz parents: diff changeset	176 if (s.startswith("\""))
1d019706d866 LLVM10 anatofuz parents: diff changeset	177 return {s};
1d019706d866 LLVM10 anatofuz parents: diff changeset	178
1d019706d866 LLVM10 anatofuz parents: diff changeset	179 // Split S with operators as separators.
1d019706d866 LLVM10 anatofuz parents: diff changeset	180 std::vector<StringRef> ret;
1d019706d866 LLVM10 anatofuz parents: diff changeset	181 while (!s.empty()) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	182 size_t e = s.find_first_of(ops);
1d019706d866 LLVM10 anatofuz parents: diff changeset	183
1d019706d866 LLVM10 anatofuz parents: diff changeset	184 // No need to split if there is no operator.
1d019706d866 LLVM10 anatofuz parents: diff changeset	185 if (e == StringRef::npos) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	186 ret.push_back(s);
1d019706d866 LLVM10 anatofuz parents: diff changeset	187 break;
1d019706d866 LLVM10 anatofuz parents: diff changeset	188 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	189
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	190 // Get a token before the operator.
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	191 if (e != 0)
1d019706d866 LLVM10 anatofuz parents: diff changeset	192 ret.push_back(s.substr(0, e));
1d019706d866 LLVM10 anatofuz parents: diff changeset	193
1d019706d866 LLVM10 anatofuz parents: diff changeset	194 // Get the operator as a token.
1d019706d866 LLVM10 anatofuz parents: diff changeset	195 // Keep !=, ==, >=, <=, << and >> operators as a single tokens.
1d019706d866 LLVM10 anatofuz parents: diff changeset	196 if (s.substr(e).startswith("!=") \|\| s.substr(e).startswith("==") \|\|
1d019706d866 LLVM10 anatofuz parents: diff changeset	197 s.substr(e).startswith(">=") \|\| s.substr(e).startswith("<=") \|\|
1d019706d866 LLVM10 anatofuz parents: diff changeset	198 s.substr(e).startswith("<<") \|\| s.substr(e).startswith(">>")) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	199 ret.push_back(s.substr(e, 2));
1d019706d866 LLVM10 anatofuz parents: diff changeset	200 s = s.substr(e + 2);
1d019706d866 LLVM10 anatofuz parents: diff changeset	201 } else {
1d019706d866 LLVM10 anatofuz parents: diff changeset	202 ret.push_back(s.substr(e, 1));
1d019706d866 LLVM10 anatofuz parents: diff changeset	203 s = s.substr(e + 1);
1d019706d866 LLVM10 anatofuz parents: diff changeset	204 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	205 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	206 return ret;
1d019706d866 LLVM10 anatofuz parents: diff changeset	207 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	208
1d019706d866 LLVM10 anatofuz parents: diff changeset	209 // In contexts where expressions are expected, the lexer should apply
1d019706d866 LLVM10 anatofuz parents: diff changeset	210 // different tokenization rules than the default one. By default,
1d019706d866 LLVM10 anatofuz parents: diff changeset	211 // arithmetic operator characters are regular characters, but in the
1d019706d866 LLVM10 anatofuz parents: diff changeset	212 // expression context, they should be independent tokens.
1d019706d866 LLVM10 anatofuz parents: diff changeset	213 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	214 // For example, "foo3" should be tokenized to "foo", "" and "3" only
1d019706d866 LLVM10 anatofuz parents: diff changeset	215 // in the expression context.
1d019706d866 LLVM10 anatofuz parents: diff changeset	216 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	217 // This function may split the current token into multiple tokens.
1d019706d866 LLVM10 anatofuz parents: diff changeset	218 void ScriptLexer::maybeSplitExpr() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	219 if (!inExpr \|\| errorCount() \|\| atEOF())
1d019706d866 LLVM10 anatofuz parents: diff changeset	220 return;
1d019706d866 LLVM10 anatofuz parents: diff changeset	221
1d019706d866 LLVM10 anatofuz parents: diff changeset	222 std::vector<StringRef> v = tokenizeExpr(tokens[pos]);
1d019706d866 LLVM10 anatofuz parents: diff changeset	223 if (v.size() == 1)
1d019706d866 LLVM10 anatofuz parents: diff changeset	224 return;
1d019706d866 LLVM10 anatofuz parents: diff changeset	225 tokens.erase(tokens.begin() + pos);
1d019706d866 LLVM10 anatofuz parents: diff changeset	226 tokens.insert(tokens.begin() + pos, v.begin(), v.end());
1d019706d866 LLVM10 anatofuz parents: diff changeset	227 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	228
1d019706d866 LLVM10 anatofuz parents: diff changeset	229 StringRef ScriptLexer::next() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	230 maybeSplitExpr();
1d019706d866 LLVM10 anatofuz parents: diff changeset	231
1d019706d866 LLVM10 anatofuz parents: diff changeset	232 if (errorCount())
1d019706d866 LLVM10 anatofuz parents: diff changeset	233 return "";
1d019706d866 LLVM10 anatofuz parents: diff changeset	234 if (atEOF()) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	235 setError("unexpected EOF");
1d019706d866 LLVM10 anatofuz parents: diff changeset	236 return "";
1d019706d866 LLVM10 anatofuz parents: diff changeset	237 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	238 return tokens[pos++];
1d019706d866 LLVM10 anatofuz parents: diff changeset	239 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	240
1d019706d866 LLVM10 anatofuz parents: diff changeset	241 StringRef ScriptLexer::peek() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	242 StringRef tok = next();
1d019706d866 LLVM10 anatofuz parents: diff changeset	243 if (errorCount())
1d019706d866 LLVM10 anatofuz parents: diff changeset	244 return "";
1d019706d866 LLVM10 anatofuz parents: diff changeset	245 pos = pos - 1;
1d019706d866 LLVM10 anatofuz parents: diff changeset	246 return tok;
1d019706d866 LLVM10 anatofuz parents: diff changeset	247 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	248
1d019706d866 LLVM10 anatofuz parents: diff changeset	249 StringRef ScriptLexer::peek2() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	250 skip();
1d019706d866 LLVM10 anatofuz parents: diff changeset	251 StringRef tok = next();
1d019706d866 LLVM10 anatofuz parents: diff changeset	252 if (errorCount())
1d019706d866 LLVM10 anatofuz parents: diff changeset	253 return "";
1d019706d866 LLVM10 anatofuz parents: diff changeset	254 pos = pos - 2;
1d019706d866 LLVM10 anatofuz parents: diff changeset	255 return tok;
1d019706d866 LLVM10 anatofuz parents: diff changeset	256 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	257
1d019706d866 LLVM10 anatofuz parents: diff changeset	258 bool ScriptLexer::consume(StringRef tok) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	259 if (peek() == tok) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	260 skip();
1d019706d866 LLVM10 anatofuz parents: diff changeset	261 return true;
1d019706d866 LLVM10 anatofuz parents: diff changeset	262 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	263 return false;
1d019706d866 LLVM10 anatofuz parents: diff changeset	264 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	265
1d019706d866 LLVM10 anatofuz parents: diff changeset	266 // Consumes Tok followed by ":". Space is allowed between Tok and ":".
1d019706d866 LLVM10 anatofuz parents: diff changeset	267 bool ScriptLexer::consumeLabel(StringRef tok) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	268 if (consume((tok + ":").str()))
1d019706d866 LLVM10 anatofuz parents: diff changeset	269 return true;
1d019706d866 LLVM10 anatofuz parents: diff changeset	270 if (tokens.size() >= pos + 2 && tokens[pos] == tok &&
1d019706d866 LLVM10 anatofuz parents: diff changeset	271 tokens[pos + 1] == ":") {
1d019706d866 LLVM10 anatofuz parents: diff changeset	272 pos += 2;
1d019706d866 LLVM10 anatofuz parents: diff changeset	273 return true;
1d019706d866 LLVM10 anatofuz parents: diff changeset	274 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	275 return false;
1d019706d866 LLVM10 anatofuz parents: diff changeset	276 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	277
1d019706d866 LLVM10 anatofuz parents: diff changeset	278 void ScriptLexer::skip() { (void)next(); }
1d019706d866 LLVM10 anatofuz parents: diff changeset	279
1d019706d866 LLVM10 anatofuz parents: diff changeset	280 void ScriptLexer::expect(StringRef expect) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	281 if (errorCount())
1d019706d866 LLVM10 anatofuz parents: diff changeset	282 return;
1d019706d866 LLVM10 anatofuz parents: diff changeset	283 StringRef tok = next();
1d019706d866 LLVM10 anatofuz parents: diff changeset	284 if (tok != expect)
1d019706d866 LLVM10 anatofuz parents: diff changeset	285 setError(expect + " expected, but got " + tok);
1d019706d866 LLVM10 anatofuz parents: diff changeset	286 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	287
1d019706d866 LLVM10 anatofuz parents: diff changeset	288 // Returns true if S encloses T.
1d019706d866 LLVM10 anatofuz parents: diff changeset	289 static bool encloses(StringRef s, StringRef t) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	290 return s.bytes_begin() <= t.bytes_begin() && t.bytes_end() <= s.bytes_end();
1d019706d866 LLVM10 anatofuz parents: diff changeset	291 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	292
1d019706d866 LLVM10 anatofuz parents: diff changeset	293 MemoryBufferRef ScriptLexer::getCurrentMB() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	294 // Find input buffer containing the current token.
1d019706d866 LLVM10 anatofuz parents: diff changeset	295 assert(!mbs.empty() && pos > 0);
1d019706d866 LLVM10 anatofuz parents: diff changeset	296 for (MemoryBufferRef mb : mbs)
1d019706d866 LLVM10 anatofuz parents: diff changeset	297 if (encloses(mb.getBuffer(), tokens[pos - 1]))
1d019706d866 LLVM10 anatofuz parents: diff changeset	298 return mb;
1d019706d866 LLVM10 anatofuz parents: diff changeset	299 llvm_unreachable("getCurrentMB: failed to find a token");
1d019706d866 LLVM10 anatofuz parents: diff changeset	300 }

Mercurial > hg > CbC > CbC_llvm

annotate lld/ELF/ScriptLexer.cpp @ 173:0572611fdcc8 llvm10 llvm12