CbC/CbC_llvm: lld/ELF/ScriptLexer.cpp annotate

annotate lld/ELF/ScriptLexer.cpp @ 221:79ff65ed7e25

LLVM12 Original

author	Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date	Tue, 15 Jun 2021 19:15:29 +0900
parents	0572611fdcc8
children	5f17cb93ff66

rev	line source
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	1 //===- ScriptLexer.cpp ----------------------------------------------------===//
1d019706d866 LLVM10 anatofuz parents: diff changeset	2 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
1d019706d866 LLVM10 anatofuz parents: diff changeset	4 // See https://llvm.org/LICENSE.txt for license information.
1d019706d866 LLVM10 anatofuz parents: diff changeset	5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
1d019706d866 LLVM10 anatofuz parents: diff changeset	6 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	7 //===----------------------------------------------------------------------===//
1d019706d866 LLVM10 anatofuz parents: diff changeset	8 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	9 // This file defines a lexer for the linker script.
1d019706d866 LLVM10 anatofuz parents: diff changeset	10 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	11 // The linker script's grammar is not complex but ambiguous due to the
1d019706d866 LLVM10 anatofuz parents: diff changeset	12 // lack of the formal specification of the language. What we are trying to
1d019706d866 LLVM10 anatofuz parents: diff changeset	13 // do in this and other files in LLD is to make a "reasonable" linker
1d019706d866 LLVM10 anatofuz parents: diff changeset	14 // script processor.
1d019706d866 LLVM10 anatofuz parents: diff changeset	15 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	16 // Among simplicity, compatibility and efficiency, we put the most
1d019706d866 LLVM10 anatofuz parents: diff changeset	17 // emphasis on simplicity when we wrote this lexer. Compatibility with the
1d019706d866 LLVM10 anatofuz parents: diff changeset	18 // GNU linkers is important, but we did not try to clone every tiny corner
1d019706d866 LLVM10 anatofuz parents: diff changeset	19 // case of their lexers, as even ld.bfd and ld.gold are subtly different
1d019706d866 LLVM10 anatofuz parents: diff changeset	20 // in various corner cases. We do not care much about efficiency because
1d019706d866 LLVM10 anatofuz parents: diff changeset	21 // the time spent in parsing linker scripts is usually negligible.
1d019706d866 LLVM10 anatofuz parents: diff changeset	22 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	23 // Our grammar of the linker script is LL(2), meaning that it needs at
1d019706d866 LLVM10 anatofuz parents: diff changeset	24 // most two-token lookahead to parse. The only place we need two-token
1d019706d866 LLVM10 anatofuz parents: diff changeset	25 // lookahead is labels in version scripts, where we need to parse "local :"
1d019706d866 LLVM10 anatofuz parents: diff changeset	26 // as if "local:".
1d019706d866 LLVM10 anatofuz parents: diff changeset	27 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	28 // Overall, this lexer works fine for most linker scripts. There might
1d019706d866 LLVM10 anatofuz parents: diff changeset	29 // be room for improving compatibility, but that's probably not at the
1d019706d866 LLVM10 anatofuz parents: diff changeset	30 // top of our todo list.
1d019706d866 LLVM10 anatofuz parents: diff changeset	31 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	32 //===----------------------------------------------------------------------===//
1d019706d866 LLVM10 anatofuz parents: diff changeset	33
1d019706d866 LLVM10 anatofuz parents: diff changeset	34 #include "ScriptLexer.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	35 #include "lld/Common/ErrorHandler.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	36 #include "llvm/ADT/Twine.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	37
1d019706d866 LLVM10 anatofuz parents: diff changeset	38 using namespace llvm;
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	39 using namespace lld;
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	40 using namespace lld::elf;
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	41
1d019706d866 LLVM10 anatofuz parents: diff changeset	42 // Returns a whole line containing the current token.
1d019706d866 LLVM10 anatofuz parents: diff changeset	43 StringRef ScriptLexer::getLine() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	44 StringRef s = getCurrentMB().getBuffer();
1d019706d866 LLVM10 anatofuz parents: diff changeset	45 StringRef tok = tokens[pos - 1];
1d019706d866 LLVM10 anatofuz parents: diff changeset	46
1d019706d866 LLVM10 anatofuz parents: diff changeset	47 size_t pos = s.rfind('\n', tok.data() - s.data());
1d019706d866 LLVM10 anatofuz parents: diff changeset	48 if (pos != StringRef::npos)
1d019706d866 LLVM10 anatofuz parents: diff changeset	49 s = s.substr(pos + 1);
1d019706d866 LLVM10 anatofuz parents: diff changeset	50 return s.substr(0, s.find_first_of("\r\n"));
1d019706d866 LLVM10 anatofuz parents: diff changeset	51 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	52
1d019706d866 LLVM10 anatofuz parents: diff changeset	53 // Returns 1-based line number of the current token.
1d019706d866 LLVM10 anatofuz parents: diff changeset	54 size_t ScriptLexer::getLineNumber() {
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	55 if (pos == 0)
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	56 return 1;
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	57 StringRef s = getCurrentMB().getBuffer();
1d019706d866 LLVM10 anatofuz parents: diff changeset	58 StringRef tok = tokens[pos - 1];
1d019706d866 LLVM10 anatofuz parents: diff changeset	59 return s.substr(0, tok.data() - s.data()).count('\n') + 1;
1d019706d866 LLVM10 anatofuz parents: diff changeset	60 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	61
1d019706d866 LLVM10 anatofuz parents: diff changeset	62 // Returns 0-based column number of the current token.
1d019706d866 LLVM10 anatofuz parents: diff changeset	63 size_t ScriptLexer::getColumnNumber() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	64 StringRef tok = tokens[pos - 1];
1d019706d866 LLVM10 anatofuz parents: diff changeset	65 return tok.data() - getLine().data();
1d019706d866 LLVM10 anatofuz parents: diff changeset	66 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	67
1d019706d866 LLVM10 anatofuz parents: diff changeset	68 std::string ScriptLexer::getCurrentLocation() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	69 std::string filename = std::string(getCurrentMB().getBufferIdentifier());
1d019706d866 LLVM10 anatofuz parents: diff changeset	70 return (filename + ":" + Twine(getLineNumber())).str();
1d019706d866 LLVM10 anatofuz parents: diff changeset	71 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	72
1d019706d866 LLVM10 anatofuz parents: diff changeset	73 ScriptLexer::ScriptLexer(MemoryBufferRef mb) { tokenize(mb); }
1d019706d866 LLVM10 anatofuz parents: diff changeset	74
1d019706d866 LLVM10 anatofuz parents: diff changeset	75 // We don't want to record cascading errors. Keep only the first one.
1d019706d866 LLVM10 anatofuz parents: diff changeset	76 void ScriptLexer::setError(const Twine &msg) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	77 if (errorCount())
1d019706d866 LLVM10 anatofuz parents: diff changeset	78 return;
1d019706d866 LLVM10 anatofuz parents: diff changeset	79
1d019706d866 LLVM10 anatofuz parents: diff changeset	80 std::string s = (getCurrentLocation() + ": " + msg).str();
1d019706d866 LLVM10 anatofuz parents: diff changeset	81 if (pos)
1d019706d866 LLVM10 anatofuz parents: diff changeset	82 s += "\n>>> " + getLine().str() + "\n>>> " +
1d019706d866 LLVM10 anatofuz parents: diff changeset	83 std::string(getColumnNumber(), ' ') + "^";
1d019706d866 LLVM10 anatofuz parents: diff changeset	84 error(s);
1d019706d866 LLVM10 anatofuz parents: diff changeset	85 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	86
1d019706d866 LLVM10 anatofuz parents: diff changeset	87 // Split S into linker script tokens.
1d019706d866 LLVM10 anatofuz parents: diff changeset	88 void ScriptLexer::tokenize(MemoryBufferRef mb) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	89 std::vector<StringRef> vec;
1d019706d866 LLVM10 anatofuz parents: diff changeset	90 mbs.push_back(mb);
1d019706d866 LLVM10 anatofuz parents: diff changeset	91 StringRef s = mb.getBuffer();
1d019706d866 LLVM10 anatofuz parents: diff changeset	92 StringRef begin = s;
1d019706d866 LLVM10 anatofuz parents: diff changeset	93
1d019706d866 LLVM10 anatofuz parents: diff changeset	94 for (;;) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	95 s = skipSpace(s);
1d019706d866 LLVM10 anatofuz parents: diff changeset	96 if (s.empty())
1d019706d866 LLVM10 anatofuz parents: diff changeset	97 break;
1d019706d866 LLVM10 anatofuz parents: diff changeset	98
1d019706d866 LLVM10 anatofuz parents: diff changeset	99 // Quoted token. Note that double-quote characters are parts of a token
1d019706d866 LLVM10 anatofuz parents: diff changeset	100 // because, in a glob match context, only unquoted tokens are interpreted
1d019706d866 LLVM10 anatofuz parents: diff changeset	101 // as glob patterns. Double-quoted tokens are literal patterns in that
1d019706d866 LLVM10 anatofuz parents: diff changeset	102 // context.
1d019706d866 LLVM10 anatofuz parents: diff changeset	103 if (s.startswith("\"")) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	104 size_t e = s.find("\"", 1);
1d019706d866 LLVM10 anatofuz parents: diff changeset	105 if (e == StringRef::npos) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	106 StringRef filename = mb.getBufferIdentifier();
1d019706d866 LLVM10 anatofuz parents: diff changeset	107 size_t lineno = begin.substr(0, s.data() - begin.data()).count('\n');
1d019706d866 LLVM10 anatofuz parents: diff changeset	108 error(filename + ":" + Twine(lineno + 1) + ": unclosed quote");
1d019706d866 LLVM10 anatofuz parents: diff changeset	109 return;
1d019706d866 LLVM10 anatofuz parents: diff changeset	110 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	111
1d019706d866 LLVM10 anatofuz parents: diff changeset	112 vec.push_back(s.take_front(e + 1));
1d019706d866 LLVM10 anatofuz parents: diff changeset	113 s = s.substr(e + 1);
1d019706d866 LLVM10 anatofuz parents: diff changeset	114 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	115 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	116
1d019706d866 LLVM10 anatofuz parents: diff changeset	117 // ">foo" is parsed to ">" and "foo", but ">>" is parsed to ">>".
1d019706d866 LLVM10 anatofuz parents: diff changeset	118 // "\|", "\|\|", "&" and "&&" are different operators.
1d019706d866 LLVM10 anatofuz parents: diff changeset	119 if (s.startswith("<<") \|\| s.startswith("<=") \|\| s.startswith(">>") \|\|
1d019706d866 LLVM10 anatofuz parents: diff changeset	120 s.startswith(">=") \|\| s.startswith("\|\|") \|\| s.startswith("&&")) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	121 vec.push_back(s.substr(0, 2));
1d019706d866 LLVM10 anatofuz parents: diff changeset	122 s = s.substr(2);
1d019706d866 LLVM10 anatofuz parents: diff changeset	123 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	124 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	125
1d019706d866 LLVM10 anatofuz parents: diff changeset	126 // Unquoted token. This is more relaxed than tokens in C-like language,
1d019706d866 LLVM10 anatofuz parents: diff changeset	127 // so that you can write "file-name.cpp" as one bare token, for example.
1d019706d866 LLVM10 anatofuz parents: diff changeset	128 size_t pos = s.find_first_not_of(
1d019706d866 LLVM10 anatofuz parents: diff changeset	129 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
1d019706d866 LLVM10 anatofuz parents: diff changeset	130 "0123456789_.$/\\~=+[]*?-!^:");
1d019706d866 LLVM10 anatofuz parents: diff changeset	131
1d019706d866 LLVM10 anatofuz parents: diff changeset	132 // A character that cannot start a word (which is usually a
1d019706d866 LLVM10 anatofuz parents: diff changeset	133 // punctuation) forms a single character token.
1d019706d866 LLVM10 anatofuz parents: diff changeset	134 if (pos == 0)
1d019706d866 LLVM10 anatofuz parents: diff changeset	135 pos = 1;
1d019706d866 LLVM10 anatofuz parents: diff changeset	136 vec.push_back(s.substr(0, pos));
1d019706d866 LLVM10 anatofuz parents: diff changeset	137 s = s.substr(pos);
1d019706d866 LLVM10 anatofuz parents: diff changeset	138 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	139
1d019706d866 LLVM10 anatofuz parents: diff changeset	140 tokens.insert(tokens.begin() + pos, vec.begin(), vec.end());
1d019706d866 LLVM10 anatofuz parents: diff changeset	141 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	142
1d019706d866 LLVM10 anatofuz parents: diff changeset	143 // Skip leading whitespace characters or comments.
1d019706d866 LLVM10 anatofuz parents: diff changeset	144 StringRef ScriptLexer::skipSpace(StringRef s) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	145 for (;;) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	146 if (s.startswith("/*")) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	147 size_t e = s.find("*/", 2);
1d019706d866 LLVM10 anatofuz parents: diff changeset	148 if (e == StringRef::npos) {
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	149 setError("unclosed comment in a linker script");
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	150 return "";
1d019706d866 LLVM10 anatofuz parents: diff changeset	151 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	152 s = s.substr(e + 2);
1d019706d866 LLVM10 anatofuz parents: diff changeset	153 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	154 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	155 if (s.startswith("#")) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	156 size_t e = s.find('\n', 1);
1d019706d866 LLVM10 anatofuz parents: diff changeset	157 if (e == StringRef::npos)
1d019706d866 LLVM10 anatofuz parents: diff changeset	158 e = s.size() - 1;
1d019706d866 LLVM10 anatofuz parents: diff changeset	159 s = s.substr(e + 1);
1d019706d866 LLVM10 anatofuz parents: diff changeset	160 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	161 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	162 size_t size = s.size();
1d019706d866 LLVM10 anatofuz parents: diff changeset	163 s = s.ltrim();
1d019706d866 LLVM10 anatofuz parents: diff changeset	164 if (s.size() == size)
1d019706d866 LLVM10 anatofuz parents: diff changeset	165 return s;
1d019706d866 LLVM10 anatofuz parents: diff changeset	166 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	167 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	168
1d019706d866 LLVM10 anatofuz parents: diff changeset	169 // An erroneous token is handled as if it were the last token before EOF.
1d019706d866 LLVM10 anatofuz parents: diff changeset	170 bool ScriptLexer::atEOF() { return errorCount() \|\| tokens.size() == pos; }
1d019706d866 LLVM10 anatofuz parents: diff changeset	171
1d019706d866 LLVM10 anatofuz parents: diff changeset	172 // Split a given string as an expression.
1d019706d866 LLVM10 anatofuz parents: diff changeset	173 // This function returns "3", "" and "5" for "35" for example.
1d019706d866 LLVM10 anatofuz parents: diff changeset	174 static std::vector<StringRef> tokenizeExpr(StringRef s) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	175 StringRef ops = "+-*/:!~=<>"; // List of operators
1d019706d866 LLVM10 anatofuz parents: diff changeset	176
1d019706d866 LLVM10 anatofuz parents: diff changeset	177 // Quoted strings are literal strings, so we don't want to split it.
1d019706d866 LLVM10 anatofuz parents: diff changeset	178 if (s.startswith("\""))
1d019706d866 LLVM10 anatofuz parents: diff changeset	179 return {s};
1d019706d866 LLVM10 anatofuz parents: diff changeset	180
1d019706d866 LLVM10 anatofuz parents: diff changeset	181 // Split S with operators as separators.
1d019706d866 LLVM10 anatofuz parents: diff changeset	182 std::vector<StringRef> ret;
1d019706d866 LLVM10 anatofuz parents: diff changeset	183 while (!s.empty()) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	184 size_t e = s.find_first_of(ops);
1d019706d866 LLVM10 anatofuz parents: diff changeset	185
1d019706d866 LLVM10 anatofuz parents: diff changeset	186 // No need to split if there is no operator.
1d019706d866 LLVM10 anatofuz parents: diff changeset	187 if (e == StringRef::npos) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	188 ret.push_back(s);
1d019706d866 LLVM10 anatofuz parents: diff changeset	189 break;
1d019706d866 LLVM10 anatofuz parents: diff changeset	190 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	191
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	192 // Get a token before the operator.
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	193 if (e != 0)
1d019706d866 LLVM10 anatofuz parents: diff changeset	194 ret.push_back(s.substr(0, e));
1d019706d866 LLVM10 anatofuz parents: diff changeset	195
1d019706d866 LLVM10 anatofuz parents: diff changeset	196 // Get the operator as a token.
1d019706d866 LLVM10 anatofuz parents: diff changeset	197 // Keep !=, ==, >=, <=, << and >> operators as a single tokens.
1d019706d866 LLVM10 anatofuz parents: diff changeset	198 if (s.substr(e).startswith("!=") \|\| s.substr(e).startswith("==") \|\|
1d019706d866 LLVM10 anatofuz parents: diff changeset	199 s.substr(e).startswith(">=") \|\| s.substr(e).startswith("<=") \|\|
1d019706d866 LLVM10 anatofuz parents: diff changeset	200 s.substr(e).startswith("<<") \|\| s.substr(e).startswith(">>")) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	201 ret.push_back(s.substr(e, 2));
1d019706d866 LLVM10 anatofuz parents: diff changeset	202 s = s.substr(e + 2);
1d019706d866 LLVM10 anatofuz parents: diff changeset	203 } else {
1d019706d866 LLVM10 anatofuz parents: diff changeset	204 ret.push_back(s.substr(e, 1));
1d019706d866 LLVM10 anatofuz parents: diff changeset	205 s = s.substr(e + 1);
1d019706d866 LLVM10 anatofuz parents: diff changeset	206 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	207 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	208 return ret;
1d019706d866 LLVM10 anatofuz parents: diff changeset	209 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	210
1d019706d866 LLVM10 anatofuz parents: diff changeset	211 // In contexts where expressions are expected, the lexer should apply
1d019706d866 LLVM10 anatofuz parents: diff changeset	212 // different tokenization rules than the default one. By default,
1d019706d866 LLVM10 anatofuz parents: diff changeset	213 // arithmetic operator characters are regular characters, but in the
1d019706d866 LLVM10 anatofuz parents: diff changeset	214 // expression context, they should be independent tokens.
1d019706d866 LLVM10 anatofuz parents: diff changeset	215 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	216 // For example, "foo3" should be tokenized to "foo", "" and "3" only
1d019706d866 LLVM10 anatofuz parents: diff changeset	217 // in the expression context.
1d019706d866 LLVM10 anatofuz parents: diff changeset	218 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	219 // This function may split the current token into multiple tokens.
1d019706d866 LLVM10 anatofuz parents: diff changeset	220 void ScriptLexer::maybeSplitExpr() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	221 if (!inExpr \|\| errorCount() \|\| atEOF())
1d019706d866 LLVM10 anatofuz parents: diff changeset	222 return;
1d019706d866 LLVM10 anatofuz parents: diff changeset	223
1d019706d866 LLVM10 anatofuz parents: diff changeset	224 std::vector<StringRef> v = tokenizeExpr(tokens[pos]);
1d019706d866 LLVM10 anatofuz parents: diff changeset	225 if (v.size() == 1)
1d019706d866 LLVM10 anatofuz parents: diff changeset	226 return;
1d019706d866 LLVM10 anatofuz parents: diff changeset	227 tokens.erase(tokens.begin() + pos);
1d019706d866 LLVM10 anatofuz parents: diff changeset	228 tokens.insert(tokens.begin() + pos, v.begin(), v.end());
1d019706d866 LLVM10 anatofuz parents: diff changeset	229 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	230
1d019706d866 LLVM10 anatofuz parents: diff changeset	231 StringRef ScriptLexer::next() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	232 maybeSplitExpr();
1d019706d866 LLVM10 anatofuz parents: diff changeset	233
1d019706d866 LLVM10 anatofuz parents: diff changeset	234 if (errorCount())
1d019706d866 LLVM10 anatofuz parents: diff changeset	235 return "";
1d019706d866 LLVM10 anatofuz parents: diff changeset	236 if (atEOF()) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	237 setError("unexpected EOF");
1d019706d866 LLVM10 anatofuz parents: diff changeset	238 return "";
1d019706d866 LLVM10 anatofuz parents: diff changeset	239 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	240 return tokens[pos++];
1d019706d866 LLVM10 anatofuz parents: diff changeset	241 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	242
1d019706d866 LLVM10 anatofuz parents: diff changeset	243 StringRef ScriptLexer::peek() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	244 StringRef tok = next();
1d019706d866 LLVM10 anatofuz parents: diff changeset	245 if (errorCount())
1d019706d866 LLVM10 anatofuz parents: diff changeset	246 return "";
1d019706d866 LLVM10 anatofuz parents: diff changeset	247 pos = pos - 1;
1d019706d866 LLVM10 anatofuz parents: diff changeset	248 return tok;
1d019706d866 LLVM10 anatofuz parents: diff changeset	249 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	250
1d019706d866 LLVM10 anatofuz parents: diff changeset	251 StringRef ScriptLexer::peek2() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	252 skip();
1d019706d866 LLVM10 anatofuz parents: diff changeset	253 StringRef tok = next();
1d019706d866 LLVM10 anatofuz parents: diff changeset	254 if (errorCount())
1d019706d866 LLVM10 anatofuz parents: diff changeset	255 return "";
1d019706d866 LLVM10 anatofuz parents: diff changeset	256 pos = pos - 2;
1d019706d866 LLVM10 anatofuz parents: diff changeset	257 return tok;
1d019706d866 LLVM10 anatofuz parents: diff changeset	258 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	259
1d019706d866 LLVM10 anatofuz parents: diff changeset	260 bool ScriptLexer::consume(StringRef tok) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	261 if (peek() == tok) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	262 skip();
1d019706d866 LLVM10 anatofuz parents: diff changeset	263 return true;
1d019706d866 LLVM10 anatofuz parents: diff changeset	264 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	265 return false;
1d019706d866 LLVM10 anatofuz parents: diff changeset	266 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	267
1d019706d866 LLVM10 anatofuz parents: diff changeset	268 // Consumes Tok followed by ":". Space is allowed between Tok and ":".
1d019706d866 LLVM10 anatofuz parents: diff changeset	269 bool ScriptLexer::consumeLabel(StringRef tok) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	270 if (consume((tok + ":").str()))
1d019706d866 LLVM10 anatofuz parents: diff changeset	271 return true;
1d019706d866 LLVM10 anatofuz parents: diff changeset	272 if (tokens.size() >= pos + 2 && tokens[pos] == tok &&
1d019706d866 LLVM10 anatofuz parents: diff changeset	273 tokens[pos + 1] == ":") {
1d019706d866 LLVM10 anatofuz parents: diff changeset	274 pos += 2;
1d019706d866 LLVM10 anatofuz parents: diff changeset	275 return true;
1d019706d866 LLVM10 anatofuz parents: diff changeset	276 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	277 return false;
1d019706d866 LLVM10 anatofuz parents: diff changeset	278 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	279
1d019706d866 LLVM10 anatofuz parents: diff changeset	280 void ScriptLexer::skip() { (void)next(); }
1d019706d866 LLVM10 anatofuz parents: diff changeset	281
1d019706d866 LLVM10 anatofuz parents: diff changeset	282 void ScriptLexer::expect(StringRef expect) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	283 if (errorCount())
1d019706d866 LLVM10 anatofuz parents: diff changeset	284 return;
1d019706d866 LLVM10 anatofuz parents: diff changeset	285 StringRef tok = next();
1d019706d866 LLVM10 anatofuz parents: diff changeset	286 if (tok != expect)
1d019706d866 LLVM10 anatofuz parents: diff changeset	287 setError(expect + " expected, but got " + tok);
1d019706d866 LLVM10 anatofuz parents: diff changeset	288 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	289
1d019706d866 LLVM10 anatofuz parents: diff changeset	290 // Returns true if S encloses T.
1d019706d866 LLVM10 anatofuz parents: diff changeset	291 static bool encloses(StringRef s, StringRef t) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	292 return s.bytes_begin() <= t.bytes_begin() && t.bytes_end() <= s.bytes_end();
1d019706d866 LLVM10 anatofuz parents: diff changeset	293 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	294
1d019706d866 LLVM10 anatofuz parents: diff changeset	295 MemoryBufferRef ScriptLexer::getCurrentMB() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	296 // Find input buffer containing the current token.
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	297 assert(!mbs.empty());
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	298 if (pos == 0)
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	299 return mbs.back();
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	300 for (MemoryBufferRef mb : mbs)
1d019706d866 LLVM10 anatofuz parents: diff changeset	301 if (encloses(mb.getBuffer(), tokens[pos - 1]))
1d019706d866 LLVM10 anatofuz parents: diff changeset	302 return mb;
1d019706d866 LLVM10 anatofuz parents: diff changeset	303 llvm_unreachable("getCurrentMB: failed to find a token");
1d019706d866 LLVM10 anatofuz parents: diff changeset	304 }

Mercurial > hg > CbC > CbC_llvm

annotate lld/ELF/ScriptLexer.cpp @ 221:79ff65ed7e25