CbC/CbC_llvm: lld/ELF/ScriptLexer.cpp annotate

annotate lld/ELF/ScriptLexer.cpp @ 266:00f31e85ec16 default tip

Added tag current for changeset 31d058e83c98

author	Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date	Sat, 14 Oct 2023 10:13:55 +0900
parents	1f2b6ac9f198
children

rev	line source
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	1 //===- ScriptLexer.cpp ----------------------------------------------------===//
1d019706d866 LLVM10 anatofuz parents: diff changeset	2 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
1d019706d866 LLVM10 anatofuz parents: diff changeset	4 // See https://llvm.org/LICENSE.txt for license information.
1d019706d866 LLVM10 anatofuz parents: diff changeset	5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
1d019706d866 LLVM10 anatofuz parents: diff changeset	6 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	7 //===----------------------------------------------------------------------===//
1d019706d866 LLVM10 anatofuz parents: diff changeset	8 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	9 // This file defines a lexer for the linker script.
1d019706d866 LLVM10 anatofuz parents: diff changeset	10 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	11 // The linker script's grammar is not complex but ambiguous due to the
1d019706d866 LLVM10 anatofuz parents: diff changeset	12 // lack of the formal specification of the language. What we are trying to
1d019706d866 LLVM10 anatofuz parents: diff changeset	13 // do in this and other files in LLD is to make a "reasonable" linker
1d019706d866 LLVM10 anatofuz parents: diff changeset	14 // script processor.
1d019706d866 LLVM10 anatofuz parents: diff changeset	15 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	16 // Among simplicity, compatibility and efficiency, we put the most
1d019706d866 LLVM10 anatofuz parents: diff changeset	17 // emphasis on simplicity when we wrote this lexer. Compatibility with the
1d019706d866 LLVM10 anatofuz parents: diff changeset	18 // GNU linkers is important, but we did not try to clone every tiny corner
1d019706d866 LLVM10 anatofuz parents: diff changeset	19 // case of their lexers, as even ld.bfd and ld.gold are subtly different
1d019706d866 LLVM10 anatofuz parents: diff changeset	20 // in various corner cases. We do not care much about efficiency because
1d019706d866 LLVM10 anatofuz parents: diff changeset	21 // the time spent in parsing linker scripts is usually negligible.
1d019706d866 LLVM10 anatofuz parents: diff changeset	22 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	23 // Our grammar of the linker script is LL(2), meaning that it needs at
1d019706d866 LLVM10 anatofuz parents: diff changeset	24 // most two-token lookahead to parse. The only place we need two-token
1d019706d866 LLVM10 anatofuz parents: diff changeset	25 // lookahead is labels in version scripts, where we need to parse "local :"
1d019706d866 LLVM10 anatofuz parents: diff changeset	26 // as if "local:".
1d019706d866 LLVM10 anatofuz parents: diff changeset	27 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	28 // Overall, this lexer works fine for most linker scripts. There might
1d019706d866 LLVM10 anatofuz parents: diff changeset	29 // be room for improving compatibility, but that's probably not at the
1d019706d866 LLVM10 anatofuz parents: diff changeset	30 // top of our todo list.
1d019706d866 LLVM10 anatofuz parents: diff changeset	31 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	32 //===----------------------------------------------------------------------===//
1d019706d866 LLVM10 anatofuz parents: diff changeset	33
1d019706d866 LLVM10 anatofuz parents: diff changeset	34 #include "ScriptLexer.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	35 #include "lld/Common/ErrorHandler.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	36 #include "llvm/ADT/Twine.h"
236 c4bab56944e8 LLVM 16 kono parents: 223 diff changeset	37 #include "llvm/Support/ErrorHandling.h"
c4bab56944e8 LLVM 16 kono parents: 223 diff changeset	38 #include <algorithm>
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	39
1d019706d866 LLVM10 anatofuz parents: diff changeset	40 using namespace llvm;
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	41 using namespace lld;
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	42 using namespace lld::elf;
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	43
1d019706d866 LLVM10 anatofuz parents: diff changeset	44 // Returns a whole line containing the current token.
1d019706d866 LLVM10 anatofuz parents: diff changeset	45 StringRef ScriptLexer::getLine() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	46 StringRef s = getCurrentMB().getBuffer();
1d019706d866 LLVM10 anatofuz parents: diff changeset	47 StringRef tok = tokens[pos - 1];
1d019706d866 LLVM10 anatofuz parents: diff changeset	48
1d019706d866 LLVM10 anatofuz parents: diff changeset	49 size_t pos = s.rfind('\n', tok.data() - s.data());
1d019706d866 LLVM10 anatofuz parents: diff changeset	50 if (pos != StringRef::npos)
1d019706d866 LLVM10 anatofuz parents: diff changeset	51 s = s.substr(pos + 1);
1d019706d866 LLVM10 anatofuz parents: diff changeset	52 return s.substr(0, s.find_first_of("\r\n"));
1d019706d866 LLVM10 anatofuz parents: diff changeset	53 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	54
1d019706d866 LLVM10 anatofuz parents: diff changeset	55 // Returns 1-based line number of the current token.
1d019706d866 LLVM10 anatofuz parents: diff changeset	56 size_t ScriptLexer::getLineNumber() {
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	57 if (pos == 0)
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	58 return 1;
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	59 StringRef s = getCurrentMB().getBuffer();
1d019706d866 LLVM10 anatofuz parents: diff changeset	60 StringRef tok = tokens[pos - 1];
223 5f17cb93ff66 LLVM13 (2021/7/18) Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 221 diff changeset	61 const size_t tokOffset = tok.data() - s.data();
5f17cb93ff66 LLVM13 (2021/7/18) Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 221 diff changeset	62
5f17cb93ff66 LLVM13 (2021/7/18) Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 221 diff changeset	63 // For the first token, or when going backwards, start from the beginning of
5f17cb93ff66 LLVM13 (2021/7/18) Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 221 diff changeset	64 // the buffer. If this token is after the previous token, start from the
5f17cb93ff66 LLVM13 (2021/7/18) Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 221 diff changeset	65 // previous token.
5f17cb93ff66 LLVM13 (2021/7/18) Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 221 diff changeset	66 size_t line = 1;
5f17cb93ff66 LLVM13 (2021/7/18) Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 221 diff changeset	67 size_t start = 0;
5f17cb93ff66 LLVM13 (2021/7/18) Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 221 diff changeset	68 if (lastLineNumberOffset > 0 && tokOffset >= lastLineNumberOffset) {
5f17cb93ff66 LLVM13 (2021/7/18) Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 221 diff changeset	69 start = lastLineNumberOffset;
5f17cb93ff66 LLVM13 (2021/7/18) Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 221 diff changeset	70 line = lastLineNumber;
5f17cb93ff66 LLVM13 (2021/7/18) Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 221 diff changeset	71 }
5f17cb93ff66 LLVM13 (2021/7/18) Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 221 diff changeset	72
5f17cb93ff66 LLVM13 (2021/7/18) Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 221 diff changeset	73 line += s.substr(start, tokOffset - start).count('\n');
5f17cb93ff66 LLVM13 (2021/7/18) Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 221 diff changeset	74
5f17cb93ff66 LLVM13 (2021/7/18) Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 221 diff changeset	75 // Store the line number of this token for reuse.
5f17cb93ff66 LLVM13 (2021/7/18) Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 221 diff changeset	76 lastLineNumberOffset = tokOffset;
5f17cb93ff66 LLVM13 (2021/7/18) Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 221 diff changeset	77 lastLineNumber = line;
5f17cb93ff66 LLVM13 (2021/7/18) Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 221 diff changeset	78
5f17cb93ff66 LLVM13 (2021/7/18) Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 221 diff changeset	79 return line;
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	80 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	81
1d019706d866 LLVM10 anatofuz parents: diff changeset	82 // Returns 0-based column number of the current token.
1d019706d866 LLVM10 anatofuz parents: diff changeset	83 size_t ScriptLexer::getColumnNumber() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	84 StringRef tok = tokens[pos - 1];
1d019706d866 LLVM10 anatofuz parents: diff changeset	85 return tok.data() - getLine().data();
1d019706d866 LLVM10 anatofuz parents: diff changeset	86 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	87
1d019706d866 LLVM10 anatofuz parents: diff changeset	88 std::string ScriptLexer::getCurrentLocation() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	89 std::string filename = std::string(getCurrentMB().getBufferIdentifier());
1d019706d866 LLVM10 anatofuz parents: diff changeset	90 return (filename + ":" + Twine(getLineNumber())).str();
1d019706d866 LLVM10 anatofuz parents: diff changeset	91 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	92
1d019706d866 LLVM10 anatofuz parents: diff changeset	93 ScriptLexer::ScriptLexer(MemoryBufferRef mb) { tokenize(mb); }
1d019706d866 LLVM10 anatofuz parents: diff changeset	94
1d019706d866 LLVM10 anatofuz parents: diff changeset	95 // We don't want to record cascading errors. Keep only the first one.
1d019706d866 LLVM10 anatofuz parents: diff changeset	96 void ScriptLexer::setError(const Twine &msg) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	97 if (errorCount())
1d019706d866 LLVM10 anatofuz parents: diff changeset	98 return;
1d019706d866 LLVM10 anatofuz parents: diff changeset	99
1d019706d866 LLVM10 anatofuz parents: diff changeset	100 std::string s = (getCurrentLocation() + ": " + msg).str();
1d019706d866 LLVM10 anatofuz parents: diff changeset	101 if (pos)
1d019706d866 LLVM10 anatofuz parents: diff changeset	102 s += "\n>>> " + getLine().str() + "\n>>> " +
1d019706d866 LLVM10 anatofuz parents: diff changeset	103 std::string(getColumnNumber(), ' ') + "^";
1d019706d866 LLVM10 anatofuz parents: diff changeset	104 error(s);
1d019706d866 LLVM10 anatofuz parents: diff changeset	105 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	106
1d019706d866 LLVM10 anatofuz parents: diff changeset	107 // Split S into linker script tokens.
1d019706d866 LLVM10 anatofuz parents: diff changeset	108 void ScriptLexer::tokenize(MemoryBufferRef mb) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	109 std::vector<StringRef> vec;
1d019706d866 LLVM10 anatofuz parents: diff changeset	110 mbs.push_back(mb);
1d019706d866 LLVM10 anatofuz parents: diff changeset	111 StringRef s = mb.getBuffer();
1d019706d866 LLVM10 anatofuz parents: diff changeset	112 StringRef begin = s;
1d019706d866 LLVM10 anatofuz parents: diff changeset	113
1d019706d866 LLVM10 anatofuz parents: diff changeset	114 for (;;) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	115 s = skipSpace(s);
1d019706d866 LLVM10 anatofuz parents: diff changeset	116 if (s.empty())
1d019706d866 LLVM10 anatofuz parents: diff changeset	117 break;
1d019706d866 LLVM10 anatofuz parents: diff changeset	118
1d019706d866 LLVM10 anatofuz parents: diff changeset	119 // Quoted token. Note that double-quote characters are parts of a token
1d019706d866 LLVM10 anatofuz parents: diff changeset	120 // because, in a glob match context, only unquoted tokens are interpreted
1d019706d866 LLVM10 anatofuz parents: diff changeset	121 // as glob patterns. Double-quoted tokens are literal patterns in that
1d019706d866 LLVM10 anatofuz parents: diff changeset	122 // context.
252 1f2b6ac9f198 LLVM16-1 Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 236 diff changeset	123 if (s.starts_with("\"")) {
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	124 size_t e = s.find("\"", 1);
1d019706d866 LLVM10 anatofuz parents: diff changeset	125 if (e == StringRef::npos) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	126 StringRef filename = mb.getBufferIdentifier();
1d019706d866 LLVM10 anatofuz parents: diff changeset	127 size_t lineno = begin.substr(0, s.data() - begin.data()).count('\n');
1d019706d866 LLVM10 anatofuz parents: diff changeset	128 error(filename + ":" + Twine(lineno + 1) + ": unclosed quote");
1d019706d866 LLVM10 anatofuz parents: diff changeset	129 return;
1d019706d866 LLVM10 anatofuz parents: diff changeset	130 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	131
1d019706d866 LLVM10 anatofuz parents: diff changeset	132 vec.push_back(s.take_front(e + 1));
1d019706d866 LLVM10 anatofuz parents: diff changeset	133 s = s.substr(e + 1);
1d019706d866 LLVM10 anatofuz parents: diff changeset	134 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	135 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	136
236 c4bab56944e8 LLVM 16 kono parents: 223 diff changeset	137 // Some operators form separate tokens.
252 1f2b6ac9f198 LLVM16-1 Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 236 diff changeset	138 if (s.starts_with("<<=") \|\| s.starts_with(">>=")) {
236 c4bab56944e8 LLVM 16 kono parents: 223 diff changeset	139 vec.push_back(s.substr(0, 3));
c4bab56944e8 LLVM 16 kono parents: 223 diff changeset	140 s = s.substr(3);
c4bab56944e8 LLVM 16 kono parents: 223 diff changeset	141 continue;
c4bab56944e8 LLVM 16 kono parents: 223 diff changeset	142 }
252 1f2b6ac9f198 LLVM16-1 Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 236 diff changeset	143 if (s.size() > 1 && ((s[1] == '=' && strchr("*/+-<>&^\|", s[0])) \|\|
236 c4bab56944e8 LLVM 16 kono parents: 223 diff changeset	144 (s[0] == s[1] && strchr("<>&\|", s[0])))) {
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	145 vec.push_back(s.substr(0, 2));
1d019706d866 LLVM10 anatofuz parents: diff changeset	146 s = s.substr(2);
1d019706d866 LLVM10 anatofuz parents: diff changeset	147 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	148 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	149
1d019706d866 LLVM10 anatofuz parents: diff changeset	150 // Unquoted token. This is more relaxed than tokens in C-like language,
1d019706d866 LLVM10 anatofuz parents: diff changeset	151 // so that you can write "file-name.cpp" as one bare token, for example.
1d019706d866 LLVM10 anatofuz parents: diff changeset	152 size_t pos = s.find_first_not_of(
1d019706d866 LLVM10 anatofuz parents: diff changeset	153 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
1d019706d866 LLVM10 anatofuz parents: diff changeset	154 "0123456789_.$/\\~=+[]*?-!^:");
1d019706d866 LLVM10 anatofuz parents: diff changeset	155
1d019706d866 LLVM10 anatofuz parents: diff changeset	156 // A character that cannot start a word (which is usually a
1d019706d866 LLVM10 anatofuz parents: diff changeset	157 // punctuation) forms a single character token.
1d019706d866 LLVM10 anatofuz parents: diff changeset	158 if (pos == 0)
1d019706d866 LLVM10 anatofuz parents: diff changeset	159 pos = 1;
1d019706d866 LLVM10 anatofuz parents: diff changeset	160 vec.push_back(s.substr(0, pos));
1d019706d866 LLVM10 anatofuz parents: diff changeset	161 s = s.substr(pos);
1d019706d866 LLVM10 anatofuz parents: diff changeset	162 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	163
1d019706d866 LLVM10 anatofuz parents: diff changeset	164 tokens.insert(tokens.begin() + pos, vec.begin(), vec.end());
1d019706d866 LLVM10 anatofuz parents: diff changeset	165 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	166
1d019706d866 LLVM10 anatofuz parents: diff changeset	167 // Skip leading whitespace characters or comments.
1d019706d866 LLVM10 anatofuz parents: diff changeset	168 StringRef ScriptLexer::skipSpace(StringRef s) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	169 for (;;) {
252 1f2b6ac9f198 LLVM16-1 Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 236 diff changeset	170 if (s.starts_with("/*")) {
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	171 size_t e = s.find("*/", 2);
1d019706d866 LLVM10 anatofuz parents: diff changeset	172 if (e == StringRef::npos) {
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	173 setError("unclosed comment in a linker script");
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	174 return "";
1d019706d866 LLVM10 anatofuz parents: diff changeset	175 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	176 s = s.substr(e + 2);
1d019706d866 LLVM10 anatofuz parents: diff changeset	177 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	178 }
252 1f2b6ac9f198 LLVM16-1 Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 236 diff changeset	179 if (s.starts_with("#")) {
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	180 size_t e = s.find('\n', 1);
1d019706d866 LLVM10 anatofuz parents: diff changeset	181 if (e == StringRef::npos)
1d019706d866 LLVM10 anatofuz parents: diff changeset	182 e = s.size() - 1;
1d019706d866 LLVM10 anatofuz parents: diff changeset	183 s = s.substr(e + 1);
1d019706d866 LLVM10 anatofuz parents: diff changeset	184 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	185 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	186 size_t size = s.size();
1d019706d866 LLVM10 anatofuz parents: diff changeset	187 s = s.ltrim();
1d019706d866 LLVM10 anatofuz parents: diff changeset	188 if (s.size() == size)
1d019706d866 LLVM10 anatofuz parents: diff changeset	189 return s;
1d019706d866 LLVM10 anatofuz parents: diff changeset	190 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	191 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	192
1d019706d866 LLVM10 anatofuz parents: diff changeset	193 // An erroneous token is handled as if it were the last token before EOF.
1d019706d866 LLVM10 anatofuz parents: diff changeset	194 bool ScriptLexer::atEOF() { return errorCount() \|\| tokens.size() == pos; }
1d019706d866 LLVM10 anatofuz parents: diff changeset	195
1d019706d866 LLVM10 anatofuz parents: diff changeset	196 // Split a given string as an expression.
1d019706d866 LLVM10 anatofuz parents: diff changeset	197 // This function returns "3", "" and "5" for "35" for example.
1d019706d866 LLVM10 anatofuz parents: diff changeset	198 static std::vector<StringRef> tokenizeExpr(StringRef s) {
252 1f2b6ac9f198 LLVM16-1 Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 236 diff changeset	199 StringRef ops = "!~*/+-<>?^:="; // List of operators
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	200
1d019706d866 LLVM10 anatofuz parents: diff changeset	201 // Quoted strings are literal strings, so we don't want to split it.
252 1f2b6ac9f198 LLVM16-1 Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 236 diff changeset	202 if (s.starts_with("\""))
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	203 return {s};
1d019706d866 LLVM10 anatofuz parents: diff changeset	204
1d019706d866 LLVM10 anatofuz parents: diff changeset	205 // Split S with operators as separators.
1d019706d866 LLVM10 anatofuz parents: diff changeset	206 std::vector<StringRef> ret;
1d019706d866 LLVM10 anatofuz parents: diff changeset	207 while (!s.empty()) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	208 size_t e = s.find_first_of(ops);
1d019706d866 LLVM10 anatofuz parents: diff changeset	209
1d019706d866 LLVM10 anatofuz parents: diff changeset	210 // No need to split if there is no operator.
1d019706d866 LLVM10 anatofuz parents: diff changeset	211 if (e == StringRef::npos) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	212 ret.push_back(s);
1d019706d866 LLVM10 anatofuz parents: diff changeset	213 break;
1d019706d866 LLVM10 anatofuz parents: diff changeset	214 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	215
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	216 // Get a token before the operator.
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	217 if (e != 0)
1d019706d866 LLVM10 anatofuz parents: diff changeset	218 ret.push_back(s.substr(0, e));
1d019706d866 LLVM10 anatofuz parents: diff changeset	219
1d019706d866 LLVM10 anatofuz parents: diff changeset	220 // Get the operator as a token.
1d019706d866 LLVM10 anatofuz parents: diff changeset	221 // Keep !=, ==, >=, <=, << and >> operators as a single tokens.
252 1f2b6ac9f198 LLVM16-1 Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 236 diff changeset	222 if (s.substr(e).starts_with("!=") \|\| s.substr(e).starts_with("==") \|\|
1f2b6ac9f198 LLVM16-1 Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 236 diff changeset	223 s.substr(e).starts_with(">=") \|\| s.substr(e).starts_with("<=") \|\|
1f2b6ac9f198 LLVM16-1 Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 236 diff changeset	224 s.substr(e).starts_with("<<") \|\| s.substr(e).starts_with(">>")) {
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	225 ret.push_back(s.substr(e, 2));
1d019706d866 LLVM10 anatofuz parents: diff changeset	226 s = s.substr(e + 2);
1d019706d866 LLVM10 anatofuz parents: diff changeset	227 } else {
1d019706d866 LLVM10 anatofuz parents: diff changeset	228 ret.push_back(s.substr(e, 1));
1d019706d866 LLVM10 anatofuz parents: diff changeset	229 s = s.substr(e + 1);
1d019706d866 LLVM10 anatofuz parents: diff changeset	230 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	231 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	232 return ret;
1d019706d866 LLVM10 anatofuz parents: diff changeset	233 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	234
1d019706d866 LLVM10 anatofuz parents: diff changeset	235 // In contexts where expressions are expected, the lexer should apply
1d019706d866 LLVM10 anatofuz parents: diff changeset	236 // different tokenization rules than the default one. By default,
1d019706d866 LLVM10 anatofuz parents: diff changeset	237 // arithmetic operator characters are regular characters, but in the
1d019706d866 LLVM10 anatofuz parents: diff changeset	238 // expression context, they should be independent tokens.
1d019706d866 LLVM10 anatofuz parents: diff changeset	239 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	240 // For example, "foo3" should be tokenized to "foo", "" and "3" only
1d019706d866 LLVM10 anatofuz parents: diff changeset	241 // in the expression context.
1d019706d866 LLVM10 anatofuz parents: diff changeset	242 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	243 // This function may split the current token into multiple tokens.
1d019706d866 LLVM10 anatofuz parents: diff changeset	244 void ScriptLexer::maybeSplitExpr() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	245 if (!inExpr \|\| errorCount() \|\| atEOF())
1d019706d866 LLVM10 anatofuz parents: diff changeset	246 return;
1d019706d866 LLVM10 anatofuz parents: diff changeset	247
1d019706d866 LLVM10 anatofuz parents: diff changeset	248 std::vector<StringRef> v = tokenizeExpr(tokens[pos]);
1d019706d866 LLVM10 anatofuz parents: diff changeset	249 if (v.size() == 1)
1d019706d866 LLVM10 anatofuz parents: diff changeset	250 return;
1d019706d866 LLVM10 anatofuz parents: diff changeset	251 tokens.erase(tokens.begin() + pos);
1d019706d866 LLVM10 anatofuz parents: diff changeset	252 tokens.insert(tokens.begin() + pos, v.begin(), v.end());
1d019706d866 LLVM10 anatofuz parents: diff changeset	253 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	254
1d019706d866 LLVM10 anatofuz parents: diff changeset	255 StringRef ScriptLexer::next() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	256 maybeSplitExpr();
1d019706d866 LLVM10 anatofuz parents: diff changeset	257
1d019706d866 LLVM10 anatofuz parents: diff changeset	258 if (errorCount())
1d019706d866 LLVM10 anatofuz parents: diff changeset	259 return "";
1d019706d866 LLVM10 anatofuz parents: diff changeset	260 if (atEOF()) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	261 setError("unexpected EOF");
1d019706d866 LLVM10 anatofuz parents: diff changeset	262 return "";
1d019706d866 LLVM10 anatofuz parents: diff changeset	263 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	264 return tokens[pos++];
1d019706d866 LLVM10 anatofuz parents: diff changeset	265 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	266
1d019706d866 LLVM10 anatofuz parents: diff changeset	267 StringRef ScriptLexer::peek() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	268 StringRef tok = next();
1d019706d866 LLVM10 anatofuz parents: diff changeset	269 if (errorCount())
1d019706d866 LLVM10 anatofuz parents: diff changeset	270 return "";
1d019706d866 LLVM10 anatofuz parents: diff changeset	271 pos = pos - 1;
1d019706d866 LLVM10 anatofuz parents: diff changeset	272 return tok;
1d019706d866 LLVM10 anatofuz parents: diff changeset	273 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	274
1d019706d866 LLVM10 anatofuz parents: diff changeset	275 StringRef ScriptLexer::peek2() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	276 skip();
1d019706d866 LLVM10 anatofuz parents: diff changeset	277 StringRef tok = next();
1d019706d866 LLVM10 anatofuz parents: diff changeset	278 if (errorCount())
1d019706d866 LLVM10 anatofuz parents: diff changeset	279 return "";
1d019706d866 LLVM10 anatofuz parents: diff changeset	280 pos = pos - 2;
1d019706d866 LLVM10 anatofuz parents: diff changeset	281 return tok;
1d019706d866 LLVM10 anatofuz parents: diff changeset	282 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	283
1d019706d866 LLVM10 anatofuz parents: diff changeset	284 bool ScriptLexer::consume(StringRef tok) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	285 if (peek() == tok) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	286 skip();
1d019706d866 LLVM10 anatofuz parents: diff changeset	287 return true;
1d019706d866 LLVM10 anatofuz parents: diff changeset	288 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	289 return false;
1d019706d866 LLVM10 anatofuz parents: diff changeset	290 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	291
1d019706d866 LLVM10 anatofuz parents: diff changeset	292 // Consumes Tok followed by ":". Space is allowed between Tok and ":".
1d019706d866 LLVM10 anatofuz parents: diff changeset	293 bool ScriptLexer::consumeLabel(StringRef tok) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	294 if (consume((tok + ":").str()))
1d019706d866 LLVM10 anatofuz parents: diff changeset	295 return true;
1d019706d866 LLVM10 anatofuz parents: diff changeset	296 if (tokens.size() >= pos + 2 && tokens[pos] == tok &&
1d019706d866 LLVM10 anatofuz parents: diff changeset	297 tokens[pos + 1] == ":") {
1d019706d866 LLVM10 anatofuz parents: diff changeset	298 pos += 2;
1d019706d866 LLVM10 anatofuz parents: diff changeset	299 return true;
1d019706d866 LLVM10 anatofuz parents: diff changeset	300 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	301 return false;
1d019706d866 LLVM10 anatofuz parents: diff changeset	302 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	303
1d019706d866 LLVM10 anatofuz parents: diff changeset	304 void ScriptLexer::skip() { (void)next(); }
1d019706d866 LLVM10 anatofuz parents: diff changeset	305
1d019706d866 LLVM10 anatofuz parents: diff changeset	306 void ScriptLexer::expect(StringRef expect) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	307 if (errorCount())
1d019706d866 LLVM10 anatofuz parents: diff changeset	308 return;
1d019706d866 LLVM10 anatofuz parents: diff changeset	309 StringRef tok = next();
1d019706d866 LLVM10 anatofuz parents: diff changeset	310 if (tok != expect)
1d019706d866 LLVM10 anatofuz parents: diff changeset	311 setError(expect + " expected, but got " + tok);
1d019706d866 LLVM10 anatofuz parents: diff changeset	312 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	313
1d019706d866 LLVM10 anatofuz parents: diff changeset	314 // Returns true if S encloses T.
1d019706d866 LLVM10 anatofuz parents: diff changeset	315 static bool encloses(StringRef s, StringRef t) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	316 return s.bytes_begin() <= t.bytes_begin() && t.bytes_end() <= s.bytes_end();
1d019706d866 LLVM10 anatofuz parents: diff changeset	317 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	318
1d019706d866 LLVM10 anatofuz parents: diff changeset	319 MemoryBufferRef ScriptLexer::getCurrentMB() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	320 // Find input buffer containing the current token.
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	321 assert(!mbs.empty());
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	322 if (pos == 0)
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	323 return mbs.back();
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	324 for (MemoryBufferRef mb : mbs)
1d019706d866 LLVM10 anatofuz parents: diff changeset	325 if (encloses(mb.getBuffer(), tokens[pos - 1]))
1d019706d866 LLVM10 anatofuz parents: diff changeset	326 return mb;
1d019706d866 LLVM10 anatofuz parents: diff changeset	327 llvm_unreachable("getCurrentMB: failed to find a token");
1d019706d866 LLVM10 anatofuz parents: diff changeset	328 }

Mercurial > hg > CbC > CbC_llvm

annotate lld/ELF/ScriptLexer.cpp @ 266:00f31e85ec16 default tip