comparison lib/Support/YAMLParser.cpp @ 77:54457678186b LLVM3.6

LLVM 3.6
author Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
date Mon, 08 Sep 2014 22:06:00 +0900
parents 95c75e76d11b
children 60c9769439b8
comparison
equal deleted inserted replaced
34:e874dbf0ad9d 77:54457678186b
257 namespace llvm { 257 namespace llvm {
258 namespace yaml { 258 namespace yaml {
259 /// @brief Scans YAML tokens from a MemoryBuffer. 259 /// @brief Scans YAML tokens from a MemoryBuffer.
260 class Scanner { 260 class Scanner {
261 public: 261 public:
262 Scanner(const StringRef Input, SourceMgr &SM); 262 Scanner(StringRef Input, SourceMgr &SM);
263 Scanner(MemoryBuffer *Buffer, SourceMgr &SM_); 263 Scanner(MemoryBufferRef Buffer, SourceMgr &SM_);
264 264
265 /// @brief Parse the next token and return it without popping it. 265 /// @brief Parse the next token and return it without popping it.
266 Token &peekNext(); 266 Token &peekNext();
267 267
268 /// @brief Parse the next token and pop it from the queue. 268 /// @brief Parse the next token and pop it from the queue.
292 bool failed() { 292 bool failed() {
293 return Failed; 293 return Failed;
294 } 294 }
295 295
296 private: 296 private:
297 void init(MemoryBufferRef Buffer);
298
297 StringRef currentInput() { 299 StringRef currentInput() {
298 return StringRef(Current, End - Current); 300 return StringRef(Current, End - Current);
299 } 301 }
300 302
301 /// @brief Decode a UTF-8 minimal well-formed code unit subsequence starting 303 /// @brief Decode a UTF-8 minimal well-formed code unit subsequence starting
376 /// 378 ///
377 /// @returns A StringRef starting at Cur which covers the longest contiguous 379 /// @returns A StringRef starting at Cur which covers the longest contiguous
378 /// sequence of ns-uri-char. 380 /// sequence of ns-uri-char.
379 StringRef scan_ns_uri_char(); 381 StringRef scan_ns_uri_char();
380 382
381 /// @brief Scan ns-plain-one-line[133] starting at \a Cur.
382 StringRef scan_ns_plain_one_line();
383
384 /// @brief Consume a minimal well-formed code unit subsequence starting at 383 /// @brief Consume a minimal well-formed code unit subsequence starting at
385 /// \a Cur. Return false if it is not the same Unicode scalar value as 384 /// \a Cur. Return false if it is not the same Unicode scalar value as
386 /// \a Expected. This updates \a Column. 385 /// \a Expected. This updates \a Column.
387 bool consume(uint32_t Expected); 386 bool consume(uint32_t Expected);
388 387
470 469
471 /// @brief The SourceMgr used for diagnostics and buffer management. 470 /// @brief The SourceMgr used for diagnostics and buffer management.
472 SourceMgr &SM; 471 SourceMgr &SM;
473 472
474 /// @brief The original input. 473 /// @brief The original input.
475 MemoryBuffer *InputBuffer; 474 MemoryBufferRef InputBuffer;
476 475
477 /// @brief The current position of the scanner. 476 /// @brief The current position of the scanner.
478 StringRef::iterator Current; 477 StringRef::iterator Current;
479 478
480 /// @brief The end of the input (one past the last character). 479 /// @brief The end of the input (one past the last character).
700 EscapedInput.push_back(*i); 699 EscapedInput.push_back(*i);
701 } 700 }
702 return EscapedInput; 701 return EscapedInput;
703 } 702 }
704 703
705 Scanner::Scanner(StringRef Input, SourceMgr &sm) 704 Scanner::Scanner(StringRef Input, SourceMgr &sm) : SM(sm) {
706 : SM(sm) 705 init(MemoryBufferRef(Input, "YAML"));
707 , Indent(-1) 706 }
708 , Column(0) 707
709 , Line(0) 708 Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_) : SM(SM_) {
710 , FlowLevel(0) 709 init(Buffer);
711 , IsStartOfStream(true) 710 }
712 , IsSimpleKeyAllowed(true) 711
713 , Failed(false) { 712 void Scanner::init(MemoryBufferRef Buffer) {
714 InputBuffer = MemoryBuffer::getMemBuffer(Input, "YAML"); 713 InputBuffer = Buffer;
715 SM.AddNewSourceBuffer(InputBuffer, SMLoc()); 714 Current = InputBuffer.getBufferStart();
716 Current = InputBuffer->getBufferStart(); 715 End = InputBuffer.getBufferEnd();
717 End = InputBuffer->getBufferEnd(); 716 Indent = -1;
718 } 717 Column = 0;
719 718 Line = 0;
720 Scanner::Scanner(MemoryBuffer *Buffer, SourceMgr &SM_) 719 FlowLevel = 0;
721 : SM(SM_) 720 IsStartOfStream = true;
722 , InputBuffer(Buffer) 721 IsSimpleKeyAllowed = true;
723 , Current(InputBuffer->getBufferStart()) 722 Failed = false;
724 , End(InputBuffer->getBufferEnd()) 723 std::unique_ptr<MemoryBuffer> InputBufferOwner =
725 , Indent(-1) 724 MemoryBuffer::getMemBuffer(Buffer);
726 , Column(0) 725 SM.AddNewSourceBuffer(std::move(InputBufferOwner), SMLoc());
727 , Line(0)
728 , FlowLevel(0)
729 , IsStartOfStream(true)
730 , IsSimpleKeyAllowed(true)
731 , Failed(false) {
732 SM.AddNewSourceBuffer(InputBuffer, SMLoc());
733 } 726 }
734 727
735 Token &Scanner::peekNext() { 728 Token &Scanner::peekNext() {
736 // If the current token is a possible simple key, keep parsing until we 729 // If the current token is a possible simple key, keep parsing until we
737 // can confirm. 730 // can confirm.
869 ++Column; 862 ++Column;
870 } else 863 } else
871 break; 864 break;
872 } 865 }
873 return StringRef(Start, Current - Start); 866 return StringRef(Start, Current - Start);
874 }
875
876 StringRef Scanner::scan_ns_plain_one_line() {
877 StringRef::iterator start = Current;
878 // The first character must already be verified.
879 ++Current;
880 while (true) {
881 if (Current == End) {
882 break;
883 } else if (*Current == ':') {
884 // Check if the next character is a ns-char.
885 if (Current + 1 == End)
886 break;
887 StringRef::iterator i = skip_ns_char(Current + 1);
888 if (Current + 1 != i) {
889 Current = i;
890 Column += 2; // Consume both the ':' and ns-char.
891 } else
892 break;
893 } else if (*Current == '#') {
894 // Check if the previous character was a ns-char.
895 // The & 0x80 check is to check for the trailing byte of a utf-8
896 if (*(Current - 1) & 0x80 || skip_ns_char(Current - 1) == Current) {
897 ++Current;
898 ++Column;
899 } else
900 break;
901 } else {
902 StringRef::iterator i = skip_nb_char(Current);
903 if (i == Current)
904 break;
905 Current = i;
906 ++Column;
907 }
908 }
909 return StringRef(start, Current - start);
910 } 867 }
911 868
912 bool Scanner::consume(uint32_t Expected) { 869 bool Scanner::consume(uint32_t Expected) {
913 if (Expected >= 0x80) 870 if (Expected >= 0x80)
914 report_fatal_error("Not dealing with this yet"); 871 report_fatal_error("Not dealing with this yet");
1559 setError("Unrecognized character while tokenizing."); 1516 setError("Unrecognized character while tokenizing.");
1560 return false; 1517 return false;
1561 } 1518 }
1562 1519
1563 Stream::Stream(StringRef Input, SourceMgr &SM) 1520 Stream::Stream(StringRef Input, SourceMgr &SM)
1564 : scanner(new Scanner(Input, SM)) 1521 : scanner(new Scanner(Input, SM)), CurrentDoc() {}
1565 , CurrentDoc(0) {} 1522
1566 1523 Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM)
1567 Stream::Stream(MemoryBuffer *InputBuffer, SourceMgr &SM) 1524 : scanner(new Scanner(InputBuffer, SM)), CurrentDoc() {}
1568 : scanner(new Scanner(InputBuffer, SM))
1569 , CurrentDoc(0) {}
1570 1525
1571 Stream::~Stream() {} 1526 Stream::~Stream() {}
1572 1527
1573 bool Stream::failed() { return scanner->failed(); } 1528 bool Stream::failed() { return scanner->failed(); }
1574 1529
1599 void Stream::skip() { 1554 void Stream::skip() {
1600 for (document_iterator i = begin(), e = end(); i != e; ++i) 1555 for (document_iterator i = begin(), e = end(); i != e; ++i)
1601 i->skip(); 1556 i->skip();
1602 } 1557 }
1603 1558
1604 Node::Node(unsigned int Type, OwningPtr<Document> &D, StringRef A, StringRef T) 1559 Node::Node(unsigned int Type, std::unique_ptr<Document> &D, StringRef A,
1605 : Doc(D) 1560 StringRef T)
1606 , TypeID(Type) 1561 : Doc(D), TypeID(Type), Anchor(A), Tag(T) {
1607 , Anchor(A)
1608 , Tag(T) {
1609 SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin()); 1562 SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin());
1610 SourceRange = SMRange(Start, Start); 1563 SourceRange = SMRange(Start, Start);
1611 } 1564 }
1612 1565
1613 std::string Node::getVerbatimTag() const { 1566 std::string Node::getVerbatimTag() const {
1615 if (!Raw.empty() && Raw != "!") { 1568 if (!Raw.empty() && Raw != "!") {
1616 std::string Ret; 1569 std::string Ret;
1617 if (Raw.find_last_of('!') == 0) { 1570 if (Raw.find_last_of('!') == 0) {
1618 Ret = Doc->getTagMap().find("!")->second; 1571 Ret = Doc->getTagMap().find("!")->second;
1619 Ret += Raw.substr(1); 1572 Ret += Raw.substr(1);
1620 return llvm_move(Ret); 1573 return std::move(Ret);
1621 } else if (Raw.startswith("!!")) { 1574 } else if (Raw.startswith("!!")) {
1622 Ret = Doc->getTagMap().find("!!")->second; 1575 Ret = Doc->getTagMap().find("!!")->second;
1623 Ret += Raw.substr(2); 1576 Ret += Raw.substr(2);
1624 return llvm_move(Ret); 1577 return std::move(Ret);
1625 } else { 1578 } else {
1626 StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1); 1579 StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1);
1627 std::map<StringRef, StringRef>::const_iterator It = 1580 std::map<StringRef, StringRef>::const_iterator It =
1628 Doc->getTagMap().find(TagHandle); 1581 Doc->getTagMap().find(TagHandle);
1629 if (It != Doc->getTagMap().end()) 1582 if (It != Doc->getTagMap().end())
1633 T.Kind = Token::TK_Tag; 1586 T.Kind = Token::TK_Tag;
1634 T.Range = TagHandle; 1587 T.Range = TagHandle;
1635 setError(Twine("Unknown tag handle ") + TagHandle, T); 1588 setError(Twine("Unknown tag handle ") + TagHandle, T);
1636 } 1589 }
1637 Ret += Raw.substr(Raw.find_last_of('!') + 1); 1590 Ret += Raw.substr(Raw.find_last_of('!') + 1);
1638 return llvm_move(Ret); 1591 return std::move(Ret);
1639 } 1592 }
1640 } 1593 }
1641 1594
1642 switch (getType()) { 1595 switch (getType()) {
1643 case NK_Null: 1596 case NK_Null:
1917 } 1870 }
1918 1871
1919 void MappingNode::increment() { 1872 void MappingNode::increment() {
1920 if (failed()) { 1873 if (failed()) {
1921 IsAtEnd = true; 1874 IsAtEnd = true;
1922 CurrentEntry = 0; 1875 CurrentEntry = nullptr;
1923 return; 1876 return;
1924 } 1877 }
1925 if (CurrentEntry) { 1878 if (CurrentEntry) {
1926 CurrentEntry->skip(); 1879 CurrentEntry->skip();
1927 if (Type == MT_Inline) { 1880 if (Type == MT_Inline) {
1928 IsAtEnd = true; 1881 IsAtEnd = true;
1929 CurrentEntry = 0; 1882 CurrentEntry = nullptr;
1930 return; 1883 return;
1931 } 1884 }
1932 } 1885 }
1933 Token T = peekNext(); 1886 Token T = peekNext();
1934 if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) { 1887 if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) {
1937 } else if (Type == MT_Block) { 1890 } else if (Type == MT_Block) {
1938 switch (T.Kind) { 1891 switch (T.Kind) {
1939 case Token::TK_BlockEnd: 1892 case Token::TK_BlockEnd:
1940 getNext(); 1893 getNext();
1941 IsAtEnd = true; 1894 IsAtEnd = true;
1942 CurrentEntry = 0; 1895 CurrentEntry = nullptr;
1943 break; 1896 break;
1944 default: 1897 default:
1945 setError("Unexpected token. Expected Key or Block End", T); 1898 setError("Unexpected token. Expected Key or Block End", T);
1946 case Token::TK_Error: 1899 case Token::TK_Error:
1947 IsAtEnd = true; 1900 IsAtEnd = true;
1948 CurrentEntry = 0; 1901 CurrentEntry = nullptr;
1949 } 1902 }
1950 } else { 1903 } else {
1951 switch (T.Kind) { 1904 switch (T.Kind) {
1952 case Token::TK_FlowEntry: 1905 case Token::TK_FlowEntry:
1953 // Eat the flow entry and recurse. 1906 // Eat the flow entry and recurse.
1956 case Token::TK_FlowMappingEnd: 1909 case Token::TK_FlowMappingEnd:
1957 getNext(); 1910 getNext();
1958 case Token::TK_Error: 1911 case Token::TK_Error:
1959 // Set this to end iterator. 1912 // Set this to end iterator.
1960 IsAtEnd = true; 1913 IsAtEnd = true;
1961 CurrentEntry = 0; 1914 CurrentEntry = nullptr;
1962 break; 1915 break;
1963 default: 1916 default:
1964 setError( "Unexpected token. Expected Key, Flow Entry, or Flow " 1917 setError( "Unexpected token. Expected Key, Flow Entry, or Flow "
1965 "Mapping End." 1918 "Mapping End."
1966 , T); 1919 , T);
1967 IsAtEnd = true; 1920 IsAtEnd = true;
1968 CurrentEntry = 0; 1921 CurrentEntry = nullptr;
1969 } 1922 }
1970 } 1923 }
1971 } 1924 }
1972 1925
1973 void SequenceNode::increment() { 1926 void SequenceNode::increment() {
1974 if (failed()) { 1927 if (failed()) {
1975 IsAtEnd = true; 1928 IsAtEnd = true;
1976 CurrentEntry = 0; 1929 CurrentEntry = nullptr;
1977 return; 1930 return;
1978 } 1931 }
1979 if (CurrentEntry) 1932 if (CurrentEntry)
1980 CurrentEntry->skip(); 1933 CurrentEntry->skip();
1981 Token T = peekNext(); 1934 Token T = peekNext();
1982 if (SeqType == ST_Block) { 1935 if (SeqType == ST_Block) {
1983 switch (T.Kind) { 1936 switch (T.Kind) {
1984 case Token::TK_BlockEntry: 1937 case Token::TK_BlockEntry:
1985 getNext(); 1938 getNext();
1986 CurrentEntry = parseBlockNode(); 1939 CurrentEntry = parseBlockNode();
1987 if (CurrentEntry == 0) { // An error occurred. 1940 if (!CurrentEntry) { // An error occurred.
1988 IsAtEnd = true; 1941 IsAtEnd = true;
1989 CurrentEntry = 0; 1942 CurrentEntry = nullptr;
1990 } 1943 }
1991 break; 1944 break;
1992 case Token::TK_BlockEnd: 1945 case Token::TK_BlockEnd:
1993 getNext(); 1946 getNext();
1994 IsAtEnd = true; 1947 IsAtEnd = true;
1995 CurrentEntry = 0; 1948 CurrentEntry = nullptr;
1996 break; 1949 break;
1997 default: 1950 default:
1998 setError( "Unexpected token. Expected Block Entry or Block End." 1951 setError( "Unexpected token. Expected Block Entry or Block End."
1999 , T); 1952 , T);
2000 case Token::TK_Error: 1953 case Token::TK_Error:
2001 IsAtEnd = true; 1954 IsAtEnd = true;
2002 CurrentEntry = 0; 1955 CurrentEntry = nullptr;
2003 } 1956 }
2004 } else if (SeqType == ST_Indentless) { 1957 } else if (SeqType == ST_Indentless) {
2005 switch (T.Kind) { 1958 switch (T.Kind) {
2006 case Token::TK_BlockEntry: 1959 case Token::TK_BlockEntry:
2007 getNext(); 1960 getNext();
2008 CurrentEntry = parseBlockNode(); 1961 CurrentEntry = parseBlockNode();
2009 if (CurrentEntry == 0) { // An error occurred. 1962 if (!CurrentEntry) { // An error occurred.
2010 IsAtEnd = true; 1963 IsAtEnd = true;
2011 CurrentEntry = 0; 1964 CurrentEntry = nullptr;
2012 } 1965 }
2013 break; 1966 break;
2014 default: 1967 default:
2015 case Token::TK_Error: 1968 case Token::TK_Error:
2016 IsAtEnd = true; 1969 IsAtEnd = true;
2017 CurrentEntry = 0; 1970 CurrentEntry = nullptr;
2018 } 1971 }
2019 } else if (SeqType == ST_Flow) { 1972 } else if (SeqType == ST_Flow) {
2020 switch (T.Kind) { 1973 switch (T.Kind) {
2021 case Token::TK_FlowEntry: 1974 case Token::TK_FlowEntry:
2022 // Eat the flow entry and recurse. 1975 // Eat the flow entry and recurse.
2026 case Token::TK_FlowSequenceEnd: 1979 case Token::TK_FlowSequenceEnd:
2027 getNext(); 1980 getNext();
2028 case Token::TK_Error: 1981 case Token::TK_Error:
2029 // Set this to end iterator. 1982 // Set this to end iterator.
2030 IsAtEnd = true; 1983 IsAtEnd = true;
2031 CurrentEntry = 0; 1984 CurrentEntry = nullptr;
2032 break; 1985 break;
2033 case Token::TK_StreamEnd: 1986 case Token::TK_StreamEnd:
2034 case Token::TK_DocumentEnd: 1987 case Token::TK_DocumentEnd:
2035 case Token::TK_DocumentStart: 1988 case Token::TK_DocumentStart:
2036 setError("Could not find closing ]!", T); 1989 setError("Could not find closing ]!", T);
2037 // Set this to end iterator. 1990 // Set this to end iterator.
2038 IsAtEnd = true; 1991 IsAtEnd = true;
2039 CurrentEntry = 0; 1992 CurrentEntry = nullptr;
2040 break; 1993 break;
2041 default: 1994 default:
2042 if (!WasPreviousTokenFlowEntry) { 1995 if (!WasPreviousTokenFlowEntry) {
2043 setError("Expected , between entries!", T); 1996 setError("Expected , between entries!", T);
2044 IsAtEnd = true; 1997 IsAtEnd = true;
2045 CurrentEntry = 0; 1998 CurrentEntry = nullptr;
2046 break; 1999 break;
2047 } 2000 }
2048 // Otherwise it must be a flow entry. 2001 // Otherwise it must be a flow entry.
2049 CurrentEntry = parseBlockNode(); 2002 CurrentEntry = parseBlockNode();
2050 if (!CurrentEntry) { 2003 if (!CurrentEntry) {
2054 break; 2007 break;
2055 } 2008 }
2056 } 2009 }
2057 } 2010 }
2058 2011
2059 Document::Document(Stream &S) : stream(S), Root(0) { 2012 Document::Document(Stream &S) : stream(S), Root(nullptr) {
2060 // Tag maps starts with two default mappings. 2013 // Tag maps starts with two default mappings.
2061 TagMap["!"] = "!"; 2014 TagMap["!"] = "!";
2062 TagMap["!!"] = "tag:yaml.org,2002:"; 2015 TagMap["!!"] = "tag:yaml.org,2002:";
2063 2016
2064 if (parseDirectives()) 2017 if (parseDirectives())
2111 getNext(); 2064 getNext();
2112 return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1)); 2065 return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1));
2113 case Token::TK_Anchor: 2066 case Token::TK_Anchor:
2114 if (AnchorInfo.Kind == Token::TK_Anchor) { 2067 if (AnchorInfo.Kind == Token::TK_Anchor) {
2115 setError("Already encountered an anchor for this node!", T); 2068 setError("Already encountered an anchor for this node!", T);
2116 return 0; 2069 return nullptr;
2117 } 2070 }
2118 AnchorInfo = getNext(); // Consume TK_Anchor. 2071 AnchorInfo = getNext(); // Consume TK_Anchor.
2119 T = peekNext(); 2072 T = peekNext();
2120 goto parse_property; 2073 goto parse_property;
2121 case Token::TK_Tag: 2074 case Token::TK_Tag:
2122 if (TagInfo.Kind == Token::TK_Tag) { 2075 if (TagInfo.Kind == Token::TK_Tag) {
2123 setError("Already encountered a tag for this node!", T); 2076 setError("Already encountered a tag for this node!", T);
2124 return 0; 2077 return nullptr;
2125 } 2078 }
2126 TagInfo = getNext(); // Consume TK_Tag. 2079 TagInfo = getNext(); // Consume TK_Tag.
2127 T = peekNext(); 2080 T = peekNext();
2128 goto parse_property; 2081 goto parse_property;
2129 default: 2082 default:
2187 default: 2140 default:
2188 // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not 2141 // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not
2189 // !!null null. 2142 // !!null null.
2190 return new (NodeAllocator) NullNode(stream.CurrentDoc); 2143 return new (NodeAllocator) NullNode(stream.CurrentDoc);
2191 case Token::TK_Error: 2144 case Token::TK_Error:
2192 return 0; 2145 return nullptr;
2193 } 2146 }
2194 llvm_unreachable("Control flow shouldn't reach here."); 2147 llvm_unreachable("Control flow shouldn't reach here.");
2195 return 0; 2148 return nullptr;
2196 } 2149 }
2197 2150
2198 bool Document::parseDirectives() { 2151 bool Document::parseDirectives() {
2199 bool isDirective = false; 2152 bool isDirective = false;
2200 while (true) { 2153 while (true) {