#include "scanner.h" #include "token.h" #include "exceptions.h" #include "exp.h" #include "scanscalar.h" #include "scantag.h" #include "tag.h" #include namespace YAML { /////////////////////////////////////////////////////////////////////// // Specialization for scanning specific tokens // Directive // . Note: no semantic checking is done here (that's for the parser to do) void Scanner::ScanDirective() { std::string name; std::vector params; // pop indents and simple keys PopAllIndents(); PopAllSimpleKeys(); m_simpleKeyAllowed = false; m_canBeJSONFlow = false; // store pos and eat indicator Token token(Token::DIRECTIVE, INPUT.mark()); INPUT.eat(1); // read name while(INPUT && !Exp::BlankOrBreak().Matches(INPUT)) token.value += INPUT.get(); // read parameters while(1) { // first get rid of whitespace while(Exp::Blank().Matches(INPUT)) INPUT.eat(1); // break on newline or comment if(!INPUT || Exp::Break().Matches(INPUT) || Exp::Comment().Matches(INPUT)) break; // now read parameter std::string param; while(INPUT && !Exp::BlankOrBreak().Matches(INPUT)) param += INPUT.get(); token.params.push_back(param); } m_tokens.push(token); } // DocStart void Scanner::ScanDocStart() { PopAllIndents(); PopAllSimpleKeys(); m_simpleKeyAllowed = false; m_canBeJSONFlow = false; // eat Mark mark = INPUT.mark(); INPUT.eat(3); m_tokens.push(Token(Token::DOC_START, mark)); } // DocEnd void Scanner::ScanDocEnd() { PopAllIndents(); PopAllSimpleKeys(); m_simpleKeyAllowed = false; m_canBeJSONFlow = false; // eat Mark mark = INPUT.mark(); INPUT.eat(3); m_tokens.push(Token(Token::DOC_END, mark)); } // FlowStart void Scanner::ScanFlowStart() { // flows can be simple keys InsertPotentialSimpleKey(); m_simpleKeyAllowed = true; m_canBeJSONFlow = false; // eat Mark mark = INPUT.mark(); char ch = INPUT.get(); FLOW_MARKER flowType = (ch == Keys::FlowSeqStart ? FLOW_SEQ : FLOW_MAP); m_flows.push(flowType); Token::TYPE type = (flowType == FLOW_SEQ ? Token::FLOW_SEQ_START : Token::FLOW_MAP_START); m_tokens.push(Token(type, mark)); } // FlowEnd void Scanner::ScanFlowEnd() { if(InBlockContext()) throw ParserException(INPUT.mark(), ErrorMsg::FLOW_END); // we might have a solo entry in the flow context if(InFlowContext()) { if(m_flows.top() == FLOW_MAP && VerifySimpleKey()) m_tokens.push(Token(Token::VALUE, INPUT.mark())); else if(m_flows.top() == FLOW_SEQ) InvalidateSimpleKey(); } m_simpleKeyAllowed = false; m_canBeJSONFlow = true; // eat Mark mark = INPUT.mark(); char ch = INPUT.get(); // check that it matches the start FLOW_MARKER flowType = (ch == Keys::FlowSeqEnd ? FLOW_SEQ : FLOW_MAP); if(m_flows.top() != flowType) throw ParserException(mark, ErrorMsg::FLOW_END); m_flows.pop(); Token::TYPE type = (flowType ? Token::FLOW_SEQ_END : Token::FLOW_MAP_END); m_tokens.push(Token(type, mark)); } // FlowEntry void Scanner::ScanFlowEntry() { // we might have a solo entry in the flow context if(InFlowContext()) { if(m_flows.top() == FLOW_MAP && VerifySimpleKey()) m_tokens.push(Token(Token::VALUE, INPUT.mark())); else if(m_flows.top() == FLOW_SEQ) InvalidateSimpleKey(); } m_simpleKeyAllowed = true; m_canBeJSONFlow = false; // eat Mark mark = INPUT.mark(); INPUT.eat(1); m_tokens.push(Token(Token::FLOW_ENTRY, mark)); } // BlockEntry void Scanner::ScanBlockEntry() { // we better be in the block context! if(InFlowContext()) throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY); // can we put it here? if(!m_simpleKeyAllowed) throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY); PushIndentTo(INPUT.column(), IndentMarker::SEQ); m_simpleKeyAllowed = true; m_canBeJSONFlow = false; // eat Mark mark = INPUT.mark(); INPUT.eat(1); m_tokens.push(Token(Token::BLOCK_ENTRY, mark)); } // Key void Scanner::ScanKey() { // handle keys diffently in the block context (and manage indents) if(InBlockContext()) { if(!m_simpleKeyAllowed) throw ParserException(INPUT.mark(), ErrorMsg::MAP_KEY); PushIndentTo(INPUT.column(), IndentMarker::MAP); } // can only put a simple key here if we're in block context m_simpleKeyAllowed = InBlockContext(); // eat Mark mark = INPUT.mark(); INPUT.eat(1); m_tokens.push(Token(Token::KEY, mark)); } // Value void Scanner::ScanValue() { // and check that simple key bool isSimpleKey = VerifySimpleKey(); m_canBeJSONFlow = false; if(isSimpleKey) { // can't follow a simple key with another simple key (dunno why, though - it seems fine) m_simpleKeyAllowed = false; } else { // handle values diffently in the block context (and manage indents) if(InBlockContext()) { if(!m_simpleKeyAllowed) throw ParserException(INPUT.mark(), ErrorMsg::MAP_VALUE); PushIndentTo(INPUT.column(), IndentMarker::MAP); } // can only put a simple key here if we're in block context m_simpleKeyAllowed = InBlockContext(); } // eat Mark mark = INPUT.mark(); INPUT.eat(1); m_tokens.push(Token(Token::VALUE, mark)); } // AnchorOrAlias void Scanner::ScanAnchorOrAlias() { bool alias; std::string name; // insert a potential simple key InsertPotentialSimpleKey(); m_simpleKeyAllowed = false; m_canBeJSONFlow = false; // eat the indicator Mark mark = INPUT.mark(); char indicator = INPUT.get(); alias = (indicator == Keys::Alias); // now eat the content while(Exp::AlphaNumeric().Matches(INPUT)) name += INPUT.get(); // we need to have read SOMETHING! if(name.empty()) throw ParserException(INPUT.mark(), alias ? ErrorMsg::ALIAS_NOT_FOUND : ErrorMsg::ANCHOR_NOT_FOUND); // and needs to end correctly if(INPUT && !Exp::AnchorEnd().Matches(INPUT)) throw ParserException(INPUT.mark(), alias ? ErrorMsg::CHAR_IN_ALIAS : ErrorMsg::CHAR_IN_ANCHOR); // and we're done Token token(alias ? Token::ALIAS : Token::ANCHOR, mark); token.value = name; m_tokens.push(token); } // Tag void Scanner::ScanTag() { // insert a potential simple key InsertPotentialSimpleKey(); m_simpleKeyAllowed = false; m_canBeJSONFlow = false; Token token(Token::TAG, INPUT.mark()); // eat the indicator INPUT.get(); if(INPUT && INPUT.peek() == Keys::VerbatimTagStart){ std::string tag = ScanVerbatimTag(INPUT); token.value = tag; token.data = Tag::VERBATIM; } else { bool canBeHandle; token.value = ScanTagHandle(INPUT, canBeHandle); token.data = (token.value.empty() ? Tag::SECONDARY_HANDLE : Tag::PRIMARY_HANDLE); // is there a suffix? if(canBeHandle && INPUT.peek() == Keys::Tag) { // eat the indicator INPUT.get(); token.params.push_back(ScanTagSuffix(INPUT)); token.data = Tag::NAMED_HANDLE; } } m_tokens.push(token); } // PlainScalar void Scanner::ScanPlainScalar() { std::string scalar; // set up the scanning parameters ScanScalarParams params; params.end = (InFlowContext() ? Exp::EndScalarInFlow() : Exp::EndScalar()) || (Exp::BlankOrBreak() + Exp::Comment()); params.eatEnd = false; params.indent = (InFlowContext() ? 0 : GetTopIndent() + 1); params.fold = FOLD_FLOW; params.eatLeadingWhitespace = true; params.trimTrailingSpaces = true; params.chomp = STRIP; params.onDocIndicator = BREAK; params.onTabInIndentation = THROW; // insert a potential simple key InsertPotentialSimpleKey(); Mark mark = INPUT.mark(); scalar = ScanScalar(INPUT, params); // can have a simple key only if we ended the scalar by starting a new line m_simpleKeyAllowed = params.leadingSpaces; m_canBeJSONFlow = false; // finally, check and see if we ended on an illegal character //if(Exp::IllegalCharInScalar.Matches(INPUT)) // throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_SCALAR); Token token(Token::SCALAR, mark); token.value = scalar; m_tokens.push(token); } // QuotedScalar void Scanner::ScanQuotedScalar() { std::string scalar; // peek at single or double quote (don't eat because we need to preserve (for the time being) the input position) char quote = INPUT.peek(); bool single = (quote == '\''); // setup the scanning parameters ScanScalarParams params; params.end = (single ? RegEx(quote) && !Exp::EscSingleQuote() : RegEx(quote)); params.eatEnd = true; params.escape = (single ? '\'' : '\\'); params.indent = 0; params.fold = FOLD_FLOW; params.eatLeadingWhitespace = true; params.trimTrailingSpaces = false; params.chomp = CLIP; params.onDocIndicator = THROW; // insert a potential simple key InsertPotentialSimpleKey(); Mark mark = INPUT.mark(); // now eat that opening quote INPUT.get(); // and scan scalar = ScanScalar(INPUT, params); m_simpleKeyAllowed = false; m_canBeJSONFlow = true; Token token(Token::SCALAR, mark); token.value = scalar; m_tokens.push(token); } // BlockScalarToken // . These need a little extra processing beforehand. // . We need to scan the line where the indicator is (this doesn't count as part of the scalar), // and then we need to figure out what level of indentation we'll be using. void Scanner::ScanBlockScalar() { std::string scalar; ScanScalarParams params; params.indent = 1; params.detectIndent = true; // eat block indicator ('|' or '>') Mark mark = INPUT.mark(); char indicator = INPUT.get(); params.fold = (indicator == Keys::FoldedScalar ? FOLD_BLOCK : DONT_FOLD); // eat chomping/indentation indicators params.chomp = CLIP; int n = Exp::Chomp().Match(INPUT); for(int i=0;i= 0) params.indent += GetTopIndent(); params.eatLeadingWhitespace = false; params.trimTrailingSpaces = false; params.onTabInIndentation = THROW; scalar = ScanScalar(INPUT, params); // simple keys always ok after block scalars (since we're gonna start a new line anyways) m_simpleKeyAllowed = true; m_canBeJSONFlow = false; Token token(Token::SCALAR, mark); token.value = scalar; m_tokens.push(token); } }