gremlin/libs/yaml-cpp/src/scantoken.cpp
2011-01-24 23:52:12 +01:00

435 lines
11 KiB
C++

#include "scanner.h"
#include "token.h"
#include "exceptions.h"
#include "exp.h"
#include "scanscalar.h"
#include "scantag.h"
#include "tag.h"
#include <sstream>
namespace YAML
{
///////////////////////////////////////////////////////////////////////
// Specialization for scanning specific tokens
// Directive
// . Note: no semantic checking is done here (that's for the parser to do)
void Scanner::ScanDirective()
{
std::string name;
std::vector <std::string> params;
// pop indents and simple keys
PopAllIndents();
PopAllSimpleKeys();
m_simpleKeyAllowed = false;
m_canBeJSONFlow = false;
// store pos and eat indicator
Token token(Token::DIRECTIVE, INPUT.mark());
INPUT.eat(1);
// read name
while(INPUT && !Exp::BlankOrBreak().Matches(INPUT))
token.value += INPUT.get();
// read parameters
while(1) {
// first get rid of whitespace
while(Exp::Blank().Matches(INPUT))
INPUT.eat(1);
// break on newline or comment
if(!INPUT || Exp::Break().Matches(INPUT) || Exp::Comment().Matches(INPUT))
break;
// now read parameter
std::string param;
while(INPUT && !Exp::BlankOrBreak().Matches(INPUT))
param += INPUT.get();
token.params.push_back(param);
}
m_tokens.push(token);
}
// DocStart
void Scanner::ScanDocStart()
{
PopAllIndents();
PopAllSimpleKeys();
m_simpleKeyAllowed = false;
m_canBeJSONFlow = false;
// eat
Mark mark = INPUT.mark();
INPUT.eat(3);
m_tokens.push(Token(Token::DOC_START, mark));
}
// DocEnd
void Scanner::ScanDocEnd()
{
PopAllIndents();
PopAllSimpleKeys();
m_simpleKeyAllowed = false;
m_canBeJSONFlow = false;
// eat
Mark mark = INPUT.mark();
INPUT.eat(3);
m_tokens.push(Token(Token::DOC_END, mark));
}
// FlowStart
void Scanner::ScanFlowStart()
{
// flows can be simple keys
InsertPotentialSimpleKey();
m_simpleKeyAllowed = true;
m_canBeJSONFlow = false;
// eat
Mark mark = INPUT.mark();
char ch = INPUT.get();
FLOW_MARKER flowType = (ch == Keys::FlowSeqStart ? FLOW_SEQ : FLOW_MAP);
m_flows.push(flowType);
Token::TYPE type = (flowType == FLOW_SEQ ? Token::FLOW_SEQ_START : Token::FLOW_MAP_START);
m_tokens.push(Token(type, mark));
}
// FlowEnd
void Scanner::ScanFlowEnd()
{
if(InBlockContext())
throw ParserException(INPUT.mark(), ErrorMsg::FLOW_END);
// we might have a solo entry in the flow context
if(InFlowContext()) {
if(m_flows.top() == FLOW_MAP && VerifySimpleKey())
m_tokens.push(Token(Token::VALUE, INPUT.mark()));
else if(m_flows.top() == FLOW_SEQ)
InvalidateSimpleKey();
}
m_simpleKeyAllowed = false;
m_canBeJSONFlow = true;
// eat
Mark mark = INPUT.mark();
char ch = INPUT.get();
// check that it matches the start
FLOW_MARKER flowType = (ch == Keys::FlowSeqEnd ? FLOW_SEQ : FLOW_MAP);
if(m_flows.top() != flowType)
throw ParserException(mark, ErrorMsg::FLOW_END);
m_flows.pop();
Token::TYPE type = (flowType ? Token::FLOW_SEQ_END : Token::FLOW_MAP_END);
m_tokens.push(Token(type, mark));
}
// FlowEntry
void Scanner::ScanFlowEntry()
{
// we might have a solo entry in the flow context
if(InFlowContext()) {
if(m_flows.top() == FLOW_MAP && VerifySimpleKey())
m_tokens.push(Token(Token::VALUE, INPUT.mark()));
else if(m_flows.top() == FLOW_SEQ)
InvalidateSimpleKey();
}
m_simpleKeyAllowed = true;
m_canBeJSONFlow = false;
// eat
Mark mark = INPUT.mark();
INPUT.eat(1);
m_tokens.push(Token(Token::FLOW_ENTRY, mark));
}
// BlockEntry
void Scanner::ScanBlockEntry()
{
// we better be in the block context!
if(InFlowContext())
throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
// can we put it here?
if(!m_simpleKeyAllowed)
throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
PushIndentTo(INPUT.column(), IndentMarker::SEQ);
m_simpleKeyAllowed = true;
m_canBeJSONFlow = false;
// eat
Mark mark = INPUT.mark();
INPUT.eat(1);
m_tokens.push(Token(Token::BLOCK_ENTRY, mark));
}
// Key
void Scanner::ScanKey()
{
// handle keys diffently in the block context (and manage indents)
if(InBlockContext()) {
if(!m_simpleKeyAllowed)
throw ParserException(INPUT.mark(), ErrorMsg::MAP_KEY);
PushIndentTo(INPUT.column(), IndentMarker::MAP);
}
// can only put a simple key here if we're in block context
m_simpleKeyAllowed = InBlockContext();
// eat
Mark mark = INPUT.mark();
INPUT.eat(1);
m_tokens.push(Token(Token::KEY, mark));
}
// Value
void Scanner::ScanValue()
{
// and check that simple key
bool isSimpleKey = VerifySimpleKey();
m_canBeJSONFlow = false;
if(isSimpleKey) {
// can't follow a simple key with another simple key (dunno why, though - it seems fine)
m_simpleKeyAllowed = false;
} else {
// handle values diffently in the block context (and manage indents)
if(InBlockContext()) {
if(!m_simpleKeyAllowed)
throw ParserException(INPUT.mark(), ErrorMsg::MAP_VALUE);
PushIndentTo(INPUT.column(), IndentMarker::MAP);
}
// can only put a simple key here if we're in block context
m_simpleKeyAllowed = InBlockContext();
}
// eat
Mark mark = INPUT.mark();
INPUT.eat(1);
m_tokens.push(Token(Token::VALUE, mark));
}
// AnchorOrAlias
void Scanner::ScanAnchorOrAlias()
{
bool alias;
std::string name;
// insert a potential simple key
InsertPotentialSimpleKey();
m_simpleKeyAllowed = false;
m_canBeJSONFlow = false;
// eat the indicator
Mark mark = INPUT.mark();
char indicator = INPUT.get();
alias = (indicator == Keys::Alias);
// now eat the content
while(Exp::AlphaNumeric().Matches(INPUT))
name += INPUT.get();
// we need to have read SOMETHING!
if(name.empty())
throw ParserException(INPUT.mark(), alias ? ErrorMsg::ALIAS_NOT_FOUND : ErrorMsg::ANCHOR_NOT_FOUND);
// and needs to end correctly
if(INPUT && !Exp::AnchorEnd().Matches(INPUT))
throw ParserException(INPUT.mark(), alias ? ErrorMsg::CHAR_IN_ALIAS : ErrorMsg::CHAR_IN_ANCHOR);
// and we're done
Token token(alias ? Token::ALIAS : Token::ANCHOR, mark);
token.value = name;
m_tokens.push(token);
}
// Tag
void Scanner::ScanTag()
{
// insert a potential simple key
InsertPotentialSimpleKey();
m_simpleKeyAllowed = false;
m_canBeJSONFlow = false;
Token token(Token::TAG, INPUT.mark());
// eat the indicator
INPUT.get();
if(INPUT && INPUT.peek() == Keys::VerbatimTagStart){
std::string tag = ScanVerbatimTag(INPUT);
token.value = tag;
token.data = Tag::VERBATIM;
} else {
bool canBeHandle;
token.value = ScanTagHandle(INPUT, canBeHandle);
token.data = (token.value.empty() ? Tag::SECONDARY_HANDLE : Tag::PRIMARY_HANDLE);
// is there a suffix?
if(canBeHandle && INPUT.peek() == Keys::Tag) {
// eat the indicator
INPUT.get();
token.params.push_back(ScanTagSuffix(INPUT));
token.data = Tag::NAMED_HANDLE;
}
}
m_tokens.push(token);
}
// PlainScalar
void Scanner::ScanPlainScalar()
{
std::string scalar;
// set up the scanning parameters
ScanScalarParams params;
params.end = (InFlowContext() ? Exp::EndScalarInFlow() : Exp::EndScalar()) || (Exp::BlankOrBreak() + Exp::Comment());
params.eatEnd = false;
params.indent = (InFlowContext() ? 0 : GetTopIndent() + 1);
params.fold = FOLD_FLOW;
params.eatLeadingWhitespace = true;
params.trimTrailingSpaces = true;
params.chomp = STRIP;
params.onDocIndicator = BREAK;
params.onTabInIndentation = THROW;
// insert a potential simple key
InsertPotentialSimpleKey();
Mark mark = INPUT.mark();
scalar = ScanScalar(INPUT, params);
// can have a simple key only if we ended the scalar by starting a new line
m_simpleKeyAllowed = params.leadingSpaces;
m_canBeJSONFlow = false;
// finally, check and see if we ended on an illegal character
//if(Exp::IllegalCharInScalar.Matches(INPUT))
// throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_SCALAR);
Token token(Token::SCALAR, mark);
token.value = scalar;
m_tokens.push(token);
}
// QuotedScalar
void Scanner::ScanQuotedScalar()
{
std::string scalar;
// peek at single or double quote (don't eat because we need to preserve (for the time being) the input position)
char quote = INPUT.peek();
bool single = (quote == '\'');
// setup the scanning parameters
ScanScalarParams params;
params.end = (single ? RegEx(quote) && !Exp::EscSingleQuote() : RegEx(quote));
params.eatEnd = true;
params.escape = (single ? '\'' : '\\');
params.indent = 0;
params.fold = FOLD_FLOW;
params.eatLeadingWhitespace = true;
params.trimTrailingSpaces = false;
params.chomp = CLIP;
params.onDocIndicator = THROW;
// insert a potential simple key
InsertPotentialSimpleKey();
Mark mark = INPUT.mark();
// now eat that opening quote
INPUT.get();
// and scan
scalar = ScanScalar(INPUT, params);
m_simpleKeyAllowed = false;
m_canBeJSONFlow = true;
Token token(Token::SCALAR, mark);
token.value = scalar;
m_tokens.push(token);
}
// BlockScalarToken
// . These need a little extra processing beforehand.
// . We need to scan the line where the indicator is (this doesn't count as part of the scalar),
// and then we need to figure out what level of indentation we'll be using.
void Scanner::ScanBlockScalar()
{
std::string scalar;
ScanScalarParams params;
params.indent = 1;
params.detectIndent = true;
// eat block indicator ('|' or '>')
Mark mark = INPUT.mark();
char indicator = INPUT.get();
params.fold = (indicator == Keys::FoldedScalar ? FOLD_BLOCK : DONT_FOLD);
// eat chomping/indentation indicators
params.chomp = CLIP;
int n = Exp::Chomp().Match(INPUT);
for(int i=0;i<n;i++) {
char ch = INPUT.get();
if(ch == '+')
params.chomp = KEEP;
else if(ch == '-')
params.chomp = STRIP;
else if(Exp::Digit().Matches(ch)) {
if(ch == '0')
throw ParserException(INPUT.mark(), ErrorMsg::ZERO_INDENT_IN_BLOCK);
params.indent = ch - '0';
params.detectIndent = false;
}
}
// now eat whitespace
while(Exp::Blank().Matches(INPUT))
INPUT.eat(1);
// and comments to the end of the line
if(Exp::Comment().Matches(INPUT))
while(INPUT && !Exp::Break().Matches(INPUT))
INPUT.eat(1);
// if it's not a line break, then we ran into a bad character inline
if(INPUT && !Exp::Break().Matches(INPUT))
throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_BLOCK);
// set the initial indentation
if(GetTopIndent() >= 0)
params.indent += GetTopIndent();
params.eatLeadingWhitespace = false;
params.trimTrailingSpaces = false;
params.onTabInIndentation = THROW;
scalar = ScanScalar(INPUT, params);
// simple keys always ok after block scalars (since we're gonna start a new line anyways)
m_simpleKeyAllowed = true;
m_canBeJSONFlow = false;
Token token(Token::SCALAR, mark);
token.value = scalar;
m_tokens.push(token);
}
}