blob: 22422c9a469b12f8bca21691f040efb138cc1bc2 [file] [log] [blame] [edit]
// Package impl edits text proto files, applies standard formatting
// and preserves comments.
package impl
import (
"bufio"
"bytes"
"fmt"
"strconv"
"strings"
"google.golang.org/protobuf/reflect/protoreflect"
"github.com/protocolbuffers/txtpbfmt/ast"
"github.com/protocolbuffers/txtpbfmt/config"
"github.com/protocolbuffers/txtpbfmt/descriptor"
"github.com/protocolbuffers/txtpbfmt/quote"
"github.com/protocolbuffers/txtpbfmt/sort"
"github.com/protocolbuffers/txtpbfmt/wrap"
)
type parser struct {
in []byte
index int
length int
// Maps the index of '{' characters on 'in' that have the matching '}' on
// the same line to 'true'.
bracketSameLine map[int]bool
config config.Config
line, column int // current position, 1-based.
}
var defConfig = config.Config{}
type bracketState struct {
insideComment bool
insideString bool
insideTemplate bool
insideTripleQuotedString bool
stringDelimiter string
isEscapedChar bool
}
func (s *bracketState) processChar(c byte, i int, in []byte, allowTripleQuotedStrings bool) {
switch c {
case '#':
if !s.insideString {
s.insideComment = true
}
case '%':
if !s.insideComment && !s.insideString {
s.insideTemplate = !s.insideTemplate
}
case '"', '\'':
if s.insideComment {
return
}
s.handleQuotes(c, i, in, allowTripleQuotedStrings)
}
}
func (s *bracketState) handleQuotes(c byte, i int, in []byte, allowTripleQuotedStrings bool) {
delim := string(c)
tripleQuoted := false
if allowTripleQuotedStrings && i+3 <= len(in) {
triple := string(in[i : i+3])
if triple == `"""` || triple == `'''` {
delim = triple
tripleQuoted = true
}
}
if s.insideString {
if s.stringDelimiter == delim && (s.insideTripleQuotedString || !s.isEscapedChar) {
s.insideString = false
s.insideTripleQuotedString = false
}
} else {
s.insideString = true
s.insideTripleQuotedString = tripleQuoted
s.stringDelimiter = delim
}
}
// Return the byte-positions of each bracket which has the corresponding close on the
// same line as a set.
func sameLineBrackets(in []byte, allowTripleQuotedStrings bool) (map[int]bool, error) {
line := 1
type bracket struct {
index int
line int
}
var open []bracket // Stack.
res := map[int]bool{}
state := bracketState{}
for i, c := range in {
state.processChar(c, i, in, allowTripleQuotedStrings)
switch c {
case '\n':
line++
state.insideComment = false
case '{', '<':
if state.insideComment || state.insideString || state.insideTemplate {
continue
}
open = append(open, bracket{index: i, line: line})
case '}', '>':
if state.insideComment || state.insideString || state.insideTemplate {
continue
}
if len(open) == 0 {
return nil, fmt.Errorf("too many '}' or '>' at line %d, index %d", line, i)
}
last := len(open) - 1
br := open[last]
open = open[:last]
if br.line == line {
res[br.index] = true
}
}
if state.isEscapedChar {
state.isEscapedChar = false
} else if c == '\\' && state.insideString && !state.insideTripleQuotedString {
state.isEscapedChar = true
}
}
if state.insideString {
return nil, fmt.Errorf("unterminated string literal")
}
return res, nil
}
var (
spaceSeparators = []byte(" \t\n\r")
valueSeparators = []byte(" \t\n\r{}:,[]<>;#")
)
// Parse returns a tree representation of a textproto file.
func Parse(in []byte) ([]*ast.Node, error) {
return ParseWithConfig(in, defConfig)
}
// ParseWithConfig functions similar to Parse, but allows the user to pass in
// additional configuration options.
func ParseWithConfig(in []byte, c config.Config) ([]*ast.Node, error) {
if err := AddMetaCommentsToConfig(in, &c); err != nil {
return nil, err
}
return ParseWithMetaCommentConfig(in, c)
}
// ParseWithMetaCommentConfig parses in textproto with MetaComments already added to configuration.
func ParseWithMetaCommentConfig(in []byte, c config.Config) ([]*ast.Node, error) {
p, err := newParser(in, c)
if err != nil {
return nil, err
}
// Load descriptor if field number sorting is enabled
var rootDesc protoreflect.MessageDescriptor
if c.SortFieldsByFieldNumber {
if c.ProtoDescriptor == "" {
return nil, fmt.Errorf("proto_descriptor is required when using sort_fields_by_field_number")
}
loader, err := descriptor.NewLoader(c.ProtoDescriptor)
if err != nil {
return nil, fmt.Errorf("failed to create descriptor loader: %v", err)
}
// Get root message descriptor
rootDesc, err = loader.GetRootMessageDescriptor(c.MessageFullName)
if err != nil {
return nil, fmt.Errorf("failed to get root message descriptor: %v", err)
}
}
if p.config.InfoLevel() {
p.config.Infof("p.in: %q", string(p.in))
p.config.Infof("p.length: %v", p.length)
}
// Although unnamed nodes aren't strictly allowed, some formats represent a
// list of protos as a list of unnamed top-level nodes.
nodes, _, err := p.parse( /*isRoot=*/ true, rootDesc)
if err != nil {
return nil, err
}
if p.index < p.length {
return nil, fmt.Errorf("parser didn't consume all input. Stopped at %s", p.errorContext())
}
for _, f := range ast.GetFormatters() {
if err := f(nodes); err != nil {
return nil, err
}
}
if err := wrap.Strings(nodes, 0, c); err != nil {
return nil, err
}
if err := sort.Process( /*parent=*/ nil, nodes, c); err != nil {
return nil, err
}
return nodes, nil
}
// There are two types of MetaComment, one in the format of <key>=<val> and the other one doesn't
// have the equal sign. Currently there are only two MetaComments that are in the former format:
//
// "sort_repeated_fields_by_subfield": If this appears multiple times, then they will all be added
// to the config and the order is preserved.
// "wrap_strings_at_column": The <val> is expected to be an integer. If it is not, then it will be
// ignored. If this appears multiple times, only the last one saved.
func addToConfig(metaComment string, c *config.Config) error {
// Test if a MetaComment is in the format of <key>=<val>.
key, val, hasEqualSign := strings.Cut(metaComment, "=")
switch key {
case "allow_triple_quoted_strings":
c.AllowTripleQuotedStrings = true
case "allow_unnamed_nodes_everywhere":
c.AllowUnnamedNodesEverywhere = true
case "disable":
c.Disable = true
case "expand_all_children":
c.ExpandAllChildren = true
case "preserve_angle_brackets":
c.PreserveAngleBrackets = true
case "remove_duplicate_values_for_repeated_fields":
c.RemoveDuplicateValuesForRepeatedFields = true
case "skip_all_colons":
c.SkipAllColons = true
case "smartquotes":
c.SmartQuotes = true
case "sort_fields_by_field_name":
c.SortFieldsByFieldName = true
case "sort_repeated_fields_by_content":
c.SortRepeatedFieldsByContent = true
case "sort_repeated_fields_by_subfield":
// Take all the subfields and the subfields in order as tie breakers.
if !hasEqualSign {
return fmt.Errorf("format should be %s=<string>, got: %s", key, metaComment)
}
c.SortRepeatedFieldsBySubfield = append(c.SortRepeatedFieldsBySubfield, val)
case "reverse_sort":
c.ReverseSort = true
case "dns_sort_order":
c.DNSSortOrder = true
case "wrap_strings_at_column":
// If multiple of this MetaComment exists in the file, take the last one.
if !hasEqualSign {
return fmt.Errorf("format should be %s=<int>, got: %s", key, metaComment)
}
i, err := strconv.Atoi(strings.TrimSpace(val))
if err != nil {
return fmt.Errorf("error parsing %s value %q (skipping): %v", key, val, err)
}
c.WrapStringsAtColumn = i
case "wrap_html_strings":
c.WrapHTMLStrings = true
case "wrap_strings_after_newlines":
c.WrapStringsAfterNewlines = true
case "wrap_strings_without_wordwrap":
c.WrapStringsWithoutWordwrap = true
case "use_short_repeated_primitive_fields":
c.UseShortRepeatedPrimitiveFields = true
case "on": // This doesn't change the overall config.
case "off": // This doesn't change the overall config.
default:
return fmt.Errorf("unrecognized MetaComment: %s", metaComment)
}
return nil
}
// AddMetaCommentsToConfig parses MetaComments and adds them to the configuration.
func AddMetaCommentsToConfig(in []byte, c *config.Config) error {
scanner := bufio.NewScanner(bytes.NewReader(in))
for scanner.Scan() {
line := scanner.Text()
if len(line) == 0 {
continue
}
if line[0] != byte('#') {
break // only process the leading comment block
}
// Look for comment lines in the format of "<key>:<value>", and process the lines with <key>
// equals to "txtpbfmt". It's assumed that the MetaComments are given in the format of:
// # txtpbfmt: <MetaComment 1>[, <MetaComment 2> ...]
key, value, hasColon := strings.Cut(line[1:], ":") // Ignore the first '#'.
if hasColon && strings.TrimSpace(key) == "txtpbfmt" {
for _, s := range strings.Split(strings.TrimSpace(value), ",") {
metaComment := strings.TrimSpace(s)
if err := addToConfig(metaComment, c); err != nil {
return err
}
}
}
}
return nil
}
func newParser(in []byte, c config.Config) (*parser, error) {
var bracketSameLine map[int]bool
if c.ExpandAllChildren {
bracketSameLine = map[int]bool{}
} else {
var err error
if bracketSameLine, err = sameLineBrackets(in, c.AllowTripleQuotedStrings); err != nil {
return nil, err
}
}
if len(in) > 0 && in[len(in)-1] != '\n' {
in = append(in, '\n')
}
parser := &parser{
in: in,
index: 0,
length: len(in),
bracketSameLine: bracketSameLine,
config: c,
line: 1,
column: 1,
}
return parser, nil
}
// getFieldNumber returns the field number for a given field name in the descriptor.
func getFieldNumber(desc protoreflect.MessageDescriptor, fieldName string) int32 {
if desc == nil {
return 0
}
field := desc.Fields().ByTextName(fieldName)
if field == nil {
return 0
}
return int32(field.Number())
}
// findChildDescriptor finds the descriptor for a nested message field.
func (p *parser) findChildDescriptor(desc protoreflect.MessageDescriptor, fieldName string) protoreflect.MessageDescriptor {
if desc == nil {
return nil
}
field := desc.Fields().ByTextName(fieldName)
if field == nil {
return nil
}
if field.Kind() == protoreflect.MessageKind {
return field.Message()
}
return nil
}
func (p *parser) nextInputIs(b byte) bool {
return p.index < p.length && p.in[p.index] == b
}
func (p *parser) consume(b byte) bool {
if !p.nextInputIs(b) {
return false
}
p.index++
p.column++
if b == '\n' {
p.line++
p.column = 1
}
return true
}
// consumeString consumes the given string s, which should not have any newlines.
func (p *parser) consumeString(s string) bool {
if p.index+len(s) > p.length {
return false
}
if string(p.in[p.index:p.index+len(s)]) != s {
return false
}
p.index += len(s)
p.column += len(s)
return true
}
// loopDetector detects if the parser is in an infinite loop (ie failing to
// make progress).
type loopDetector struct {
lastIndex int
count int
parser *parser
}
func (p *parser) getLoopDetector() *loopDetector {
return &loopDetector{lastIndex: p.index, parser: p}
}
func (l *loopDetector) iter() error {
if l.parser.index == l.lastIndex {
l.count++
if l.count < 2 {
return nil
}
return fmt.Errorf("parser failed to make progress at %s", l.parser.errorContext())
}
l.lastIndex = l.parser.index
l.count = 0
return nil
}
func (p parser) errorContext() string {
index := p.index
if index >= p.length {
index = p.length - 1
}
// Provide the surrounding input as context.
lastContentIndex := index + 20
if lastContentIndex >= p.length {
lastContentIndex = p.length - 1
}
previousContentIndex := index - 20
if previousContentIndex < 0 {
previousContentIndex = 0
}
before := string(p.in[previousContentIndex:index])
after := string(p.in[index:lastContentIndex])
return fmt.Sprintf("index %v\nposition %+v\nbefore: %q\nafter: %q\nbefore+after: %q", index, p.position(), before, after, before+after)
}
func (p *parser) position() ast.Position {
return ast.Position{
Byte: uint32(p.index),
Line: int32(p.line),
Column: int32(p.column),
}
}
// Modifies the parser by rewinding to the given position.
// A position can be snapshotted by using the `position()` function above.
func (p *parser) rollbackPosition(pos ast.Position) {
p.index = int(pos.Byte)
p.line = int(pos.Line)
p.column = int(pos.Column)
}
func (p *parser) consumeOptionalSeparator() error {
if p.index > 0 && !p.isBlankSep(p.index-1) {
// If an unnamed field immediately follows non-whitespace, we require a separator character first (key_one:,:value_two instead of key_one::value_two)
if p.consume(':') {
return fmt.Errorf("parser encountered unexpected character ':' (should be whitespace, ',', or ';')")
}
}
_ = p.consume(';') // Ignore optional ';'.
_ = p.consume(',') // Ignore optional ','.
return nil
}
// parse parses a text proto.
// It assumes the text to be either conformant with the standard text proto
// (i.e. passes proto.UnmarshalText() without error) or the alternative textproto
// format (sequence of messages, each of which passes proto.UnmarshalText()).
// endPos is the position of the first character on the first line
// after parsed nodes: that's the position to append more children.
func (p *parser) parse(isRoot bool, desc protoreflect.MessageDescriptor) (result []*ast.Node, endPos ast.Position, err error) {
var res []*ast.Node
res = []*ast.Node{} // empty children is different from nil children
for ld := p.getLoopDetector(); p.index < p.length; {
if err := ld.iter(); err != nil {
return nil, ast.Position{}, err
}
// p.parse is often invoked with the index pointing at the newline character
// after the previous item. We should still report that this item starts in
// the next line.
p.consume('\r')
p.consume('\n')
startPos := p.position()
fmtDisabled, err := p.readFormatterDisabledBlock()
if err != nil {
return nil, startPos, err
}
if len(fmtDisabled) > 0 {
res = append(res, &ast.Node{
Start: startPos,
Raw: fmtDisabled,
})
continue
}
// Read PreComments.
comments, blankLines := p.skipWhiteSpaceAndReadComments(true /* multiLine */)
// Handle blank lines.
if blankLines > 0 {
if p.config.InfoLevel() {
p.config.Infof("blankLines: %v", blankLines)
}
// Here we collapse the leading blank lines into one blank line.
comments = append([]string{""}, comments...)
}
for p.nextInputIs('%') {
comments = append(comments, p.readTemplate())
c, _ := p.skipWhiteSpaceAndReadComments(false)
comments = append(comments, c...)
}
if end, endPos, err := p.handleEndOfMessage(startPos, comments, &res); end {
return res, endPos, err
}
nd := &ast.Node{
Start: startPos,
PreComments: comments,
}
if p.config.InfoLevel() {
p.config.Infof("PreComments: %q", strings.Join(nd.PreComments, "\n"))
}
// Skip white-space other than '\n', which is handled below.
for p.consume(' ') || p.consume('\t') {
}
// Handle multiple comment blocks.
// <example>
// # comment block 1
// # comment block 1
//
// # comment block 2
// # comment block 2
// </example>
// Each block that ends on an empty line (instead of a field) gets its own
// 'empty' node.
if p.nextInputIs('\n') {
res = append(res, nd)
continue
}
// Handle end of file.
if end, err := p.handleEndOfFile(nd, &res); end {
if err != nil {
return nil, ast.Position{}, err
}
break
}
if err := p.parseFieldName(nd, isRoot); err != nil {
return nil, ast.Position{}, err
}
// Set field number from descriptor if available
nd.FieldNumber = getFieldNumber(desc, nd.Name)
// Skip separator.
preCommentsBeforeColon, _ := p.skipWhiteSpaceAndReadComments(true /* multiLine */)
nd.SkipColon = !p.consume(':')
previousPos := p.position()
preCommentsAfterColon, _ := p.skipWhiteSpaceAndReadComments(true /* multiLine */)
if err := p.parseFieldValue(nd, desc, preCommentsBeforeColon, preCommentsAfterColon, previousPos); err != nil {
return nil, ast.Position{}, err
}
if p.config.InfoLevel() && p.index < p.length {
p.config.Infof("p.in[p.index]: %q", string(p.in[p.index]))
}
res = append(res, nd)
}
return res, p.position(), nil
}
func (p *parser) parseFieldValue(nd *ast.Node, desc protoreflect.MessageDescriptor, preCommentsBeforeColon, preCommentsAfterColon []string, previousPos ast.Position) error {
if p.consume('{') || p.consume('<') {
if err := p.parseMessage(nd, desc); err != nil {
return err
}
} else if p.consume('[') {
if err := p.parseList(nd, preCommentsBeforeColon, preCommentsAfterColon); err != nil {
return err
}
if nd.ValuesAsList {
return nil
}
} else {
// Rewind comments.
p.rollbackPosition(previousPos)
// Handle Values.
var err error
nd.Values, err = p.readValues()
if err != nil {
return err
}
if err := p.consumeOptionalSeparator(); err != nil {
return err
}
}
return nil
}
func (p *parser) handleEndOfFile(nd *ast.Node, res *[]*ast.Node) (bool, error) {
if p.index >= p.length {
nd.End = p.position()
if len(nd.PreComments) > 0 {
*res = append(*res, nd)
}
return true, nil
}
return false, nil
}
func (p *parser) handleEndOfMessage(startPos ast.Position, comments []string, res *[]*ast.Node) (bool, ast.Position, error) {
if endPos := p.position(); p.consume('}') || p.consume('>') || p.consume(']') {
// Handle comments after last child.
if len(comments) > 0 {
*res = append(*res, &ast.Node{Start: startPos, PreComments: comments})
}
// endPos points at the closing brace, but we should rather return the position
// of the first character after the previous item. Therefore let's rewind a bit:
for endPos.Byte > 0 && p.in[endPos.Byte-1] == ' ' {
endPos.Byte--
endPos.Column--
}
if err := p.consumeOptionalSeparator(); err != nil {
return true, ast.Position{}, err
}
// Done parsing children.
return true, endPos, nil
}
return false, ast.Position{}, nil
}
func (p *parser) parseFieldName(nd *ast.Node, isRoot bool) error {
if p.consume('[') {
// Read Name (of proto extension).
nd.Name = fmt.Sprintf("[%s]", p.readExtension())
_ = p.consume(']') // Ignore the ']'.
} else {
// Read Name.
nd.Name = p.readFieldName()
if nd.Name == "" && !isRoot && !p.config.AllowUnnamedNodesEverywhere {
return fmt.Errorf("Failed to find a FieldName at %s", p.errorContext())
}
}
if p.config.InfoLevel() {
p.config.Infof("name: %q", nd.Name)
}
return nil
}
func (p *parser) parseMessage(nd *ast.Node, desc protoreflect.MessageDescriptor) error {
if p.config.SkipAllColons {
nd.SkipColon = true
}
nd.ChildrenSameLine = p.bracketSameLine[p.index-1]
nd.IsAngleBracket = p.config.PreserveAngleBrackets && p.in[p.index-1] == '<'
// Recursive call to parse child nodes.
childDesc := p.findChildDescriptor(desc, nd.Name)
nodes, lastPos, err := p.parse( /*isRoot=*/ false, childDesc)
if err != nil {
return err
}
nd.Children = nodes
nd.End = lastPos
nd.ClosingBraceComment = p.readInlineComment()
return nil
}
func (p *parser) parseList(nd *ast.Node, preCommentsBeforeColon, preCommentsAfterColon []string) error {
openBracketLine := p.line
// Skip separator.
preCommentsAfterListStart := p.readContinuousBlocksOfComments()
var preComments []string
preComments = append(preComments, preCommentsBeforeColon...)
preComments = append(preComments, preCommentsAfterColon...)
preComments = append(preComments, preCommentsAfterListStart...)
if p.nextInputIs('{') {
// Handle list of nodes.
return p.parseListOfNodes(nd, preComments, openBracketLine)
} else {
// Handle list of values.
return p.parseListOfValues(nd, preComments, openBracketLine)
}
}
func (p *parser) parseListOfNodes(nd *ast.Node, preComments []string, openBracketLine int) error {
nd.ChildrenAsList = true
nodes, lastPos, err := p.parse( /*isRoot=*/ true, nil)
if err != nil {
return err
}
if len(nodes) > 0 {
nodes[0].PreComments = preComments
}
nd.Children = nodes
nd.End = lastPos
nd.ClosingBraceComment = p.readInlineComment()
nd.ChildrenSameLine = openBracketLine == p.line
return nil
}
func (p *parser) parseListOfValues(nd *ast.Node, preComments []string, openBracketLine int) error {
nd.ValuesAsList = true // We found values in list - keep it as list.
for ld := p.getLoopDetector(); !p.consume(']') && p.index < p.length; {
if err := ld.iter(); err != nil {
return err
}
// Read each value in the list.
vals, err := p.readValues()
if err != nil {
return err
}
if len(vals) != 1 {
return fmt.Errorf("multiple-string value not supported (%v). Please add comma explicitly, see http://b/162070952", vals)
}
if len(preComments) > 0 {
// If we read preComments before readValues(), they should go first,
// but avoid copy overhead if there are none.
vals[0].PreComments = append(preComments, vals[0].PreComments...)
}
// Skip separator.
_, _ = p.skipWhiteSpaceAndReadComments(false /* multiLine */)
if p.consume(',') {
vals[0].InlineComment = p.readInlineComment()
}
nd.Values = append(nd.Values, vals...)
preComments, _ = p.skipWhiteSpaceAndReadComments(true /* multiLine */)
}
nd.ChildrenSameLine = openBracketLine == p.line
// Handle comments after last line (or for empty list)
nd.PostValuesComments = preComments
nd.ClosingBraceComment = p.readInlineComment()
if err := p.consumeOptionalSeparator(); err != nil {
return err
}
return nil
}
func (p *parser) readFieldName() string {
i := p.index
for ; i < p.length && !p.isValueSep(i); i++ {
}
return p.advance(i)
}
func (p *parser) readExtension() string {
i := p.index
for ; i < p.length && (p.isBlankSep(i) || !p.isValueSep(i)); i++ {
}
return removeBlanks(p.advance(i))
}
func removeBlanks(in string) string {
s := []byte(in)
for _, b := range spaceSeparators {
s = bytes.Replace(s, []byte{b}, nil, -1)
}
return string(s)
}
func (p *parser) readContinuousBlocksOfComments() []string {
var preComments []string
for {
comments, blankLines := p.skipWhiteSpaceAndReadComments(true)
if len(comments) == 0 {
break
}
if blankLines > 0 && len(preComments) > 0 {
comments = append([]string{""}, comments...)
}
preComments = append(preComments, comments...)
}
return preComments
}
func (p *parser) consumeWhitespace() (int, error) {
start := p.index
for p.index < p.length && p.isBlankSep(p.index) {
if p.consume('\n') || (p.consume('\r') && p.consume('\n')) {
// Include up to one blank line before the 'off' directive.
start = p.index - 1
} else if p.consume(' ') || p.consume('\t') {
// Do nothing. Side-effect is to advance p.index.
} else {
return 0, fmt.Errorf("unhandled isBlankSep at %s", p.errorContext())
}
}
return start, nil
}
// Returns the exact text within the block flanked by "# txtpbfmt: off" and "# txtpbfmt: on".
// The 'off' directive must be on its own line, and it cannot be preceded by a comment line. Any
// preceding whitespace on this line and up to one blank line will be retained.
// The 'on' directive must followed by a line break. Only full nodes of a AST can be
// within this block. Partially disabled sections, like just the first line of a for loop without
// body or closing brace, are not supported. Value lists are not supported. No parsing happens
// within this block, and as parsing errors will be ignored, please exercise caution.
func (p *parser) readFormatterDisabledBlock() (string, error) {
previousPos := p.position()
start, err := p.consumeWhitespace()
if err != nil {
return "", err
}
if !p.consumeString("# txtpbfmt: off") {
// Directive not found. Rollback to start.
p.rollbackPosition(previousPos)
return "", nil
}
if !p.consume('\n') {
return "", fmt.Errorf("txtpbfmt off should be followed by newline at %s", p.errorContext())
}
for ; p.index < p.length; p.index++ {
if p.consumeString("# txtpbfmt: on") {
if !p.consume('\n') {
return "", fmt.Errorf("txtpbfmt on should be followed by newline at %s", p.errorContext())
}
// Retain up to one blank line.
p.consume('\n')
return string(p.in[start:p.index]), nil
}
}
// We reached the end of the file without finding the 'on' directive.
p.rollbackPosition(previousPos)
return "", fmt.Errorf("unterminated txtpbfmt off at %s", p.errorContext())
}
// skipWhiteSpaceAndReadComments has multiple cases:
// - (1) reading a block of comments followed by a blank line
// - (2) reading a block of comments followed by non-blank content
// - (3) reading the inline comments between the current char and the end of
// the current line
//
// In both cases (1) and (2), there can also be blank lines before the comment
// starts.
//
// Lines of comments and number of blank lines before the comment will be
// returned. If there is no comment, the returned slice will be empty.
func (p *parser) skipWhiteSpaceAndReadComments(multiLine bool) ([]string, int) {
i := p.index
var foundComment, insideComment bool
commentBegin := 0
var comments []string
// Number of blanks lines *before* the comment (if any) starts.
blankLines := 0
for ; i < p.length; i++ {
if p.in[i] == '#' && !insideComment {
insideComment = true
foundComment = true
commentBegin = i
} else if p.in[i] == '\n' {
if insideComment {
comments = append(comments, string(p.in[commentBegin:i])) // Exclude the '\n'.
insideComment = false
} else if foundComment {
i-- // Put back the last '\n' so the caller can detect that we're on case (1).
break
} else {
blankLines++
}
if !multiLine {
break
}
}
if !insideComment && !p.isBlankSep(i) {
break
}
}
sep := p.advance(i)
if p.config.InfoLevel() {
p.config.Infof("sep: %q\np.index: %v", string(sep), p.index)
if p.index < p.length {
p.config.Infof("p.in[p.index]: %q", string(p.in[p.index]))
}
}
return comments, blankLines
}
func (p *parser) isBlankSep(i int) bool {
return bytes.Contains(spaceSeparators, p.in[i:i+1])
}
func (p *parser) isValueSep(i int) bool {
return bytes.Contains(valueSeparators, p.in[i:i+1])
}
func (p *parser) advance(i int) string {
if i > p.length {
i = p.length
}
res := p.in[p.index:i]
p.index = i
strRes := string(res)
newlines := strings.Count(strRes, "\n")
if newlines == 0 {
p.column += len(strRes)
} else {
p.column = len(strRes) - strings.LastIndex(strRes, "\n")
p.line += newlines
}
return string(res)
}
func (p *parser) readValues() ([]*ast.Value, error) {
var values []*ast.Value
var previousPos ast.Position
preComments, _ := p.skipWhiteSpaceAndReadComments(true /* multiLine */)
if p.nextInputIs('%') {
values = append(values, p.populateValue(p.readTemplate(), nil))
previousPos = p.position()
}
if v, err := p.readTripleQuotedStringValue(); err != nil {
return nil, err
} else {
if v != nil {
values = append(values, v)
previousPos = p.position()
}
}
for p.consume('"') || p.consume('\'') {
// Handle string value.
v, err := p.readSingleQuotedStringValue(preComments)
if err != nil {
return nil, err
}
values = append(values, v)
previousPos = p.position()
preComments, _ = p.skipWhiteSpaceAndReadComments(true /* multiLine */)
}
if previousPos != (ast.Position{}) {
// Rewind comments.
p.rollbackPosition(previousPos)
} else {
i := p.index
// Handle other values.
values = append(values, p.readOtherValue(i, preComments))
}
if p.config.InfoLevel() {
p.config.Infof("values: %v", values)
}
return values, nil
}
func (p *parser) readTripleQuotedStringValue() (*ast.Value, error) {
if !p.config.AllowTripleQuotedStrings {
return nil, nil
}
return p.readTripleQuotedString()
}
func (p *parser) readSingleQuotedStringValue(preComments []string) (*ast.Value, error) {
stringBegin := p.index - 1 // Index of the quote.
i := p.index
for ; i < p.length; i++ {
if p.in[i] == '\\' {
i++ // Skip escaped char.
continue
}
if p.in[i] == '\n' {
p.index = i
return nil, fmt.Errorf("found literal (unescaped) new line in string at %s", p.errorContext())
}
if p.in[i] == p.in[stringBegin] {
var vl string
if p.config.SmartQuotes {
vl = quote.Smart(p.advance(i))
} else {
vl = quote.Fix(p.advance(i))
}
_ = p.advance(i + 1) // Skip the quote.
return p.populateValue(vl, preComments), nil
}
}
if i == p.length {
p.index = i
return nil, fmt.Errorf("unfinished string at %s", p.errorContext())
}
return nil, nil
}
func (p *parser) readOtherValue(i int, preComments []string) *ast.Value {
for ; i < p.length; i++ {
if p.isValueSep(i) {
break
}
}
vl := p.advance(i)
return p.populateValue(vl, preComments)
}
func (p *parser) readTripleQuotedString() (*ast.Value, error) {
start := p.index
stringBegin := p.index
delimiter := `"""`
if !p.consumeString(delimiter) {
delimiter = `'''`
if !p.consumeString(delimiter) {
return nil, nil
}
}
for {
if p.consumeString(delimiter) {
break
}
if p.index == p.length {
p.index = start
return nil, fmt.Errorf("unfinished string at %s", p.errorContext())
}
p.index++
}
v := p.populateValue(string(p.in[stringBegin:p.index]), nil)
return v, nil
}
func (p *parser) populateValue(vl string, preComments []string) *ast.Value {
if p.config.InfoLevel() {
p.config.Infof("value: %q", vl)
}
return &ast.Value{
Value: vl,
InlineComment: p.readInlineComment(),
PreComments: preComments,
}
}
func (p *parser) readInlineComment() string {
inlineComment, _ := p.skipWhiteSpaceAndReadComments(false /* multiLine */)
if p.config.InfoLevel() {
p.config.Infof("inlineComment: %q", strings.Join(inlineComment, "\n"))
}
if len(inlineComment) > 0 {
return inlineComment[0]
}
return ""
}
func (p *parser) readStringInTemplate(i int) int {
stringBegin := i - 1 // Index of quote.
for ; i < p.length; i++ {
if p.in[i] == '\\' {
i++ // Skip escaped char.
continue
}
if p.in[i] == p.in[stringBegin] {
i++ // Skip end quote.
break
}
}
return i
}
func (p *parser) readTemplate() string {
if !p.nextInputIs('%') {
return ""
}
i := p.index + 1
for ; i < p.length; i++ {
if p.in[i] == '"' || p.in[i] == '\'' {
i++
i = p.readStringInTemplate(i)
}
if i < p.length && p.in[i] == '%' {
i++
break
}
}
return p.advance(i)
}