Просмотр исходного кода

Propery handle foot comments ending blocks.

This means being able to associate loose comments at the bottom
of the block with the original values that were at the same
indentation level earlier on.

For example, this is now handled properly:

  ka:
    kb:
      kc: vc
      # Foot of kc
    # Foot of kb
  # Foot of ka
Gustavo Niemeyer 6 лет назад
Родитель
Сommit
117fdf03f4
7 измененных файлов с 986 добавлено и 179 удалено
  1. 23 2
      decode.go
  2. 36 14
      emitterc.go
  3. 29 15
      encode.go
  4. 667 20
      node_test.go
  5. 30 8
      parserc.go
  6. 181 112
      scannerc.go
  7. 20 8
      yamlh.go

+ 23 - 2
decode.go

@@ -152,8 +152,10 @@ func (p *parser) parse() *Node {
 	case yaml_STREAM_END_EVENT:
 		// Happens when attempting to decode an empty buffer.
 		return nil
+	case yaml_TAIL_COMMENT_EVENT:
+		panic("internal error: unexpected tail comment event (please report)")
 	default:
-		panic("attempted to parse unknown event: " + p.event.typ.String())
+		panic("internal error: attempted to parse unknown event (please report): " + p.event.typ.String())
 	}
 }
 
@@ -256,21 +258,40 @@ func (p *parser) sequence() *Node {
 
 func (p *parser) mapping() *Node {
 	n := p.node(MappingNode, mapTag, string(p.event.tag), "")
+	block := true
 	if p.event.mapping_style()&yaml_FLOW_MAPPING_STYLE != 0 {
+		block = false
 		n.Style |= FlowStyle
 	}
 	p.anchor(n, p.event.anchor)
 	p.expect(yaml_MAPPING_START_EVENT)
 	for p.peek() != yaml_MAPPING_END_EVENT {
 		k := p.parseChild(n)
+		if block && k.FootComment != "" {
+			// Must be a foot comment for the prior value when being dedented.
+			if len(n.Content) > 2 {
+				n.Content[len(n.Content)-3].FootComment = k.FootComment
+				k.FootComment = ""
+			}
+		}
 		v := p.parseChild(n)
-		if v.FootComment != "" {
+		if k.FootComment == "" && v.FootComment != "" {
 			k.FootComment = v.FootComment
 			v.FootComment = ""
 		}
+		if p.peek() == yaml_TAIL_COMMENT_EVENT {
+			if k.FootComment == "" {
+				k.FootComment = string(p.event.foot_comment)
+			}
+			p.expect(yaml_TAIL_COMMENT_EVENT)
+		}
 	}
 	n.LineComment = string(p.event.line_comment)
 	n.FootComment = string(p.event.foot_comment)
+	if n.Style&FlowStyle == 0 && n.FootComment != "" && len(n.Content) > 1 {
+		n.Content[len(n.Content)-2].FootComment = n.FootComment
+		n.FootComment = ""
+	}
 	p.expect(yaml_MAPPING_END_EVENT)
 	return n
 }

+ 36 - 14
emitterc.go

@@ -539,13 +539,15 @@ func yaml_emitter_emit_flow_sequence_item(emitter *yaml_emitter_t, event *yaml_e
 	}
 
 	if event.typ == yaml_SEQUENCE_END_EVENT {
-		emitter.flow_level--
-		emitter.indent = emitter.indents[len(emitter.indents)-1]
-		emitter.indents = emitter.indents[:len(emitter.indents)-1]
-		if emitter.canonical && !first {
+		if emitter.canonical && !first && !trail {
 			if !yaml_emitter_write_indicator(emitter, []byte{','}, false, false, false) {
 				return false
 			}
+		}
+		emitter.flow_level--
+		emitter.indent = emitter.indents[len(emitter.indents)-1]
+		emitter.indents = emitter.indents[:len(emitter.indents)-1]
+		if emitter.column == 0 || emitter.canonical && !first {
 			if !yaml_emitter_write_indent(emitter) {
 				return false
 			}
@@ -585,7 +587,7 @@ func yaml_emitter_emit_flow_sequence_item(emitter *yaml_emitter_t, event *yaml_e
 			return false
 		}
 	}
-	if len(emitter.line_comment) > 0 || len(emitter.foot_comment) > 0 {
+	if len(emitter.line_comment)+len(emitter.foot_comment)+len(emitter.tail_comment) > 0 {
 		emitter.states = append(emitter.states, yaml_EMIT_FLOW_SEQUENCE_TRAIL_ITEM_STATE)
 	} else {
 		emitter.states = append(emitter.states, yaml_EMIT_FLOW_SEQUENCE_ITEM_STATE)
@@ -593,7 +595,7 @@ func yaml_emitter_emit_flow_sequence_item(emitter *yaml_emitter_t, event *yaml_e
 	if !yaml_emitter_emit_node(emitter, event, false, true, false, false) {
 		return false
 	}
-	if len(emitter.line_comment) > 0 || len(emitter.foot_comment) > 0 {
+	if len(emitter.line_comment)+len(emitter.foot_comment)+len(emitter.tail_comment) > 0 {
 		if !yaml_emitter_write_indicator(emitter, []byte{','}, false, false, false) {
 			return false
 		}
@@ -620,13 +622,18 @@ func yaml_emitter_emit_flow_mapping_key(emitter *yaml_emitter_t, event *yaml_eve
 	}
 
 	if event.typ == yaml_MAPPING_END_EVENT {
+		if (emitter.canonical || len(emitter.head_comment)+len(emitter.foot_comment)+len(emitter.tail_comment) > 0) && !first && !trail {
+			if !yaml_emitter_write_indicator(emitter, []byte{','}, false, false, false) {
+				return false
+			}
+		}
+		if !yaml_emitter_process_head_comment(emitter) {
+			return false
+		}
 		emitter.flow_level--
 		emitter.indent = emitter.indents[len(emitter.indents)-1]
 		emitter.indents = emitter.indents[:len(emitter.indents)-1]
 		if emitter.canonical && !first {
-			if !yaml_emitter_write_indicator(emitter, []byte{','}, false, false, false) {
-				return false
-			}
 			if !yaml_emitter_write_indent(emitter) {
 				return false
 			}
@@ -654,6 +661,7 @@ func yaml_emitter_emit_flow_mapping_key(emitter *yaml_emitter_t, event *yaml_eve
 	if !yaml_emitter_process_head_comment(emitter) {
 		return false
 	}
+
 	if emitter.column == 0 {
 		if !yaml_emitter_write_indent(emitter) {
 			return false
@@ -693,7 +701,7 @@ func yaml_emitter_emit_flow_mapping_value(emitter *yaml_emitter_t, event *yaml_e
 			return false
 		}
 	}
-	if len(emitter.line_comment) > 0 || len(emitter.foot_comment) > 0 {
+	if len(emitter.line_comment)+len(emitter.foot_comment)+len(emitter.tail_comment) > 0 {
 		emitter.states = append(emitter.states, yaml_EMIT_FLOW_MAPPING_TRAIL_KEY_STATE)
 	} else {
 		emitter.states = append(emitter.states, yaml_EMIT_FLOW_MAPPING_KEY_STATE)
@@ -701,7 +709,7 @@ func yaml_emitter_emit_flow_mapping_value(emitter *yaml_emitter_t, event *yaml_e
 	if !yaml_emitter_emit_node(emitter, event, false, false, true, false) {
 		return false
 	}
-	if len(emitter.line_comment) > 0 || len(emitter.foot_comment) > 0 {
+	if len(emitter.line_comment)+len(emitter.foot_comment)+len(emitter.tail_comment) > 0 {
 		if !yaml_emitter_write_indicator(emitter, []byte{','}, false, false, false) {
 			return false
 		}
@@ -765,6 +773,9 @@ func yaml_emitter_emit_block_mapping_key(emitter *yaml_emitter_t, event *yaml_ev
 			return false
 		}
 	}
+	if !yaml_emitter_process_head_comment(emitter) {
+		return false
+	}
 	if event.typ == yaml_MAPPING_END_EVENT {
 		emitter.indent = emitter.indents[len(emitter.indents)-1]
 		emitter.indents = emitter.indents[:len(emitter.indents)-1]
@@ -772,9 +783,6 @@ func yaml_emitter_emit_block_mapping_key(emitter *yaml_emitter_t, event *yaml_ev
 		emitter.states = emitter.states[:len(emitter.states)-1]
 		return true
 	}
-	if !yaml_emitter_process_head_comment(emitter) {
-		return false
-	}
 	if !yaml_emitter_write_indent(emitter) {
 		return false
 	}
@@ -1081,9 +1089,20 @@ func yaml_emitter_process_scalar(emitter *yaml_emitter_t) bool {
 
 // Write a head comment.
 func yaml_emitter_process_head_comment(emitter *yaml_emitter_t) bool {
+	if len(emitter.tail_comment) > 0 {
+		if !yaml_emitter_write_indent(emitter) {
+			return false
+		}
+		if !yaml_emitter_write_comment(emitter, emitter.tail_comment) {
+			return false
+		}
+		emitter.tail_comment = emitter.tail_comment[:0]
+	}
+
 	if len(emitter.head_comment) == 0 {
 		return true
 	}
+
 	space_above := emitter.space_above
 	if !emitter.indention {
 		if !put_break(emitter) {
@@ -1379,6 +1398,9 @@ func yaml_emitter_analyze_event(emitter *yaml_emitter_t, event *yaml_event_t) bo
 	if len(event.foot_comment) > 0 {
 		emitter.foot_comment = event.foot_comment
 	}
+	if len(event.tail_comment) > 0 {
+		emitter.tail_comment = event.tail_comment
+	}
 
 	switch event.typ {
 	case yaml_ALIAS_EVENT:

+ 29 - 15
encode.go

@@ -332,7 +332,7 @@ func (e *encoder) stringv(tag string, in reflect.Value) {
 	default:
 		style = yaml_DOUBLE_QUOTED_SCALAR_STYLE
 	}
-	e.emitScalar(s, "", tag, style, nil, nil, nil)
+	e.emitScalar(s, "", tag, style, nil, nil, nil, nil)
 }
 
 func (e *encoder) boolv(tag string, in reflect.Value) {
@@ -342,23 +342,23 @@ func (e *encoder) boolv(tag string, in reflect.Value) {
 	} else {
 		s = "false"
 	}
-	e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE, nil, nil, nil)
+	e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE, nil, nil, nil, nil)
 }
 
 func (e *encoder) intv(tag string, in reflect.Value) {
 	s := strconv.FormatInt(in.Int(), 10)
-	e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE, nil, nil, nil)
+	e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE, nil, nil, nil, nil)
 }
 
 func (e *encoder) uintv(tag string, in reflect.Value) {
 	s := strconv.FormatUint(in.Uint(), 10)
-	e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE, nil, nil, nil)
+	e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE, nil, nil, nil, nil)
 }
 
 func (e *encoder) timev(tag string, in reflect.Value) {
 	t := in.Interface().(time.Time)
 	s := t.Format(time.RFC3339Nano)
-	e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE, nil, nil, nil)
+	e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE, nil, nil, nil, nil)
 }
 
 func (e *encoder) floatv(tag string, in reflect.Value) {
@@ -377,14 +377,14 @@ func (e *encoder) floatv(tag string, in reflect.Value) {
 	case "NaN":
 		s = ".nan"
 	}
-	e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE, nil, nil, nil)
+	e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE, nil, nil, nil, nil)
 }
 
 func (e *encoder) nilv() {
-	e.emitScalar("null", "", "", yaml_PLAIN_SCALAR_STYLE, nil, nil, nil)
+	e.emitScalar("null", "", "", yaml_PLAIN_SCALAR_STYLE, nil, nil, nil, nil)
 }
 
-func (e *encoder) emitScalar(value, anchor, tag string, style yaml_scalar_style_t, head, line, foot []byte) {
+func (e *encoder) emitScalar(value, anchor, tag string, style yaml_scalar_style_t, head, line, foot, tail []byte) {
 	// TODO Kill this function. Replace all initialize calls by their underlining Go literals.
 	implicit := tag == ""
 	if !implicit {
@@ -394,14 +394,15 @@ func (e *encoder) emitScalar(value, anchor, tag string, style yaml_scalar_style_
 	e.event.head_comment = head
 	e.event.line_comment = line
 	e.event.foot_comment = foot
+	e.event.tail_comment = tail
 	e.emit()
 }
 
 func (e *encoder) nodev(in reflect.Value) {
-	e.node(in.Interface().(*Node))
+	e.node(in.Interface().(*Node), "")
 }
 
-func (e *encoder) node(node *Node) {
+func (e *encoder) node(node *Node, tail string) {
 	// If the tag was not explicitly requested, and dropping it won't change the
 	// implicit tag of the value, don't include it in the presentation.
 	var tag = node.Tag
@@ -440,7 +441,7 @@ func (e *encoder) node(node *Node) {
 		e.event.head_comment = []byte(node.HeadComment)
 		e.emit()
 		for _, node := range node.Content {
-			e.node(node)
+			e.node(node, "")
 		}
 		yaml_document_end_event_initialize(&e.event, true)
 		e.event.foot_comment = []byte(node.FootComment)
@@ -455,7 +456,7 @@ func (e *encoder) node(node *Node) {
 		e.event.head_comment = []byte(node.HeadComment)
 		e.emit()
 		for _, node := range node.Content {
-			e.node(node)
+			e.node(node, "")
 		}
 		e.must(yaml_sequence_end_event_initialize(&e.event))
 		e.event.line_comment = []byte(node.LineComment)
@@ -468,15 +469,28 @@ func (e *encoder) node(node *Node) {
 			style = yaml_FLOW_MAPPING_STYLE
 		}
 		yaml_mapping_start_event_initialize(&e.event, []byte(node.Anchor), []byte(tag), tag == "", style)
+		e.event.tail_comment = []byte(tail)
 		e.event.head_comment = []byte(node.HeadComment)
 		e.emit()
 
+		// The tail logic below moves the foot comment of prior keys to the following key,
+		// since the value for each key may be a nested structure and the foot needs to be
+		// processed only the entirety of the value is streamed. The last tail is processed
+		// with the mapping end event.
+		var tail string
 		for i := 0; i+1 < len(node.Content); i += 2 {
-			e.node(node.Content[i])
-			e.node(node.Content[i+1])
+			k := node.Content[i]
+			foot := k.FootComment
+			k.FootComment = ""
+			e.node(k, tail)
+			tail = foot
+
+			v := node.Content[i+1]
+			e.node(v, "")
 		}
 
 		yaml_mapping_end_event_initialize(&e.event)
+		e.event.tail_comment = []byte(tail)
 		e.event.line_comment = []byte(node.LineComment)
 		e.event.foot_comment = []byte(node.FootComment)
 		e.emit()
@@ -519,6 +533,6 @@ func (e *encoder) node(node *Node) {
 			style = yaml_DOUBLE_QUOTED_SCALAR_STYLE
 		}
 
-		e.emitScalar(value, node.Anchor, tag, style, []byte(node.HeadComment), []byte(node.LineComment), []byte(node.FootComment))
+		e.emitScalar(value, node.Anchor, tag, style, []byte(node.HeadComment), []byte(node.LineComment), []byte(node.FootComment), []byte(tail))
 	}
 }

+ 667 - 20
node_test.go

@@ -17,10 +17,12 @@ package yaml_test
 
 import (
 	"bytes"
+	"fmt"
 	"os"
 
 	. "gopkg.in/check.v1"
 	"gopkg.in/yaml.v3"
+	"io"
 	"strings"
 )
 
@@ -935,6 +937,553 @@ var nodeTests = []struct {
 				}},
 			}},
 		},
+	}, {
+		"# DH1\n\n# HA1\nka:\n  # HB1\n  kb:\n  # HC1\n  # HC2\n  - lc # IC\n  # FC1\n  # FC2\n\n  # HD1\n  - ld # ID\n  # FD1\nke: ve\n\n# DF1\n",
+		yaml.Node{
+			Kind:        yaml.DocumentNode,
+			Line:        4,
+			Column:      1,
+			HeadComment: "# DH1",
+			FootComment: "# DF1",
+			Content: []*yaml.Node{{
+				Kind:   yaml.MappingNode,
+				Tag:    "!!map",
+				Line:   4,
+				Column: 1,
+				Content: []*yaml.Node{{
+					Kind:        yaml.ScalarNode,
+					Tag:         "!!str",
+					Line:        4,
+					Column:      1,
+					Value:       "ka",
+					HeadComment: "# HA1",
+				}, {
+					Kind:   yaml.MappingNode,
+					Tag:    "!!map",
+					Line:   6,
+					Column: 3,
+					Content: []*yaml.Node{{
+						Kind:        yaml.ScalarNode,
+						Tag:         "!!str",
+						Line:        6,
+						Column:      3,
+						Value:       "kb",
+						HeadComment: "# HB1",
+					}, {
+						Kind:   yaml.SequenceNode,
+						Line:   9,
+						Column: 3,
+						Tag:    "!!seq",
+						Content: []*yaml.Node{{
+							Kind:        yaml.ScalarNode,
+							Tag:         "!!str",
+							Line:        9,
+							Column:      5,
+							Value:       "lc",
+							HeadComment: "# HC1\n# HC2",
+							LineComment: "# IC",
+							FootComment: "# FC1\n# FC2",
+						}, {
+							Kind:        yaml.ScalarNode,
+							Tag:         "!!str",
+							Line:        14,
+							Column:      5,
+							Value:       "ld",
+							HeadComment: "# HD1",
+							LineComment: "# ID",
+							FootComment: "# FD1",
+						}},
+					}},
+				}, {
+					Kind:   yaml.ScalarNode,
+					Tag:    "!!str",
+					Line:   16,
+					Column: 1,
+					Value:  "ke",
+				}, {
+					Kind:   yaml.ScalarNode,
+					Tag:    "!!str",
+					Line:   16,
+					Column: 5,
+					Value:  "ve",
+				}},
+			}},
+		},
+	}, {
+		"# DH1\n\n# DH2\n\n# HA1\n# HA2\nka:\n  # HB1\n  # HB2\n  kb:\n" +
+			"    # HC1\n    # HC2\n    kc:\n      # HD1\n      # HD2\n      kd: vd\n      # FD1\n      # FD2\n" +
+			"    # FC1\n    # FC2\n  # FB1\n  # FB2\n# FA1\n# FA2\n\n# HE1\n# HE2\nke: ve\n# FE1\n# FE2\n\n# DF1\n\n# DF2\n",
+		yaml.Node{
+			Kind:        yaml.DocumentNode,
+			HeadComment: "# DH1\n\n# DH2",
+			FootComment: "# DF1\n\n# DF2",
+			Line:        7,
+			Column:      1,
+			Content: []*yaml.Node{{
+				Kind:   yaml.MappingNode,
+				Tag:    "!!map",
+				Line:   7,
+				Column: 1,
+				Content: []*yaml.Node{{
+					Kind:        yaml.ScalarNode,
+					Tag:         "!!str",
+					Value:       "ka",
+					HeadComment: "# HA1\n# HA2",
+					FootComment: "# FA1\n# FA2",
+					Line:        7,
+					Column:      1,
+				}, {
+					Kind:   yaml.MappingNode,
+					Tag:    "!!map",
+					Line:   10,
+					Column: 3,
+					Content: []*yaml.Node{{
+						Kind:        yaml.ScalarNode,
+						Tag:         "!!str",
+						Value:       "kb",
+						HeadComment: "# HB1\n# HB2",
+						FootComment: "# FB1\n# FB2",
+						Line:        10,
+						Column:      3,
+					}, {
+						Kind:   yaml.MappingNode,
+						Tag:    "!!map",
+						Line:   13,
+						Column: 5,
+						Content: []*yaml.Node{{
+							Kind:        yaml.ScalarNode,
+							Tag:         "!!str",
+							Value:       "kc",
+							HeadComment: "# HC1\n# HC2",
+							FootComment: "# FC1\n# FC2",
+							Line:        13,
+							Column:      5,
+						}, {
+							Kind:   yaml.MappingNode,
+							Tag:    "!!map",
+							Line:   16,
+							Column: 7,
+							Content: []*yaml.Node{{
+								Kind:        yaml.ScalarNode,
+								Tag:         "!!str",
+								Value:       "kd",
+								HeadComment: "# HD1\n# HD2",
+								FootComment: "# FD1\n# FD2",
+								Line:        16,
+								Column:      7,
+							}, {
+								Kind:   yaml.ScalarNode,
+								Tag:    "!!str",
+								Value:  "vd",
+								Line:   16,
+								Column: 11,
+							}},
+						}},
+					}},
+				}, {
+					Kind:        yaml.ScalarNode,
+					Tag:         "!!str",
+					Value:       "ke",
+					HeadComment: "# HE1\n# HE2",
+					FootComment: "# FE1\n# FE2",
+					Line:        28,
+					Column:      1,
+				}, {
+					Kind:   yaml.ScalarNode,
+					Tag:    "!!str",
+					Value:  "ve",
+					Line:   28,
+					Column: 5,
+				}},
+			}},
+		},
+	}, {
+		// Same as above but indenting ke in so it's also part of ka's value.
+		"# DH1\n\n# DH2\n\n# HA1\n# HA2\nka:\n  # HB1\n  # HB2\n  kb:\n" +
+			"    # HC1\n    # HC2\n    kc:\n      # HD1\n      # HD2\n      kd: vd\n      # FD1\n      # FD2\n" +
+			"    # FC1\n    # FC2\n  # FB1\n  # FB2\n\n  # HE1\n  # HE2\n  ke: ve\n  # FE1\n  # FE2\n# FA1\n# FA2\n\n# DF1\n\n# DF2\n",
+		yaml.Node{
+			Kind:        yaml.DocumentNode,
+			HeadComment: "# DH1\n\n# DH2",
+			FootComment: "# DF1\n\n# DF2",
+			Line:        7,
+			Column:      1,
+			Content: []*yaml.Node{{
+				Kind:   yaml.MappingNode,
+				Tag:    "!!map",
+				Line:   7,
+				Column: 1,
+				Content: []*yaml.Node{{
+					Kind:        yaml.ScalarNode,
+					Tag:         "!!str",
+					Value:       "ka",
+					HeadComment: "# HA1\n# HA2",
+					FootComment: "# FA1\n# FA2",
+					Line:        7,
+					Column:      1,
+				}, {
+					Kind:   yaml.MappingNode,
+					Tag:    "!!map",
+					Line:   10,
+					Column: 3,
+					Content: []*yaml.Node{{
+						Kind:        yaml.ScalarNode,
+						Tag:         "!!str",
+						Value:       "kb",
+						HeadComment: "# HB1\n# HB2",
+						FootComment: "# FB1\n# FB2",
+						Line:        10,
+						Column:      3,
+					}, {
+						Kind:   yaml.MappingNode,
+						Tag:    "!!map",
+						Line:   13,
+						Column: 5,
+						Content: []*yaml.Node{{
+							Kind:        yaml.ScalarNode,
+							Tag:         "!!str",
+							Value:       "kc",
+							HeadComment: "# HC1\n# HC2",
+							FootComment: "# FC1\n# FC2",
+							Line:        13,
+							Column:      5,
+						}, {
+							Kind:   yaml.MappingNode,
+							Tag:    "!!map",
+							Line:   16,
+							Column: 7,
+							Content: []*yaml.Node{{
+								Kind:        yaml.ScalarNode,
+								Tag:         "!!str",
+								Value:       "kd",
+								HeadComment: "# HD1\n# HD2",
+								FootComment: "# FD1\n# FD2",
+								Line:        16,
+								Column:      7,
+							}, {
+								Kind:   yaml.ScalarNode,
+								Tag:    "!!str",
+								Value:  "vd",
+								Line:   16,
+								Column: 11,
+							}},
+						}},
+					}, {
+						Kind:        yaml.ScalarNode,
+						Tag:         "!!str",
+						Value:       "ke",
+						HeadComment: "# HE1\n# HE2",
+						FootComment: "# FE1\n# FE2",
+						Line:        26,
+						Column:      3,
+					}, {
+						Kind:   yaml.ScalarNode,
+						Tag:    "!!str",
+						Value:  "ve",
+						Line:   26,
+						Column: 7,
+					}},
+				}},
+			}},
+		},
+	}, {
+		// Decode only due to lack of newline at the end.
+		"[decode]# HA1\nka:\n  # HB1\n  kb: vb\n  # FB1\n# FA1",
+		yaml.Node{
+			Kind:   yaml.DocumentNode,
+			Line:   2,
+			Column: 1,
+			Content: []*yaml.Node{{
+				Kind:   yaml.MappingNode,
+				Tag:    "!!map",
+				Line:   2,
+				Column: 1,
+				Content: []*yaml.Node{{
+					Kind:        yaml.ScalarNode,
+					Tag:         "!!str",
+					Value:       "ka",
+					HeadComment: "# HA1",
+					FootComment: "# FA1",
+					Line:        2,
+					Column:      1,
+				}, {
+					Kind:   yaml.MappingNode,
+					Tag:    "!!map",
+					Line:   4,
+					Column: 3,
+					Content: []*yaml.Node{{
+						Kind:        yaml.ScalarNode,
+						Tag:         "!!str",
+						Value:       "kb",
+						HeadComment: "# HB1",
+						FootComment: "# FB1",
+						Line:        4,
+						Column:      3,
+					}, {
+						Kind:   yaml.ScalarNode,
+						Tag:    "!!str",
+						Value:  "vb",
+						Line:   4,
+						Column: 7,
+					}},
+				}},
+			},
+			},
+		},
+	}, {
+		// Same as above, but with newline at the end.
+		"# HA1\nka:\n  # HB1\n  kb: vb\n  # FB1\n# FA1\n",
+		yaml.Node{
+			Kind:   yaml.DocumentNode,
+			Line:   2,
+			Column: 1,
+			Content: []*yaml.Node{{
+				Kind:   yaml.MappingNode,
+				Tag:    "!!map",
+				Line:   2,
+				Column: 1,
+				Content: []*yaml.Node{{
+					Kind:        yaml.ScalarNode,
+					Tag:         "!!str",
+					Value:       "ka",
+					HeadComment: "# HA1",
+					FootComment: "# FA1",
+					Line:        2,
+					Column:      1,
+				}, {
+					Kind:   yaml.MappingNode,
+					Tag:    "!!map",
+					Line:   4,
+					Column: 3,
+					Content: []*yaml.Node{{
+						Kind:        yaml.ScalarNode,
+						Tag:         "!!str",
+						Value:       "kb",
+						HeadComment: "# HB1",
+						FootComment: "# FB1",
+						Line:        4,
+						Column:      3,
+					}, {
+						Kind:   yaml.ScalarNode,
+						Tag:    "!!str",
+						Value:  "vb",
+						Line:   4,
+						Column: 7,
+					}},
+				}},
+			},
+			},
+		},
+	}, {
+		// Same as above, but with two newlines at the end. Decode-only for that.
+		"[decode]# HA1\nka:\n  # HB1\n  kb: vb\n  # FB1\n# FA1\n\n",
+		yaml.Node{
+			Kind:   yaml.DocumentNode,
+			Line:   2,
+			Column: 1,
+			Content: []*yaml.Node{{
+				Kind:   yaml.MappingNode,
+				Tag:    "!!map",
+				Line:   2,
+				Column: 1,
+				Content: []*yaml.Node{{
+					Kind:        yaml.ScalarNode,
+					Tag:         "!!str",
+					Value:       "ka",
+					HeadComment: "# HA1",
+					FootComment: "# FA1",
+					Line:        2,
+					Column:      1,
+				}, {
+					Kind:   yaml.MappingNode,
+					Tag:    "!!map",
+					Line:   4,
+					Column: 3,
+					Content: []*yaml.Node{{
+						Kind:        yaml.ScalarNode,
+						Tag:         "!!str",
+						Value:       "kb",
+						HeadComment: "# HB1",
+						FootComment: "# FB1",
+						Line:        4,
+						Column:      3,
+					}, {
+						Kind:   yaml.ScalarNode,
+						Tag:    "!!str",
+						Value:  "vb",
+						Line:   4,
+						Column: 7,
+					}},
+				}},
+			},
+			},
+		},
+	}, {
+		"# HA1\nka:\n  # HB1\n  kb: vb\n  # FB1\nkc: vc\n# FC1\n",
+		yaml.Node{
+			Kind:   yaml.DocumentNode,
+			Line:   2,
+			Column: 1,
+			Content: []*yaml.Node{{
+				Kind:   yaml.MappingNode,
+				Tag:    "!!map",
+				Line:   2,
+				Column: 1,
+				Content: []*yaml.Node{{
+					Kind:        yaml.ScalarNode,
+					Tag:         "!!str",
+					Value:       "ka",
+					HeadComment: "# HA1",
+					Line:        2,
+					Column:      1,
+				}, {
+					Kind:   yaml.MappingNode,
+					Tag:    "!!map",
+					Line:   4,
+					Column: 3,
+					Content: []*yaml.Node{{
+						Kind:        yaml.ScalarNode,
+						Tag:         "!!str",
+						Value:       "kb",
+						HeadComment: "# HB1",
+						FootComment: "# FB1",
+						Line:        4,
+						Column:      3,
+					}, {
+						Kind:   yaml.ScalarNode,
+						Tag:    "!!str",
+						Value:  "vb",
+						Line:   4,
+						Column: 7,
+					}},
+				}, {
+					Kind:        yaml.ScalarNode,
+					Tag:         "!!str",
+					Value:       "kc",
+					FootComment: "# FC1",
+					Line:        6,
+					Column:      1,
+				}, {
+					Kind:   yaml.ScalarNode,
+					Tag:    "!!str",
+					Value:  "vc",
+					Line:   6,
+					Column: 5,
+				}},
+			}},
+		},
+	}, {
+		// Decode only as encoding adds an empty line between ka's value and kc's headers.
+		"[decode]# HA1\nka:\n  # HB1\n  kb: vb\n  # FB1\n# HC1\n# HC2\nkc: vc\n# FC1\n# FC2\n",
+		yaml.Node{
+			Kind:   yaml.DocumentNode,
+			Line:   2,
+			Column: 1,
+			Content: []*yaml.Node{{
+				Kind:   yaml.MappingNode,
+				Tag:    "!!map",
+				Line:   2,
+				Column: 1,
+				Content: []*yaml.Node{{
+					Kind:        yaml.ScalarNode,
+					Tag:         "!!str",
+					Value:       "ka",
+					HeadComment: "# HA1",
+					Line:        2,
+					Column:      1,
+				}, {
+					Kind:   yaml.MappingNode,
+					Tag:    "!!map",
+					Line:   4,
+					Column: 3,
+					Content: []*yaml.Node{{
+						Kind:        yaml.ScalarNode,
+						Tag:         "!!str",
+						Value:       "kb",
+						HeadComment: "# HB1",
+						FootComment: "# FB1",
+						Line:        4,
+						Column:      3,
+					}, {
+						Kind:   yaml.ScalarNode,
+						Tag:    "!!str",
+						Value:  "vb",
+						Line:   4,
+						Column: 7,
+					}},
+				}, {
+					Kind:        yaml.ScalarNode,
+					Tag:         "!!str",
+					Value:       "kc",
+					HeadComment: "# HC1\n# HC2",
+					FootComment: "# FC1\n# FC2",
+					Line:        8,
+					Column:      1,
+				}, {
+					Kind:   yaml.ScalarNode,
+					Tag:    "!!str",
+					Value:  "vc",
+					Line:   8,
+					Column: 5,
+				}},
+			}},
+		},
+	}, {
+		// Same as above, but with the empty line between ka's value and kc's headers.
+		"# HA1\nka:\n  # HB1\n  kb: vb\n  # FB1\n\n# HC1\n# HC2\nkc: vc\n# FC1\n# FC2\n",
+		yaml.Node{
+			Kind:   yaml.DocumentNode,
+			Line:   2,
+			Column: 1,
+			Content: []*yaml.Node{{
+				Kind:   yaml.MappingNode,
+				Tag:    "!!map",
+				Line:   2,
+				Column: 1,
+				Content: []*yaml.Node{{
+					Kind:        yaml.ScalarNode,
+					Tag:         "!!str",
+					Value:       "ka",
+					HeadComment: "# HA1",
+					Line:        2,
+					Column:      1,
+				}, {
+					Kind:   yaml.MappingNode,
+					Tag:    "!!map",
+					Line:   4,
+					Column: 3,
+					Content: []*yaml.Node{{
+						Kind:        yaml.ScalarNode,
+						Tag:         "!!str",
+						Value:       "kb",
+						HeadComment: "# HB1",
+						FootComment: "# FB1",
+						Line:        4,
+						Column:      3,
+					}, {
+						Kind:   yaml.ScalarNode,
+						Tag:    "!!str",
+						Value:  "vb",
+						Line:   4,
+						Column: 7,
+					}},
+				}, {
+					Kind:        yaml.ScalarNode,
+					Tag:         "!!str",
+					Value:       "kc",
+					HeadComment: "# HC1\n# HC2",
+					FootComment: "# FC1\n# FC2",
+					Line:        9,
+					Column:      1,
+				}, {
+					Kind:   yaml.ScalarNode,
+					Tag:    "!!str",
+					Value:  "vc",
+					Line:   9,
+					Column: 5,
+				}},
+			}},
+		},
 	}, {
 		"# H1\n[la, lb] # I\n# F1\n",
 		yaml.Node{
@@ -1037,6 +1586,61 @@ var nodeTests = []struct {
 				}},
 			}},
 		},
+	}, {
+		"ka:\n  kb: [\n    # HA1\n    la,\n    # FA1\n\n    # HB1\n    lb,\n    # FB1\n  ]\n",
+		yaml.Node{
+			Kind:   yaml.DocumentNode,
+			Line:   1,
+			Column: 1,
+			Content: []*yaml.Node{{
+				Kind:   yaml.MappingNode,
+				Tag:    "!!map",
+				Line:   1,
+				Column: 1,
+				Content: []*yaml.Node{{
+					Kind:   yaml.ScalarNode,
+					Tag:    "!!str",
+					Value:  "ka",
+					Line:   1,
+					Column: 1,
+				}, {
+					Kind:   0x4,
+					Tag:    "!!map",
+					Line:   2,
+					Column: 3,
+					Content: []*yaml.Node{{
+						Kind:   yaml.ScalarNode,
+						Tag:    "!!str",
+						Value:  "kb",
+						Line:   2,
+						Column: 3,
+					}, {
+						Kind:   yaml.SequenceNode,
+						Style:  0x20,
+						Tag:    "!!seq",
+						Line:   2,
+						Column: 7,
+						Content: []*yaml.Node{{
+							Kind:        yaml.ScalarNode,
+							Tag:         "!!str",
+							Value:       "la",
+							HeadComment: "# HA1",
+							FootComment: "# FA1",
+							Line:        4,
+							Column:      5,
+						}, {
+							Kind:        yaml.ScalarNode,
+							Tag:         "!!str",
+							Value:       "lb",
+							HeadComment: "# HB1",
+							FootComment: "# FB1",
+							Line:        8,
+							Column:      5,
+						}},
+					}},
+				}},
+			}},
+		},
 	}, {
 		"# DH1\n\n# MH1\n{\n  # HA1\n  ka: va, # IA\n  # FA1\n\n  # HB1\n  kb: vb, # IB\n  # FB1\n}\n# MF1\n\n# DF1\n",
 		yaml.Node{
@@ -1179,6 +1783,12 @@ func (s *S) TestNodeRoundtrip(c *C) {
 	for i, item := range nodeTests {
 		c.Logf("test %d: %q", i, item.yaml)
 
+		if strings.Contains(item.yaml, "#") {
+			var buf bytes.Buffer
+			fprintComments(&buf, &item.node, "    ")
+			c.Logf("  comments:\n%s", buf.Bytes())
+		}
+
 		decode := true
 		encode := true
 
@@ -1237,51 +1847,51 @@ var setStringTests = []struct {
 		"something simple",
 		"something simple\n",
 		yaml.Node{
-			Kind:   yaml.ScalarNode,
-			Value:  "something simple",
-			Tag:    "!!str",
+			Kind:  yaml.ScalarNode,
+			Value: "something simple",
+			Tag:   "!!str",
 		},
 	}, {
 		`"quoted value"`,
 		"'\"quoted value\"'\n",
 		yaml.Node{
-			Kind:   yaml.ScalarNode,
-			Value:  `"quoted value"`,
-			Tag:    "!!str",
+			Kind:  yaml.ScalarNode,
+			Value: `"quoted value"`,
+			Tag:   "!!str",
 		},
 	}, {
 		"multi\nline",
 		"|-\n  multi\n  line\n",
 		yaml.Node{
-			Kind:   yaml.ScalarNode,
-			Value:  "multi\nline",
-			Tag:    "!!str",
-			Style:  yaml.LiteralStyle,
+			Kind:  yaml.ScalarNode,
+			Value: "multi\nline",
+			Tag:   "!!str",
+			Style: yaml.LiteralStyle,
 		},
 	}, {
 		"123",
 		"\"123\"\n",
 		yaml.Node{
-			Kind:   yaml.ScalarNode,
-			Value:  "123",
-			Tag:    "!!str",
+			Kind:  yaml.ScalarNode,
+			Value: "123",
+			Tag:   "!!str",
 		},
 	}, {
 		"multi\nline\n",
 		"|\n  multi\n  line\n",
 		yaml.Node{
-			Kind:   yaml.ScalarNode,
-			Value:  "multi\nline\n",
-			Tag:    "!!str",
-			Style:  yaml.LiteralStyle,
+			Kind:  yaml.ScalarNode,
+			Value: "multi\nline\n",
+			Tag:   "!!str",
+			Style: yaml.LiteralStyle,
 		},
 	}, {
 		"\x80\x81\x82",
 		"!!binary gIGC\n",
 		yaml.Node{
-			Kind:   yaml.ScalarNode,
-			Value:  "gIGC",
-			Tag:    "!!binary",
+			Kind:  yaml.ScalarNode,
+			Value: "gIGC",
+			Tag:   "!!binary",
 		},
 	},
 }
@@ -1317,3 +1927,40 @@ func (s *S) TestSetString(c *C) {
 		c.Assert(str, Equals, item.str)
 	}
 }
+
+func fprintComments(out io.Writer, node *yaml.Node, indent string) {
+	switch node.Kind {
+	case yaml.ScalarNode:
+		fmt.Fprintf(out, "%s<%s> ", indent, node.Value)
+		fprintCommentSet(out, node)
+		fmt.Fprintf(out, "\n")
+	case yaml.DocumentNode:
+		fmt.Fprintf(out, "%s<DOC> ", indent)
+		fprintCommentSet(out, node)
+		fmt.Fprintf(out, "\n")
+		for i := 0; i < len(node.Content); i++ {
+			fprintComments(out, node.Content[i], indent+"  ")
+		}
+	case yaml.MappingNode:
+		fmt.Fprintf(out, "%s<MAP> ", indent)
+		fprintCommentSet(out, node)
+		fmt.Fprintf(out, "\n")
+		for i := 0; i < len(node.Content); i += 2 {
+			fprintComments(out, node.Content[i], indent+"  ")
+			fprintComments(out, node.Content[i+1], indent+"  ")
+		}
+	case yaml.SequenceNode:
+		fmt.Fprintf(out, "%s<SEQ> ", indent)
+		fprintCommentSet(out, node)
+		fmt.Fprintf(out, "\n")
+		for i := 0; i < len(node.Content); i++ {
+			fprintComments(out, node.Content[i], indent+"  ")
+		}
+	}
+}
+
+func fprintCommentSet(out io.Writer, node *yaml.Node) {
+	if len(node.HeadComment)+len(node.LineComment)+len(node.FootComment) > 0 {
+		fmt.Fprintf(out, "%q / %q / %q", node.HeadComment, node.LineComment, node.FootComment)
+	}
+}

+ 30 - 8
parserc.go

@@ -1,17 +1,17 @@
-// 
+//
 // Copyright (c) 2011-2019 Canonical Ltd
 // Copyright (c) 2006-2010 Kirill Simonov
-// 
+//
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files (the "Software"), to deal in
 // the Software without restriction, including without limitation the rights to
 // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 // of the Software, and to permit persons to whom the Software is furnished to do
 // so, subject to the following conditions:
-// 
+//
 // The above copyright notice and this permission notice shall be included in all
 // copies or substantial portions of the Software.
-// 
+//
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -78,9 +78,13 @@ func peek_token(parser *yaml_parser_t) *yaml_token_t {
 // comments behind the position of the provided token into the respective
 // top-level comment slices in the parser.
 func yaml_parser_unfold_comments(parser *yaml_parser_t, token *yaml_token_t) {
-	for parser.comments_head < len(parser.comments) && token.start_mark.index >= parser.comments[parser.comments_head].after.index {
+	for parser.comments_head < len(parser.comments) && token.start_mark.index >= parser.comments[parser.comments_head].token_mark.index {
 		comment := &parser.comments[parser.comments_head]
 		if len(comment.head) > 0 {
+			if token.typ == yaml_BLOCK_END_TOKEN {
+				// No heads on ends, so keep comment.head for a follow up token.
+				break
+			}
 			if len(parser.head_comment) > 0 {
 				parser.head_comment = append(parser.head_comment, '\n')
 			}
@@ -359,6 +363,7 @@ func yaml_parser_parse_document_content(parser *yaml_parser_t, event *yaml_event
 	if token == nil {
 		return false
 	}
+
 	if token.typ == yaml_VERSION_DIRECTIVE_TOKEN ||
 		token.typ == yaml_TAG_DIRECTIVE_TOKEN ||
 		token.typ == yaml_DOCUMENT_START_TOKEN ||
@@ -401,10 +406,12 @@ func yaml_parser_parse_document_end(parser *yaml_parser_t, event *yaml_event_t)
 		start_mark: start_mark,
 		end_mark:   end_mark,
 		implicit:   implicit,
-
-		foot_comment: parser.head_comment,
 	}
-	parser.head_comment = nil
+	yaml_parser_set_event_comments(parser, event)
+	if len(event.head_comment) > 0 && len(event.foot_comment) == 0 {
+		event.foot_comment = event.head_comment
+		event.head_comment = nil
+	}
 	return true
 }
 
@@ -415,6 +422,7 @@ func yaml_parser_set_event_comments(parser *yaml_parser_t, event *yaml_event_t)
 	parser.head_comment = nil
 	parser.line_comment = nil
 	parser.foot_comment = nil
+	parser.tail_comment = nil
 }
 
 // Parse the productions:
@@ -775,6 +783,19 @@ func yaml_parser_parse_block_mapping_key(parser *yaml_parser_t, event *yaml_even
 		return false
 	}
 
+	// [Go] A tail comment was left from the prior mapping value processed. Emit an event
+	//      as it needs to be processed with that value and not the following key.
+	if len(parser.tail_comment) > 0 {
+		*event = yaml_event_t{
+			typ:          yaml_TAIL_COMMENT_EVENT,
+			start_mark:   token.start_mark,
+			end_mark:     token.end_mark,
+			foot_comment: parser.tail_comment,
+		}
+		parser.tail_comment = nil
+		return true
+	}
+
 	if token.typ == yaml_KEY_TOKEN {
 		mark := token.end_mark
 		skip_token(parser)
@@ -800,6 +821,7 @@ func yaml_parser_parse_block_mapping_key(parser *yaml_parser_t, event *yaml_even
 			start_mark: token.start_mark,
 			end_mark:   token.end_mark,
 		}
+		yaml_parser_set_event_comments(parser, event)
 		skip_token(parser)
 		return true
 	}

+ 181 - 112
scannerc.go

@@ -1,17 +1,17 @@
-// 
+//
 // Copyright (c) 2011-2019 Canonical Ltd
 // Copyright (c) 2006-2010 Kirill Simonov
-// 
+//
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files (the "Software"), to deal in
 // the Software without restriction, including without limitation the rights to
 // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 // of the Software, and to permit persons to whom the Software is furnished to do
 // so, subject to the following conditions:
-// 
+//
 // The above copyright notice and this permission notice shall be included in all
 // copies or substantial portions of the Software.
-// 
+//
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -511,6 +511,9 @@ func cache(parser *yaml_parser_t, length int) bool {
 
 // Advance the buffer pointer.
 func skip(parser *yaml_parser_t) {
+	if !is_blank(parser.buffer, parser.buffer_pos) {
+		parser.newlines = 0
+	}
 	parser.mark.index++
 	parser.mark.column++
 	parser.unread--
@@ -524,17 +527,22 @@ func skip_line(parser *yaml_parser_t) {
 		parser.mark.line++
 		parser.unread -= 2
 		parser.buffer_pos += 2
+		parser.newlines++
 	} else if is_break(parser.buffer, parser.buffer_pos) {
 		parser.mark.index++
 		parser.mark.column = 0
 		parser.mark.line++
 		parser.unread--
 		parser.buffer_pos += width(parser.buffer[parser.buffer_pos])
+		parser.newlines++
 	}
 }
 
 // Copy a character to a string buffer and advance pointers.
 func read(parser *yaml_parser_t, s []byte) []byte {
+	if !is_blank(parser.buffer, parser.buffer_pos) {
+		parser.newlines = 0
+	}
 	w := width(parser.buffer[parser.buffer_pos])
 	if w == 0 {
 		panic("invalid character sequence")
@@ -586,6 +594,7 @@ func read_line(parser *yaml_parser_t, s []byte) []byte {
 	parser.mark.column = 0
 	parser.mark.line++
 	parser.unread--
+	parser.newlines++
 	return s
 }
 
@@ -651,11 +660,11 @@ func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool {
 		// Check if we really need to fetch more tokens.
 		need_more_tokens := false
 
-		// [Go] When parsing flow items, force the queue to have at least
-		// two items so that comments after commas may be associated
-		// with the value being parsed before them.
-		if parser.tokens_head == len(parser.tokens) || parser.flow_level > 0 && parser.tokens_head >= len(parser.tokens)-1 {
-			// Queue is empty or has just one element inside a flow context.
+		// [Go] The comment parsing logic requires a lookahead of one token
+		// in block style or two tokens in flow style so that the foot
+		// comments may be parsed in time of associating them with the tokens
+		// that are parsed before them.
+		if parser.tokens_head >= len(parser.tokens)-1 || parser.flow_level > 0 && parser.tokens_head >= len(parser.tokens)-2 {
 			need_more_tokens = true
 		} else {
 			// Check if any potential simple key may occupy the head position.
@@ -698,6 +707,8 @@ func yaml_parser_fetch_next_token(parser *yaml_parser_t) (ok bool) {
 		return yaml_parser_fetch_stream_start(parser)
 	}
 
+	scan_mark := parser.mark
+
 	// Eat whitespaces and comments until we reach the next token.
 	if !yaml_parser_scan_to_next_token(parser) {
 		return false
@@ -708,8 +719,12 @@ func yaml_parser_fetch_next_token(parser *yaml_parser_t) (ok bool) {
 		return false
 	}
 
+	// [Go] While unrolling indents, transform the head comments of prior
+	// indentation levels observed after scan_start into foot comments at
+	// the respective indexes.
+
 	// Check the indentation level against the current column.
-	if !yaml_parser_unroll_indent(parser, parser.mark.column) {
+	if !yaml_parser_unroll_indent(parser, parser.mark.column, scan_mark) {
 		return false
 	}
 
@@ -755,10 +770,6 @@ func yaml_parser_fetch_next_token(parser *yaml_parser_t) (ok bool) {
 			ok = false
 			return
 		}
-		if !yaml_parser_scan_foot_comment(parser, comment_mark) {
-			ok = false
-			return
-		}
 	}()
 
 	// Is it the flow sequence start indicator?
@@ -1001,19 +1012,49 @@ func yaml_parser_roll_indent(parser *yaml_parser_t, column, number int, typ yaml
 // Pop indentation levels from the indents stack until the current level
 // becomes less or equal to the column.  For each indentation level, append
 // the BLOCK-END token.
-func yaml_parser_unroll_indent(parser *yaml_parser_t, column int) bool {
+func yaml_parser_unroll_indent(parser *yaml_parser_t, column int, scan_mark yaml_mark_t) bool {
 	// In the flow context, do nothing.
 	if parser.flow_level > 0 {
 		return true
 	}
 
+	block_mark := scan_mark
+	block_mark.index--
+
 	// Loop through the indentation levels in the stack.
 	for parser.indent > column {
+
+		// [Go] Reposition the end token before potential following
+		//      foot comments of parent blocks. For that, search
+		//      backwards for recent comments that were at the same
+		//      indent as the block that is ending now.
+		stop_index := block_mark.index
+		for i := len(parser.comments) - 1; i >= 0; i-- {
+			comment := &parser.comments[i]
+
+			if comment.end_mark.index < stop_index {
+				// Don't go back beyond the start of the comment/whitespace scan, unless column < 0.
+				// If requested indent column is < 0, then the document is over and everything else
+				// is a foot anyway.
+				break
+			}
+			if comment.start_mark.column == parser.indent+1 {
+				// This is a good match. But maybe there's a former comment
+				// at that same indent level, so keep searching.
+				block_mark = comment.start_mark
+			}
+
+			// While the end of the former comment matches with
+			// the start of the following one, we know there's
+			// nothing in between and scanning is still safe.
+			stop_index = comment.scan_mark.index
+		}
+
 		// Create a token and append it to the queue.
 		token := yaml_token_t{
 			typ:        yaml_BLOCK_END_TOKEN,
-			start_mark: parser.mark,
-			end_mark:   parser.mark,
+			start_mark: block_mark,
+			end_mark:   block_mark,
 		}
 		yaml_insert_token(parser, -1, &token)
 
@@ -1060,7 +1101,7 @@ func yaml_parser_fetch_stream_end(parser *yaml_parser_t) bool {
 	}
 
 	// Reset the indentation level.
-	if !yaml_parser_unroll_indent(parser, -1) {
+	if !yaml_parser_unroll_indent(parser, -1, parser.mark) {
 		return false
 	}
 
@@ -1084,7 +1125,7 @@ func yaml_parser_fetch_stream_end(parser *yaml_parser_t) bool {
 // Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token.
 func yaml_parser_fetch_directive(parser *yaml_parser_t) bool {
 	// Reset the indentation level.
-	if !yaml_parser_unroll_indent(parser, -1) {
+	if !yaml_parser_unroll_indent(parser, -1, parser.mark) {
 		return false
 	}
 
@@ -1108,7 +1149,7 @@ func yaml_parser_fetch_directive(parser *yaml_parser_t) bool {
 // Produce the DOCUMENT-START or DOCUMENT-END token.
 func yaml_parser_fetch_document_indicator(parser *yaml_parser_t, typ yaml_token_type_t) bool {
 	// Reset the indentation level.
-	if !yaml_parser_unroll_indent(parser, -1) {
+	if !yaml_parser_unroll_indent(parser, -1, parser.mark) {
 		return false
 	}
 
@@ -1472,6 +1513,8 @@ func yaml_parser_fetch_plain_scalar(parser *yaml_parser_t) bool {
 // Eat whitespaces and comments until the next token is found.
 func yaml_parser_scan_to_next_token(parser *yaml_parser_t) bool {
 
+	scan_mark := parser.mark
+
 	// Until the next token is not found.
 	for {
 		// Allow the BOM mark to start a line.
@@ -1500,7 +1543,7 @@ func yaml_parser_scan_to_next_token(parser *yaml_parser_t) bool {
 
 		// Eat a comment until a line break.
 		if parser.buffer[parser.buffer_pos] == '#' {
-			if !yaml_parser_scan_head_comment(parser, parser.mark) {
+			if !yaml_parser_scan_comments(parser, scan_mark) {
 				return false
 			}
 		}
@@ -2738,12 +2781,12 @@ func yaml_parser_scan_plain_scalar(parser *yaml_parser_t, token *yaml_token_t) b
 	return true
 }
 
-func yaml_parser_scan_line_comment(parser *yaml_parser_t, after yaml_mark_t) bool {
-	if parser.mark.column == 0 {
+func yaml_parser_scan_line_comment(parser *yaml_parser_t, token_mark yaml_mark_t) bool {
+	if parser.newlines > 0 {
 		return true
 	}
 
-	parser.comments = append(parser.comments, yaml_comment_t{after: after})
+	parser.comments = append(parser.comments, yaml_comment_t{token_mark: token_mark})
 	comment := &parser.comments[len(parser.comments)-1].line
 
 	for peek := 0; peek < 512; peek++ {
@@ -2769,11 +2812,10 @@ func yaml_parser_scan_line_comment(parser *yaml_parser_t, after yaml_mark_t) boo
 			until := parser.buffer_pos + peek
 			for parser.buffer_pos < until {
 				if is_break(parser.buffer, parser.buffer_pos) {
-					//break // Leave the break in the buffer so calling this function twice is safe.
-					if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
-						return false
-					}
-					skip_line(parser)
+					// The break should stay in the buffer so calling this function twice or just
+					// before parsing foot comments works correctly. But this should never happen
+					// anyway given the logic above that stops at the break.
+					panic("internal error: the impossible has just happened!")
 				} else {
 					skip(parser)
 				}
@@ -2784,112 +2826,139 @@ func yaml_parser_scan_line_comment(parser *yaml_parser_t, after yaml_mark_t) boo
 	return true
 }
 
-func yaml_parser_scan_head_comment(parser *yaml_parser_t, after yaml_mark_t) bool {
-	parser.comments = append(parser.comments, yaml_comment_t{after: after})
-	comment := &parser.comments[len(parser.comments)-1].head
-	breaks := false
-	for peek := 0; peek < 512; peek++ {
+func yaml_parser_scan_comments(parser *yaml_parser_t, scan_mark yaml_mark_t) bool {
+	token := parser.tokens[len(parser.tokens)-1]
+
+	if token.typ == yaml_FLOW_ENTRY_TOKEN && len(parser.tokens) > 1 {
+		token = parser.tokens[len(parser.tokens)-2]
+	}
+
+	var token_mark = token.start_mark
+	var start_mark yaml_mark_t
+
+	var recent_empty = false
+	var first_empty = true
+
+	var line = parser.mark.line
+	var column = parser.mark.column
+
+	var text []byte
+
+	// The foot line is the place where a comment must start to
+	// still be considered as a foot of the prior content.
+	// If there's some content in the currently parsed line, then the foot
+	// is the line below it.
+	var foot_line = parser.mark.line-parser.newlines+1
+	if parser.newlines == 0 && parser.mark.column > 1 {
+		foot_line++
+	}
+
+	var peek = 0
+	for ; peek < 512; peek++ {
 		if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) {
 			break
 		}
-		if parser.buffer[parser.buffer_pos+peek] == 0 {
-			break
-		}
+		column++
 		if is_blank(parser.buffer, parser.buffer_pos+peek) {
 			continue
 		}
-		if is_break(parser.buffer, parser.buffer_pos+peek) {
-			if !breaks {
-				*comment = append(*comment, '\n')
-			}
-			breaks = true
-		} else if parser.buffer[parser.buffer_pos+peek] == '#' {
-			if len(*comment) > 0 {
-				*comment = append(*comment, '\n')
-			}
-			breaks = false
-			for !is_breakz(parser.buffer, parser.buffer_pos+peek) {
-				*comment = append(*comment, parser.buffer[parser.buffer_pos+peek])
-				peek++
-				if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) {
-					return false
-				}
-			}
-
-			// Skip until after the consumed comment line.
-			until := parser.buffer_pos + peek
-			for parser.buffer_pos < until {
-				if is_break(parser.buffer, parser.buffer_pos) {
-					if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
-						return false
+		c := parser.buffer[parser.buffer_pos+peek]
+		if is_breakz(parser.buffer, parser.buffer_pos+peek) || parser.flow_level > 0 && (c == ']' || c == '}') {
+			// Got line break or terminator.
+			if !recent_empty {
+				if first_empty && (start_mark.line > 0 && start_mark.line == foot_line || start_mark.column-1 < parser.indent) {
+					// This is the first empty line and there were no empty lines before,
+					// so this initial part of the comment is a foot of the prior token
+					// instead of being a head for the following one. Split it up.
+					if len(text) > 0 {
+						parser.comments = append(parser.comments, yaml_comment_t{
+							scan_mark:  scan_mark,
+							token_mark: token_mark,
+							start_mark: start_mark,
+							end_mark:   yaml_mark_t{parser.mark.index + peek, line, column},
+							foot:       text,
+						})
+						scan_mark = yaml_mark_t{parser.mark.index + peek, line, column}
+						token_mark = scan_mark
+						text = nil
 					}
-					skip_line(parser)
 				} else {
-					skip(parser)
+					if len(text) > 0 && parser.buffer[parser.buffer_pos+peek] != 0 {
+						text = append(text, '\n')
+					}
 				}
 			}
-			peek = 0
-		} else {
-			break
+			if !is_break(parser.buffer, parser.buffer_pos+peek) {
+				break
+			}
+			first_empty = false
+			recent_empty = true
+			column = 0
+			line++
+			continue
 		}
-	}
-	return true
-}
 
-func yaml_parser_scan_foot_comment(parser *yaml_parser_t, after yaml_mark_t) bool {
-	parser.comments = append(parser.comments, yaml_comment_t{after: after})
-	comment := &parser.comments[len(parser.comments)-1].foot
-	original := *comment
-	breaks := false
-	peek := 0
-	for ; peek < 32768; peek++ {
-		if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) {
-			break
+		if len(text) > 0 && column < parser.indent+1 && column != start_mark.column {
+			// The comment at the different indentation is a foot of the
+			// preceding data rather than a head of the upcoming one.
+			parser.comments = append(parser.comments, yaml_comment_t{
+				scan_mark:  scan_mark,
+				token_mark: token_mark,
+				start_mark: start_mark,
+				end_mark:   yaml_mark_t{parser.mark.index + peek, line, column},
+				foot:       text,
+			})
+			scan_mark = yaml_mark_t{parser.mark.index + peek, line, column}
+			token_mark = scan_mark
+			text = nil
 		}
-		c := parser.buffer[parser.buffer_pos+peek]
-		if c == 0 {
+
+		if parser.buffer[parser.buffer_pos+peek] != '#' {
 			break
 		}
-		if is_blank(parser.buffer, parser.buffer_pos+peek) {
-			continue
+
+		if len(text) == 0 {
+			start_mark = yaml_mark_t{parser.mark.index + peek, line, column}
+		} else {
+			text = append(text, '\n')
 		}
-		if is_break(parser.buffer, parser.buffer_pos+peek) {
-			if breaks {
-				break
-			}
-			breaks = true
-		} else if c == '#' {
-			if len(*comment) > 0 {
-				*comment = append(*comment, '\n')
+
+		// Find the end of the comment line.
+		recent_empty = false
+		for !is_breakz(parser.buffer, parser.buffer_pos+peek) {
+			text = append(text, parser.buffer[parser.buffer_pos+peek])
+			peek++
+			if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) {
+				return false
 			}
-			for !is_breakz(parser.buffer, parser.buffer_pos+peek) {
-				*comment = append(*comment, parser.buffer[parser.buffer_pos+peek])
-				peek++
-				if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) {
+		}
+
+		// Skip until after the consumed comment line.
+		until := parser.buffer_pos + peek
+		for parser.buffer_pos < until {
+			if is_break(parser.buffer, parser.buffer_pos) {
+				if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
 					return false
 				}
+				skip_line(parser)
+			} else {
+				skip(parser)
 			}
-			breaks = true
-		} else if c == ']' || c == '}' {
-			break
-		} else {
-			// Abort and allow that next line to have the comment as its header.
-			*comment = original
-			return true
 		}
+
+		peek = 0
+		column = 0
+		line = parser.mark.line
 	}
 
-	// Skip until after the consumed comment lines.
-	until := parser.buffer_pos + peek
-	for parser.buffer_pos < until {
-		if is_break(parser.buffer, parser.buffer_pos) {
-			if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
-				return false
-			}
-			skip_line(parser)
-		} else {
-			skip(parser)
-		}
+	if len(text) > 0 {
+		parser.comments = append(parser.comments, yaml_comment_t{
+			scan_mark:  scan_mark,
+			token_mark: start_mark,
+			start_mark: start_mark,
+			end_mark:   yaml_mark_t{parser.mark.index + peek - 1, line, column},
+			head:       text,
+		})
 	}
 	return true
 }

+ 20 - 8
yamlh.go

@@ -1,17 +1,17 @@
-// 
+//
 // Copyright (c) 2011-2019 Canonical Ltd
 // Copyright (c) 2006-2010 Kirill Simonov
-// 
+//
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files (the "Software"), to deal in
 // the Software without restriction, including without limitation the rights to
 // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 // of the Software, and to permit persons to whom the Software is furnished to do
 // so, subject to the following conditions:
-// 
+//
 // The above copyright notice and this permission notice shall be included in all
 // copies or substantial portions of the Software.
-// 
+//
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -260,6 +260,7 @@ const (
 	yaml_SEQUENCE_END_EVENT   // A SEQUENCE-END event.
 	yaml_MAPPING_START_EVENT  // A MAPPING-START event.
 	yaml_MAPPING_END_EVENT    // A MAPPING-END event.
+	yaml_TAIL_COMMENT_EVENT
 )
 
 var eventStrings = []string{
@@ -274,6 +275,7 @@ var eventStrings = []string{
 	yaml_SEQUENCE_END_EVENT:   "sequence end",
 	yaml_MAPPING_START_EVENT:  "mapping start",
 	yaml_MAPPING_END_EVENT:    "mapping end",
+	yaml_TAIL_COMMENT_EVENT:   "tail comment",
 }
 
 func (e yaml_event_type_t) String() string {
@@ -305,6 +307,7 @@ type yaml_event_t struct {
 	head_comment []byte
 	line_comment []byte
 	foot_comment []byte
+	tail_comment []byte
 
 	// The anchor (for yaml_SCALAR_EVENT, yaml_SEQUENCE_START_EVENT, yaml_MAPPING_START_EVENT, yaml_ALIAS_EVENT).
 	anchor []byte
@@ -581,6 +584,8 @@ type yaml_parser_t struct {
 
 	unread int // The number of unread characters in the buffer.
 
+	newlines int // The number of line breaks since last non-break/non-blank character
+
 	raw_buffer     []byte // The raw buffer.
 	raw_buffer_pos int    // The current position of the buffer.
 
@@ -594,6 +599,7 @@ type yaml_parser_t struct {
 	head_comment []byte // The current head comments
 	line_comment []byte // The current line comments
 	foot_comment []byte // The current foot comments
+	tail_comment []byte // Foot comment that happens at the end of a block.
 
 	comments      []yaml_comment_t // The folded comments for all parsed tokens
 	comments_head int
@@ -631,10 +637,15 @@ type yaml_parser_t struct {
 }
 
 type yaml_comment_t struct {
-	after yaml_mark_t
-	head  []byte
-	line  []byte
-	foot  []byte
+
+	scan_mark  yaml_mark_t // Position where scanning for comments started
+	token_mark yaml_mark_t // Position after which tokens will be associated with this comment
+	start_mark yaml_mark_t // Position of '#' comment mark
+	end_mark   yaml_mark_t // Position where comment terminated
+
+	head []byte
+	line []byte
+	foot []byte
 }
 
 // Emitter Definitions
@@ -771,6 +782,7 @@ type yaml_emitter_t struct {
 	head_comment []byte
 	line_comment []byte
 	foot_comment []byte
+	tail_comment []byte
 
 	// Dumper stuff