markdown.go 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927
  1. //
  2. // Blackfriday Markdown Processor
  3. // Available at http://github.com/russross/blackfriday
  4. //
  5. // Copyright © 2011 Russ Ross <russ@russross.com>.
  6. // Distributed under the Simplified BSD License.
  7. // See README.md for details.
  8. //
  9. //
  10. //
  11. // Markdown parsing and processing
  12. //
  13. //
  14. // Blackfriday markdown processor.
  15. //
  16. // Translates plain text with simple formatting rules into HTML or LaTeX.
  17. package blackfriday
  18. import (
  19. "bytes"
  20. "fmt"
  21. "strings"
  22. "unicode/utf8"
  23. )
  24. const VERSION = "1.5"
  25. // These are the supported markdown parsing extensions.
  26. // OR these values together to select multiple extensions.
  27. const (
  28. EXTENSION_NO_INTRA_EMPHASIS = 1 << iota // ignore emphasis markers inside words
  29. EXTENSION_TABLES // render tables
  30. EXTENSION_FENCED_CODE // render fenced code blocks
  31. EXTENSION_AUTOLINK // detect embedded URLs that are not explicitly marked
  32. EXTENSION_STRIKETHROUGH // strikethrough text using ~~test~~
  33. EXTENSION_LAX_HTML_BLOCKS // loosen up HTML block parsing rules
  34. EXTENSION_SPACE_HEADERS // be strict about prefix header rules
  35. EXTENSION_HARD_LINE_BREAK // translate newlines into line breaks
  36. EXTENSION_TAB_SIZE_EIGHT // expand tabs to eight spaces instead of four
  37. EXTENSION_FOOTNOTES // Pandoc-style footnotes
  38. EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
  39. EXTENSION_HEADER_IDS // specify header IDs with {#id}
  40. EXTENSION_TITLEBLOCK // Titleblock ala pandoc
  41. EXTENSION_AUTO_HEADER_IDS // Create the header ID from the text
  42. EXTENSION_BACKSLASH_LINE_BREAK // translate trailing backslashes into line breaks
  43. EXTENSION_DEFINITION_LISTS // render definition lists
  44. EXTENSION_JOIN_LINES // delete newline and join lines
  45. commonHtmlFlags = 0 |
  46. HTML_USE_XHTML |
  47. HTML_USE_SMARTYPANTS |
  48. HTML_SMARTYPANTS_FRACTIONS |
  49. HTML_SMARTYPANTS_DASHES |
  50. HTML_SMARTYPANTS_LATEX_DASHES
  51. commonExtensions = 0 |
  52. EXTENSION_NO_INTRA_EMPHASIS |
  53. EXTENSION_TABLES |
  54. EXTENSION_FENCED_CODE |
  55. EXTENSION_AUTOLINK |
  56. EXTENSION_STRIKETHROUGH |
  57. EXTENSION_SPACE_HEADERS |
  58. EXTENSION_HEADER_IDS |
  59. EXTENSION_BACKSLASH_LINE_BREAK |
  60. EXTENSION_DEFINITION_LISTS
  61. )
  62. // These are the possible flag values for the link renderer.
  63. // Only a single one of these values will be used; they are not ORed together.
  64. // These are mostly of interest if you are writing a new output format.
  65. const (
  66. LINK_TYPE_NOT_AUTOLINK = iota
  67. LINK_TYPE_NORMAL
  68. LINK_TYPE_EMAIL
  69. )
  70. // These are the possible flag values for the ListItem renderer.
  71. // Multiple flag values may be ORed together.
  72. // These are mostly of interest if you are writing a new output format.
  73. const (
  74. LIST_TYPE_ORDERED = 1 << iota
  75. LIST_TYPE_DEFINITION
  76. LIST_TYPE_TERM
  77. LIST_ITEM_CONTAINS_BLOCK
  78. LIST_ITEM_BEGINNING_OF_LIST
  79. LIST_ITEM_END_OF_LIST
  80. )
  81. // These are the possible flag values for the table cell renderer.
  82. // Only a single one of these values will be used; they are not ORed together.
  83. // These are mostly of interest if you are writing a new output format.
  84. const (
  85. TABLE_ALIGNMENT_LEFT = 1 << iota
  86. TABLE_ALIGNMENT_RIGHT
  87. TABLE_ALIGNMENT_CENTER = (TABLE_ALIGNMENT_LEFT | TABLE_ALIGNMENT_RIGHT)
  88. )
  89. // The size of a tab stop.
  90. const (
  91. TAB_SIZE_DEFAULT = 4
  92. TAB_SIZE_EIGHT = 8
  93. )
  94. // blockTags is a set of tags that are recognized as HTML block tags.
  95. // Any of these can be included in markdown text without special escaping.
  96. var blockTags = map[string]struct{}{
  97. "blockquote": {},
  98. "del": {},
  99. "div": {},
  100. "dl": {},
  101. "fieldset": {},
  102. "form": {},
  103. "h1": {},
  104. "h2": {},
  105. "h3": {},
  106. "h4": {},
  107. "h5": {},
  108. "h6": {},
  109. "iframe": {},
  110. "ins": {},
  111. "math": {},
  112. "noscript": {},
  113. "ol": {},
  114. "pre": {},
  115. "p": {},
  116. "script": {},
  117. "style": {},
  118. "table": {},
  119. "ul": {},
  120. // HTML5
  121. "address": {},
  122. "article": {},
  123. "aside": {},
  124. "canvas": {},
  125. "figcaption": {},
  126. "figure": {},
  127. "footer": {},
  128. "header": {},
  129. "hgroup": {},
  130. "main": {},
  131. "nav": {},
  132. "output": {},
  133. "progress": {},
  134. "section": {},
  135. "video": {},
  136. }
  137. // Renderer is the rendering interface.
  138. // This is mostly of interest if you are implementing a new rendering format.
  139. //
  140. // When a byte slice is provided, it contains the (rendered) contents of the
  141. // element.
  142. //
  143. // When a callback is provided instead, it will write the contents of the
  144. // respective element directly to the output buffer and return true on success.
  145. // If the callback returns false, the rendering function should reset the
  146. // output buffer as though it had never been called.
  147. //
  148. // Currently Html and Latex implementations are provided
  149. type Renderer interface {
  150. // block-level callbacks
  151. BlockCode(out *bytes.Buffer, text []byte, lang string)
  152. BlockQuote(out *bytes.Buffer, text []byte)
  153. BlockHtml(out *bytes.Buffer, text []byte)
  154. Header(out *bytes.Buffer, text func() bool, level int, id string)
  155. HRule(out *bytes.Buffer)
  156. List(out *bytes.Buffer, text func() bool, flags int)
  157. ListItem(out *bytes.Buffer, text []byte, flags int)
  158. Paragraph(out *bytes.Buffer, text func() bool)
  159. Table(out *bytes.Buffer, header []byte, body []byte, columnData []int)
  160. TableRow(out *bytes.Buffer, text []byte)
  161. TableHeaderCell(out *bytes.Buffer, text []byte, flags int)
  162. TableCell(out *bytes.Buffer, text []byte, flags int)
  163. Footnotes(out *bytes.Buffer, text func() bool)
  164. FootnoteItem(out *bytes.Buffer, name, text []byte, flags int)
  165. TitleBlock(out *bytes.Buffer, text []byte)
  166. // Span-level callbacks
  167. AutoLink(out *bytes.Buffer, link []byte, kind int)
  168. CodeSpan(out *bytes.Buffer, text []byte)
  169. DoubleEmphasis(out *bytes.Buffer, text []byte)
  170. Emphasis(out *bytes.Buffer, text []byte)
  171. Image(out *bytes.Buffer, link []byte, title []byte, alt []byte)
  172. LineBreak(out *bytes.Buffer)
  173. Link(out *bytes.Buffer, link []byte, title []byte, content []byte)
  174. RawHtmlTag(out *bytes.Buffer, tag []byte)
  175. TripleEmphasis(out *bytes.Buffer, text []byte)
  176. StrikeThrough(out *bytes.Buffer, text []byte)
  177. FootnoteRef(out *bytes.Buffer, ref []byte, id int)
  178. // Low-level callbacks
  179. Entity(out *bytes.Buffer, entity []byte)
  180. NormalText(out *bytes.Buffer, text []byte)
  181. // Header and footer
  182. DocumentHeader(out *bytes.Buffer)
  183. DocumentFooter(out *bytes.Buffer)
  184. GetFlags() int
  185. }
  186. // Callback functions for inline parsing. One such function is defined
  187. // for each character that triggers a response when parsing inline data.
  188. type inlineParser func(p *parser, out *bytes.Buffer, data []byte, offset int) int
  189. // Parser holds runtime state used by the parser.
  190. // This is constructed by the Markdown function.
  191. type parser struct {
  192. r Renderer
  193. refOverride ReferenceOverrideFunc
  194. refs map[string]*reference
  195. inlineCallback [256]inlineParser
  196. flags int
  197. nesting int
  198. maxNesting int
  199. insideLink bool
  200. // Footnotes need to be ordered as well as available to quickly check for
  201. // presence. If a ref is also a footnote, it's stored both in refs and here
  202. // in notes. Slice is nil if footnotes not enabled.
  203. notes []*reference
  204. }
  205. func (p *parser) getRef(refid string) (ref *reference, found bool) {
  206. if p.refOverride != nil {
  207. r, overridden := p.refOverride(refid)
  208. if overridden {
  209. if r == nil {
  210. return nil, false
  211. }
  212. return &reference{
  213. link: []byte(r.Link),
  214. title: []byte(r.Title),
  215. noteId: 0,
  216. hasBlock: false,
  217. text: []byte(r.Text)}, true
  218. }
  219. }
  220. // refs are case insensitive
  221. ref, found = p.refs[strings.ToLower(refid)]
  222. return ref, found
  223. }
  224. //
  225. //
  226. // Public interface
  227. //
  228. //
  229. // Reference represents the details of a link.
  230. // See the documentation in Options for more details on use-case.
  231. type Reference struct {
  232. // Link is usually the URL the reference points to.
  233. Link string
  234. // Title is the alternate text describing the link in more detail.
  235. Title string
  236. // Text is the optional text to override the ref with if the syntax used was
  237. // [refid][]
  238. Text string
  239. }
  240. // ReferenceOverrideFunc is expected to be called with a reference string and
  241. // return either a valid Reference type that the reference string maps to or
  242. // nil. If overridden is false, the default reference logic will be executed.
  243. // See the documentation in Options for more details on use-case.
  244. type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool)
  245. // Options represents configurable overrides and callbacks (in addition to the
  246. // extension flag set) for configuring a Markdown parse.
  247. type Options struct {
  248. // Extensions is a flag set of bit-wise ORed extension bits. See the
  249. // EXTENSION_* flags defined in this package.
  250. Extensions int
  251. // ReferenceOverride is an optional function callback that is called every
  252. // time a reference is resolved.
  253. //
  254. // In Markdown, the link reference syntax can be made to resolve a link to
  255. // a reference instead of an inline URL, in one of the following ways:
  256. //
  257. // * [link text][refid]
  258. // * [refid][]
  259. //
  260. // Usually, the refid is defined at the bottom of the Markdown document. If
  261. // this override function is provided, the refid is passed to the override
  262. // function first, before consulting the defined refids at the bottom. If
  263. // the override function indicates an override did not occur, the refids at
  264. // the bottom will be used to fill in the link details.
  265. ReferenceOverride ReferenceOverrideFunc
  266. }
  267. // MarkdownBasic is a convenience function for simple rendering.
  268. // It processes markdown input with no extensions enabled.
  269. func MarkdownBasic(input []byte) []byte {
  270. // set up the HTML renderer
  271. htmlFlags := HTML_USE_XHTML
  272. renderer := HtmlRenderer(htmlFlags, "", "")
  273. // set up the parser
  274. return MarkdownOptions(input, renderer, Options{Extensions: 0})
  275. }
  276. // Call Markdown with most useful extensions enabled
  277. // MarkdownCommon is a convenience function for simple rendering.
  278. // It processes markdown input with common extensions enabled, including:
  279. //
  280. // * Smartypants processing with smart fractions and LaTeX dashes
  281. //
  282. // * Intra-word emphasis suppression
  283. //
  284. // * Tables
  285. //
  286. // * Fenced code blocks
  287. //
  288. // * Autolinking
  289. //
  290. // * Strikethrough support
  291. //
  292. // * Strict header parsing
  293. //
  294. // * Custom Header IDs
  295. func MarkdownCommon(input []byte) []byte {
  296. // set up the HTML renderer
  297. renderer := HtmlRenderer(commonHtmlFlags, "", "")
  298. return MarkdownOptions(input, renderer, Options{
  299. Extensions: commonExtensions})
  300. }
  301. // Markdown is the main rendering function.
  302. // It parses and renders a block of markdown-encoded text.
  303. // The supplied Renderer is used to format the output, and extensions dictates
  304. // which non-standard extensions are enabled.
  305. //
  306. // To use the supplied Html or LaTeX renderers, see HtmlRenderer and
  307. // LatexRenderer, respectively.
  308. func Markdown(input []byte, renderer Renderer, extensions int) []byte {
  309. return MarkdownOptions(input, renderer, Options{
  310. Extensions: extensions})
  311. }
  312. // MarkdownOptions is just like Markdown but takes additional options through
  313. // the Options struct.
  314. func MarkdownOptions(input []byte, renderer Renderer, opts Options) []byte {
  315. // no point in parsing if we can't render
  316. if renderer == nil {
  317. return nil
  318. }
  319. extensions := opts.Extensions
  320. // fill in the render structure
  321. p := new(parser)
  322. p.r = renderer
  323. p.flags = extensions
  324. p.refOverride = opts.ReferenceOverride
  325. p.refs = make(map[string]*reference)
  326. p.maxNesting = 16
  327. p.insideLink = false
  328. // register inline parsers
  329. p.inlineCallback['*'] = emphasis
  330. p.inlineCallback['_'] = emphasis
  331. if extensions&EXTENSION_STRIKETHROUGH != 0 {
  332. p.inlineCallback['~'] = emphasis
  333. }
  334. p.inlineCallback['`'] = codeSpan
  335. p.inlineCallback['\n'] = lineBreak
  336. p.inlineCallback['['] = link
  337. p.inlineCallback['<'] = leftAngle
  338. p.inlineCallback['\\'] = escape
  339. p.inlineCallback['&'] = entity
  340. if extensions&EXTENSION_AUTOLINK != 0 {
  341. p.inlineCallback[':'] = autoLink
  342. }
  343. if extensions&EXTENSION_FOOTNOTES != 0 {
  344. p.notes = make([]*reference, 0)
  345. }
  346. first := firstPass(p, input)
  347. second := secondPass(p, first)
  348. return second
  349. }
  350. // first pass:
  351. // - normalize newlines
  352. // - extract references (outside of fenced code blocks)
  353. // - expand tabs (outside of fenced code blocks)
  354. // - copy everything else
  355. func firstPass(p *parser, input []byte) []byte {
  356. var out bytes.Buffer
  357. tabSize := TAB_SIZE_DEFAULT
  358. if p.flags&EXTENSION_TAB_SIZE_EIGHT != 0 {
  359. tabSize = TAB_SIZE_EIGHT
  360. }
  361. beg := 0
  362. lastFencedCodeBlockEnd := 0
  363. for beg < len(input) {
  364. // Find end of this line, then process the line.
  365. end := beg
  366. for end < len(input) && input[end] != '\n' && input[end] != '\r' {
  367. end++
  368. }
  369. if p.flags&EXTENSION_FENCED_CODE != 0 {
  370. // track fenced code block boundaries to suppress tab expansion
  371. // and reference extraction inside them:
  372. if beg >= lastFencedCodeBlockEnd {
  373. if i := p.fencedCodeBlock(&out, input[beg:], false); i > 0 {
  374. lastFencedCodeBlockEnd = beg + i
  375. }
  376. }
  377. }
  378. // add the line body if present
  379. if end > beg {
  380. if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
  381. out.Write(input[beg:end])
  382. } else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 {
  383. beg += refEnd
  384. continue
  385. } else {
  386. expandTabs(&out, input[beg:end], tabSize)
  387. }
  388. }
  389. if end < len(input) && input[end] == '\r' {
  390. end++
  391. }
  392. if end < len(input) && input[end] == '\n' {
  393. end++
  394. }
  395. out.WriteByte('\n')
  396. beg = end
  397. }
  398. // empty input?
  399. if out.Len() == 0 {
  400. out.WriteByte('\n')
  401. }
  402. return out.Bytes()
  403. }
  404. // second pass: actual rendering
  405. func secondPass(p *parser, input []byte) []byte {
  406. var output bytes.Buffer
  407. p.r.DocumentHeader(&output)
  408. p.block(&output, input)
  409. if p.flags&EXTENSION_FOOTNOTES != 0 && len(p.notes) > 0 {
  410. p.r.Footnotes(&output, func() bool {
  411. flags := LIST_ITEM_BEGINNING_OF_LIST
  412. for i := 0; i < len(p.notes); i += 1 {
  413. ref := p.notes[i]
  414. var buf bytes.Buffer
  415. if ref.hasBlock {
  416. flags |= LIST_ITEM_CONTAINS_BLOCK
  417. p.block(&buf, ref.title)
  418. } else {
  419. p.inline(&buf, ref.title)
  420. }
  421. p.r.FootnoteItem(&output, ref.link, buf.Bytes(), flags)
  422. flags &^= LIST_ITEM_BEGINNING_OF_LIST | LIST_ITEM_CONTAINS_BLOCK
  423. }
  424. return true
  425. })
  426. }
  427. p.r.DocumentFooter(&output)
  428. if p.nesting != 0 {
  429. panic("Nesting level did not end at zero")
  430. }
  431. return output.Bytes()
  432. }
  433. //
  434. // Link references
  435. //
  436. // This section implements support for references that (usually) appear
  437. // as footnotes in a document, and can be referenced anywhere in the document.
  438. // The basic format is:
  439. //
  440. // [1]: http://www.google.com/ "Google"
  441. // [2]: http://www.github.com/ "Github"
  442. //
  443. // Anywhere in the document, the reference can be linked by referring to its
  444. // label, i.e., 1 and 2 in this example, as in:
  445. //
  446. // This library is hosted on [Github][2], a git hosting site.
  447. //
  448. // Actual footnotes as specified in Pandoc and supported by some other Markdown
  449. // libraries such as php-markdown are also taken care of. They look like this:
  450. //
  451. // This sentence needs a bit of further explanation.[^note]
  452. //
  453. // [^note]: This is the explanation.
  454. //
  455. // Footnotes should be placed at the end of the document in an ordered list.
  456. // Inline footnotes such as:
  457. //
  458. // Inline footnotes^[Not supported.] also exist.
  459. //
  460. // are not yet supported.
  461. // References are parsed and stored in this struct.
  462. type reference struct {
  463. link []byte
  464. title []byte
  465. noteId int // 0 if not a footnote ref
  466. hasBlock bool
  467. text []byte
  468. }
  469. func (r *reference) String() string {
  470. return fmt.Sprintf("{link: %q, title: %q, text: %q, noteId: %d, hasBlock: %v}",
  471. r.link, r.title, r.text, r.noteId, r.hasBlock)
  472. }
  473. // Check whether or not data starts with a reference link.
  474. // If so, it is parsed and stored in the list of references
  475. // (in the render struct).
  476. // Returns the number of bytes to skip to move past it,
  477. // or zero if the first line is not a reference.
  478. func isReference(p *parser, data []byte, tabSize int) int {
  479. // up to 3 optional leading spaces
  480. if len(data) < 4 {
  481. return 0
  482. }
  483. i := 0
  484. for i < 3 && data[i] == ' ' {
  485. i++
  486. }
  487. noteId := 0
  488. // id part: anything but a newline between brackets
  489. if data[i] != '[' {
  490. return 0
  491. }
  492. i++
  493. if p.flags&EXTENSION_FOOTNOTES != 0 {
  494. if i < len(data) && data[i] == '^' {
  495. // we can set it to anything here because the proper noteIds will
  496. // be assigned later during the second pass. It just has to be != 0
  497. noteId = 1
  498. i++
  499. }
  500. }
  501. idOffset := i
  502. for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
  503. i++
  504. }
  505. if i >= len(data) || data[i] != ']' {
  506. return 0
  507. }
  508. idEnd := i
  509. // spacer: colon (space | tab)* newline? (space | tab)*
  510. i++
  511. if i >= len(data) || data[i] != ':' {
  512. return 0
  513. }
  514. i++
  515. for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
  516. i++
  517. }
  518. if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
  519. i++
  520. if i < len(data) && data[i] == '\n' && data[i-1] == '\r' {
  521. i++
  522. }
  523. }
  524. for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
  525. i++
  526. }
  527. if i >= len(data) {
  528. return 0
  529. }
  530. var (
  531. linkOffset, linkEnd int
  532. titleOffset, titleEnd int
  533. lineEnd int
  534. raw []byte
  535. hasBlock bool
  536. )
  537. if p.flags&EXTENSION_FOOTNOTES != 0 && noteId != 0 {
  538. linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize)
  539. lineEnd = linkEnd
  540. } else {
  541. linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i)
  542. }
  543. if lineEnd == 0 {
  544. return 0
  545. }
  546. // a valid ref has been found
  547. ref := &reference{
  548. noteId: noteId,
  549. hasBlock: hasBlock,
  550. }
  551. if noteId > 0 {
  552. // reusing the link field for the id since footnotes don't have links
  553. ref.link = data[idOffset:idEnd]
  554. // if footnote, it's not really a title, it's the contained text
  555. ref.title = raw
  556. } else {
  557. ref.link = data[linkOffset:linkEnd]
  558. ref.title = data[titleOffset:titleEnd]
  559. }
  560. // id matches are case-insensitive
  561. id := string(bytes.ToLower(data[idOffset:idEnd]))
  562. p.refs[id] = ref
  563. return lineEnd
  564. }
  565. func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
  566. // link: whitespace-free sequence, optionally between angle brackets
  567. if data[i] == '<' {
  568. i++
  569. }
  570. linkOffset = i
  571. if i == len(data) {
  572. return
  573. }
  574. for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
  575. i++
  576. }
  577. linkEnd = i
  578. if data[linkOffset] == '<' && data[linkEnd-1] == '>' {
  579. linkOffset++
  580. linkEnd--
  581. }
  582. // optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
  583. for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
  584. i++
  585. }
  586. if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
  587. return
  588. }
  589. // compute end-of-line
  590. if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
  591. lineEnd = i
  592. }
  593. if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' {
  594. lineEnd++
  595. }
  596. // optional (space|tab)* spacer after a newline
  597. if lineEnd > 0 {
  598. i = lineEnd + 1
  599. for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
  600. i++
  601. }
  602. }
  603. // optional title: any non-newline sequence enclosed in '"() alone on its line
  604. if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
  605. i++
  606. titleOffset = i
  607. // look for EOL
  608. for i < len(data) && data[i] != '\n' && data[i] != '\r' {
  609. i++
  610. }
  611. if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
  612. titleEnd = i + 1
  613. } else {
  614. titleEnd = i
  615. }
  616. // step back
  617. i--
  618. for i > titleOffset && (data[i] == ' ' || data[i] == '\t') {
  619. i--
  620. }
  621. if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
  622. lineEnd = titleEnd
  623. titleEnd = i
  624. }
  625. }
  626. return
  627. }
  628. // The first bit of this logic is the same as (*parser).listItem, but the rest
  629. // is much simpler. This function simply finds the entire block and shifts it
  630. // over by one tab if it is indeed a block (just returns the line if it's not).
  631. // blockEnd is the end of the section in the input buffer, and contents is the
  632. // extracted text that was shifted over one tab. It will need to be rendered at
  633. // the end of the document.
  634. func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) {
  635. if i == 0 || len(data) == 0 {
  636. return
  637. }
  638. // skip leading whitespace on first line
  639. for i < len(data) && data[i] == ' ' {
  640. i++
  641. }
  642. blockStart = i
  643. // find the end of the line
  644. blockEnd = i
  645. for i < len(data) && data[i-1] != '\n' {
  646. i++
  647. }
  648. // get working buffer
  649. var raw bytes.Buffer
  650. // put the first line into the working buffer
  651. raw.Write(data[blockEnd:i])
  652. blockEnd = i
  653. // process the following lines
  654. containsBlankLine := false
  655. gatherLines:
  656. for blockEnd < len(data) {
  657. i++
  658. // find the end of this line
  659. for i < len(data) && data[i-1] != '\n' {
  660. i++
  661. }
  662. // if it is an empty line, guess that it is part of this item
  663. // and move on to the next line
  664. if p.isEmpty(data[blockEnd:i]) > 0 {
  665. containsBlankLine = true
  666. blockEnd = i
  667. continue
  668. }
  669. n := 0
  670. if n = isIndented(data[blockEnd:i], indentSize); n == 0 {
  671. // this is the end of the block.
  672. // we don't want to include this last line in the index.
  673. break gatherLines
  674. }
  675. // if there were blank lines before this one, insert a new one now
  676. if containsBlankLine {
  677. raw.WriteByte('\n')
  678. containsBlankLine = false
  679. }
  680. // get rid of that first tab, write to buffer
  681. raw.Write(data[blockEnd+n : i])
  682. hasBlock = true
  683. blockEnd = i
  684. }
  685. if data[blockEnd-1] != '\n' {
  686. raw.WriteByte('\n')
  687. }
  688. contents = raw.Bytes()
  689. return
  690. }
  691. //
  692. //
  693. // Miscellaneous helper functions
  694. //
  695. //
  696. // Test if a character is a punctuation symbol.
  697. // Taken from a private function in regexp in the stdlib.
  698. func ispunct(c byte) bool {
  699. for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
  700. if c == r {
  701. return true
  702. }
  703. }
  704. return false
  705. }
  706. // Test if a character is a whitespace character.
  707. func isspace(c byte) bool {
  708. return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'
  709. }
  710. // Test if a character is letter.
  711. func isletter(c byte) bool {
  712. return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
  713. }
  714. // Test if a character is a letter or a digit.
  715. // TODO: check when this is looking for ASCII alnum and when it should use unicode
  716. func isalnum(c byte) bool {
  717. return (c >= '0' && c <= '9') || isletter(c)
  718. }
  719. // Replace tab characters with spaces, aligning to the next TAB_SIZE column.
  720. // always ends output with a newline
  721. func expandTabs(out *bytes.Buffer, line []byte, tabSize int) {
  722. // first, check for common cases: no tabs, or only tabs at beginning of line
  723. i, prefix := 0, 0
  724. slowcase := false
  725. for i = 0; i < len(line); i++ {
  726. if line[i] == '\t' {
  727. if prefix == i {
  728. prefix++
  729. } else {
  730. slowcase = true
  731. break
  732. }
  733. }
  734. }
  735. // no need to decode runes if all tabs are at the beginning of the line
  736. if !slowcase {
  737. for i = 0; i < prefix*tabSize; i++ {
  738. out.WriteByte(' ')
  739. }
  740. out.Write(line[prefix:])
  741. return
  742. }
  743. // the slow case: we need to count runes to figure out how
  744. // many spaces to insert for each tab
  745. column := 0
  746. i = 0
  747. for i < len(line) {
  748. start := i
  749. for i < len(line) && line[i] != '\t' {
  750. _, size := utf8.DecodeRune(line[i:])
  751. i += size
  752. column++
  753. }
  754. if i > start {
  755. out.Write(line[start:i])
  756. }
  757. if i >= len(line) {
  758. break
  759. }
  760. for {
  761. out.WriteByte(' ')
  762. column++
  763. if column%tabSize == 0 {
  764. break
  765. }
  766. }
  767. i++
  768. }
  769. }
  770. // Find if a line counts as indented or not.
  771. // Returns number of characters the indent is (0 = not indented).
  772. func isIndented(data []byte, indentSize int) int {
  773. if len(data) == 0 {
  774. return 0
  775. }
  776. if data[0] == '\t' {
  777. return 1
  778. }
  779. if len(data) < indentSize {
  780. return 0
  781. }
  782. for i := 0; i < indentSize; i++ {
  783. if data[i] != ' ' {
  784. return 0
  785. }
  786. }
  787. return indentSize
  788. }
  789. // Create a url-safe slug for fragments
  790. func slugify(in []byte) []byte {
  791. if len(in) == 0 {
  792. return in
  793. }
  794. out := make([]byte, 0, len(in))
  795. sym := false
  796. for _, ch := range in {
  797. if isalnum(ch) {
  798. sym = false
  799. out = append(out, ch)
  800. } else if sym {
  801. continue
  802. } else {
  803. out = append(out, '-')
  804. sym = true
  805. }
  806. }
  807. var a, b int
  808. var ch byte
  809. for a, ch = range out {
  810. if ch != '-' {
  811. break
  812. }
  813. }
  814. for b = len(out) - 1; b > 0; b-- {
  815. if out[b] != '-' {
  816. break
  817. }
  818. }
  819. return out[a : b+1]
  820. }