2/11/2018 - 9:37 AM

A markdown-ish parser written in Swift 2.0

/*
  Markdown-ish parser. Regular expressions are not allowed in here!

  The parser works in several stages:

  1. split up the text into lines
  2. combine lines into logical blocks
  3. tokenize the contents of the blocks

  The result of this process is a tree structure that describes the contents of
  the file. This tree can then be rendered to HTML, for example.
 */

import Foundation

// MARK: - String Methods

extension String {
  func replace(s1: String, with s2: String) -> String {
    return self.stringByReplacingOccurrencesOfString(s1, withString: s2)
  }

  /*
   * HTML escapes a string.
   */
  func escape() -> String {
    var s = self
    s = s.replace("&", with: "&amp;")
    s = s.replace("\"", with: "&quot;")
    s = s.replace("'", with: "&#39;")
    s = s.replace("<", with: "&lt;")
    s = s.replace(">", with: "&gt;")
    return s
  }
}

// MARK: - Types

struct Markdown {
  /*
   * The text from a Fragment is converted to a stream of Tokens. This allows
   * for the separation of actual text from the control characters that modify
   * the appearance of the text.
   */
  enum Token {
    case Spacing                 // any amount of whitespace
    case Text(text: String)      // words
    case Escape(symbol: String)  // --, <, >, ', & and so on
    case Open(symbol: String)    // ", *, **, ~~, ` spans
    case Close(symbol: String)
    case Link(fragment: Fragment, url: String)
    case Image(caption: String, url: String)
  }

  /*
   * Roughly speaking, each line in the input document corresponds to a fragment,
   * but the fragment will have any whitespace trimmed off. The last newline is
   * also stripped. If the line starts a new block, such as `- list item` then
   * the `-` is also stripped from the fragment.
   *
   * Note: Source code is not tokenized at the moment, but could be in the future
   * to support syntax highlighting. (Note: .Code blocks consist of only a single
   * fragment, so such fragments may include newlines.)
   */
  enum Fragment {
    case Text(tokens: [Token])
    case Code(code: String)
  }

  /*
   * The possible types of top-level blocks.
   */
  enum BlockType {
    case Empty                     // just for parsing
    case Header(level: Int)        // #
    case Text                      // regular paragraph of text
    case Quote                     // >
    case Code(language: String)    // ```language
    case CodeIndented              // 4 spaces or tab
    case ListItem(ordered: Bool)   // 1. or -
  }

  /*
   * Describes a top-level block. The lines from the input document are grouped
   * into such blocks. Each block will have one or more Fragments.
   */
  struct Block {
    var type: BlockType
    var fragments: [Fragment] = []

    init(type: BlockType) {
      self.type = type
    }
  }

  private let input: String          // the Markdown text
  private var blocks: [Block] = []   // the top-level blocks
}

// MARK: - Public API

extension Markdown {
  init(string input: String) {
    self.input = input
    blocks = parseBlocks(parseLines())
  }

  func tree() -> [Block] {
    return blocks
  }

  mutating func removeBlockAtIndex(index: Int) {
    blocks.removeAtIndex(index)
  }
}

// MARK: - Workarounds for Swift Issues

/*
 * For debugging only. Letting Swift do this automatically doesn't work
 * very dependably yet (Xcode 7 beta 5).
 */
extension Markdown.Token: CustomStringConvertible {
  var description: String {
    switch self {
    case .Spacing: return "Spacing"
    case .Text(let text): return text
    case .Escape(let symbol): return "Escape(\(symbol))"
    case .Open(let symbol): return "Open(\(symbol))"
    case .Close(let symbol): return "Close(\(symbol))"
    case .Link(let fragment, let url): return "Link(\(fragment),\(url))"
    case .Image(let caption, let url): return "Image(\(caption),\(url))"
    }
  }
}

extension Markdown.BlockType: CustomStringConvertible {
  var description: String {
    switch self {
    case .Empty: return "@Empty"
    case .Header(let level): return "@Header:\(level)"
    case .Text: return "@Text"
    case .Code(let language): return "@Code:\(language)"
    case .CodeIndented: return "@CodeIndented"
    case .Quote: return "@Quote"
    case .ListItem(let ordered): return "@ListItem:\(ordered)"
    }
  }
}

/*
 * These helper methods are necessary because `if case` cannot be combined with
 * other conditions.
 *
 * In Swift 2.0 you can't write: if case .Quote = foo && bar { ... }
 *                           or: if !case .Empty
 *                           or: if case .Empty || case .Header || case .Code
 *
 * But at least you can now write `if foo.isQuote() && bar { ... }`
 */
private extension Markdown.BlockType {
  func isEmpty() -> Bool {
    if case .Empty = self { return true } else { return false }
  }

  func isCode() -> Bool {
    if case .Code = self { return true } else { return false }
  }

  func isQuote() -> Bool {
    if case .Quote = self { return true } else { return false }
  }

  func isListItem() -> Bool {
    if case .ListItem = self { return true } else { return false }
  }

  func isOrderedList() -> Bool {
    if case ListItem(let ordered) = self { return ordered } else { return false }
  }

  func isEmpty_Header_Code() -> Bool {
    switch self {
    case .Empty, .Header, .Code: return true
    default: return false
    }
  }

  func isText_CodeIndented() -> Bool {
    switch self {
    case .Text, .CodeIndented: return true
    default: return false
    }
  }

  func shouldTrimWhitespace() -> Bool {
    switch self {
    case .Code, .CodeIndented: return false
    default: return true
    }
  }
}

// MARK: - Supporting Methods

private extension Character {
  func isWhitespace() -> Bool {
    return self == " " || self == "\t" || self == "\r"
  }

  func isWhitespaceOrNewline() -> Bool {
    return isWhitespace() || self == "\n"
  }
}

private extension Markdown {
  func eatLeadingWhitespace(startIndex: String.Index, _ endIndex: String.Index) -> String.Index {
    for var i = startIndex; i < endIndex; i = i.successor() {
      if !input[i].isWhitespaceOrNewline() { return i }
    }
    return endIndex
  }

  func eatTrailingWhitespace(startIndex: String.Index, _ endIndex: String.Index) -> String.Index {
    for var i = endIndex.predecessor(); i >= startIndex; i = i.predecessor() {
      if !input[i].isWhitespaceOrNewline() { return i.successor() }
    }
    return startIndex
  }
}

// MARK: - Splitting Into Lines

private extension Markdown {
  /*
   * Determine the indices in the input document at which new lines begin.
   */
  func parseLines() -> [String.Index] {
    var lines: [String.Index] = []
    var i = input.startIndex
    while i < input.endIndex {
      let c = input[i]
      i = i.successor()

      // For convenience, the end of the string is recorded twice so the block
      // scanning logic doesn't need a separate check for end-of-text. When the
      // end is reached, it simply sees one final .Empty block.
      // It may not be immediately obvious, but the check for endIndex here
      // makes sure this happens whether the text ends with a newline or not.
      if c == "\n" || i == input.endIndex {
        lines.append(i)
      }
    }
    lines.append(i)  // add the endIndex again
    return lines
  }
}

// MARK: - Top-Level Blocks

private extension Markdown {
  /*
   * Determine the top-level blocks in the file.
   */
  func parseBlocks(lines: [String.Index]) -> [Block] {
    var blocks: [Block] = []
    var lineStart = input.startIndex
    var fragmentStart = lineStart
    var fragmentEnd = lineStart
    var block = Block(type: .Empty)
    var count = 0

    for lineEnd in lines {
      // This looks at the next line. If the type of this line is different from
      // the current block, then we may need to end the block and make a new one.
      // Exactly how depends on the particular block type. The "adjusted" start
      // index is for skipping the symbol that identifies the line.
      var (nextType, adjustedLineStart) = identify(lineStart, lineEnd)

      // A fragment should not have leading or trailing whitespace or a newline.
      if nextType.shouldTrimWhitespace() {
        adjustedLineStart = eatLeadingWhitespace(adjustedLineStart, lineEnd)
      }
      let adjustedLineEnd = eatTrailingWhitespace(adjustedLineStart, lineEnd)

      //print("LINE '" + input.substringWithRange(lineStart ..< lineEnd) + "' is type \(nextType)")

      // Because the endIndex appears twice in the array, we can easily detect
      // whether scanning has reached the end of the input. (By the way, the 
      // type of that last "line" is .Empty, so when checking for the Empty state
      // we don't need to look at endOfInput also.)
      let endOfInput = (adjustedLineStart == input.endIndex)

      // It may be a bit weird to define inner functions here, but this allows
      // them to use variables such as `nextType` and `block`, without passing
      // those as parameters.

      func beginNewBlock() {
        block = Block(type: nextType)
        fragmentStart = adjustedLineStart
        count = 0
      }

      func addTextFragment() {
        let tokenized = tokenize(startIndex: fragmentStart, endIndex: fragmentEnd)
        let fragment = Fragment.Text(tokens: tokenized)
        block.fragments.append(fragment)
      }

      func addCodeFragment() {
        let s = input.substringWithRange(fragmentStart ..< fragmentEnd)
        let fragment = Fragment.Code(code: s)
        block.fragments.append(fragment)
      }

      func finishBlock() {
        blocks.append(block)
      }

      // The current fragment always refers to the previous line(s). It is given
      // by `fragmentStart` and `lineStart`, which is the end of the previous line
      // and also the start of this one. So we don't immediately add new fragments,
      // we always want to look at the next line first.

      switch block.type {
      case .Empty:
        if !nextType.isEmpty() {
          beginNewBlock()
        }

      case .Header:
        // A header is always just one line, so we can immediately add this block.
        addTextFragment()
        finishBlock()
        beginNewBlock()

      case .Text:
        addTextFragment()

        // A text block ends when the next line is empty, a header, or code.
        if nextType.isEmpty_Header_Code() {
          finishBlock()
          beginNewBlock()
        } else {
          fragmentStart = lineStart
        }

      case .Quote:
        // Each line in a quote is added as a new fragment, and we strip off
        // the leading > character.
        addTextFragment()

        // A quote ends when the next line is empty, a header, or code.
        if nextType.isEmpty_Header_Code() {
          blocks.append(block)
          beginNewBlock()
        } else {
          // Any other type of line also gets added to the quote. If it
          // starts with >, we strip that off.
          if case .Quote = nextType {
            fragmentStart = adjustedLineStart
          } else {
            fragmentStart = lineStart
          }
        }

      case .Code:
        // A code block ends after a closing line of ``` backticks.
        if nextType.isCode() || endOfInput {
          addCodeFragment()
          finishBlock()
          block = Block(type: .Empty)
        }

      case .CodeIndented:
        // Each line in an indented code block is added as a new fragment,
        // allowing us to strip off the leading spaces/tabs.
        if count == 0 {
          addCodeFragment()
        }

        // If the next line is empty, keep going. If the empty line(s) is/are
        // followed by more code, then we'll insert empty fragments.
        if nextType.isEmpty() && !endOfInput {
          ++count
          fragmentStart = lineEnd.predecessor()
        } else if case .CodeIndented = nextType {
          // If the next line is also an indented code block, then keep going.
          // If we've seen empty lines, then add an empty fragment for each line.
          if count > 0 {
            fragmentEnd = fragmentStart
            for _ in 1...count { addCodeFragment() }
            count = 0
          }
          fragmentStart = adjustedLineStart
        } else {
          // If the next line is any other kind of block, then the indented 
          // code block has ended.
          finishBlock()
          beginNewBlock()
        }

      case .ListItem:
        // Each line in a list item is added as a new fragment, and we strip 
        // off the leading - character.
        addTextFragment()

        // If the next line is text or indented code, then interpret this as
        // another fragment that also belongs to this list item.
        if nextType.isText_CodeIndented() {
          fragmentStart = adjustedLineStart
        } else {
          blocks.append(block)
          beginNewBlock()
        }
      }

      lineStart = lineEnd
      fragmentEnd = adjustedLineEnd
    }
    return blocks
  }
}

// MARK: - Line Identification

private extension Markdown {
  typealias LineType = (BlockType, String.Index)

  /*
   * Scans the beginning of the line in order to identify what sort of line
   * this is. 
   *
   * Returns a new String.Index that points at the beginning of the actual text,
   * having skipped the identifying characters (#, .1, -) but not necessarily
   * any whitespace.
   */
  func identify(startIndex: String.Index, _ endIndex: String.Index) -> LineType {
    var i = startIndex

    func header() -> LineType {
      let textStart = i
      var count = 1
      i = i.successor()
      for ; i < endIndex; i = i.successor() {
        if input[i] == "#" {
          ++count
        } else {
          let headerStart = i
          i = i.successor()
          for ; i < endIndex; i = i.successor() {
            if !input[i].isWhitespaceOrNewline() {
              return (.Header(level: count), headerStart)
            }
          }
          break
        }
      }
      return (.Text, textStart)  // a # by itself
    }

    func code() -> LineType {
      let textStart = i
      i = i.successor()
      if i < endIndex && input[i] == "`" {
        i = i.successor()
        if i < endIndex && input[i] == "`" {
          i = i.successor()
          let j = endIndex.predecessor()  // not true for very last line if no newline
          if i < j {
            return (.Code(language: input.substringWithRange(i ..< j)), endIndex)
          } else {
            return (.Code(language: ""), endIndex)
          }
        }
      }
      return (.Text, textStart)
    }

    func unorderedListItem() -> LineType {
      let textStart = i
      i = i.successor()
      if i < endIndex && input[i].isWhitespace() {
        return (.ListItem(ordered: false), i)
      }
      return (.Text, textStart)
    }

    func orderedListItem() -> LineType {
      let textStart = i
      i = i.successor()
      if i < endIndex && input[i] == "." {
        i = i.successor()
        if i < endIndex && input[i].isWhitespace() {
          return (.ListItem(ordered: true), i.successor())
        }
      }
      return (.Text, textStart)
    }

    func identifier() -> LineType {
      switch input[i] {
      case "#":
        return header()
      case ">":
        return (.Quote, i.successor())
      case "`":
        return code()
      case "-":
        return unorderedListItem()
      case "1", "2", "3", "4", "5", "6", "7", "8", "9":
        return orderedListItem()
      default:
        return (.Text, i)
      }
    }

    func whitespace() -> LineType {
      var count = 0
      var codeStart = i
      for ; i < endIndex; i = i.successor() {
        switch input[i] {
        case "\n":
          break
        case " ", "\r":
          count += 1
        case "\t":
          count += 4
        default:
          if count >= 4 {
            return (.CodeIndented, codeStart)
          } else {
            return identifier()
          }
        }
        if count == 4 {
          codeStart = i.successor()
        }
      }
      return (.Empty, endIndex)
    }

    if i == endIndex || input[i] == "\n" {
      return (.Empty, endIndex)
    } else {
      switch input[i] {
      case " ", "\t", "\r":
        return whitespace()
      default:
        return identifier()
      }
    }
  }
}

// MARK: - Tokenization of Fragments

private extension Markdown {
  /*
   * The tokenizer takes in a text fragment and outputs a stream of `Token` objects.
   *
   * For example, the input:
   *    aaa  **x**
   *    I've
   *
   * becomes the following stream of tokens: Text(aaa) Spacing Open(bold) Text(x)
   * Close(Bold) Newline Text(I) Escape(') Text(ve)
   *
   * Multiple spaces get combined into a single token.
   */
  func tokenize(startIndex startIndex: String.Index, endIndex: String.Index) -> [Token] {
    var tokens = [Token]()
    var i = startIndex      // the lookahead character

    func addTextToken(s: String) {
      tokens.append(Token.Text(text: s))
    }

    func addEscapeToken(s: String) {
      tokens.append(Token.Escape(symbol: s))
    }

    func spacing() {
      for ; i < endIndex; i = i.successor() {
        if !input[i].isWhitespace() { break }
      }
      tokens.append(Token.Spacing)
    }

    func escapeDash() {
      if i < endIndex && input[i] == "-" {
        addEscapeToken("--")
        i = i.successor()
      } else {
        addTextToken("-")
      }
    }

    func escapeEllipsis() {
      if i < endIndex && input[i] == "." {
        i = i.successor()
        if i < endIndex && input[i] == "." {
          i = i.successor()
          addEscapeToken("...")
        } else {
          addTextToken("..")
        }
      } else {
        addTextToken(".")
      }
    }

    func escape() {
      let c = input[i]
      i = i.successor()
      if c == "-" {
        escapeDash()
      } else if c == "." {
        escapeEllipsis()
      } else {
        addEscapeToken(String(c))
      }
    }

    var seenOpen = [String: Bool]()

    func addOpenOrCloseToken(symbol: String, strict: Bool = false) {
      // The official rules are that *word* and *word1 word2* will work but
      // not * word *, *word *, or * word*. To keep things simple, we only
      // require that the opening * is followed by non-whitespace; where the
      // closing * is doesn't matter.
      if let open = seenOpen[symbol] where open {
        seenOpen[symbol] = false
        tokens.append(Token.Close(symbol: symbol))
      } else if strict && (i == endIndex || input[i].isWhitespace()) {
        addTextToken(symbol)
      } else {
        seenOpen[symbol] = true
        tokens.append(Token.Open(symbol: symbol))
      }
    }

    func strikethrough() {
      if i < endIndex && input[i] == "~" {
        i = i.successor()
        addOpenOrCloseToken("~~")
      } else {
        addTextToken("~")
      }
    }

    // Note that italics, bold, etc, do no work across multiple lines.
    // That happens because we tokenize each fragment individually and we reset
    // the tokenization state with each new fragment.
    func italicsOrBold() {
      if i < endIndex && input[i] == "*" {
        i = i.successor()
        addOpenOrCloseToken("**", strict: true)
      } else {
        addOpenOrCloseToken("*", strict: true)
      }
    }

    func span() {
      let c = input[i]
      i = i.successor()
      if c == "*" {
        italicsOrBold()
      } else if c == "~" {
        strikethrough()
      } else {
        addOpenOrCloseToken(String(c))
      }
    }

    func backslash() {
      i = i.successor()
      if i < endIndex {
        let c = input[i]
        if c == "*" || c == "[" || c == "]" {
          addTextToken(String(c))
          i = i.successor()
          return
        }
      }
      addTextToken("\\")
    }

    func parseLink() -> ((String.Index, String.Index), (String.Index, String.Index))? {
      let textStart = i.successor()

      // Loop until we find ] followed by ( followed by ). It's not a real link
      // unless it has text and a valid URL, but we're not that picky.
      // To be honest, this is where a regexp is the simpler solution. ;-)
      for ; i < endIndex; i = i.successor() {
        if input[i] == "]" {
          let textEnd = i
          i = i.successor()
          if i < endIndex && input[i] == "(" {
            i = i.successor()
            let urlStart = i
            for ; i < endIndex; i = i.successor() {
              if input[i] == ")" {
                let urlEnd = i
                i = i.successor()
                return ((eatLeadingWhitespace(textStart, textEnd),
                         eatTrailingWhitespace(textStart, textEnd)),
                        (eatLeadingWhitespace(urlStart, urlEnd),
                         eatTrailingWhitespace(urlStart, urlEnd)))
              }
            }
          }
        }
      }

      // This does not appear to be a validly formatted link.
      addTextToken("[")
      i = textStart
      return nil
    }

    func link() {
      if let ((textStart, textEnd), (urlStart, urlEnd)) = parseLink() {
        let tokenized = tokenize(startIndex: textStart, endIndex: textEnd)
        let fragment = Fragment.Text(tokens: tokenized)
        let url = input.substringWithRange(urlStart ..< urlEnd)
        let token = Token.Link(fragment: fragment, url: url)
        tokens.append(token)
      }
    }

    func image() {
      i = i.successor()
      if i < endIndex && input[i] == "[" {
        if let ((captionStart, captionEnd), (urlStart, urlEnd)) = parseLink() {
          let caption = input.substringWithRange(captionStart ..< captionEnd)
          let url = input.substringWithRange(urlStart ..< urlEnd)
          let token = Token.Image(caption: caption, url: url)
          tokens.append(token)
        }
      } else {
        addTextToken("!")
      }
    }

    func endsWord(c: Character) -> Bool {
      return c == " "  || c == "\t" || c == "\n" || c == "\r" ||                          // whitespace
             c == "'"  || c == "<"  || c == ">"  || c == "&"  || c == "-" || c == "." ||  // escapes
             c == "`"  || c == "\"" || c == "*"  || c == "~"  ||                          // span
             c == "\\" || c == "["  || c == "!"
    }

    func word() {
      let wordStart = i
      for ; i < endIndex; i = i.successor() {
        if endsWord(input[i]) { break }
      }
      addTextToken(input.substringWithRange(wordStart ..< i))
    }

    while i < endIndex {
      switch input[i] {
      case " ", "\t", "\r":
        spacing()
      case "'", "<", ">", "&", "-", ".":
        escape()
      case "`", "\"", "*", "~":
        span()
      case "\\":
        backslash()
      case "[":
        link()
      case "!":
        image()
      case "\n":
        fatalError("fragments should not contain newlines")
      default:
        word()
      }
    }
    return tokens
  }
}

// MARK: - HTML Rendering

private let escapeTable = [
  "'": "&#8217;",
  "<": "&lt;",
  ">": "&gt;",
  "&": "&amp;",
  "-": "&#8211;",
  "--": "&#8212;",
  "...": "&#8230;",
]

private let openTable = [
  "`": "<code>",
  "\"": "&#8220;",
  "*": "<em>",
  "**": "<strong>",
  "~~": "<del>",
]

private let closeTable = [
  "`": "</code>",
  "\"": "&#8221;",
  "*": "</em>",
  "**": "</strong>",
  "~~": "</del>",
]

private extension Markdown.Token {
  func toHTML() -> String {
    switch self {
    case .Spacing: return " "
    case .Text(let text): return text
    case .Escape(let symbol): return escapeTable[symbol]!
    case .Open(let symbol): return openTable[symbol]!
    case .Close(let symbol): return closeTable[symbol]!
    case .Link(let fragment, let url):
      return "<a href=\"\(url.escape())\">" + fragment.toHTML() + "</a>"
    case .Image(let caption, let url):
      return "<img src=\"\(url.escape())\" alt=\"\(caption.escape())\">"
    }
  }
}

private extension Markdown.Fragment {
  func toHTML() -> String {
    switch self {
    case .Text(let tokens):
      var s = ""
      for token in tokens {
        s += token.toHTML()
      }
      return s
    case .Code(let string):
      return string.escape()
    }
  }

  // If the text block contains only one image token, then we turn it into
  // a <figure> instead of a <p> paragraph.
  func isImageOnly() -> Bool {
    if case .Text(let tokens) = self where tokens.count == 1, case .Image = tokens[0] {
      return true
    } else {
      return false
    }
  }
}

private extension Markdown.Block {
  func isImageOnly() -> Bool {
    return fragments.count == 1 && fragments.first!.isImageOnly()
  }

  func formatFragments(separator: String = "<br>\n") -> String {
    precondition(fragments.count > 0)
    if fragments.count == 1 {
      return fragments.first!.toHTML()
    } else {
      return separator.join(fragments.map { $0.toHTML() })
    }
  }

  func toHTML() -> String {
    switch type {
    case .Empty:
      fatalError("should not happen")

    case .Header(let level):
      return "<h\(level)>" + formatFragments() + "</h\(level)>\n\n"

    case .Text:
      if isImageOnly() {
        return "<figure class=\"image\">" + formatFragments() + "</figure>\n\n"
      } else {
        return "<p>" + formatFragments("\n") + "</p>\n\n"
      }

    case .Quote:
      return "<p>" + formatFragments() + "</p>\n"

    case .Code(let language):
      var s = "<figure class=\"code\"><pre><code"
      if !language.isEmpty {
        s += " class=\"\(language.escape())\""
      }
      s += ">" + formatFragments() + "</code></pre></figure>\n\n"
      return s

    case .CodeIndented:
      return "<figure class=\"code\"><pre><code>" + formatFragments("\n") + "</code></pre></figure>\n\n"

    case .ListItem:
      return "<li>" + formatFragments() + "</li>\n"
    }
  }
}

extension Markdown {
  func renderHTML() -> String {
    var s = ""
    var lastType = BlockType.Empty
    var wasOrderedList = false

    for block in blocks {
      if !lastType.isQuote() && block.type.isQuote() {
        s += "<blockquote>\n"
      } else if lastType.isQuote() && !block.type.isQuote() {
        s += "</blockquote>\n\n"
      }

      if !lastType.isListItem() && block.type.isListItem() {
        wasOrderedList = block.type.isOrderedList()
        s += wasOrderedList ? "<ol>\n" : "<ul>\n"
      } else if lastType.isListItem() && !block.type.isListItem() {
        s += wasOrderedList ? "</ol>\n\n" : "</ul>\n\n"
      }

      s += block.toHTML()
      lastType = block.type
    }
    return s
  }
}

Barfdown.markdown

Rendered
Source

Barfdown: a Markdown-ish Parser Written in Swift

Goals for this project:

Parse a simplified version of Markdown that is good enough for writing my blog posts.
Be reasonably efficient. This means the parser shouldn't copy substrings around if not necessary. This is done by storing all the elements as indexes into the original text.
Be small and therefore be easy to understand.
No regular expressions. They are the lazy person's solution to parsing. ;-)

This is just a toy project for me to experiment with writing parsers in Swift. Because why not?

There may be bugs.

Supported Markdown

The Markdown tags that are currently supported:

# Headers

*italics*
**bold**
~~strikethrough~~
`code`

1. numbered
2. list

- unordered
- list

> quote

[Link text](http://url)

![Alt text](images/image.png)

    source code (indented 1 tab or 4 spaces)

 ```language

\*   literal asterisk
\[   literal [
\]   literal ]

Not supported are:

Tables
Horizontal rules
Nested lists
Unordered lists starting with *
Headers that are underlined
Line breaks (line ending in two spaces)
Probably lots of other stuff...

This could really do with a good test suite. ;-)

How to use it

Create a new Markdown instance and give it a String. The parser creates a tree structure that describes the Markdown document. You can either step through that tree yourself or simply call renderHTML() to convert it to HTML.

if let data = NSData(contentsOfFile: path) {
  if let text = NSString(data: data, encoding: NSUTF8StringEncoding) {
    let m = Markdown(string: text as String)
    let s = m.renderHTML()
    print("<!DOCTYPE html><html><head><meta charset=\"utf-8\"/></head><body>\n")
    print(s, appendNewline: false)
    print("</body></html>\n")
  }
}

Why not regexps?

They definitely make parsing easier but I don't like throwing a handful of regexps at a parsing problem.

When you use a regular expression, it is turned into a state machine by the regex parser. Here I've basically "unrolled" all those state machines by hand.

Cacher is the code snippet organizer for pro developers

We empower you and your team to get more done, faster

A markdown-ish parser written in Swift 2.0

Barfdown: a Markdown-ish Parser Written in Swift

Supported Markdown

How to use it

Why not regexps?