| -- #!/usr/bin/env lua
|
|
|
| --[[
|
| # markdown.lua -- version 0.32
|
|
|
| <http://www.frykholm.se/files/markdown.lua>
|
|
|
| **Author:** Niklas Frykholm, <niklas@frykholm.se>
|
| **Date:** 31 May 2008
|
|
|
| This is an implementation of the popular text markup language Markdown in pure Lua.
|
| Markdown can convert documents written in a simple and easy to read text format
|
| to well-formatted HTML. For a more thourough description of Markdown and the Markdown
|
| syntax, see <http://daringfireball.net/projects/markdown>.
|
|
|
| The original Markdown source is written in Perl and makes heavy use of advanced
|
| regular expression techniques (such as negative look-ahead, etc) which are not available
|
| in Lua's simple regex engine. Therefore this Lua port has been rewritten from the ground
|
| up. It is probably not completely bug free. If you notice any bugs, please report them to
|
| me. A unit test that exposes the error is helpful.
|
|
|
| ## Usage
|
|
|
| require "markdown"
|
| markdown(source)
|
|
|
| ``markdown.lua`` exposes a single global function named ``markdown(s)`` which applies the
|
| Markdown transformation to the specified string.
|
|
|
| ``markdown.lua`` can also be used directly from the command line:
|
|
|
| lua markdown.lua test.md
|
|
|
| Creates a file ``test.html`` with the converted content of ``test.md``. Run:
|
|
|
| lua markdown.lua -h
|
|
|
| For a description of the command-line options.
|
|
|
| ``markdown.lua`` uses the same license as Lua, the MIT license.
|
|
|
| ## License
|
|
|
| Copyright © 2008 Niklas Frykholm.
|
|
|
| Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
| software and associated documentation files (the "Software"), to deal in the Software
|
| without restriction, including without limitation the rights to use, copy, modify, merge,
|
| publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
|
| to whom the Software is furnished to do so, subject to the following conditions:
|
|
|
| The above copyright notice and this permission notice shall be included in all copies
|
| or substantial portions of the Software.
|
|
|
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
| THE SOFTWARE.
|
|
|
| ## Version history
|
|
|
| - **0.32** -- 31 May 2008
|
| - Fix for links containing brackets
|
| - **0.31** -- 1 Mar 2008
|
| - Fix for link definitions followed by spaces
|
| - **0.30** -- 25 Feb 2008
|
| - Consistent behavior with Markdown when the same link reference is reused
|
| - **0.29** -- 24 Feb 2008
|
| - Fix for <pre> blocks with spaces in them
|
| - **0.28** -- 18 Feb 2008
|
| - Fix for link encoding
|
| - **0.27** -- 14 Feb 2008
|
| - Fix for link database links with ()
|
| - **0.26** -- 06 Feb 2008
|
| - Fix for nested italic and bold markers
|
| - **0.25** -- 24 Jan 2008
|
| - Fix for encoding of naked <
|
| - **0.24** -- 21 Jan 2008
|
| - Fix for link behavior.
|
| - **0.23** -- 10 Jan 2008
|
| - Fix for a regression bug in longer expressions in italic or bold.
|
| - **0.22** -- 27 Dec 2007
|
| - Fix for crash when processing blocks with a percent sign in them.
|
| - **0.21** -- 27 Dec 2007
|
| - Fix for combined strong and emphasis tags
|
| - **0.20** -- 13 Oct 2007
|
| - Fix for < as well in image titles, now matches Dingus behavior
|
| - **0.19** -- 28 Sep 2007
|
| - Fix for quotation marks " and ampersands & in link and image titles.
|
| - **0.18** -- 28 Jul 2007
|
| - Does not crash on unmatched tags (behaves like standard markdown)
|
| - **0.17** -- 12 Apr 2007
|
| - Fix for links with %20 in them.
|
| - **0.16** -- 12 Apr 2007
|
| - Do not require arg global to exist.
|
| - **0.15** -- 28 Aug 2006
|
| - Better handling of links with underscores in them.
|
| - **0.14** -- 22 Aug 2006
|
| - Bug for *`foo()`*
|
| - **0.13** -- 12 Aug 2006
|
| - Added -l option for including stylesheet inline in document.
|
| - Fixed bug in -s flag.
|
| - Fixed emphasis bug.
|
| - **0.12** -- 15 May 2006
|
| - Fixed several bugs to comply with MarkdownTest 1.0 <http://six.pairlist.net/pipermail/markdown-discuss/2004-December/000909.html>
|
| - **0.11** -- 12 May 2006
|
| - Fixed bug for escaping `*` and `_` inside code spans.
|
| - Added license terms.
|
| - Changed join() to table.concat().
|
| - **0.10** -- 3 May 2006
|
| - Initial public release.
|
|
|
| // Niklas
|
| ]]
|
|
|
|
|
| -- Set up a table for holding local functions to avoid polluting the global namespace
|
| local M = {}
|
| local MT = {__index = _G}
|
| setmetatable(M, MT)
|
| setfenv(1, M)
|
|
|
| ----------------------------------------------------------------------
|
| -- Utility functions
|
| ----------------------------------------------------------------------
|
|
|
| -- Locks table t from changes, writes an error if someone attempts to change the table.
|
| -- This is useful for detecting variables that have "accidently" been made global. Something
|
| -- I tend to do all too much.
|
| function lock(t)
|
| function lock_new_index(t, k, v)
|
| error("module has been locked -- " .. k .. " must be declared local", 2)
|
| end
|
|
|
| local mt = {__newindex = lock_new_index}
|
| if getmetatable(t) then mt.__index = getmetatable(t).__index end
|
| setmetatable(t, mt)
|
| end
|
|
|
| -- Returns the result of mapping the values in table t through the function f
|
| function map(t, f)
|
| local out = {}
|
| for k,v in pairs(t) do out[k] = f(v,k) end
|
| return out
|
| end
|
|
|
| -- The identity function, useful as a placeholder.
|
| function identity(text) return text end
|
|
|
| -- Functional style if statement. (NOTE: no short circuit evaluation)
|
| function iff(t, a, b) if t then return a else return b end end
|
|
|
| -- Splits the text into an array of separate lines.
|
| function split(text, sep)
|
| sep = sep or "\n"
|
| local lines = {}
|
| local pos = 1
|
| while true do
|
| local b,e = text:find(sep, pos)
|
| if not b then table.insert(lines, text:sub(pos)) break end
|
| table.insert(lines, text:sub(pos, b-1))
|
| pos = e + 1
|
| end
|
| return lines
|
| end
|
|
|
| -- Converts tabs to spaces
|
| function detab(text)
|
| local tab_width = 4
|
| local function rep(match)
|
| local spaces = -match:len()
|
| while spaces<1 do spaces = spaces + tab_width end
|
| return match .. string.rep(" ", spaces)
|
| end
|
| text = text:gsub("([^\n]-)\t", rep)
|
| return text
|
| end
|
|
|
| -- Applies string.find for every pattern in the list and returns the first match
|
| function find_first(s, patterns, index)
|
| local res = {}
|
| for _,p in ipairs(patterns) do
|
| local match = {s:find(p, index)}
|
| if #match>0 and (#res==0 or match[1] < res[1]) then res = match end
|
| end
|
| return unpack(res)
|
| end
|
|
|
| -- If a replacement array is specified, the range [start, stop] in the array is replaced
|
| -- with the replacement array and the resulting array is returned. Without a replacement
|
| -- array the section of the array between start and stop is returned.
|
| function splice(array, start, stop, replacement)
|
| if replacement then
|
| local n = stop - start + 1
|
| while n > 0 do
|
| table.remove(array, start)
|
| n = n - 1
|
| end
|
| for i,v in ipairs(replacement) do
|
| table.insert(array, start, v)
|
| end
|
| return array
|
| else
|
| local res = {}
|
| for i = start,stop do
|
| table.insert(res, array[i])
|
| end
|
| return res
|
| end
|
| end
|
|
|
| -- Outdents the text one step.
|
| function outdent(text)
|
| text = "\n" .. text
|
| text = text:gsub("\n ? ? ?", "\n")
|
| text = text:sub(2)
|
| return text
|
| end
|
|
|
| -- Indents the text one step.
|
| function indent(text)
|
| text = text:gsub("\n", "\n ")
|
| return text
|
| end
|
|
|
| -- Does a simple tokenization of html data. Returns the data as a list of tokens.
|
| -- Each token is a table with a type field (which is either "tag" or "text") and
|
| -- a text field (which contains the original token data).
|
| function tokenize_html(html)
|
| local tokens = {}
|
| local pos = 1
|
| while true do
|
| local start = find_first(html, {"<!%-%-", "<[a-z/!$]", "<%?"}, pos)
|
| if not start then
|
| table.insert(tokens, {type="text", text=html:sub(pos)})
|
| break
|
| end
|
| if start ~= pos then table.insert(tokens, {type="text", text = html:sub(pos, start-1)}) end
|
|
|
| local _, stop
|
| if html:match("^<!%-%-", start) then
|
| _,stop = html:find("%-%->", start)
|
| elseif html:match("^<%?", start) then
|
| _,stop = html:find("?>", start)
|
| else
|
| _,stop = html:find("%b<>", start)
|
| end
|
| if not stop then
|
| -- error("Could not match html tag " .. html:sub(start,start+30))
|
| table.insert(tokens, {type="text", text=html:sub(start, start)})
|
| pos = start + 1
|
| else
|
| table.insert(tokens, {type="tag", text=html:sub(start, stop)})
|
| pos = stop + 1
|
| end
|
| end
|
| return tokens
|
| end
|
|
|
| ----------------------------------------------------------------------
|
| -- Hash
|
| ----------------------------------------------------------------------
|
|
|
| -- This is used to "hash" data into alphanumeric strings that are unique
|
| -- in the document. (Note that this is not cryptographic hash, the hash
|
| -- function is not one-way.) The hash procedure is used to protect parts
|
| -- of the document from further processing.
|
|
|
| local HASH = {
|
| -- Has the hash been inited.
|
| inited = false,
|
|
|
| -- The unique string prepended to all hash values. This is to ensure
|
| -- that hash values do not accidently coincide with an actual existing
|
| -- string in the document.
|
| identifier = "",
|
|
|
| -- Counter that counts up for each new hash instance.
|
| counter = 0,
|
|
|
| -- Hash table.
|
| table = {}
|
| }
|
|
|
| -- Inits hashing. Creates a hash_identifier that doesn't occur anywhere
|
| -- in the text.
|
| function init_hash(text)
|
| HASH.inited = true
|
| HASH.identifier = ""
|
| HASH.counter = 0
|
| HASH.table = {}
|
|
|
| local s = "HASH"
|
| local counter = 0
|
| local id
|
| while true do
|
| id = s .. counter
|
| if not text:find(id, 1, true) then break end
|
| counter = counter + 1
|
| end
|
| HASH.identifier = id
|
| end
|
|
|
| -- Returns the hashed value for s.
|
| function hash(s)
|
| assert(HASH.inited)
|
| if not HASH.table[s] then
|
| HASH.counter = HASH.counter + 1
|
| local id = HASH.identifier .. HASH.counter .. "X"
|
| HASH.table[s] = id
|
| end
|
| return HASH.table[s]
|
| end
|
|
|
| ----------------------------------------------------------------------
|
| -- Protection
|
| ----------------------------------------------------------------------
|
|
|
| -- The protection module is used to "protect" parts of a document
|
| -- so that they are not modified by subsequent processing steps.
|
| -- Protected parts are saved in a table for later unprotection
|
|
|
| -- Protection data
|
| local PD = {
|
| -- Saved blocks that have been converted
|
| blocks = {},
|
|
|
| -- Block level tags that will be protected
|
| tags = {"p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote",
|
| "pre", "table", "dl", "ol", "ul", "script", "noscript", "form", "fieldset",
|
| "iframe", "math", "ins", "del"}
|
| }
|
|
|
| -- Pattern for matching a block tag that begins and ends in the leftmost
|
| -- column and may contain indented subtags, i.e.
|
| -- <div>
|
| -- A nested block.
|
| -- <div>
|
| -- Nested data.
|
| -- </div>
|
| -- </div>
|
| function block_pattern(tag)
|
| return "\n<" .. tag .. ".-\n</" .. tag .. ">[ \t]*\n"
|
| end
|
|
|
| -- Pattern for matching a block tag that begins and ends with a newline
|
| function line_pattern(tag)
|
| return "\n<" .. tag .. ".-</" .. tag .. ">[ \t]*\n"
|
| end
|
|
|
| -- Protects the range of characters from start to stop in the text and
|
| -- returns the protected string.
|
| function protect_range(text, start, stop)
|
| local s = text:sub(start, stop)
|
| local h = hash(s)
|
| PD.blocks[h] = s
|
| text = text:sub(1,start) .. h .. text:sub(stop)
|
| return text
|
| end
|
|
|
| -- Protect every part of the text that matches any of the patterns. The first
|
| -- matching pattern is protected first, etc.
|
| function protect_matches(text, patterns)
|
| while true do
|
| local start, stop = find_first(text, patterns)
|
| if not start then break end
|
| text = protect_range(text, start, stop)
|
| end
|
| return text
|
| end
|
|
|
| -- Protects blocklevel tags in the specified text
|
| function protect(text)
|
| -- First protect potentially nested block tags
|
| text = protect_matches(text, map(PD.tags, block_pattern))
|
| -- Then protect block tags at the line level.
|
| text = protect_matches(text, map(PD.tags, line_pattern))
|
| -- Protect <hr> and comment tags
|
| text = protect_matches(text, {"\n<hr[^>]->[ \t]*\n"})
|
| text = protect_matches(text, {"\n<!%-%-.-%-%->[ \t]*\n"})
|
| return text
|
| end
|
|
|
| -- Returns true if the string s is a hash resulting from protection
|
| function is_protected(s)
|
| return PD.blocks[s]
|
| end
|
|
|
| -- Unprotects the specified text by expanding all the nonces
|
| function unprotect(text)
|
| for k,v in pairs(PD.blocks) do
|
| v = v:gsub("%%", "%%%%")
|
| text = text:gsub(k, v)
|
| end
|
| return text
|
| end
|
|
|
|
|
| ----------------------------------------------------------------------
|
| -- Block transform
|
| ----------------------------------------------------------------------
|
|
|
| -- The block transform functions transform the text on the block level.
|
| -- They work with the text as an array of lines rather than as individual
|
| -- characters.
|
|
|
| -- Returns true if the line is a ruler of (char) characters.
|
| -- The line must contain at least three char characters and contain only spaces and
|
| -- char characters.
|
| function is_ruler_of(line, char)
|
| if not line:match("^[ %" .. char .. "]*$") then return false end
|
| if not line:match("%" .. char .. ".*%" .. char .. ".*%" .. char) then return false end
|
| return true
|
| end
|
|
|
| -- Identifies the block level formatting present in the line
|
| function classify(line)
|
| local info = {line = line, text = line}
|
|
|
| if line:match("^ ") then
|
| info.type = "indented"
|
| info.outdented = line:sub(5)
|
| return info
|
| end
|
|
|
| for _,c in ipairs({'*', '-', '_', '='}) do
|
| if is_ruler_of(line, c) then
|
| info.type = "ruler"
|
| info.ruler_char = c
|
| return info
|
| end
|
| end
|
|
|
| if line == "" then
|
| info.type = "blank"
|
| return info
|
| end
|
|
|
| if line:match("^(#+)[ \t]*(.-)[ \t]*#*[ \t]*$") then
|
| local m1, m2 = line:match("^(#+)[ \t]*(.-)[ \t]*#*[ \t]*$")
|
| info.type = "header"
|
| info.level = m1:len()
|
| info.text = m2
|
| return info
|
| end
|
|
|
| if line:match("^ ? ? ?(%d+)%.[ \t]+(.+)") then
|
| local number, text = line:match("^ ? ? ?(%d+)%.[ \t]+(.+)")
|
| info.type = "list_item"
|
| info.list_type = "numeric"
|
| info.number = 0 + number
|
| info.text = text
|
| return info
|
| end
|
|
|
| if line:match("^ ? ? ?([%*%+%-])[ \t]+(.+)") then
|
| local bullet, text = line:match("^ ? ? ?([%*%+%-])[ \t]+(.+)")
|
| info.type = "list_item"
|
| info.list_type = "bullet"
|
| info.bullet = bullet
|
| info.text= text
|
| return info
|
| end
|
|
|
| if line:match("^>[ \t]?(.*)") then
|
| info.type = "blockquote"
|
| info.text = line:match("^>[ \t]?(.*)")
|
| return info
|
| end
|
|
|
| if is_protected(line) then
|
| info.type = "raw"
|
| info.html = unprotect(line)
|
| return info
|
| end
|
|
|
| info.type = "normal"
|
| return info
|
| end
|
|
|
| -- Find headers constisting of a normal line followed by a ruler and converts them to
|
| -- header entries.
|
| function headers(array)
|
| local i = 1
|
| while i <= #array - 1 do
|
| if array[i].type == "normal" and array[i+1].type == "ruler" and
|
| (array[i+1].ruler_char == "-" or array[i+1].ruler_char == "=") then
|
| local info = {line = array[i].line}
|
| info.text = info.line
|
| info.type = "header"
|
| info.level = iff(array[i+1].ruler_char == "=", 1, 2)
|
| table.remove(array, i+1)
|
| array[i] = info
|
| end
|
| i = i + 1
|
| end
|
| return array
|
| end
|
|
|
| -- Find list blocks and convert them to protected data blocks
|
| function lists(array, sublist)
|
| local function process_list(arr)
|
| local function any_blanks(arr)
|
| for i = 1, #arr do
|
| if arr[i].type == "blank" then return true end
|
| end
|
| return false
|
| end
|
|
|
| local function split_list_items(arr)
|
| local acc = {arr[1]}
|
| local res = {}
|
| for i=2,#arr do
|
| if arr[i].type == "list_item" then
|
| table.insert(res, acc)
|
| acc = {arr[i]}
|
| else
|
| table.insert(acc, arr[i])
|
| end
|
| end
|
| table.insert(res, acc)
|
| return res
|
| end
|
|
|
| local function process_list_item(lines, block)
|
| while lines[#lines].type == "blank" do
|
| table.remove(lines)
|
| end
|
|
|
| local itemtext = lines[1].text
|
| for i=2,#lines do
|
| itemtext = itemtext .. "\n" .. outdent(lines[i].line)
|
| end
|
| if block then
|
| itemtext = block_transform(itemtext, true)
|
| if not itemtext:find("<pre>") then itemtext = indent(itemtext) end
|
| return " <li>" .. itemtext .. "</li>"
|
| else
|
| local lines = split(itemtext)
|
| lines = map(lines, classify)
|
| lines = lists(lines, true)
|
| lines = blocks_to_html(lines, true)
|
| itemtext = table.concat(lines, "\n")
|
| if not itemtext:find("<pre>") then itemtext = indent(itemtext) end
|
| return " <li>" .. itemtext .. "</li>"
|
| end
|
| end
|
|
|
| local block_list = any_blanks(arr)
|
| local items = split_list_items(arr)
|
| local out = ""
|
| for _, item in ipairs(items) do
|
| out = out .. process_list_item(item, block_list) .. "\n"
|
| end
|
| if arr[1].list_type == "numeric" then
|
| return "<ol>\n" .. out .. "</ol>"
|
| else
|
| return "<ul>\n" .. out .. "</ul>"
|
| end
|
| end
|
|
|
| -- Finds the range of lines composing the first list in the array. A list
|
| -- starts with (^ list_item) or (blank list_item) and ends with
|
| -- (blank* $) or (blank normal).
|
| --
|
| -- A sublist can start with just (list_item) does not need a blank...
|
| local function find_list(array, sublist)
|
| local function find_list_start(array, sublist)
|
| if array[1].type == "list_item" then return 1 end
|
| if sublist then
|
| for i = 1,#array do
|
| if array[i].type == "list_item" then return i end
|
| end
|
| else
|
| for i = 1, #array-1 do
|
| if array[i].type == "blank" and array[i+1].type == "list_item" then
|
| return i+1
|
| end
|
| end
|
| end
|
| return nil
|
| end
|
| local function find_list_end(array, start)
|
| local pos = #array
|
| for i = start, #array-1 do
|
| if array[i].type == "blank" and array[i+1].type ~= "list_item"
|
| and array[i+1].type ~= "indented" and array[i+1].type ~= "blank" then
|
| pos = i-1
|
| break
|
| end
|
| end
|
| while pos > start and array[pos].type == "blank" do
|
| pos = pos - 1
|
| end
|
| return pos
|
| end
|
|
|
| local start = find_list_start(array, sublist)
|
| if not start then return nil end
|
| return start, find_list_end(array, start)
|
| end
|
|
|
| while true do
|
| local start, stop = find_list(array, sublist)
|
| if not start then break end
|
| local text = process_list(splice(array, start, stop))
|
| local info = {
|
| line = text,
|
| type = "raw",
|
| html = text
|
| }
|
| array = splice(array, start, stop, {info})
|
| end
|
|
|
| -- Convert any remaining list items to normal
|
| for _,line in ipairs(array) do
|
| if line.type == "list_item" then line.type = "normal" end
|
| end
|
|
|
| return array
|
| end
|
|
|
| -- Find and convert blockquote markers.
|
| function blockquotes(lines)
|
| local function find_blockquote(lines)
|
| local start
|
| for i,line in ipairs(lines) do
|
| if line.type == "blockquote" then
|
| start = i
|
| break
|
| end
|
| end
|
| if not start then return nil end
|
|
|
| local stop = #lines
|
| for i = start+1, #lines do
|
| if lines[i].type == "blank" or lines[i].type == "blockquote" then
|
| elseif lines[i].type == "normal" then
|
| if lines[i-1].type == "blank" then stop = i-1 break end
|
| else
|
| stop = i-1 break
|
| end
|
| end
|
| while lines[stop].type == "blank" do stop = stop - 1 end
|
| return start, stop
|
| end
|
|
|
| local function process_blockquote(lines)
|
| local raw = lines[1].text
|
| for i = 2,#lines do
|
| raw = raw .. "\n" .. lines[i].text
|
| end
|
| local bt = block_transform(raw)
|
| if not bt:find("<pre>") then bt = indent(bt) end
|
| return "<blockquote>\n " .. bt ..
|
| "\n</blockquote>"
|
| end
|
|
|
| while true do
|
| local start, stop = find_blockquote(lines)
|
| if not start then break end
|
| local text = process_blockquote(splice(lines, start, stop))
|
| local info = {
|
| line = text,
|
| type = "raw",
|
| html = text
|
| }
|
| lines = splice(lines, start, stop, {info})
|
| end
|
| return lines
|
| end
|
|
|
| -- Find and convert codeblocks.
|
| function codeblocks(lines)
|
| local function find_codeblock(lines)
|
| local start
|
| for i,line in ipairs(lines) do
|
| if line.type == "indented" then start = i break end
|
| end
|
| if not start then return nil end
|
|
|
| local stop = #lines
|
| for i = start+1, #lines do
|
| if lines[i].type ~= "indented" and lines[i].type ~= "blank" then
|
| stop = i-1
|
| break
|
| end
|
| end
|
| while lines[stop].type == "blank" do stop = stop - 1 end
|
| return start, stop
|
| end
|
|
|
| local function process_codeblock(lines)
|
| local raw = detab(encode_code(outdent(lines[1].line)))
|
| for i = 2,#lines do
|
| raw = raw .. "\n" .. detab(encode_code(outdent(lines[i].line)))
|
| end
|
| return "<pre><code>" .. raw .. "\n</code></pre>"
|
| end
|
|
|
| while true do
|
| local start, stop = find_codeblock(lines)
|
| if not start then break end
|
| local text = process_codeblock(splice(lines, start, stop))
|
| local info = {
|
| line = text,
|
| type = "raw",
|
| html = text
|
| }
|
| lines = splice(lines, start, stop, {info})
|
| end
|
| return lines
|
| end
|
|
|
| -- Convert lines to html code
|
| function blocks_to_html(lines, no_paragraphs)
|
| local out = {}
|
| local i = 1
|
| while i <= #lines do
|
| local line = lines[i]
|
| if line.type == "ruler" then
|
| table.insert(out, "<hr/>")
|
| elseif line.type == "raw" then
|
| table.insert(out, line.html)
|
| elseif line.type == "normal" then
|
| local s = line.line
|
|
|
| while i+1 <= #lines and lines[i+1].type == "normal" do
|
| i = i + 1
|
| s = s .. "\n" .. lines[i].line
|
| end
|
|
|
| if no_paragraphs then
|
| table.insert(out, span_transform(s))
|
| else
|
| table.insert(out, "<p>" .. span_transform(s) .. "</p>")
|
| end
|
| elseif line.type == "header" then
|
| local s = "<h" .. line.level .. ">" .. span_transform(line.text) .. "</h" .. line.level .. ">"
|
| table.insert(out, s)
|
| else
|
| table.insert(out, line.line)
|
| end
|
| i = i + 1
|
| end
|
| return out
|
| end
|
|
|
| -- Perform all the block level transforms
|
| function block_transform(text, sublist)
|
| local lines = split(text)
|
| lines = map(lines, classify)
|
| lines = headers(lines)
|
| lines = lists(lines, sublist)
|
| lines = codeblocks(lines)
|
| lines = blockquotes(lines)
|
| lines = blocks_to_html(lines)
|
| local text = table.concat(lines, "\n")
|
| return text
|
| end
|
|
|
| -- Debug function for printing a line array to see the result
|
| -- of partial transforms.
|
| function print_lines(lines)
|
| for i, line in ipairs(lines) do
|
| print(i, line.type, line.text or line.line)
|
| end
|
| end
|
|
|
| ----------------------------------------------------------------------
|
| -- Span transform
|
| ----------------------------------------------------------------------
|
|
|
| -- Functions for transforming the text at the span level.
|
|
|
| -- These characters may need to be escaped because they have a special
|
| -- meaning in markdown.
|
| escape_chars = "'\\`*_{}[]()>#+-.!'"
|
| escape_table = {}
|
|
|
| function init_escape_table()
|
| escape_table = {}
|
| for i = 1,#escape_chars do
|
| local c = escape_chars:sub(i,i)
|
| escape_table[c] = hash(c)
|
| end
|
| end
|
|
|
| -- Adds a new escape to the escape table.
|
| function add_escape(text)
|
| if not escape_table[text] then
|
| escape_table[text] = hash(text)
|
| end
|
| return escape_table[text]
|
| end
|
|
|
| -- Escape characters that should not be disturbed by markdown.
|
| function escape_special_chars(text)
|
| local tokens = tokenize_html(text)
|
|
|
| local out = ""
|
| for _, token in ipairs(tokens) do
|
| local t = token.text
|
| if token.type == "tag" then
|
| -- In tags, encode * and _ so they don't conflict with their use in markdown.
|
| t = t:gsub("%*", escape_table["*"])
|
| t = t:gsub("%_", escape_table["_"])
|
| else
|
| t = encode_backslash_escapes(t)
|
| end
|
| out = out .. t
|
| end
|
| return out
|
| end
|
|
|
| -- Encode backspace-escaped characters in the markdown source.
|
| function encode_backslash_escapes(t)
|
| for i=1,escape_chars:len() do
|
| local c = escape_chars:sub(i,i)
|
| t = t:gsub("\\%" .. c, escape_table[c])
|
| end
|
| return t
|
| end
|
|
|
| -- Unescape characters that have been encoded.
|
| function unescape_special_chars(t)
|
| local tin = t
|
| for k,v in pairs(escape_table) do
|
| k = k:gsub("%%", "%%%%")
|
| t = t:gsub(v,k)
|
| end
|
| if t ~= tin then t = unescape_special_chars(t) end
|
| return t
|
| end
|
|
|
| -- Encode/escape certain characters inside Markdown code runs.
|
| -- The point is that in code, these characters are literals,
|
| -- and lose their special Markdown meanings.
|
| function encode_code(s)
|
| s = s:gsub("%&", "&")
|
| s = s:gsub("<", "<")
|
| s = s:gsub(">", ">")
|
| for k,v in pairs(escape_table) do
|
| s = s:gsub("%"..k, v)
|
| end
|
| return s
|
| end
|
|
|
| -- Handle backtick blocks.
|
| function code_spans(s)
|
| s = s:gsub("\\\\", escape_table["\\"])
|
| s = s:gsub("\\`", escape_table["`"])
|
|
|
| local pos = 1
|
| while true do
|
| local start, stop = s:find("`+", pos)
|
| if not start then return s end
|
| local count = stop - start + 1
|
| -- Find a matching numbert of backticks
|
| local estart, estop = s:find(string.rep("`", count), stop+1)
|
| local brstart = s:find("\n", stop+1)
|
| if estart and (not brstart or estart < brstart) then
|
| local code = s:sub(stop+1, estart-1)
|
| code = code:gsub("^[ \t]+", "")
|
| code = code:gsub("[ \t]+$", "")
|
| code = code:gsub(escape_table["\\"], escape_table["\\"] .. escape_table["\\"])
|
| code = code:gsub(escape_table["`"], escape_table["\\"] .. escape_table["`"])
|
| code = "<code>" .. encode_code(code) .. "</code>"
|
| code = add_escape(code)
|
| s = s:sub(1, start-1) .. code .. s:sub(estop+1)
|
| pos = start + code:len()
|
| else
|
| pos = stop + 1
|
| end
|
| end
|
| return s
|
| end
|
|
|
| -- Encode alt text... enodes &, and ".
|
| function encode_alt(s)
|
| if not s then return s end
|
| s = s:gsub('&', '&')
|
| s = s:gsub('"', '"')
|
| s = s:gsub('<', '<')
|
| return s
|
| end
|
|
|
| -- Handle image references
|
| function images(text)
|
| local function reference_link(alt, id)
|
| alt = encode_alt(alt:match("%b[]"):sub(2,-2))
|
| id = id:match("%[(.*)%]"):lower()
|
| if id == "" then id = text:lower() end
|
| link_database[id] = link_database[id] or {}
|
| if not link_database[id].url then return nil end
|
| local url = link_database[id].url or id
|
| url = encode_alt(url)
|
| local title = encode_alt(link_database[id].title)
|
| if title then title = " title=\"" .. title .. "\"" else title = "" end
|
| return add_escape ('<img src="' .. url .. '" alt="' .. alt .. '"' .. title .. "/>")
|
| end
|
|
|
| local function inline_link(alt, link)
|
| alt = encode_alt(alt:match("%b[]"):sub(2,-2))
|
| local url, title = link:match("%(<?(.-)>?[ \t]*['\"](.+)['\"]")
|
| url = url or link:match("%(<?(.-)>?%)")
|
| url = encode_alt(url)
|
| title = encode_alt(title)
|
| if title then
|
| return add_escape('<img src="' .. url .. '" alt="' .. alt .. '" title="' .. title .. '"/>')
|
| else
|
| return add_escape('<img src="' .. url .. '" alt="' .. alt .. '"/>')
|
| end
|
| end
|
|
|
| text = text:gsub("!(%b[])[ \t]*\n?[ \t]*(%b[])", reference_link)
|
| text = text:gsub("!(%b[])(%b())", inline_link)
|
| return text
|
| end
|
|
|
| -- Handle anchor references
|
| function anchors(text)
|
| local function reference_link(text, id)
|
| text = text:match("%b[]"):sub(2,-2)
|
| id = id:match("%b[]"):sub(2,-2):lower()
|
| if id == "" then id = text:lower() end
|
| link_database[id] = link_database[id] or {}
|
| if not link_database[id].url then return nil end
|
| local url = link_database[id].url or id
|
| url = encode_alt(url)
|
| local title = encode_alt(link_database[id].title)
|
| if title then title = " title=\"" .. title .. "\"" else title = "" end
|
| return add_escape("<a href=\"" .. url .. "\"" .. title .. ">") .. text .. add_escape("</a>")
|
| end
|
|
|
| local function inline_link(text, link)
|
| text = text:match("%b[]"):sub(2,-2)
|
| local url, title = link:match("%(<?(.-)>?[ \t]*['\"](.+)['\"]")
|
| title = encode_alt(title)
|
| url = url or link:match("%(<?(.-)>?%)") or ""
|
| url = encode_alt(url)
|
| if title then
|
| return add_escape("<a href=\"" .. url .. "\" title=\"" .. title .. "\">") .. text .. "</a>"
|
| else
|
| return add_escape("<a href=\"" .. url .. "\">") .. text .. add_escape("</a>")
|
| end
|
| end
|
|
|
| text = text:gsub("(%b[])[ \t]*\n?[ \t]*(%b[])", reference_link)
|
| text = text:gsub("(%b[])(%b())", inline_link)
|
| return text
|
| end
|
|
|
| -- Handle auto links, i.e. <http://www.google.com/>.
|
| function auto_links(text)
|
| local function link(s)
|
| return add_escape("<a href=\"" .. s .. "\">") .. s .. "</a>"
|
| end
|
| -- Encode chars as a mix of dec and hex entitites to (perhaps) fool
|
| -- spambots.
|
| local function encode_email_address(s)
|
| -- Use a deterministic encoding to make unit testing possible.
|
| -- Code 45% hex, 45% dec, 10% plain.
|
| local hex = {code = function(c) return "&#x" .. string.format("%x", c:byte()) .. ";" end, count = 1, rate = 0.45}
|
| local dec = {code = function(c) return "&#" .. c:byte() .. ";" end, count = 0, rate = 0.45}
|
| local plain = {code = function(c) return c end, count = 0, rate = 0.1}
|
| local codes = {hex, dec, plain}
|
| local function swap(t,k1,k2) local temp = t[k2] t[k2] = t[k1] t[k1] = temp end
|
|
|
| local out = ""
|
| for i = 1,s:len() do
|
| for _,code in ipairs(codes) do code.count = code.count + code.rate end
|
| if codes[1].count < codes[2].count then swap(codes,1,2) end
|
| if codes[2].count < codes[3].count then swap(codes,2,3) end
|
| if codes[1].count < codes[2].count then swap(codes,1,2) end
|
|
|
| local code = codes[1]
|
| local c = s:sub(i,i)
|
| -- Force encoding of "@" to make email address more invisible.
|
| if c == "@" and code == plain then code = codes[2] end
|
| out = out .. code.code(c)
|
| code.count = code.count - 1
|
| end
|
| return out
|
| end
|
| local function mail(s)
|
| s = unescape_special_chars(s)
|
| local address = encode_email_address("mailto:" .. s)
|
| local text = encode_email_address(s)
|
| return add_escape("<a href=\"" .. address .. "\">") .. text .. "</a>"
|
| end
|
| -- links
|
| text = text:gsub("<(https?:[^'\">%s]+)>", link)
|
| text = text:gsub("<(ftp:[^'\">%s]+)>", link)
|
|
|
| -- mail
|
| text = text:gsub("<mailto:([^'\">%s]+)>", mail)
|
| text = text:gsub("<([-.%w]+%@[-.%w]+)>", mail)
|
| return text
|
| end
|
|
|
| -- Encode free standing amps (&) and angles (<)... note that this does not
|
| -- encode free >.
|
| function amps_and_angles(s)
|
| -- encode amps not part of &..; expression
|
| local pos = 1
|
| while true do
|
| local amp = s:find("&", pos)
|
| if not amp then break end
|
| local semi = s:find(";", amp+1)
|
| local stop = s:find("[ \t\n&]", amp+1)
|
| if not semi or (stop and stop < semi) or (semi - amp) > 15 then
|
| s = s:sub(1,amp-1) .. "&" .. s:sub(amp+1)
|
| pos = amp+1
|
| else
|
| pos = amp+1
|
| end
|
| end
|
|
|
| -- encode naked <'s
|
| s = s:gsub("<([^a-zA-Z/?$!])", "<%1")
|
| s = s:gsub("<$", "<")
|
|
|
| -- what about >, nothing done in the original markdown source to handle them
|
| return s
|
| end
|
|
|
| -- Handles emphasis markers (* and _) in the text.
|
| function emphasis(text)
|
| for _, s in ipairs {"%*%*", "%_%_"} do
|
| text = text:gsub(s .. "([^%s][%*%_]?)" .. s, "<strong>%1</strong>")
|
| text = text:gsub(s .. "([^%s][^<>]-[^%s][%*%_]?)" .. s, "<strong>%1</strong>")
|
| end
|
| for _, s in ipairs {"%*", "%_"} do
|
| text = text:gsub(s .. "([^%s_])" .. s, "<em>%1</em>")
|
| text = text:gsub(s .. "(<strong>[^%s_]</strong>)" .. s, "<em>%1</em>")
|
| text = text:gsub(s .. "([^%s_][^<>_]-[^%s_])" .. s, "<em>%1</em>")
|
| text = text:gsub(s .. "([^<>_]-<strong>[^<>_]-</strong>[^<>_]-)" .. s, "<em>%1</em>")
|
| end
|
| return text
|
| end
|
|
|
| -- Handles line break markers in the text.
|
| function line_breaks(text)
|
| return text:gsub(" +\n", " <br/>\n")
|
| end
|
|
|
| -- Perform all span level transforms.
|
| function span_transform(text)
|
| text = code_spans(text)
|
| text = escape_special_chars(text)
|
| text = images(text)
|
| text = anchors(text)
|
| text = auto_links(text)
|
| text = amps_and_angles(text)
|
| text = emphasis(text)
|
| text = line_breaks(text)
|
| return text
|
| end
|
|
|
| ----------------------------------------------------------------------
|
| -- Markdown
|
| ----------------------------------------------------------------------
|
|
|
| -- Cleanup the text by normalizing some possible variations to make further
|
| -- processing easier.
|
| function cleanup(text)
|
| -- Standardize line endings
|
| text = text:gsub("\r\n", "\n") -- DOS to UNIX
|
| text = text:gsub("\r", "\n") -- Mac to UNIX
|
|
|
| -- Convert all tabs to spaces
|
| text = detab(text)
|
|
|
| -- Strip lines with only spaces and tabs
|
| while true do
|
| local subs
|
| text, subs = text:gsub("\n[ \t]+\n", "\n\n")
|
| if subs == 0 then break end
|
| end
|
|
|
| return "\n" .. text .. "\n"
|
| end
|
|
|
| -- Strips link definitions from the text and stores the data in a lookup table.
|
| function strip_link_definitions(text)
|
| local linkdb = {}
|
|
|
| local function link_def(id, url, title)
|
| id = id:match("%[(.+)%]"):lower()
|
| linkdb[id] = linkdb[id] or {}
|
| linkdb[id].url = url or linkdb[id].url
|
| linkdb[id].title = title or linkdb[id].title
|
| return ""
|
| end
|
|
|
| local def_no_title = "\n ? ? ?(%b[]):[ \t]*\n?[ \t]*<?([^%s>]+)>?[ \t]*"
|
| local def_title1 = def_no_title .. "[ \t]+\n?[ \t]*[\"'(]([^\n]+)[\"')][ \t]*"
|
| local def_title2 = def_no_title .. "[ \t]*\n[ \t]*[\"'(]([^\n]+)[\"')][ \t]*"
|
| local def_title3 = def_no_title .. "[ \t]*\n?[ \t]+[\"'(]([^\n]+)[\"')][ \t]*"
|
|
|
| text = text:gsub(def_title1, link_def)
|
| text = text:gsub(def_title2, link_def)
|
| text = text:gsub(def_title3, link_def)
|
| text = text:gsub(def_no_title, link_def)
|
| return text, linkdb
|
| end
|
|
|
| link_database = {}
|
|
|
| -- Main markdown processing function
|
| function markdown(text)
|
| init_hash(text)
|
| init_escape_table()
|
|
|
| text = cleanup(text)
|
| text = protect(text)
|
| text, link_database = strip_link_definitions(text)
|
| text = block_transform(text)
|
| text = unescape_special_chars(text)
|
| return text
|
| end
|
|
|
| ----------------------------------------------------------------------
|
| -- End of module
|
| ----------------------------------------------------------------------
|
|
|
| setfenv(1, _G)
|
| M.lock(M)
|
|
|
| -- Expose markdown function to the world
|
| markdown = M.markdown
|
|
|
| -- Class for parsing command-line options
|
| local OptionParser = {}
|
| OptionParser.__index = OptionParser
|
|
|
| -- Creates a new option parser
|
| function OptionParser:new()
|
| local o = {short = {}, long = {}}
|
| setmetatable(o, self)
|
| return o
|
| end
|
|
|
| -- Calls f() whenever a flag with specified short and long name is encountered
|
| function OptionParser:flag(short, long, f)
|
| local info = {type = "flag", f = f}
|
| if short then self.short[short] = info end
|
| if long then self.long[long] = info end
|
| end
|
|
|
| -- Calls f(param) whenever a parameter flag with specified short and long name is encountered
|
| function OptionParser:param(short, long, f)
|
| local info = {type = "param", f = f}
|
| if short then self.short[short] = info end
|
| if long then self.long[long] = info end
|
| end
|
|
|
| -- Calls f(v) for each non-flag argument
|
| function OptionParser:arg(f)
|
| self.arg = f
|
| end
|
|
|
| -- Runs the option parser for the specified set of arguments. Returns true if all arguments
|
| -- where successfully parsed and false otherwise.
|
| function OptionParser:run(args)
|
| local pos = 1
|
| while pos <= #args do
|
| local arg = args[pos]
|
| if arg == "--" then
|
| for i=pos+1,#args do
|
| if self.arg then self.arg(args[i]) end
|
| return true
|
| end
|
| end
|
| if arg:match("^%-%-") then
|
| local info = self.long[arg:sub(3)]
|
| if not info then print("Unknown flag: " .. arg) return false end
|
| if info.type == "flag" then
|
| info.f()
|
| pos = pos + 1
|
| else
|
| param = args[pos+1]
|
| if not param then print("No parameter for flag: " .. arg) return false end
|
| info.f(param)
|
| pos = pos+2
|
| end
|
| elseif arg:match("^%-") then
|
| for i=2,arg:len() do
|
| local c = arg:sub(i,i)
|
| local info = self.short[c]
|
| if not info then print("Unknown flag: -" .. c) return false end
|
| if info.type == "flag" then
|
| info.f()
|
| else
|
| if i == arg:len() then
|
| param = args[pos+1]
|
| if not param then print("No parameter for flag: -" .. c) return false end
|
| info.f(param)
|
| pos = pos + 1
|
| else
|
| param = arg:sub(i+1)
|
| info.f(param)
|
| end
|
| break
|
| end
|
| end
|
| pos = pos + 1
|
| else
|
| if self.arg then self.arg(arg) end
|
| pos = pos + 1
|
| end
|
| end
|
| return true
|
| end
|
|
|
| -- Handles the case when markdown is run from the command line
|
| local function run_command_line(arg)
|
| -- Generate output for input s given options
|
| local function run(s, options)
|
| s = markdown(s)
|
| if not options.wrap_header then return s end
|
| local header = ""
|
| if options.header then
|
| local f = io.open(options.header) or error("Could not open file: " .. options.header)
|
| header = f:read("*a")
|
| f:close()
|
| else
|
| header = [[
|
| <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
| <html>
|
| <head>
|
| <meta http-equiv="content-type" content="text/html; charset=CHARSET" />
|
| <title>TITLE</title>
|
| <link rel="stylesheet" type="text/css" href="STYLESHEET" />
|
| </head>
|
| <body>
|
| ]]
|
| local title = options.title or s:match("<h1>(.-)</h1>") or s:match("<h2>(.-)</h2>") or
|
| s:match("<h3>(.-)</h3>") or "Untitled"
|
| header = header:gsub("TITLE", title)
|
| if options.inline_style then
|
| local style = ""
|
| local f = io.open(options.stylesheet)
|
| if f then
|
| style = f:read("*a") f:close()
|
| else
|
| error("Could not include style sheet " .. options.stylesheet .. ": File not found")
|
| end
|
| header = header:gsub('<link rel="stylesheet" type="text/css" href="STYLESHEET" />',
|
| "<style type=\"text/css\"><!--\n" .. style .. "\n--></style>")
|
| else
|
| header = header:gsub("STYLESHEET", options.stylesheet)
|
| end
|
| header = header:gsub("CHARSET", options.charset)
|
| end
|
| local footer = "</body></html>"
|
| if options.footer then
|
| local f = io.open(options.footer) or error("Could not open file: " .. options.footer)
|
| footer = f:read("*a")
|
| f:close()
|
| end
|
| return header .. s .. footer
|
| end
|
|
|
| -- Generate output path name from input path name given options.
|
| local function outpath(path, options)
|
| if options.append then return path .. ".html" end
|
| local m = path:match("^(.+%.html)[^/\\]+$") if m then return m end
|
| m = path:match("^(.+%.)[^/\\]*$") if m and path ~= m .. "html" then return m .. "html" end
|
| return path .. ".html"
|
| end
|
|
|
| -- Default commandline options
|
| local options = {
|
| wrap_header = true,
|
| header = nil,
|
| footer = nil,
|
| charset = "utf-8",
|
| title = nil,
|
| stylesheet = "default.css",
|
| inline_style = false
|
| }
|
| local help = [[
|
| Usage: markdown.lua [OPTION] [FILE]
|
| Runs the markdown text markup to HTML converter on each file specified on the
|
| command line. If no files are specified, runs on standard input.
|
|
|
| No header:
|
| -n, --no-wrap Don't wrap the output in <html>... tags.
|
| Custom header:
|
| -e, --header FILE Use content of FILE for header.
|
| -f, --footer FILE Use content of FILE for footer.
|
| Generated header:
|
| -c, --charset SET Specifies charset (default utf-8).
|
| -i, --title TITLE Specifies title (default from first <h1> tag).
|
| -s, --style STYLE Specifies style sheet file (default default.css).
|
| -l, --inline-style Include the style sheet file inline in the header.
|
| Generated files:
|
| -a, --append Append .html extension (instead of replacing).
|
| Other options:
|
| -h, --help Print this help text.
|
| -t, --test Run the unit tests.
|
| ]]
|
|
|
| local run_stdin = true
|
| local op = OptionParser:new()
|
| op:flag("n", "no-wrap", function () options.wrap_header = false end)
|
| op:param("e", "header", function (x) options.header = x end)
|
| op:param("f", "footer", function (x) options.footer = x end)
|
| op:param("c", "charset", function (x) options.charset = x end)
|
| op:param("i", "title", function(x) options.title = x end)
|
| op:param("s", "style", function(x) options.stylesheet = x end)
|
| op:flag("l", "inline-style", function(x) options.inline_style = true end)
|
| op:flag("a", "append", function() options.append = true end)
|
| op:flag("t", "test", function()
|
| local n = arg[0]:gsub("markdown.lua", "markdown-tests.lua")
|
| local f = io.open(n)
|
| if f then
|
| f:close() dofile(n)
|
| else
|
| error("Cannot find markdown-tests.lua")
|
| end
|
| run_stdin = false
|
| end)
|
| op:flag("h", "help", function() print(help) run_stdin = false end)
|
| op:arg(function(path)
|
| local file = io.open(path) or error("Could not open file: " .. path)
|
| local s = file:read("*a")
|
| file:close()
|
| s = run(s, options)
|
| file = io.open(outpath(path, options), "w") or error("Could not open output file: " .. outpath(path, options))
|
| file:write(s)
|
| file:close()
|
| run_stdin = false
|
| end
|
| )
|
|
|
| if not op:run(arg) then
|
| print(help)
|
| run_stdin = false
|
| end
|
|
|
| if run_stdin then
|
| local s = io.read("*a")
|
| s = run(s, options)
|
| io.write(s)
|
| end
|
| end
|
|
|
| -- If we are being run from the command-line, act accordingly
|
| if arg and arg[0]:find("markdown%.lua$") then
|
| run_command_line(arg)
|
| else
|
| return markdown
|
| end
|