blob: c739a54cad076ed85f0664a01d6dcb1295978be6 [file] [log] [blame]
--- Simple Input Patterns (SIP). <p>
-- SIP patterns start with '$', then a
-- one-letter type, and then an optional variable in curly braces. <p>
-- Example:
-- <pre class=example>
-- sip.match('$v=$q','name="dolly"',res)
-- ==> res=={'name','dolly'}
-- sip.match('($q{first},$q{second})','("john","smith")',res)
-- ==> res=={second='smith',first='john'}
-- </pre>
-- <pre>
-- <b>Type names</b>
-- v identifier
-- i integer
-- f floating-point
-- q quoted string
-- ([{&lt; match up to closing bracket
-- </pre>
-- <p>
-- See <a href="../../index.html#sip">the Guide</a>
-- @class module
-- @name pl.sip
local append,concat = table.insert,table.concat
local concat = table.concat
local ipairs,loadstring,type,unpack = ipairs,loadstring,type,unpack
local io,_G = io,_G
local print,rawget = print,rawget
local patterns = {
FLOAT = '[%+%-%d]%d*%.?%d*[eE]?[%+%-]?%d*',
INTEGER = '[+%-%d]%d*',
IDEN = '[%a_][%w_]*',
FILE = '[%a%.\\][:%][%w%._%-\\]*'
}
local function assert_arg(idx,val,tp)
if type(val) ~= tp then
error("argument "..idx.." must be "..tp, 2)
end
end
--[[
module ('pl.sip',utils._module)
]]
local sip = {}
local brackets = {['<'] = '>', ['('] = ')', ['{'] = '}', ['['] = ']' }
local stdclasses = {a=1,c=0,d=1,l=1,p=0,u=1,w=1,x=1,s=0}
local _patterns = {}
local function group(s)
return '('..s..')'
end
-- escape all magic characters except $, which has special meaning
-- Also, un-escape any characters after $, so $( passes through as is.
local function escape (spec)
--_G.print('spec',spec)
local res = spec:gsub('[%-%.%+%[%]%(%)%^%%%?%*]','%%%1'):gsub('%$%%(%S)','$%1')
--_G.print('res',res)
return res
end
local function imcompressible (s)
return s:gsub('%s+','\001')
end
-- [handling of spaces in patterns]
-- spaces may be 'compressed' (i.e will match zero or more spaces)
-- unless this occurs within a number or an identifier. So we mark
-- the four possible imcompressible patterns first and then replace.
-- The possible alnum patterns are v,f,a,d,x,l and u.
local function compress_spaces (s)
s = s:gsub('%$[vifadxlu]%s+%$[vfadxlu]',imcompressible)
s = s:gsub('[%w_]%s+[%w_]',imcompressible)
s = s:gsub('[%w_]%s+%$[vfadxlu]',imcompressible)
s = s:gsub('%$[vfadxlu]%s+[%w_]',imcompressible)
s = s:gsub('%s+','%%s*')
s = s:gsub('\001',' ')
return s
end
--- convert a SIP pattern into the equivalent Lua string pattern.
-- @param spec a SIP pattern
-- @param options a table; only the <code>at_start</code> field is
-- currently meaningful and esures that the pattern is anchored
-- at the start of the string.
-- @return a Lua string pattern.
function sip.create_pattern (spec,options)
assert_arg(1,spec,'string')
local fieldnames,fieldtypes = {},{}
if type(spec) == 'string' then
spec = escape(spec)
else
local res = {}
for i,s in ipairs(spec) do
res[i] = escape(s)
end
spec = concat(res,'.-')
end
local kount = 1
local function addfield (name,type)
if not name then name = kount end
if fieldnames then append(fieldnames,name) end
if fieldtypes then fieldtypes[name] = type end
kount = kount + 1
end
local named_vars, pattern
named_vars = spec:find('{%a+}')
pattern = '%$%S'
if options and options.at_start then
spec = '^'..spec
end
if spec:sub(-1,-1) == '$' then
spec = spec:sub(1,-2)..'$r'
if named_vars then spec = spec..'{rest}' end
end
local names
if named_vars then
names = {}
spec = spec:gsub('{(%a+)}',function(name)
append(names,name)
return ''
end)
end
spec = compress_spaces(spec)
local k = 1
local err
local r = (spec:gsub(pattern,function(s)
local type,name
type = s:sub(2,2)
if names then name = names[k]; k=k+1 end
-- this kludge is necessary because %q generates two matches, and
-- we want to ignore the first. Not a problem for named captures.
if not names and type == 'q' then
addfield(nil,'Q')
else
addfield(name,type)
end
local res
if type == 'v' then
res = group(patterns.IDEN)
elseif type == 'i' then
res = group(patterns.INTEGER)
elseif type == 'f' then
res = group(patterns.FLOAT)
elseif type == 'r' then
res = '(%S.*)'
elseif type == 'q' then
-- some Lua pattern matching voodoo; we want to match '...' as
-- well as "...", and can use the fact that %n will match a
-- previous capture. Adding the extra field above comes from needing
-- to accomodate the extra spurious match (which is either ' or ")
addfield(name,type)
res = '(["\'])(.-)%'..(kount-2)
elseif type == 'p' then
res = '([%a]?[:]?[\\/%.%w_]+)'
else
local endbracket = brackets[type]
if endbracket then
res = '(%b'..type..endbracket..')'
elseif stdclasses[type] or stdclasses[type:lower()] then
res = '(%'..type..'+)'
else
err = "unknown format type or character class"
end
end
return res
end))
--print(r,err)
if err then
return nil,err
else
return r,fieldnames,fieldtypes
end
end
local function tnumber (s)
return s == 'd' or s == 'i' or s == 'f'
end
function sip.create_spec_fun(spec,options)
local fieldtypes,fieldnames
local ls = {}
spec,fieldnames,fieldtypes = sip.create_pattern(spec,options)
if not spec then return spec,fieldnames end
local named_vars = type(fieldnames[1]) == 'string'
for i = 1,#fieldnames do
append(ls,'mm'..i)
end
local fun = ('return (function(s,res)\n\tlocal %s = s:match(%q)\n'):format(concat(ls,','),spec)
fun = fun..'\tif not mm1 then return false end\n'
local k=1
for i,f in ipairs(fieldnames) do
if f ~= '_' then
local var = 'mm'..i
if tnumber(fieldtypes[f]) then
var = 'tonumber('..var..')'
elseif brackets[fieldtypes[f]] then
var = var..':sub(2,-2)'
end
if named_vars then
fun = ('%s\tres.%s = %s\n'):format(fun,f,var)
else
if fieldtypes[f] ~= 'Q' then -- we skip the string-delim capture
fun = ('%s\tres[%d] = %s\n'):format(fun,k,var)
k = k + 1
end
end
end
end
return fun..'\treturn true\nend)\n', named_vars
end
--- convert a SIP pattern into a matching function.
-- The returned function takes two arguments, the line and an empty table.
-- If the line matched the pattern, then this function return true
-- and the table is filled with field-value pairs.
-- @param spec a SIP pattern
-- @param options optional table; {anywhere=true} will stop pattern anchoring at start
-- @return a function if successful, or nil,<error>
function sip.compile(spec,options)
assert_arg(1,spec,'string')
local fun,names = sip.create_spec_fun(spec,options)
if not fun then return nil,names end
if rawget(_G,'_DEBUG') then print(fun) end
local chunk,err = loadstring(fun,'tmp')
if err then return nil,err end
return chunk(),names
end
local cache = {}
--- match a SIP pattern against a string.
-- @param spec a SIP pattern
-- @param line a string
-- @param res a table to receive values
-- @param options (optional) option table
-- @return true or false
function sip.match (spec,line,res,options)
assert_arg(1,spec,'string')
assert_arg(2,line,'string')
assert_arg(3,res,'table')
if not cache[spec] then
cache[spec] = sip.compile(spec,options)
end
return cache[spec](line,res)
end
--- match a SIP pattern against the start of a string.
-- @param spec a SIP pattern
-- @param line a string
-- @param res a table to receive values
-- @return true or false
function sip.match_at_start (spec,line,res)
return sip.match(spec,line,res,{at_start=true})
end
--- given a pattern and a file object, return an iterator over the results
-- @param spec a SIP pattern
-- @param f a file - use standard input if not specified.
function sip.fields (spec,f)
assert_arg(1,spec,'string')
f = f or io.stdin
local fun,err = sip.compile(spec)
if not fun then return nil,err end
local res = {}
return function()
while true do
local line = f:read()
if not line then return end
if fun(line,res) then
local values = res
res = {}
return unpack(values)
end
end
end
end
--- register a match which will be used in the read function.
-- @param spec a SIP pattern
-- @param fun a function to be called with the results of the match
-- @see read
function sip.pattern (spec,fun)
assert_arg(1,spec,'string')
local pat,named = sip.compile(spec)
append(_patterns,{pat=pat,named=named,callback=fun or false})
end
--- enter a loop which applies all registered matches to the input file.
-- @param f a file object; if nil, then io.stdin is assumed.
function sip.read (f)
local owned,err
f = f or io.stdin
if type(f) == 'string' then
f,err = io.open(f)
if not f then return nil,err end
owned = true
end
local res = {}
for line in f:lines() do
for _,item in ipairs(_patterns) do
if item.pat(line,res) then
if item.callback then
if item.named then
item.callback(res)
else
item.callback(unpack(res))
end
end
res = {}
break
end
end
end
if owned then f:close() end
end
return sip