| --- Simple Input Patterns (SIP). <p> |
| -- SIP patterns start with '$', then a |
| -- one-letter type, and then an optional variable in curly braces. <p> |
| -- Example: |
| -- <pre class=example> |
| -- sip.match('$v=$q','name="dolly"',res) |
| -- ==> res=={'name','dolly'} |
| -- sip.match('($q{first},$q{second})','("john","smith")',res) |
| -- ==> res=={second='smith',first='john'} |
| -- </pre> |
| -- <pre> |
| -- <b>Type names</b> |
| -- v identifier |
| -- i integer |
| -- f floating-point |
| -- q quoted string |
| -- ([{< match up to closing bracket |
| -- </pre> |
| -- <p> |
| -- See <a href="../../index.html#sip">the Guide</a> |
| -- @class module |
| -- @name pl.sip |
| |
| local append,concat = table.insert,table.concat |
| local concat = table.concat |
| local ipairs,loadstring,type,unpack = ipairs,loadstring,type,unpack |
| local io,_G = io,_G |
| local print,rawget = print,rawget |
| |
| local patterns = { |
| FLOAT = '[%+%-%d]%d*%.?%d*[eE]?[%+%-]?%d*', |
| INTEGER = '[+%-%d]%d*', |
| IDEN = '[%a_][%w_]*', |
| FILE = '[%a%.\\][:%][%w%._%-\\]*' |
| } |
| |
| local function assert_arg(idx,val,tp) |
| if type(val) ~= tp then |
| error("argument "..idx.." must be "..tp, 2) |
| end |
| end |
| |
| |
| --[[ |
| module ('pl.sip',utils._module) |
| ]] |
| |
| local sip = {} |
| |
| local brackets = {['<'] = '>', ['('] = ')', ['{'] = '}', ['['] = ']' } |
| local stdclasses = {a=1,c=0,d=1,l=1,p=0,u=1,w=1,x=1,s=0} |
| |
| local _patterns = {} |
| |
| |
| local function group(s) |
| return '('..s..')' |
| end |
| |
| -- escape all magic characters except $, which has special meaning |
| -- Also, un-escape any characters after $, so $( passes through as is. |
| local function escape (spec) |
| --_G.print('spec',spec) |
| local res = spec:gsub('[%-%.%+%[%]%(%)%^%%%?%*]','%%%1'):gsub('%$%%(%S)','$%1') |
| --_G.print('res',res) |
| return res |
| end |
| |
| local function imcompressible (s) |
| return s:gsub('%s+','\001') |
| end |
| |
| -- [handling of spaces in patterns] |
| -- spaces may be 'compressed' (i.e will match zero or more spaces) |
| -- unless this occurs within a number or an identifier. So we mark |
| -- the four possible imcompressible patterns first and then replace. |
| -- The possible alnum patterns are v,f,a,d,x,l and u. |
| local function compress_spaces (s) |
| s = s:gsub('%$[vifadxlu]%s+%$[vfadxlu]',imcompressible) |
| s = s:gsub('[%w_]%s+[%w_]',imcompressible) |
| s = s:gsub('[%w_]%s+%$[vfadxlu]',imcompressible) |
| s = s:gsub('%$[vfadxlu]%s+[%w_]',imcompressible) |
| s = s:gsub('%s+','%%s*') |
| s = s:gsub('\001',' ') |
| return s |
| end |
| |
| --- convert a SIP pattern into the equivalent Lua string pattern. |
| -- @param spec a SIP pattern |
| -- @param options a table; only the <code>at_start</code> field is |
| -- currently meaningful and esures that the pattern is anchored |
| -- at the start of the string. |
| -- @return a Lua string pattern. |
| function sip.create_pattern (spec,options) |
| assert_arg(1,spec,'string') |
| local fieldnames,fieldtypes = {},{} |
| |
| if type(spec) == 'string' then |
| spec = escape(spec) |
| else |
| local res = {} |
| for i,s in ipairs(spec) do |
| res[i] = escape(s) |
| end |
| spec = concat(res,'.-') |
| end |
| |
| local kount = 1 |
| |
| local function addfield (name,type) |
| if not name then name = kount end |
| if fieldnames then append(fieldnames,name) end |
| if fieldtypes then fieldtypes[name] = type end |
| kount = kount + 1 |
| end |
| |
| local named_vars, pattern |
| named_vars = spec:find('{%a+}') |
| pattern = '%$%S' |
| |
| if options and options.at_start then |
| spec = '^'..spec |
| end |
| if spec:sub(-1,-1) == '$' then |
| spec = spec:sub(1,-2)..'$r' |
| if named_vars then spec = spec..'{rest}' end |
| end |
| |
| |
| local names |
| |
| if named_vars then |
| names = {} |
| spec = spec:gsub('{(%a+)}',function(name) |
| append(names,name) |
| return '' |
| end) |
| end |
| spec = compress_spaces(spec) |
| |
| local k = 1 |
| local err |
| local r = (spec:gsub(pattern,function(s) |
| local type,name |
| type = s:sub(2,2) |
| if names then name = names[k]; k=k+1 end |
| -- this kludge is necessary because %q generates two matches, and |
| -- we want to ignore the first. Not a problem for named captures. |
| if not names and type == 'q' then |
| addfield(nil,'Q') |
| else |
| addfield(name,type) |
| end |
| local res |
| if type == 'v' then |
| res = group(patterns.IDEN) |
| elseif type == 'i' then |
| res = group(patterns.INTEGER) |
| elseif type == 'f' then |
| res = group(patterns.FLOAT) |
| elseif type == 'r' then |
| res = '(%S.*)' |
| elseif type == 'q' then |
| -- some Lua pattern matching voodoo; we want to match '...' as |
| -- well as "...", and can use the fact that %n will match a |
| -- previous capture. Adding the extra field above comes from needing |
| -- to accomodate the extra spurious match (which is either ' or ") |
| addfield(name,type) |
| res = '(["\'])(.-)%'..(kount-2) |
| elseif type == 'p' then |
| res = '([%a]?[:]?[\\/%.%w_]+)' |
| else |
| local endbracket = brackets[type] |
| if endbracket then |
| res = '(%b'..type..endbracket..')' |
| elseif stdclasses[type] or stdclasses[type:lower()] then |
| res = '(%'..type..'+)' |
| else |
| err = "unknown format type or character class" |
| end |
| end |
| return res |
| end)) |
| --print(r,err) |
| if err then |
| return nil,err |
| else |
| return r,fieldnames,fieldtypes |
| end |
| end |
| |
| |
| local function tnumber (s) |
| return s == 'd' or s == 'i' or s == 'f' |
| end |
| |
| function sip.create_spec_fun(spec,options) |
| local fieldtypes,fieldnames |
| local ls = {} |
| spec,fieldnames,fieldtypes = sip.create_pattern(spec,options) |
| if not spec then return spec,fieldnames end |
| local named_vars = type(fieldnames[1]) == 'string' |
| for i = 1,#fieldnames do |
| append(ls,'mm'..i) |
| end |
| local fun = ('return (function(s,res)\n\tlocal %s = s:match(%q)\n'):format(concat(ls,','),spec) |
| fun = fun..'\tif not mm1 then return false end\n' |
| local k=1 |
| for i,f in ipairs(fieldnames) do |
| if f ~= '_' then |
| local var = 'mm'..i |
| if tnumber(fieldtypes[f]) then |
| var = 'tonumber('..var..')' |
| elseif brackets[fieldtypes[f]] then |
| var = var..':sub(2,-2)' |
| end |
| if named_vars then |
| fun = ('%s\tres.%s = %s\n'):format(fun,f,var) |
| else |
| if fieldtypes[f] ~= 'Q' then -- we skip the string-delim capture |
| fun = ('%s\tres[%d] = %s\n'):format(fun,k,var) |
| k = k + 1 |
| end |
| end |
| end |
| end |
| return fun..'\treturn true\nend)\n', named_vars |
| end |
| |
| --- convert a SIP pattern into a matching function. |
| -- The returned function takes two arguments, the line and an empty table. |
| -- If the line matched the pattern, then this function return true |
| -- and the table is filled with field-value pairs. |
| -- @param spec a SIP pattern |
| -- @param options optional table; {anywhere=true} will stop pattern anchoring at start |
| -- @return a function if successful, or nil,<error> |
| function sip.compile(spec,options) |
| assert_arg(1,spec,'string') |
| local fun,names = sip.create_spec_fun(spec,options) |
| if not fun then return nil,names end |
| if rawget(_G,'_DEBUG') then print(fun) end |
| local chunk,err = loadstring(fun,'tmp') |
| if err then return nil,err end |
| return chunk(),names |
| end |
| |
| local cache = {} |
| |
| --- match a SIP pattern against a string. |
| -- @param spec a SIP pattern |
| -- @param line a string |
| -- @param res a table to receive values |
| -- @param options (optional) option table |
| -- @return true or false |
| function sip.match (spec,line,res,options) |
| assert_arg(1,spec,'string') |
| assert_arg(2,line,'string') |
| assert_arg(3,res,'table') |
| if not cache[spec] then |
| cache[spec] = sip.compile(spec,options) |
| end |
| return cache[spec](line,res) |
| end |
| |
| --- match a SIP pattern against the start of a string. |
| -- @param spec a SIP pattern |
| -- @param line a string |
| -- @param res a table to receive values |
| -- @return true or false |
| function sip.match_at_start (spec,line,res) |
| return sip.match(spec,line,res,{at_start=true}) |
| end |
| |
| --- given a pattern and a file object, return an iterator over the results |
| -- @param spec a SIP pattern |
| -- @param f a file - use standard input if not specified. |
| function sip.fields (spec,f) |
| assert_arg(1,spec,'string') |
| f = f or io.stdin |
| local fun,err = sip.compile(spec) |
| if not fun then return nil,err end |
| local res = {} |
| return function() |
| while true do |
| local line = f:read() |
| if not line then return end |
| if fun(line,res) then |
| local values = res |
| res = {} |
| return unpack(values) |
| end |
| end |
| end |
| end |
| |
| --- register a match which will be used in the read function. |
| -- @param spec a SIP pattern |
| -- @param fun a function to be called with the results of the match |
| -- @see read |
| function sip.pattern (spec,fun) |
| assert_arg(1,spec,'string') |
| local pat,named = sip.compile(spec) |
| append(_patterns,{pat=pat,named=named,callback=fun or false}) |
| end |
| |
| --- enter a loop which applies all registered matches to the input file. |
| -- @param f a file object; if nil, then io.stdin is assumed. |
| function sip.read (f) |
| local owned,err |
| f = f or io.stdin |
| if type(f) == 'string' then |
| f,err = io.open(f) |
| if not f then return nil,err end |
| owned = true |
| end |
| local res = {} |
| for line in f:lines() do |
| for _,item in ipairs(_patterns) do |
| if item.pat(line,res) then |
| if item.callback then |
| if item.named then |
| item.callback(res) |
| else |
| item.callback(unpack(res)) |
| end |
| end |
| res = {} |
| break |
| end |
| end |
| end |
| if owned then f:close() end |
| end |
| |
| return sip |