--[==[ LUA MODULE luabalanced v$(_VERSION) - Functions for matching delimited snippets of Lua code in a string SYNOPSIS local LB = require "luabalanced" -- Extract Lua expression starting at position 4. print(LB.match_expression("if x^2 + x > 5 then print(x) end", 4)) --> x^2 + x > 5 16 -- Extract Lua string starting at (default) position 1. print(LB.match_string([["test\"123" .. "more"]])) --> "test\"123" 12 -- Break Lua code into code types. LB.gsub([[ local x = 1 -- test print("x=", x) ]], function(u, s) print(u .. '[' .. s .. ']') end) --[[output: e[ local x = 1 ] c[-- test ] e[ print(] s["x="] e[, x) ] ]] DESCRIPTION This module can, for example, match a Lua string, Lua comment, or Lua expression. It is useful in particular for source filters or parsing Lua snippets embedded in another language. It is inspired by Damian Conway's Text::Balanced [1] in Perl. The unique feature of this implementation is that that it does not rigorously lex and parse the Lua grammar. It doesn't need to. It assumes during the parse that the Lua code is syntactically correct (which can be verified later using loadstring). By assuming this, extraction of delimited sequences is significantly simplified yet can still be robust, and it also supports supersets of the Lua grammar. The code, which is written entirely in Lua, is just under 200 lines of Lua code (compare to Yueliang used in MetaLua, where the lexer alone is a few hundred lines). API LB.match_string(s, pos) --> string, posnew Match Lua string in string starting at position `pos`. Returns `string`, `posnew`, where `string` is the matched string (or nil on no match) and `posnew` is the character following the match (or `pos` on no match). Supports all Lua string syntax: "...", '...', [[...]], [=[...]=], etc. LB.match_bracketed(s, pos) --> string, posnew Match bracketed Lua expression, e.g. "(...)", "{...}", "[...]", "[[...]]", [=[...]=], etc. Function interface is similar to `match_string`. LB.match_comment(s, pos) --> string, posnew Match Lua comment, e.g. "--...\n", "--[[...]]", "--[=[...]=]", etc. Function interface is similar to `match_string`. LB.match_expression(s, pos) --> string, posnew Match Lua expression, e.g. "a + b * c[e]". Function interface is similar to match_string. LB.match_namelist(s, pos) --> array, posnew Match name list (zero or more names). E.g. "a,b,c" Function interface is similar to match_string, but returns array as match. M.match_explist(s, pos) --> array, posnew Match expression list (zero or more expressions). E.g. "a+b,b*c". Function interface is similar to match_string, but returns array as match. M.gsub(s, f) Replace snippets of code in Lua code string `s` using replacement function `f(u,sin) --> sout`. `u` is the type of snippet ('c' = comment, 's' = string, 'e' = any other code). Snippet is replaced with `sout` (unless `sout` is `nil` or `false`, in which case the original snippet is kept) This is somewhat analogous to `string.gsub`. DEPENDENCIES None (other than Lua 5.1 or 5.2). HOME PAGE http://lua-users.org/wiki/LuaBalanced https://github.com/davidm/lua-balanced DOWNLOAD/INSTALL If using LuaRocks: luarocks install lua-balanced Otherwise, download and unzip. Alternately, if using git: git clone git://github.com/davidm/lua-balanced.git cd lua-balanced Optionally unpack: ./util.mk or unpack and install in LuaRocks: ./util.mk install REFERENCES [1] http://lua-users.org/wiki/LuaBalanced [2] http://search.cpan.org/dist/Text-Balanced/lib/Text/Balanced.pm LICENSE (c) 2008-2011 David Manura. Licensed under the same terms as Lua (MIT). Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. (end license) --]==]--------------------------------------------------------------------- local M = {_TYPE='module', _NAME='luabalanaced', _VERSION='0.1.20111203'} local assert = assert -- map opening brace <-> closing brace. local ends = { ['('] = ')', ['{'] = '}', ['['] = ']' } local begins = {}; for k,v in pairs(ends) do begins[v] = k end local function match_string(s, pos) pos = pos or 1 local posa = pos local c = s:sub(pos,pos) if c == '"' or c == "'" then pos = pos + 1 while 1 do pos = assert(s:find("[" .. c .. "\\]", pos), 'syntax error') if s:sub(pos,pos) == c then local part = s:sub(posa, pos) return part, pos + 1 else pos = pos + 2 end end else local sc = s:match("^%[(=*)%[", pos) if sc then local _; _, pos = s:find("%]" .. sc .. "%]", pos) assert(pos) local part = s:sub(posa, pos) return part, pos + 1 else return nil, pos end end end M.match_string = match_string local function match_bracketed(s, pos) pos = pos or 1 local posa = pos local ca = s:sub(pos,pos) if not ends[ca] then return nil, pos end local stack = {} while 1 do pos = s:find('[%(%{%[%)%}%]\"\']', pos) assert(pos, 'syntax error: unbalanced') local c = s:sub(pos,pos) if c == '"' or c == "'" then local part; part, pos = match_string(s, pos) assert(part) elseif ends[c] then -- open local mid, posb if c == '[' then mid, posb = s:match('^%[(=*)%[()', pos) end if mid then pos = s:match('%]' .. mid .. '%]()', posb) assert(pos, 'syntax error: long string not terminated') if #stack == 0 then local part = s:sub(posa, pos-1) return part, pos end else stack[#stack+1] = c pos = pos + 1 end else -- close assert(stack[#stack] == assert(begins[c]), 'syntax error: unbalanced') stack[#stack] = nil if #stack == 0 then local part = s:sub(posa, pos) return part, pos+1 end pos = pos + 1 end end end M.match_bracketed = match_bracketed local function match_comment(s, pos) pos = pos or 1 if s:sub(pos, pos+1) ~= '--' then return nil, pos end pos = pos + 2 local partt, post = match_string(s, pos) if partt then return '--' .. partt, post end local part; part, pos = s:match('^([^\n]*\n?)()', pos) return '--' .. part, pos end local wordop = {['and']=true, ['or']=true, ['not']=true} local is_compare = {['>']=true, ['<']=true, ['~']=true} local function match_expression(s, pos) pos = pos or 1 local posa = pos local lastident local poscs, posce while pos do local c = s:sub(pos,pos) if c == '"' or c == "'" or c == '[' and s:find('^[=%[]', pos+1) then local part; part, pos = match_string(s, pos) assert(part, 'syntax error') elseif c == '-' and s:sub(pos+1,pos+1) == '-' then -- note: handle adjacent comments in loop to properly support -- backtracing (poscs/posce). poscs = pos while s:sub(pos,pos+1) == '--' do local part; part, pos = match_comment(s, pos) assert(part) pos = s:match('^%s*()', pos) posce = pos end elseif c == '(' or c == '{' or c == '[' then local part; part, pos = match_bracketed(s, pos) elseif c == '=' and s:sub(pos+1,pos+1) == '=' then pos = pos + 2 -- skip over two-char op containing '=' elseif c == '=' and is_compare[s:sub(pos-1,pos-1)] then pos = pos + 1 -- skip over two-char op containing '=' elseif c:match'^[%)%}%];,=]' then local part = s:sub(posa, pos-1) return part, pos elseif c:match'^[%w_]' then local newident,newpos = s:match('^([%w_]+)()', pos) if pos ~= posa and not wordop[newident] then -- non-first ident local pose = ((posce == pos) and poscs or pos) - 1 while s:match('^%s', pose) do pose = pose - 1 end local ce = s:sub(pose,pose) if ce:match'[%)%}\'\"%]]' or ce:match'[%w_]' and not wordop[lastident] then local part = s:sub(posa, pos-1) return part, pos end end lastident, pos = newident, newpos else pos = pos + 1 end pos = s:find('[%(%{%[%)%}%]\"\';,=%w_%-]', pos) end local part = s:sub(posa, #s) return part, #s+1 end M.match_expression = match_expression local function match_namelist(s, pos) pos = pos or 1 local list = {} while 1 do local c = #list == 0 and '^' or '^%s*,%s*' local item, post = s:match(c .. '([%a_][%w_]*)%s*()', pos) if item then pos = post else break end list[#list+1] = item end return list, pos end M.match_namelist = match_namelist local function match_explist(s, pos) pos = pos or 1 local list = {} while 1 do if #list ~= 0 then local post = s:match('^%s*,%s*()', pos) if post then pos = post else break end end local item; item, pos = match_expression(s, pos) assert(item, 'syntax error') list[#list+1] = item end return list, pos end M.match_explist = match_explist local function gsub(s, f) local pos = 1 local posa = 1 local sret = '' while 1 do pos = s:find('[%-\'\"%[]', pos) if not pos then break end if s:match('^%-%-', pos) then local exp = s:sub(posa, pos-1) if #exp > 0 then sret = sret .. (f('e', exp) or exp) end local comment; comment, pos = match_comment(s, pos) sret = sret .. (f('c', assert(comment)) or comment) posa = pos else local posb = s:find('^[\'\"%[]', pos) local str if posb then str, pos = match_string(s, posb) end if str then local exp = s:sub(posa, posb-1) if #exp > 0 then sret = sret .. (f('e', exp) or exp) end sret = sret .. (f('s', str) or str) posa = pos else pos = pos + 1 end end end local exp = s:sub(posa) if #exp > 0 then sret = sret .. (f('e', exp) or exp) end return sret end M.gsub = gsub return M