Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
-- eval.lua: functions for parsing and evaluating blogme code. -- This file: -- http://angg.twu.net/blogme4/eval.lua.html -- http://angg.twu.net/blogme4/eval.lua -- (find-blogme4file "eval.lua") -- Author: Eduardo Ochs <[email protected]> -- Version: 2011feb17 -- License: GPL3 -- -- «.control-flow» (to "control-flow") -- «.parse_pattern» (to "parse_pattern") -- «.tlongwords» (to "tlongwords") -- «.qlongwords» (to "qlongwords") -- «.tlongword_mapconcat» (to "tlongword_mapconcat") -- «.longwords» (to "longwords") -- «.readvword» (to "readvword") -- «.readvrest» (to "readvrest") -- «.readqword» (to "readqword") -- «.readqrest» (to "readqrest") -- «._A» (to "_A") -- «.with_subj» (to "with_subj") -- «.blogme_eval» (to "blogme_eval") -- «.test-tlongword» (to "test-tlongword") -- «.test-parse» (to "test-parse") -- «.test-read» (to "test-read") -- «control-flow» (to ".control-flow") -- The difficult part of evaluation is dealing with the [] "blocks". -- The execution flow involves this very nasty recursion: -- -- blogme_eval(subj) -- v -- with_subj(subj, readverylongword) -- v -- readverylongword() <--------------------------\ -- v | -- parse_verylongword(blogme_evalblock) | -- v | -- /-> tlongword_mapconcat(blogme_evalblock, T, "") | -- | : | -- | v | -- | blogme_evalblock(s, e) | -- | v | -- | with_pos_endpos(s+1, e-1, blogme__eval) | -- | v | -- | blogme__eval() | -- | v | -- | _A[argp]() | -- | : | -- | v | -- | readvvvrest() | -- | | | \ | -- | v v v | -- | readvword() readvrest() | -- | v | | -- | readlongword() \------------------------/ -- | v -- \-- parse_longword(blogme_evalblock) -- «parse_pattern» (to ".parse_pattern") -- Conventions for the "parse*" functions -- ====================================== -- They operate on these three globals: -- subj (read-only), -- pos (advanced when parsing succeeds, unchanged when not), -- oldpos (gets the old value of pos on success, garbage on failure), -- result (discussed below; relevant on success, garbage on failure), -- and they return true if they succed, nil if they fail. -- They are all called either "parse__blah" or "parse_blah". -- The "parse__blah" functions just store "oldpos" in "result". -- The "parse_blah" functions are more complex, and they produce less -- trivial "result"s. In most cases - mainly the cases implemented -- with "parse_pattern" - their results are the substring of subj -- between oldpos and pos; the cases involving "longwords" will be -- described below. -- -- The "blah" in the names of the "parse__blah" and "parse_blah" -- function indicate what those functions try to parse, and -- (sometimes) how the result is calculated from the parsed region: -- "spaces": one or more whitespace chars -- "block": a region enclosed in balanced "[]"s -- "wchars": one or more "word chars", i.e., which are neither -- whitespace nor "[]"s -- "rchars": one or more "regular chars", i.e., those which are -- not "[]"s. -- parse_pattern = function (pat) oldpos, result, pos = pos, subj:match(pat, pos) if not pos then pos = oldpos else return true end end parse__spaces = function () return parse_pattern("^()[ \t\n]+()") end parse_spaces = function () return parse_pattern("^([ \t\n]+)()") end parse__wchars = function () return parse_pattern("^()[^ \t\n%[%]]+()") end parse_wchars = function () return parse_pattern("^([^ \t\n%[%]]+)()") end parse__rchars = function () return parse_pattern("^()[^%[%]]+()") end parse_rchars = function () return parse_pattern("^([^%[%]]+)()") end parse__block = function () return parse_pattern("^()%b[]()") end -- slow parse_block = function () return parse_pattern("^(%b[])()") end -- slow -- «tlongwords» (to ".tlongwords") -- A "longword" is something of the form "(wchars | block)+", and a -- "verylongword" is something of the form "(rchars | block)+". A -- string like "a[+ 1 2][+ 3 4]b c[+ 5 6]d" is two longwords, -- separated by a space, and is a single verylongword; verylongwords -- end at "]"s or the end of the string, while longwords can also end -- at whitespace. -- Usually we want the "value" of a longword/verylongword; the "value" -- is calculated by replacing each "[]" in the {very}longword by its -- result - for example, the "value" of "a[+ 1 2][+ 3 4]b" is "a37b". -- To calculate these "values" we need a nasty recursion, so here we -- start with something simpler. -- -- A "tlongword" is an array of strings and {begpos, endpos} pairs. -- For example, the value "as a tlongword" of the string -- -- 11111111112222 -- -- 12345678901234567890123 -- subj = "e[+ 1 [+ 2 3]]f[+ 4 5]" -- is: -- {"e", {2, 15}, "f", {16, 22}} -- Note that: -- subj:sub(2, 15) == "[+ 1 [+ 2 3]]" -- parse__xcharsandblocks = function (parse_xchars) local origpos = pos local T = {} -- a table of strings and {begpos, endpos} pairs local push = function (r) table.insert(T, r) end while parse__block() do push({oldpos, pos}) end -- push pair while parse_xchars() do push(result) -- push string while parse__block() do push({oldpos, pos}) end -- push pair end result = T -- the result is a table of strings and pairs oldpos = origpos return #T > 0 end parse_tlongword = function () return parse__xcharsandblocks(parse_wchars) end parse_tverylongword = function () return parse__xcharsandblocks(parse_rchars) end -- «qlongwords» (to ".qlongwords") -- Quoted longwords. -- These are used by blogme words like "#" and "lua:". parse__qlongword = function () if parse_tlongword() then result = oldpos return true end end parse_qlongword = function () if parse_tlongword() then result = subj:sub(oldpos, pos-1) return true end end parse__qverylongword = function () if pos < endpos then oldpos, pos, result = pos, endpos, pos return true end end parse_qverylongword = function () if pos < endpos then oldpos, pos, result = pos, endpos, subj:sub(pos, endpos-1) return true end end -- «tlongword_mapconcat» (to ".tlongword_mapconcat") -- This is the function that we use to evaluate tlongwords. -- The function "f" is usually "blogme_evalblock", so this ends up -- being recursive. -- See: (find-blogme4 "eval.lua" "blogme_eval") -- (find-elnode "Mapping Functions" "Function: mapconcat") -- In blogme3 I implemented a special behavior for tlongwords of -- length 1 - I skipped the concatenation step. This should be done -- here too, I think (for HLIST and friends?). -- tlongword_mapconcat = function (f, T, sep) if not T then return nil end for i=1,#T do if type(T[i]) == "table" then T[i] = f(T[i][1], T[i][2]) or "" end -- Note that *we change the table T*!!! end if #T == 1 then return T[1] end return table.concat(T, sep) end -- «longwords» (to ".longwords") -- These functions are similar to the ones that return tlongwords, but -- here we run tlongword_mapconcat to return the "values" of these -- tlongwords. -- (I think that they destroy endpos... is that important?) parse_longword = function (eval_block) if not parse_tlongword() then return nil end result = tlongword_mapconcat(eval_block, result, "") return true end parse_verylongword = function (eval_block) if not parse_tverylongword() then return nil end result = tlongword_mapconcat(eval_block, result, "") return true end -- «readvword» (to ".readvword") -- «readvrest» (to ".readvrest") -- The "read*" functions are high-level functions used to parse -- arguments for blogme "calls"; they follow conventions that are -- quite different from the "parse*" functions. -- For example, the argparser for "HREF" has to parse a longword -- and a verylongword; in the evaluation process for -- "[HREF http://foo/ bar plic]" -- we get: -- HREF(readvvrest()) -- that becomes: -- HREF("http://foo/", "bar plic") -- but -- "[HREF http://foo/ bar plic]" -- becomes: -- HREF("http://foo/", "") -- -- Shorthands: a "vword" is the value of a longword; a "vrest" (used -- to obtain the "rest of the arguments", as &rest in Lisp) is the -- value of a verylongword. Additional "v"s in the prefix mean vwords; -- for example, a "vvvrest" is a vword, then another vword, then a -- vrest. -- -- Remember that the "parse*" functions returned a flag, and stored -- the "result" of the parsed region in the global variable "result". -- The "read*" functions return their "results" straight away, and in -- the case of failure (i.e., of parsing nothing) they return the -- empty string. Also, they parse (and discard) spaces before each -- vword and vrest. readlongword = function () if parse_longword(blogme_evalblock) then return result end return "" end readverylongword = function () if parse_verylongword(blogme_evalblock) then return result end return "" end readvword = function () parse__spaces(); return readlongword() end readvrest = function () parse__spaces(); return readverylongword() end readvvrest = function () return readvword(), readvrest() end readvvvrest = function () return readvword(), readvvrest() end readvvvvrest = function () return readvword(), readvvvrest() end readvvvvvrest = function () return readvword(), readvvvvrest() end -- «readqword» (to ".readqword") -- «readqrest» (to ".readqrest") readqlongword = function () if parse_qlongword() then return result end return "" end readqverylongword = function () if parse_qverylongword() then return result end return "" end readqword = function () parse__spaces(); return readqlongword() end readqrest = function () parse__spaces(); return readqverylongword() end readqqrest = function () return readqword(), readqrest() end readqqqrest = function () return readqword(), readqqrest() end readqqqqrest = function () return readqword(), readqqqrest() end readqqqqqrest = function () return readqword(), readqqqqrest() end -- «_A» (to "._A") -- (find-blogme3 "definers.lua" "_AA") -- (find-blogme3 "brackets.lua" "readvword") -- (find-blogme3 "anggdefs.lua" "basic-special-words" "lua:") _A = _A or {} _A["0"] = nop _A["1"] = readvrest _A["2"] = readvvrest _A["3"] = readvvvrest _A["4"] = readvvvvrest _A["5"] = readvvvvvrest _A["1Q"] = readqrest _A["2Q"] = readqqrest _A["3Q"] = readqqqrest _A["4Q"] = readqqqqrest _A["5Q"] = readqqqqqrest -- «with_subj» (to ".with_subj") with_pos_endpos = function (pos_, endpos_, f) local backups = {pos=pos, endpos=endpos} pos, endpos = pos_, endpos_ local r = f(pos, endpos) pos, endpos = backups.pos, backups.endpos return r end with_subj = function (subj_, f) local backups = {subj=subj, pos=pos, endpos=endpos} subj, pos, endpos = subj_, 1, #subj_+1 local r = f(pos, endpos) subj, pos, endpos = backups.subj, backups.pos, backups.endpos return r end -- «blogme_eval» (to ".blogme_eval") -- "blogme__eval" (with a double "__") is a very low-level function, -- that does the heavy work for both "blogme_evalblock" and -- "blogme_eval". It takes as its "input" the global variables subj, -- pos and endpos, parses a word, and then returns the result of -- wordf(argpf()). Here is a typical example of how it runs. If: -- subj = "ab [HREF http://foo/ bar] cd" -- pos = 5 -- endpos = 25 -- then "word" is "HREF", "argp" is "2", and the result of argpf() -- is the sequence "http://foo/", "bar"; then blogme__eval will -- return the result of HREF("http://foo/", "bar"). -- Note that blogme__eval uses the table _B of blogmewords and -- the table _A or argparser codes. See: -- (find-blogme4 "def.lua" "BlogmeWord") -- -- "blogme_evalblock" is used to run a blogme "call" inside "[]s" (as -- in the example above). -- -- "blogme_eval" is used to evaluate all the blogme calls inside a -- string, replacing each one by its result; for example, -- blogme_eval "ab [HREF http://foo/ bar] cd" -- returns -- "ab " .. HREF("http://foo/", "bar") .. " cd". -- blogme__eval = function () parse__spaces() if not parse_wchars() then error("Empty word!") end local word = result local bword = _B[word] or error("Unknown blogme word: "..word) local wordf = bword.fun local argp = bword.argp local argpf = (type(argp) == "string" and (_A[argp] or _G[argp])) or argp or error("Unknow arglist parser: "..argp) return wordf(argpf()) end blogme_evalblock = function (s, e) return with_pos_endpos(s+1, e-1, blogme__eval) -- skip the '[' and the ']' end blogme_eval = function (subj_) return with_subj(subj_, readverylongword) end -- dump-to: tests --[===[ -- «test-tlongword» (to ".test-tlongword") -- (find-blogme4 "argparsers.lua") -- (find-blogme4 "brackets.lua") -- (find-blogme4 "def.lua") * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) userocks() ee_dofile "~/blogme4/blogme4-all.lua" -- These tests should make clear what a tlongword is. -- /-----------------------------------------------------------\ -- | /-----\ /-----------------\/-------\ | str = "[HREF http://[+ 1 2]/ bar [* [+ 1 2] [+ 3 4]][* 10 10]plic]" -- ^ ^ ^ ^ ^ ^^ -- 1 7 14 20 ^24 29 4748 ^56 ^61 angf = function (s, e) return "<"..subj:sub(s, e-1)..">" end test = function (p, f, ...) subj, pos = str, p PP(f(...), oldpos, pos, result) end test(7, parse_tlongword) --> <true> 7 22 {"http://", {14, 21}, "/"} -- bool oldpos pos result test(24, parse_tverylongword) --> <true> 24 61 {"bar ", {29, 48}, {48, 57}, "plic"} -- bool oldpos pos result test(24, parse_tverylongword) -- reset "result" PP(tlongword__mapconcat(angf, result, "..")) -- this changes "result" --> "bar ..<[* [+ 1 2] [+ 3 4]]>..<[* 10 10]>..plic" PP(result) --> {1="bar ", 2="<[* [+ 1 2] [+ 3 4]]>", 3="<[* 10 10]>", 4="plic"} test(7, parse_tlongword) --> <true> 7 22 {1="http://", 2={1=14, 2=21}, 3="/"} test(7, parse_longword, angf) -- <true> 7 22 "http://<[+ 1 2]>/" test(24, parse_verylongword, angf) --> <true> 24 61 "bar <[* [+ 1 2] [+ 3 4]]><[* 10 10]>plic" def [[ HREF 2 url,str "<a href=\"$url\">$str</a>" ]] def [[ * 2 a,b a*b ]] def [[ + 2 a,b a+b ]] -- /-----------------------------------------------------------\ -- | /-----\ /-----------------\/-------\ | str = "[HREF http://[+ 1 2]/ bar [* [+ 1 2] [+ 3 4]][* 10 10]plic]" -- ^ ^ ^ ^ ^ ^^ -- 1 7 14 20 ^24 29 4748 ^56 ^61 subj = str = blogme_evalblock(14, 20) = blogme_evalblock(29, 48) = blogme_evalblock( 1, 61) = blogme_eval(str) -- «test-parse» (to ".test-parse") -- «test-read» (to ".test-read") -- High-level tests. -- "be" tests evaluating a string using blogme_eval, -- "tp" tests a "parse_*" word, -- "tr" tests a "read*" word. * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) -- (find-blogme4 "blogme4-all.lua") ee_dofile "~/blogme4/blogme4-all.lua" def [[ pp 1 body "<"..body..">", print("<"..body..">") ]] def [[ pq 1Q body "<"..body..">", print("<"..body..">") ]] comp = function (f, g) return function (...) return f(g(...)) end end be = function (str) PP(blogme_eval(str)) end tp = function (f) return function (subj) PP(with_subj(subj, f), result) end end tr = function (f) return function (subj) with_subj(subj, comp(PP, f)) end end tp(parse_spaces) [==[ ab cd ef ]==] tp(parse_rchars) [==[ ab cd ef gh ]==] tp(parse_rchars) [==[ ab cd [pp ef] gh ]==] tp(parse_qverylongword) [==[ ab cd [pp ef] gh ]==] tp(parse_tverylongword) [==[ ab cd [pp ef] gh ]==] tp(parse_tlongword) [==[ab[ pp cd ]ef gh [pp ij] kl ]==] tp(parse__qlongword) [==[ab[ pp cd ]ef gh [pp ij] kl ]==] tp(parse_qlongword) [==[ab[ pp cd ]ef gh [pp ij] kl ]==] tp(parse_qlongword) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==] tr(readlongword) [==[ab[ pp cd ]ef gh [pp ij] kl ]==] tr(readverylongword) [==[ab[ pp cd ]ef gh [pp ij] kl ]==] tr(readvword) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==] tr(readvrest) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==] tr(readvvrest) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==] tr(readvvvrest) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==] tr(readvvvvrest) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==] tr(readvvvvvrest) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==] tr(readqlongword) [==[ab[ pp cd ]ef gh [pp ij] kl ]==] tr(readqlongword) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==] tr(readqverylongword) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==] tr(readqword) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==] tr(readqrest) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==] tr(readqqrest) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==] tr(readqqqrest) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==] tr(readqqqqrest) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==] tr(readqqqqqrest) [==[ ab[ pp cd ]ef gh [pp ij] kl ]==] be [==[ ab[ pp cd ]ef [pq [pp gh] ij] kl ]==] --]===] -- Local Variables: -- coding: raw-text-unix -- ee-anchor-format: "«%s»" -- End: