Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
####### # # E-scripts on lpeg. # # Note 1: use the eev command (defined in eev.el) and the # ee alias (in my .zshrc) to execute parts of this file. # Executing this file as a whole makes no sense. # An introduction to eev can be found here: # # (find-eev-quick-intro) # http://angg.twu.net/eev-intros/find-eev-quick-intro.html # # Note 2: be VERY careful and make sure you understand what # you're doing. # # Note 3: If you use a shell other than zsh things like |& # and the for loops may not work. # # Note 4: I always run as root. # # Note 5: some parts are too old and don't work anymore. Some # never worked. # # Note 6: the definitions for the find-xxxfile commands are on my # .emacs. # # Note 7: if you see a strange command check my .zshrc -- it may # be defined there as a function or an alias. # # Note 8: the sections without dates are always older than the # sections with dates. # # This file is at <http://angg.twu.net/e/lpeg.e> # or at <http://angg.twu.net/e/lpeg.e.html>. # See also <http://angg.twu.net/emacs.html>, # <http://angg.twu.net/.emacs[.html]>, # <http://angg.twu.net/.zshrc[.html]>, # <http://angg.twu.net/escripts.html>, # and <http://angg.twu.net/>. # ####### # «.lpeg-debian» (to "lpeg-debian") # «.metatable» (to "metatable") # «.lpeg-source» (to "lpeg-source") # «.lpeg-debug-mode» (to "lpeg-debug-mode") # «.LpegDebugMode1» (to "LpegDebugMode1") # «.globals» (to "globals") # «.lpeg-quickref» (to "lpeg-quickref") # «.lpeg-Cs-and-C» (to "lpeg-Cs-and-C") # «.lpeg-matchtime» (to "lpeg-matchtime") # «.lpeg-matchtime-suchthat» (to "lpeg-matchtime-suchthat") # «.lpeg-named» (to "lpeg-named") # «.lpeg-unnamed» (to "lpeg-unnamed") # «.lpeg-unnamed-email» (to "lpeg-unnamed-email") # «.lpeg-back» (to "lpeg-back") # «.lpeg-Carg» (to "lpeg-Carg") # «.re-quickref» (to "re-quickref") # «.Re» (to "Re") # «.Re-balanced» (to "Re-balanced") # «.eoo-re» (to "eoo-re") # «.pegdebug0» (to "pegdebug0") # «.pegdebug» (to "pegdebug") # «.LpegDebug» (to "LpegDebug") # «.lulpeg» (to "lulpeg") # «.lpeglabel» (to "lpeglabel") # «.lpegrex» (to "lpegrex") # «.lpegrex-keywords» (to "lpegrex-keywords") # «.lpegrex-json» (to "lpegrex-json") # «.lpegrex-tag» (to "lpegrex-tag") # «.lpegrex-grammar» (to "lpegrex-grammar") # «.folding» (to "folding") # «.lpeg-table-captures» (to "lpeg-table-captures") # «.tati-2018» (to "tati-2018") # «.railroad-diagrams» (to "railroad-diagrams") # «.capts-question» (to "capts-question") # «.capts-question-2» (to "capts-question-2") # «.capts-subpat-analogy» (to "capts-subpat-analogy") # «.associativity» (to "associativity") # «.capts-cbcgct» (to "capts-cbcgct") # «.capts-lpeg.C» (to "capts-lpeg.C") # «.capts-current-table» (to "capts-current-table") # «.apr-capts-prep» (to "apr-capts-prep") # «.apr-capts» (to "apr-capts") # «.lpeg.Cobeying» (to "lpeg.Cobeying") # «.lpeg.Cfromthere» (to "lpeg.Cfromthere") # «.lpeg.Ccall» (to "lpeg.Ccall") # «.compare-pm» (to "compare-pm") # «.emacsconf2023» (to "emacsconf2023") # «.bapl» (to "bapl") # «.replace-printfs-1» (to "replace-printfs-1") # «.replace-printfs-2» (to "replace-printfs-2") # «.long-strings» (to "long-strings") # «.marpa» (to "marpa") # (find-es "lua5" "lpeg-quickref") # (find-es "lua5" "lpeg-re-finch") # (find-es "lua-intro" "lpeg-re-infix-1") # (find-es "lua-intro" "lpeg-re-1") # http://lua-users.org/wiki/LpegTutorial ##### # # lpeg-debian # 2021sep26 # ##### # «lpeg-debian» (to ".lpeg-debian") # (find-status "lua-lpeg") # (find-vldifile "lua-lpeg:amd64.list") # (find-udfile "lua-lpeg/") # (find-fline "/usr/share/lua/5.1/re.lua") # (find-fline "/usr/share/lua/5.2/re.lua") # (find-fline "/usr/share/lua/5.3/re.lua") -- (find-fline "/usr/share/lua/5.1/re.lua") -- (find-fline "/usr/share/lua/5.1/re.lua" ".__div") m = require"lpeg" mm = m mt = getmetatable(mm.P(0)) PPV(mt) PPV(mt.__index) # (find-status "lua-lpeg-dev") # (find-vldifile "lua-lpeg-dev:amd64.list") # (find-udfile "lua-lpeg-dev/") # (find-status "lua-lpeg-patterns") # (find-vldifile "lua-lpeg-patterns.list") # (find-udfile "lua-lpeg-patterns/") ##### # # metatable # 2023aug19 # ##### # «metatable» (to ".metatable") # (find-angg "LUA/lua50init.lua" "eoo") * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) lpeg = require"lpeg" pat = lpeg.P(1) mt = getmetatable(pat) PPV(mt) PPV(mt.__index) PPV(lpeg) = (getmetatable(pat).__index == lpeg) = pat = pat:type() otype_metatables = {} otype = function (o) local mt = getmetatable(o) return mt and (otype_metatables[mt] or mt.type) or type(o) end otype_metatables[getmetatable(lpeg.P(1))] = "lpeg" = otype(pat) ##### # # lpeg-source # 2021sep26 # ##### # «lpeg-source» (to ".lpeg-source") # (to "lpeg-debug-mode") # http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html#download # http://www.inf.puc-rio.br/~roberto/lpeg/lpeg-1.0.2.tar.gz * (eepitch-shell) * (eepitch-kill) * (eepitch-shell) rm -Rv ~/usrc/lpeg-1.0.2/ mkdir ~/usrc/lpeg-1.0.2/ tar -C ~/usrc/ -xvzf $S/http/www.inf.puc-rio.br/~roberto/lpeg/lpeg-1.0.2.tar.gz cd ~/usrc/lpeg-1.0.2/ rm -fv *.o rm -fv *.so rm -fv om # (find-fline "~/emlua/Makefile") # (find-fline "~/emlua/Makefile" "-I$(LUA_INCLUDE_DIR)") make LUADIR=/usr/include/lua5.1 COPT="-O2 -DNDEBUG" |& tee om # (code-c-d "lpegsrc" "~/usrc/lpeg-1.0.2/") # (find-lpegsrcfile "") # (find-lpegsrcfile "lptree.c" "{\"ptree\", lp_printtree},") # (find-lpegsrcfile "lptree.c" "{\"pcode\", lp_printcode}") # (find-lpegsrctag "lp_printtree") # (find-lpegsrctag "lp_printcode") # (find-lpegsrctag "printktable") # (find-lpegsrctag "printtree") # (find-lpegsrcgrep "grep --color=auto -niH --null -e debug *") # (find-lpegsrcgrep "grep --color=auto -niH --null -e 'only implemented in debug mode' *") # (find-lpegsrcgrep "grep --color=auto -nH --null -e lp_divcapture *") # (find-lpegsrcgrep "grep --color=auto -nH --null -e Cb *.c") # (find-lpegsrcgrep "grep --color=auto -nH --null -e backref *.c") * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) -- (find-blogme3file "htmlize-utf8.lua") require "lpeg" AlphaNumeric = lpeg.R("AZ", "az", "09") AnchorChar = AlphaNumeric + lpeg.S("!#$%()*+,-./:;=?@^_{|}~") = AnchorChar:pcode() = AnchorChar:ptree() ##### # # lpeg-debug-mode # 2022jun30 # ##### # «lpeg-debug-mode» (to ".lpeg-debug-mode") # (to "LpegDebugMode1") # (to "replace-printfs-1") # (to "lpeg-source") # http://lua-users.org/lists/lua-l/2023-04/msg00030.html # http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html#download # http://www.inf.puc-rio.br/~roberto/lpeg/lpeg-1.1.0.tar.gz # # (code-c-d "lpegsrc" "~/usrc/lpeg-1.1.0/" :tags) # (find-lpegsrcfile "") # (find-lpegsrcfile "makefile") # (find-lpegsrcfile "makefile" "linux:") # (find-lpegsrcfile "makefile" "macosx:") # (find-lpegsrcfile "makefile" "macosx:" "-undefined dynamic_lookup") # (find-lpegsrcfile "makefile" "-I$(LUADIR)") # (find-lpegsrcgrep "grep --color=auto -nH --null -e LPEG_DEBUG *.h *.c") # (find-fline "~/emlua/Makefile") # (find-fline "~/emlua/Makefile" "-I$(LUA_INCLUDE_DIR)") * (eepitch-shell) * (eepitch-kill) * (eepitch-shell) rm -Rv ~/usrc/lpeg-1.1.0/ mkdir ~/usrc/lpeg-1.1.0/ tar -C ~/usrc/ -xvzf $S/http/www.inf.puc-rio.br/~roberto/lpeg/lpeg-1.1.0.tar.gz cd ~/usrc/lpeg-1.1.0/ ls *.c *.h | tee .files.ch etags $(cat .files.ch) rm -fv *.o rm -fv *.so rm -fv om make LUADIR=/usr/include/lua5.1 COPT="-O2 -DLPEG_DEBUG" |& tee om * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) -- (find-blogme3file "htmlize-utf8.lua") -- Path.prependtocpath "~/usrc/lpeg-1.1.0/?.so" -- = package.cpath package.cpath = "/home/edrx/usrc/lpeg-1.1.0/?.so;" .. package.cpath require "lpeg" AlphaNumeric = lpeg.R("AZ", "az", "09") AnchorChar = AlphaNumeric + lpeg.S("!#$%()*+,-./:;=?@^_{|}~") = AnchorChar:pcode() = AnchorChar:ptree() AnchorChar:pcode() AnchorChar:ptree() 2023apr15: Better ways to use -DLPEG_DEBUG, :pcode(), and :ptree()? Hi list, I know that I can examine the internal representation of (compiled) lpeg patterns by compiling lpeg with -DLPEG_DEBUG and then running some methods like :pcode() and :ptree(), as in the code below... but these methods use printfs, and so they send their output to stdout. People, do you have any tricks - wrappers, maybe? - to display these debugging outputs in other ways? Here is some code that works, but that is clumsy: --snip--snip-- rm -Rv /tmp/lpeg-1.0.2* cd /tmp/ wget https://www.inf.puc-rio.br/~roberto/lpeg/lpeg-1.0.2.tar.gz tar -xvzf lpeg-1.0.2.tar.gz cd /tmp/lpeg-1.0.2/ make LUADIR=/usr/include/lua5.1 COPT="-O2 -DLPEG_DEBUG" cat > o.lua <<'%%%' require "lpeg" AlphaNumeric = lpeg.R("AZ", "az", "09") AnchorChar = AlphaNumeric + lpeg.S("!#$%()*+,-./:;=?@^_{|}~") AnchorChar:pcode() AnchorChar:ptree() %%% lua5.1 o.lua | tee o.out --snip--snip-- Thanks in advance! Eduardo Ochs http://anggtwu.net/luaforth.html # (find-fline "/tmp/lpeg-1.0.2/o.out") ##### # # LpegDebugMode1 # 2024jan30 # ##### # «LpegDebugMode1» (to ".LpegDebugMode1") # (to "lpeg-debug-mode") # (find-angg "LUA/LpegDebugMode1.lua") * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) Path.addLUAtopath() require "LpegDebugMode1" -- (find-angg "LUA/LpegDebugMode1.lua") = run " P(1):pcode() " = run " P(2):pcode() " = run " P('a'):pcode() " = run " C(1):pcode() " = run " C('a'):pcode() " = run " (C('a') * C('b')):pcode() " = run " ((C('a') * C('b'))^-4):ptree() " = run " ((C('a') * C('b'))^-4):pcode() " ##### # # globals # 2022mar17 # ##### # «globals» (to ".globals") # (to "compare-pm") * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) require "lpeg" = mapconcat(id, sorted(keys(lpeg)), " ") B,C,P,R,S,V = lpeg.B,lpeg.C,lpeg.P,lpeg.R,lpeg.S,lpeg.V Cb,Cc,Cf,Cg = lpeg.Cb,lpeg.Cc,lpeg.Cf,lpeg.Cg Cp,Cs,Ct = lpeg.Cp,lpeg.Cs,lpeg.Ct Carg,Cmt = lpeg.Carg,lpeg.Cmt lpeg.ptmatch = function (pat, str) PP(pat:Ct():match(str)) end ##### # # lpeg-quickref # 2021sep27 # ##### # «lpeg-quickref» (to ".lpeg-quickref") # From: (find-es "lua-intro" "lpeg-quickref") # http://www.inf.puc-rio.br/~roberto/lpeg.html # http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html # file:///usr/share/doc/lua-lpeg-dev/lpeg.html # file:///usr/share/doc/lua-lpeg-dev/lpeg.html#grammar # file:///home/edrx/usrc/lpeg-1.0.2/lpeg.html lpeg.P(str) matches str lpeg.P(0) matches always lpeg.P(1) matches one char (".") lpeg.P(2) matches two chars ("..") lpeg.P(-1) matches if there isn't another char (end of string) lpeg.P(-2) matches if there aren't two more chars lpeg.P(p) return the pattern p unmodified lpeg.P(f) if f(subj, pos) returns a valid newpos then matches until it lpeg.P(gra) matches the first entry (gra[1]) in the grammar gra (a table) lpeg.V(2) matches the second entry (gra[2]) in the grammar above this lpeg.R("az", "AZ") matches any char in ranges - like "[A-Za-z]" lpeg.S("abd") matches any char in set - like "[abd]" #patt like patt, but without consuming input -patt like #patt, but negating the result p1 * p2 concatenation: "p1p2" p1 + p2 bounded prioritized choice - roughly like "p1|p2" p1 - p2 equivalent to -p2 * p1 1 - cset a char not in cset patt ^ 0 at least 0 occurrences of patt - like "patt*" patt ^ 1 at least 1 occurrence of patt - like "patt+" patt ^ 2 at least 2 occurrences of patt patt ^ -1 at most 1 occurrence of patt - like "patt?" patt ^ -2 at most 2 occurrences of patt lpeg.C(patt) the substring matched by patt (then patt's other captures) lpeg.Ca(patt) if patt captures a,f,g,h then h(g(f(a))) <--- WRONG! lpeg.Cc(v) matches the empty string, captures the value v lpeg.Cp() matches the empty string, captures the current position lpeg.Cs(patt) the substring matched by patt, after substitutions lpeg.Ct(patt) all captures made by patt as a table patt / f if patt captured a,b,c then f(a, b, c) patt / "_%2_" if patt captured "foo" and "bar" then "_bar_" patt / tbl if patt captured a and b then tbl[a] lpeg.Cg(patt, name) group capture lpeg.Cf(patt, f) a folding of the captures from patt lpeg.Cmt(patt, f) the returns of f applied to the captures of patt; match-time (find-es "lua5" "lpeg.Cmt") (find-es "lua5" "lpeg.Cmt-2") ##### # # lpeg.Cs and lpeg.C # 2023apr14 # ##### # «lpeg-Cs-and-C» (to ".lpeg-Cs-and-C") # lpeg.Cs substitutes submatches. # lpeg.C returns the region and after that the other captures. * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) require "lpeg" B,C,P,R,S,V = lpeg.B,lpeg.C,lpeg.P,lpeg.R,lpeg.S,lpeg.V Cb,Cc,Cf,Cg = lpeg.Cb,lpeg.Cc,lpeg.Cf,lpeg.Cg Cp,Cs,Ct = lpeg.Cp,lpeg.Cs,lpeg.Ct Carg,Cmt = lpeg.Carg,lpeg.Cmt lpeg.ptmatch = function (pat, str) PP(pat:Ct():match(str)) end (P"a" * Cs"b" * P"c") : ptmatch("abcd") (P"a" * (Cs"b"/"BB") * P"c") : ptmatch("abcd") (P"a" * (Cs"b"/"BB") * P"c"):Cs() : ptmatch("abcd") (P"a" * (Cs"b"/"BB") * P"c"):C() : ptmatch("abcd") ##### # # lpeg-matchtime # 2023mar29 # ##### # «lpeg-matchtime» (to ".lpeg-matchtime") # http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html#matchtime # file:///usr/share/doc/lua-lpeg-dev/lpeg.html # file:///usr/share/doc/lua-lpeg-dev/lpeg.html#matchtime # (find-lpegmanual "#matchtime") * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) require "lpeg" B,C,P,R,S,V = lpeg.B,lpeg.C,lpeg.P,lpeg.R,lpeg.S,lpeg.V Cb,Cc,Cf,Cg = lpeg.Cb,lpeg.Cc,lpeg.Cf,lpeg.Cg Cp,Cs,Ct = lpeg.Cp,lpeg.Cs,lpeg.Ct Carg,Cmt = lpeg.Carg,lpeg.Cmt lpeg.ptmatch = function (pat, str) PP("Outside:", pat:Ct():match(str)) end pc = Cs(P(1)) f1 = function (...) PP("Inside:", ...); return true end f2 = function (...) PP("Inside:", ...); return false end f3 = function (subj,pos,c) PP("subj,pos,c:", subj,pos,c); return pos end f4 = function (subj,pos,c) PP("subj,pos,c:", subj,pos,c); return pos+1 end f5 = function (subj,pos,c) PP("subj,pos,c:", subj,pos,c); return 6, "!", "!" end (pc*pc*pc:Cmt(f1)*pc):ptmatch("abcdef") (pc*pc*pc:Cmt(f2)*pc):ptmatch("abcdef") (pc*pc*pc:Cmt(f3)*pc):ptmatch("abcdef") (pc*pc*pc:Cmt(f4)*pc):ptmatch("abcdef") (pc*pc*pc:Cmt(f5)*pc):ptmatch("abcdef") # «lpeg-matchtime-suchthat» (to ".lpeg-matchtime-suchthat") * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) require "lpeg" -- (find-angg "LUA/Code.lua" "Code-tests") Code.__index.expr = function (c) return expr(c.code) end Code.__index.suchthat_fmt = [[ function (subj, pos, ...) local a,b = ... if a+b <= 3 then return true,... end end ]] Code.suchthat = function (src) local c = Code.from(src) return c:setcode(c.suchthat_fmt) end PP (Code.suchthat [[ a,b => a+b<=3 ]]) f = Code.suchthat [[ a,b => a+b<=3 ]]:expr() PP(f("s", "p", 2, 0, 4)) PP(f("s", "p", 2, 10, 4)) ##### # # Named group captures (in table captures) # 2023apr10 # ##### # «lpeg-named» (to ".lpeg-named") # http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html#cap-g # file:///usr/share/doc/lua-lpeg-dev/lpeg.html#cap-g # (find-lpegmanual "#cap-c") # (find-lpegmanual "#cap-g") * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) require "lpeg" B,C,P,R,S,V = lpeg.B,lpeg.C,lpeg.P,lpeg.R,lpeg.S,lpeg.V Cb,Cc,Cf,Cg = lpeg.Cb,lpeg.Cc,lpeg.Cf,lpeg.Cg Cp,Cs,Ct = lpeg.Cp,lpeg.Cs,lpeg.Ct Carg,Cmt = lpeg.Carg,lpeg.Cmt lpeg.ptmatch = function (pat, str) PP(pat:Ct():match(str)) end -- Why ptmatch: -- = (Cc"a") :match("xyz") = (Cc"a" * Cc"b":Cg"c") :match("xyz") PP((Cc"a" * Cc"b":Cg"c"):Ct() :match("xyz")) (Cc"a" * Cc"b":Cg"c") :ptmatch("xyz") -- A named capture can store a list of captures, -- and we can retrieve that list with Cb(name): -- (Cc("a","b") ) :ptmatch("xyz") (Cc("a","b"):Cg"c" ) :ptmatch("xyz") (Cc("a","b"):Cg"c" * Cb"c" ) :ptmatch("xyz") (Cc("a","b"):Cg"c" * Cb"c" * Cc(nil):Cg"c") :ptmatch("xyz") -- ...but it can't store named captures. -- Note that the "d"="c" disappears: -- ( Cc("a","b")*Cc"c":Cg"d" ) :ptmatch("xyz") ((Cc("a","b")*Cc"c":Cg"d"):Cg"e" ) :ptmatch("xyz") ((Cc("a","b")*Cc"c":Cg"d"):Cg"e" * Cb"e" ) :ptmatch("xyz") ((Cc("a","b")*Cc"c":Cg"d"):Cg"e" * Cb"e" * Cc(nil):Cg"e") :ptmatch("xyz") (Cc"a") :ptmatch("xyz") pa = Cs(P"a"^1):Cg("b") pc = Cs(P"c"^1):Cg("d") (pa * pc):ptmatch("aaacccc") ##### # # Unnamed group captures # 2023sep08 # ##### # «lpeg-unnamed» (to ".lpeg-unnamed") # http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html#cap-g # file:///usr/share/doc/lua-lpeg-dev/lpeg.html#cap-g # http://lua-users.org/wiki/LpegTutorial # (find-lpegmanual "#cap-c") # (find-lpegmanual "#cap-g") * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) require "lpeg" B,C,P,R,S,V = lpeg.B,lpeg.C,lpeg.P,lpeg.R,lpeg.S,lpeg.V Cb,Cc,Cf,Cg = lpeg.Cb,lpeg.Cc,lpeg.Cf,lpeg.Cg Cp,Cs,Ct = lpeg.Cp,lpeg.Cs,lpeg.Ct Carg,Cmt = lpeg.Carg,lpeg.Cmt lpeg.pm = function (pat, str) PP(pat:match(str or "")) end char = C(1) char2 = C(1)*C(1) char2g = Cg(C(1)*C(1)) f = function (...) return "("..table.concat({...}, ",")..")" end (char * char ^0):Cf(f):pm("abcde") --> "((((a,b),c),d),e)" (char * char2 ^0):Cf(f):pm("abcde") --> "((((a,b),c),d),e)" (char * char2g^0):Cf(f):pm("abcde") --> "((a,b,c),d,e)" (char2g * char2g^0):Cf(f):pm("abcdef") --> "((a,c,d),e,f)" (char * char ^0):Ct():pm("abcde") --> {1="a", 2="b", 3="c", 4="d", 5="e"} (char * char2 ^0):Ct():pm("abcde") --> {1="a", 2="b", 3="c", 4="d", 5="e"} (char * char2g^0):Ct():pm("abcde") --> {1="a", 2="b", 3="c", 4="d", 5="e"} (char2g * char2g^0):Ct():pm("abcdef") --> {1="a", 2="b", 3="c", 4="d", 5="e", 6="f"} ((char * char ^0) / 2):pm("abcde") --> "b" ((char * char2 ^0) / 2):pm("abcde") --> "b" ((char * char2g^0) / 2):pm("abcde") --> "b" ((char2g * char2g^0) / 2):pm("abcdef") --> "b" ##### # # lpeg-unnamed-email # 2023sep08 # ##### # «lpeg-unnamed-email» (to ".lpeg-unnamed-email") # A question about lpeg.Cg() and anonymous groups # http://lua-users.org/lists/lua-l/2023-09/msg00017.html Edrx 1 # http://lua-users.org/lists/lua-l/2023-09/msg00022.html Roberto 2 Hi list, I was trying to understand what exactly lpeg.Cg does when it creates anyonymous group captures and I found something weird... well, at least on lpeg-1.0.2, but let me ask it anyway. This is the code; PP is my favorite pretty-printing function and the results of the tests are after the "-->"s: require "lpeg" B,C,P,R,S,V = lpeg.B,lpeg.C,lpeg.P,lpeg.R,lpeg.S,lpeg.V Cb,Cc,Cf,Cg = lpeg.Cb,lpeg.Cc,lpeg.Cf,lpeg.Cg Cp,Cs,Ct = lpeg.Cp,lpeg.Cs,lpeg.Ct Carg,Cmt = lpeg.Carg,lpeg.Cmt lpeg.pm = function (pat, str) PP(pat:match(str or "")) end char = C(1) char2 = C(1)*C(1) char2g = Cg(C(1)*C(1)) f = function (...) return "("..table.concat({...}, ",")..")" end (char * char ^0):pm("abcde") --> "a" "b" "c" "d" "e" (char * char2 ^0):pm("abcde") --> "a" "b" "c" "d" "e" (char * char2g^0):pm("abcde") --> "a" "b" "c" "d" "e" (char2g * char2g^0):pm("abcdef") --> "a" "b" "c" "d" "e" "f" (char * char ^0):Cf(f):pm("abcde") --> "((((a,b),c),d),e)" (char * char2 ^0):Cf(f):pm("abcde") --> "((((a,b),c),d),e)" (char * char2g^0):Cf(f):pm("abcde") --> "((a,b,c),d,e)" (char2g * char2g^0):Cf(f):pm("abcdef") --> "((a,c,d),e,f)" (char * char ^0):Ct():pm("abcde") --> {1="a", 2="b", 3="c", 4="d", 5="e"} (char * char2 ^0):Ct():pm("abcde") --> {1="a", 2="b", 3="c", 4="d", 5="e"} (char * char2g^0):Ct():pm("abcde") --> {1="a", 2="b", 3="c", 4="d", 5="e"} (char2g * char2g^0):Ct():pm("abcdef") --> {1="a", 2="b", 3="c", 4="d", 5="e", 6="f"} ((char * char ^0) / 2):pm("abcde") --> "b" ((char * char2 ^0) / 2):pm("abcde") --> "b" ((char * char2g^0) / 2):pm("abcde") --> "b" ((char2g * char2g^0) / 2):pm("abcdef") --> "b" Some of the patterns at the left in the tests above produce "a" "b" "c" "d" "e" as five separate captures, some produce them as "a" plus two captures with two values each, grouped like this, "a" ("b" "c") ("d" "e") and some produce three captures with two values each, grouped like this: ("a" "b") ("c" "d") ("e" "f") and apparently only lpeg.Cf() distinguishes all these cases... I couldn't find anything else, besides lpeg.Cf(), that would _not_ coerce the three cases above into five or six separate captures. Are there other ways - besides lpeg.Cf() - to access the captures while they are still in this form, "a" ("b" "c") ("d" "e") or ("a" "b") ("c" "d") ("e" "f"), before the groups are unpacked? Thanks in advance... Eduardo Ochs http://anggtwu.net/luaforth.html ##### # # Back captures # 2023apr12 # ##### # «lpeg-back» (to ".lpeg-back") # (find-lpegmanual "#cap-b") # (find-lpegmanual "#cap-func") # http://lua-users.org/lists/lua-l/2023-04/msg00022.html Edrx 1 # http://lua-users.org/lists/lua-l/2023-04/msg00023.html Sean 2 # http://lua-users.org/lists/lua-l/2023-04/msg00024.html Andrew 3 # http://lua-users.org/lists/lua-l/2023-04/msg00025.html Edrx 4 * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) require "lpeg" B,C,P,R,S,V = lpeg.B,lpeg.C,lpeg.P,lpeg.R,lpeg.S,lpeg.V Cb,Cc,Cf,Cg = lpeg.Cb,lpeg.Cc,lpeg.Cf,lpeg.Cg Cp,Cs,Ct = lpeg.Cp,lpeg.Cs,lpeg.Ct Carg,Cmt = lpeg.Carg,lpeg.Cmt lpeg.ptmatch = function (pat, str) PP(pat:Ct():match(str)) end PP {a="b", 10, 20} f = function (a,b) return a..b,b..a end ((Cc"a" * Cc"b" ) ):ptmatch("ccc") ((Cc"a" * Cc"b" / f) ):ptmatch("ccc") ((Cc"a" * Cc"b" / f):Cg"c" ):ptmatch("ccc") ((Cc"a" * Cc"b" / f):Cg"c" * Cb"c" ):ptmatch("ccc") ((Cc"a" * Cc"b" / f):Cg"c" * (Cb"c" / 2) ):ptmatch("ccc") ((Cc"a" * Cc"b" / f):Cg"c" * (Cb"c" / 2):Cg"d"):ptmatch("ccc") Whoa!!! That's brilliant! I didn't know that a back capture could store several values! Many thanks! =) =) =) Cheers, Eduardo Ochs # https://mail.google.com/mail/u/0/#sent/KtbxLxGvZCkGlJlGFcrrLhVBBLMndzBllq Lpeg: a question about function captures and named captures Hi list, if anyone wants a short version of my question, here it is... when we use function captures in lpeg - i.e., this thing, http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html#cap-func the "f" in patt/f receives "numbered captures" and returns "numbered captures". Is there a way to write something similar to that returns named captures? Now here is a clearer version of that question, with code. In the code below PP is my favorite pretty-printing function. It is defined in my init file, and it prints tables like this: > PP {a="b", 10, 20} {1=10, 2=20, "a"="b"} > There's even a way to configure it to make it print tables like this, > PP {a="b", 10, 20} {[1]=10, [2]=20, ["a"]="b"} > but I don't use it much. Anyway, here is the code: require "lpeg" B,C,P,R,S,V = lpeg.B,lpeg.C,lpeg.P,lpeg.R,lpeg.S,lpeg.V Cb,Cc,Cf,Cg = lpeg.Cb,lpeg.Cc,lpeg.Cf,lpeg.Cg Cp,Cs,Ct = lpeg.Cp,lpeg.Cs,lpeg.Ct Carg,Cmt = lpeg.Carg,lpeg.Cmt lpeg.ptmatch = function (pat, str) PP(pat:Ct():match(str)) end PP {a="b", 10, 20} f = function (a,b) return a..b,b..a end (Cc"a" * Cc"b") :ptmatch("ccc") (Cc"a" * Cc"b" / f) :ptmatch("ccc") (Cc"a" * Cc"b" / f):Cg"c" :ptmatch("ccc") (Cc"a" * Cc"b" / f):Cg(3) :ptmatch("ccc") If I run it one line at a time in a REPL I get this: Lua 5.1.5 Copyright (C) 1994-2012 Lua.org, PUC-Rio > require "lpeg" > B,C,P,R,S,V = lpeg.B,lpeg.C,lpeg.P,lpeg.R,lpeg.S,lpeg.V > Cb,Cc,Cf,Cg = lpeg.Cb,lpeg.Cc,lpeg.Cf,lpeg.Cg > Cp,Cs,Ct = lpeg.Cp,lpeg.Cs,lpeg.Ct > Carg,Cmt = lpeg.Carg,lpeg.Cmt > > lpeg.ptmatch = function (pat, str) PP(pat:Ct():match(str)) end > > PP {a="b", 10, 20} {1=10, 2=20, "a"="b"} > > f = function (a,b) return a..b,b..a end > > (Cc"a" * Cc"b") :ptmatch("ccc") {1="a", 2="b"} > (Cc"a" * Cc"b" / f) :ptmatch("ccc") {1="ab", 2="ba"} > (Cc"a" * Cc"b" / f):Cg"c" :ptmatch("ccc") {"c"="ab"} > (Cc"a" * Cc"b" / f):Cg(3) :ptmatch("ccc") {3="ab"} > How do I modify the ".../f" above to make it put the first result of f into :Cg"c" and the second result into :Cg"d"? Thanks in advance! Eduardo Ochs http://anggtwu.net/luaforth.html * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) require "lpeg" B,C,P,R,S,V = lpeg.B,lpeg.C,lpeg.P,lpeg.R,lpeg.S,lpeg.V Cb,Cc,Cf,Cg = lpeg.Cb,lpeg.Cc,lpeg.Cf,lpeg.Cg Cp,Cs,Ct = lpeg.Cp,lpeg.Cs,lpeg.Ct Carg,Cmt = lpeg.Carg,lpeg.Cmt lpeg.ptmatch = function (pat, str) PP(pat:Ct():match(str)) end PP {a="b", 10, 20} f = function (a,b) return a..b,b..a end (Cc"a" * Cc"b") :ptmatch("ccc") (Cc"a" * Cc"b" / f) :ptmatch("ccc") (Cc"a" * Cc"b" / f):Cg"c" :ptmatch("ccc") (Cc"a" * Cc"b" / f):Cg(3) :ptmatch("ccc") ##### # # lpeg.Carg(n) returns the n-th extra argument to :match # 2023nov15 # ##### # «lpeg-Carg» (to ".lpeg-Carg") # (find-es "lpeg" "globals") # (find-lpegmanual "#f-match") # (find-lpegmanual "#cap-arg") # (find-lpegmanual "#cap-arg" "Creates an argument capture") * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) require "lpeg" B,C,P,R,S,V = lpeg.B,lpeg.C,lpeg.P,lpeg.R,lpeg.S,lpeg.V Cb,Cc,Cf,Cg = lpeg.Cb,lpeg.Cc,lpeg.Cf,lpeg.Cg Cp,Cs,Ct = lpeg.Cp,lpeg.Cs,lpeg.Ct Carg,Cmt = lpeg.Carg,lpeg.Cmt lpeg.pm = function (pat,subj,init,...) PP(pat:match(subj,init,...)) end (Carg(1)*Carg(2)) :pm("foo", nil, 20, 30) ##### # # re-quickref # 2021sep04 # ##### # «re-quickref» (to ".re-quickref") # file:///usr/share/doc/lua-lpeg-dev/lpeg.html # file:///usr/share/doc/lua-lpeg-dev/lpeg.html#cap-t lpeg.Ct # file:///usr/share/doc/lua-lpeg-dev/re.html # http://www.inf.puc-rio.br/~roberto/lpeg/ # http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html # http://www.inf.puc-rio.br/~roberto/lpeg/re.html # (find-angg "LUA/Re.lua" "Re-tests") # (find-lpegremanual "") # (find-fline "/usr/share/lua/5.1/re.lua") Syntax Description ====== =========== ( p ) grouping 'string' literal string "string" literal string [class] character class . any character %name pattern defs[name] or a pre-defined pattern name non terminal <name> non terminal {} position capture { p } simple capture {: p :} anonymous group capture {:name: p :} named group capture {~ p ~} substitution capture {| p |} table capture =name back reference p ? optional match p * zero or more repetitions p + one or more repetitions p^num exactly n repetitions p^+num at least n repetitions p^-num at most n repetitions p -> 'string' string capture p -> "string" string capture p -> num numbered capture p -> name function/query/string capture equivalent to p / defs[name] p => name match-time capture equivalent to lpeg.Cmt(p, defs[name]) p ~> name fold capture equivalent to lpeg.Cf(p, defs[name]) & p and predicate ! p not predicate p1 p2 concatenation p1 / p2 ordered choice (name <- p)+ grammar ##### # # Re: a class for testing and learning lpeg.re # 2021sep27 # ##### # «Re» (to ".Re") # (find-angg "LUA/Re.lua") # (find-angg "LUA/Re.lua" "arit2-output") # Superseded by: # (to "lpegrex") # (find-angg "LUA/ELpeg1.lua") # (find-angg "LUA/Re2.lua") ##### # # Re-balanced # 2021oct23 # ##### # «Re-balanced» (to ".Re-balanced") * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) re = require "re" gram = [=[ a <- [^()]+ / ("(" a* ")") b <- {~ "("->"" a* ")"->"" ~} ]=] gram = [=[ a <- [^{}]+ / ("{" a* "}") ac <- ("{" { a* } "}") act <- ac -> leantranslate actc <- {~ ""->"{" act ""->"}" ~} acc <- {~ ""->"{" ac ""->"}" ~} tac <- {~ "\tac" actc ac ~} lean <- {~ "\lean" actc ~} tex <- {~ ""->"$" a ""->"$" ~} all <- tac / lean / tex alle <- all !. ]=] defs = { leantranslate = function (str) return "."..str.."." end } rtg = Re { print = PP, grammar = gram, defs = defs } rtg:cc 'top <- all' [=[ foo ]=] rtg:cc 'top <- all' [=[ foo } ]=] rtg:cc 'top <- alle' [=[ foo } ]=] rtg:cc 'top <- alle' [=[ foo ]=] rtg:cc 'top <- alle' [=[\tac{a}{bc}]=] rtg:cc 'top <- alle' [=[\tac{a}{b} ]=] rtg:cc 'top <- alle' [=[\lean{abcd}]=] ##### # # A modified version of re.lua, in eoo style # 2023mar17 # ##### # «eoo-re» (to ".eoo-re") # Better version: (find-angg "LUA/Re2.lua") # (find-sh "locate /re.lua") /home/edrx/LATEX/dednat6/re.lua /home/edrx/dednat6/dednat6/re.lua /home/edrx/usrc/LuLPeg/src/re.lua /home/edrx/usrc/lpeg-1.0.2/re.lua # (find-fline "/usr/share/lua/5.1/re.lua") # (find-fline "/usr/share/lua/5.2/re.lua") # (find-fline "/usr/share/lua/5.3/re.lua") ##### # # The post that inspired PegDebug # 2023mar30 # ##### # «pegdebug0» (to ".pegdebug0") # http://lua-users.org/lists/lua-l/2009-10/msg00774.html for k, p in pairs(grammar) do local enter = lpeg.Cmt(lpeg.P(true), function(s, p, ...) print("ENTER", k) return p end); local leave = lpeg.Cmt(lpeg.P(true), function(s, p, ...) print("LEAVE", k) return p end) * (lpeg.P("k") - lpeg.P "k"); grammar[k] = lpeg.Cmt(enter * p + leave, function(s, p, ...) print("---", k, "---") print(p, s:sub(1, p-1)) return p end) end for k, p in pairs(grammar) do local fenter = function(s, p, ...) print("ENTER", k); return p end local fleave = function(s, p, ...) print("LEAVE", k); return p end local fclose = function(s, p, ...) print("---", k, "---") print(p, s:sub(1, p-1)) return p end local ptrue = lpeg.P(true) local phack = lpeg.P("k") - lpeg.P("k") local penter = ptrue:Cmt(fenter) local pleave = ptrue:Cmt(fleave) * phack grammar[k] = (penter*p+pleave):Cmt(fclose) end ##### # # pegdebug # 2014sep23 # ##### # «pegdebug» (to ".pegdebug") # https://github.com/pkulchenko/PegDebug # http://lua-users.org/lists/lua-l/2009-10/msg00774.html # http://lua-users.org/lists/lua-l/2014-09/msg00119.html # (find-git-links "https://github.com/pkulchenko/PegDebug" "pegdebug") # (code-c-d "pegdebug" "~/usrc/PegDebug/") # (find-pegdebugfile "") # (find-pegdebugfile "README.md") # (find-pegdebugfile "src/pegdebug.lua") # (find-pegdebugfile "src/pegdebug.lua" "grammar[k] = enter * p * eq + leave") * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) Path.prepend("path", "~/usrc/PegDebug/src/?.lua") lpeg = require('lpeg') pegdebug = require('pegdebug') grammar = pegdebug.trace({ "List", NonNumber = lpeg.R('az'), Number = lpeg.R"09"^1 / tonumber, List = lpeg.V("NonNumber") + lpeg.V("Number") * ("," * lpeg.V("Number"))^0; }) print(lpeg.match(lpeg.P(grammar), "10,30,43")) PPPV(grammar) PPPV {"list"; a=11; b=12} ##### # # LpegDebug: my rewrite of pegdebug into a more REPL-friendly format # 2023apr01 # ##### # «LpegDebug» (to ".LpegDebug") # (find-angg "LUA/Gram2.lua" "LpegDebug") ##### # # lulpeg # 2022mar06 # ##### # «lulpeg» (to ".lulpeg") # https://github.com/pygy/LuLPeg # (find-git-links "https://github.com/pygy/LuLPeg" "lulpeg") # (code-c-d "lulpeg" "~/usrc/LuLPeg/") # (find-lulpegfile "") # (find-lulpegfile "README.md") # (find-lulpegfile "lulpeg.lua") # (find-lulpegfile "src/") # (find-lulpegfile "src/datastructures.lua") * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) -- Path.prepend("path", "~/usrc/LuLPeg/src/?.lua") Path.prepend ("path", "~/usrc/LuLPeg/?.lua") lulpeg = require "lulpeg" re = lulpeg.re pattern = lulpeg.C(lulpeg.P"A" + "B") ^ 0 print(pattern:match"ABA") --> "A" "B" "A" # (find-fline "~/LOGS/2022mar05.fennel") ##### # # lpeglabel # 2022mar06 # ##### # «lpeglabel» (to ".lpeglabel") # https://github.com/sqmedeiros/lpeglabel # (find-git-links "https://github.com/sqmedeiros/lpeglabel" "lpeglabel") # (code-pdf-page "lpeglabel" "~/tmp/lpeglabel.pdf") # (code-pdf-text "lpeglabel" "~/tmp/lpeglabel.pdf") # (code-c-d "lpeglabel" "~/usrc/lpeglabel/") # (find-lpeglabelpage) # (find-lpeglabeltext) # (find-lpeglabelpage 2 "lpeglabel.T (l)") # (find-lpeglabeltext 2 "lpeglabel.T (l)") # (find-lpeglabelfile "") # (find-lpeglabelfile "README.md") # (find-lpeglabelfile "makefile") # (find-sh "locate lua.h | grep '.h$'") # (find-fline "/usr/include/lua5.1/" "lua.h") # (find-fline "/usr/include/lua5.2/" "lua.h") # (find-fline "/usr/include/lua5.3/" "lua.h") * (eepitch-shell) * (eepitch-kill) * (eepitch-shell) cd ~/usrc/lpeglabel/ git clean -dfx git reset --hard make LUADIR=/usr/include/lua5.2 # make LUADIR=/usr/include/lua5.3 * (eepitch-lua52) * (eepitch-kill) * (eepitch-lua52) loadlpegrex() m = require'lpeglabel' function matchPrint(p, s) local r, lab, errpos = p:match(s) print("r: ", r, "lab: ", lab, "errpos: ", errpos) end p = m.P"a"^0 * m.P"b" + m.P"c" matchPrint(p, "abc") --> r: 3 lab: nil errpos: nil matchPrint(p, "c") --> r: 2 lab: nil errpos: nil matchPrint(p, "aac") --> r: nil lab: fail errpos: 3 matchPrint(p, "xxc") --> r: nil lab: fail errpos: 1 ##### # # lpegrex # 2022mar06 # ##### # «lpegrex» (to ".lpegrex") # (find-angg ".emacs" "lpegrex") # (find-es "lua5" "nelua") # (find-angg "LUA/lua50init.lua" "loadlpegrex") # (find-angg "LUA/LpegRex1.lua") # (find-angg "LUA/LpegRex2.lua") # (find-angg "LUA/Lisp3.lua") # (find-angggrep "grep --color=auto -nH --null -e lpegrex LUA/*.lua") # https://github.com/edubart/lpegrex # https://github.com/edubart/lpegrex/blob/main/parsers/lua.lua # (find-git-links "https://github.com/edubart/lpegrex" "lpegrex") # (find-fline "~/LOGS/2022mar05.fennel" "lpegrex") # (code-pdf-page "lpegrex" "~/tmp/lpegrex.pdf") # (code-pdf-text "lpegrex" "~/tmp/lpegrex.pdf") # (code-c-d "lpegrex" "~/usrc/lpegrex/") # (find-lpegrexpage) # (find-lpegrextext) # (find-lpegrexpage 2 "AST Nodes" "NodeName <== patt") # (find-lpegrextext 2 "AST Nodes" "NodeName <== patt") # (find-lpegrexpage 7 "Expected_name") # (find-lpegrextext 7 "Expected_name") # (find-lpegrexfile "") # (find-lpegrexfile "lpegrex.lua") # (find-lpegrexfile "lpegrex.lua" "function expect(") # (find-lpegrexfile "parsers/") # (find-lpegrexfile "parsers/csv.lua") # (find-lpegrexfile "parsers/lua.lua") # (find-lpegrexfile "parsers/lua.lua" "expror") # (find-lpegrexfile "tests/") # (find-lpegrexfile "tests/csv-test.lua") # (find-lpegrexfile "examples/lua-ast.lua") * (eepitch-lua52) * (eepitch-kill) * (eepitch-lua52) Path.prepend("path", "~/usrc/lpeglabel/?.lua") Path.prepend("cpath", "~/usrc/lpeglabel/?.so") Path.prepend("path", "~/usrc/lpegrex/?.lua") require "lpegrex" require "tests/csv-test" arg = {"/home/edrx/usrc/lpegrex/examples/lua-ast.lua"} require "examples/lua-ast" * (eepitch-lua52) * (eepitch-kill) * (eepitch-lua52) Path.prepend("path", "~/usrc/lpeglabel/?.lua") Path.prepend("cpath", "~/usrc/lpeglabel/?.so") Path.prepend("path", "~/usrc/lpegrex/?.lua") lpegrex = require "lpegrex" Grammar = [==[ top <-- plusexpr plusexpr <-| timesexpr ({"+"} timesexpr)* timesexpr <-| norp ({"*"} norp)* norp <-| num / ("(" plusexpr ")") num <-- {%d} ]==] patt = lpegrex.compile(Grammar) bigstr = "(1+2)*3" ast, errlabel, errpos = patt:match(bigstr) PPPV(ast, errlabel, errpos) ##### # # lpegrex-keywords # 2023mar30 # ##### # «lpegrex-keywords» (to ".lpegrex-keywords") # (find-lpegrexpage 2 "matching unique keywords" "`for`") # (find-lpegrextext 2 "matching unique keywords" "`for`") # (find-lpegrexpage 3 "Match keyword") # (find-lpegrextext 3 "Match keyword") # (find-lpegrexpage 6 "Matching keywords and tokens") # (find-lpegrextext 6 "Matching keywords and tokens") # (find-lpegrexpage 6 "Capturing identifier names") # (find-lpegrextext 6 "Capturing identifier names") # (find-lpegrexfile "") # (find-lpegrexfile "README.md") # https://github.com/edubart/lpegrex # https://github.com/edubart/lpegrex#matching-keywords-and-tokens ##### # # lpegrex-json # 2023jan05 # ##### # «lpegrex-json» (to ".lpegrex-json") # (find-lpegrexfile "README.md" "parsing JSON into an AST") # (find-lpegrexfile "README.md" "Quick References") * (eepitch-lua52) * (eepitch-kill) * (eepitch-lua52) -- local lpegrex = require 'lpegrex' Path.prepend("path", "~/usrc/lpeglabel/?.lua") Path.prepend("cpath", "~/usrc/lpeglabel/?.so") Path.prepend("path", "~/usrc/lpegrex/?.lua") lpegrex = require 'lpegrex' patt = lpegrex.compile([[ Json <-- SKIP (Object / Array) (!.)^UnexpectedSyntax Object <== `{` (Member (`,` @Member)*)? @`}` Array <== `[` (Value (`,` @Value)*)? @`]` Member <== String `:` @Value Value <-- String / Number / Object / Array / Boolean / Null String <-- '"' {~ ('\' -> '' @ESCAPE / !'"' .)* ~} @'"' SKIP Number <-- {[+-]? (%d+ '.'? %d+? / '.' %d+) ([eE] [+-]? %d+)?} -> tonumber SKIP Boolean <-- `false` -> tofalse / `true` -> totrue Null <-- `null` -> tonil ESCAPE <-- [\/"] / ('b' $8 / 't' $9 / 'n' $10 / 'f' $12 / 'r' $13 / 'u' {%x^4} $16) -> tochar SKIP <-- %s* NAME_SUFFIX <-- [_%w]+ ]]) source = '[{"string":"some\\ntext", "boolean":true, "number":-1.5e+2, "null":null}]' source = '[{"string":"sometext", "boolean":"true", "number":-1.5e+2, "null":null}]' ast, errlabel, errpos = patt:match(source) if not ast then local lineno, colno, line = lpegrex.calcline(source, errpos) local colhelp = string.rep(' ', colno-1)..'^' error('syntax error: '..lineno..':'..colno..': '..errlabel.. '\n'..line..'\n'..colhelp) end -- `ast` should be a table with the JSON print('JSON parsed with success!') PP(ast) = SynTree.from(ast) -- {1={1={1="string", 2="some text", "endpos"=24, "pos"=3, "tag"="Member"}, -- 2={1="boolean", 2=<true>, "endpos"=40, "pos"=26, "tag"="Member"}, -- 3={1="number", 2=-150, "endpos"=58, "pos"=42, "tag"="Member"}, -- 4={1="null", "endpos"=71, "pos"=60, "tag"="Member"}, -- "endpos"=72, "pos"=2, "tag"="Object"}, -- "endpos"=73, "pos"=1, "tag"="Array"} ##### # # lpegrex: the "tag" field in __options # 2023jan19 # ##### # «lpegrex-tag» (to ".lpegrex-tag") # (find-lpegrexfile "README.md" "## Quick References") # (find-lpegrexfile "README.md" "## Quick References" "Capture tagged node") # (find-lpegrexfile "lpegrex.lua") # (find-lpegrexfile "lpegrex.lua" "TaggedNodeDefinition =") # (find-lpegrexfile "lpegrex.lua" "TaggedNodeDefinition =" "makenode") # (find-lpegrexfile "lpegrex.lua" "local function makenode(") # (find-lpegrexgrep "grep --color=auto -nH --null -e options * */*") # (find-lpegrexfile "tests/lpegrex-test.lua" "__options={tag=") * (eepitch-lua52) * (eepitch-kill) * (eepitch-lua52) Path.prepend("path", "~/usrc/lpeglabel/?.lua") Path.prepend("cpath", "~/usrc/lpeglabel/?.so") Path.prepend("path", "~/usrc/lpegrex/?.lua") lpegrex = require 'lpegrex' c = lpegrex.compile([[ chunk <-- num num:Number <== {%d+} %s* ]], {__options={ tag=function(name, node) node.mytag = name return node end} } ) PPV(c:match("1234")) ##### # # lpegrex-grammar # 2023jan21 # ##### # «lpegrex-grammar» (to ".lpegrex-grammar") # https://github.com/edubart/lpegrex/issues/4 ##### # # folding # 2022mar08 # ##### # «folding» (to ".folding") # http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html lpeg.Cf(patt, func) a folding of the captures from patt Creates a fold capture. If patt produces a list of captures C1 C2 ... Cn, this capture will produce the value func(...func(func(C1, C2), C3)..., Cn), that is, it will fold (or accumulate, or reduce) the captures from patt using function func. ##### # # lpeg-table-captures # 2022mar21 # ##### # «lpeg-table-captures» (to ".lpeg-table-captures") # file:///usr/share/doc/lua-lpeg-dev/lpeg.html * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) require "lpeg" = mapconcat(id, sorted(keys(lpeg)), " ") B,C,P,R,S,V = lpeg.B,lpeg.C,lpeg.P,lpeg.R,lpeg.S,lpeg.V Cb,Cc,Cf,Cg = lpeg.Cb,lpeg.Cc,lpeg.Cf,lpeg.Cg Cp,Cs,Ct = lpeg.Cp,lpeg.Cs,lpeg.Ct Carg,Cmt = lpeg.Carg,lpeg.Cmt = Cc("AA") :match("abcd") PP((Cc("AA") ) :match("abcd")) PP((Cc("AA") * Cc("BB") ) :match("abcd")) PP((Cc("AA") * Cc("BB") ):Ct():match("abcd")) PP((Cc("AA") * Cc("BB"):Cg("CC")):Ct():match("abcd")) PP((Cc("AA") * Cc( 22 ):Cg( 33 )):Ct():match("abcd")) extrafields = P("") extrafields = Cc(22):Cg(33) * Cc(44):Cg(55) PP((Cc("AA") * extrafields):Ct():match("abcd")) extrafields = P("") PP((Cc("AA") * extrafields):Ct():match("abcd")) ##### # # Processing my chat with Tatiana (2018) # 2022mar22 # ##### # «tati-2018» (to ".tati-2018") # (find-fline "~/TH/2016-t.blogme") # (find-fline "~/TH/2016-t.blogme" 331) # (find-fline "~/TH/2016-t.blogme" 428) # (find-TH "2018-t") * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) require "lpeg" require "re" bigstr = ee_readfile "~/TH/2016-t.blogme" bigstr2 = table.concat(splitlines(bigstr), "\n", 331, 428) = bigstr2 tdn = re.compile [=[ top <- {| "[" time ", " date "] " name ": " |} time <- { [0-9:]+ } date <- { [0-9/]+ } name <- { "Eduardo Ochs" / "Tatiana Pequeno" } ]=] PP(tdn:match(bigstr2)) patfalas = re.compile [=[ top <- {| tdnt * |} tdnt <- {| tdn text |} tdn <- ( "[" time ", " date "] " name ": " ) time <- { [0-9:]+ } date <- { [0-9/]+ } name <- { "Eduardo Ochs" / "Tatiana Pequeno" } text <- { ((!tdn) .)* } ]=] falas = patfalas:match(bigstr2) PPV(falas) loadblogme3() f = function (fala) local who = fala[3]:sub(1,1)..": " local who2 = BF(who) local text = fala[4]:gsub("\n$", "") local text2 = text:gsub("\n", "\n"..BR()) return who2..text2 end = f(falas[3]) = mapconcat(f, falas, "\n"..BR()) ##### # # railroad-diagrams # 2023apr01 # ##### # «railroad-diagrams» (to ".railroad-diagrams") # https://mail.google.com/mail/u/0/#search/lpegrex/FMfcgzGpGTDrQjqpxgTHJjHDpbdgcLSD # https://www.bottlecaps.de/rr/ui # https://www.bottlecaps.de/convert/ # https://www.bottlecaps.de/rex/ ##### # # My questions on how to draw capture diagrams # 2023aug13 # ##### # «capts-question» (to ".capts-question") # (code-etex-tla "lc" "2023lpegcaptures") # (find-LATEX "2023lpegcaptures.tex") # http://lua-users.org/lists/lua-l/2023-08/msg00041.html Hi list, I'm trying to learn how group captures in lpeg "really" work, and it seems that they store data in a certain data structure - I will refer to it as "Ltables", but this is obviously an improvised name - that is between tables and lists of values... In the examples below I will use my favorite pretty-printing function, "PP", whose output is like this: PP(2, "3", {4, 5, a=6, [{7,8}]=9, [{7,8}]=10}) --> 2 "3" {1=4, 2=5, "a"=6, {1=7, 2=8}=9, {1=7, 2=8}=10} If we run this in a REPL, require "lpeg" B,C,P,R,S,V = lpeg.B,lpeg.C,lpeg.P,lpeg.R,lpeg.S,lpeg.V Cb,Cc,Cf,Cg = lpeg.Cb,lpeg.Cc,lpeg.Cf,lpeg.Cg Cp,Cs,Ct = lpeg.Cp,lpeg.Cs,lpeg.Ct Carg,Cmt = lpeg.Carg,lpeg.Cmt lpeg.pm = function (pat, str) PP(pat:match(str or "")) end (Cc("a","b") * Cc("c","d")) :pm() (Cc("a","b") * Cc("c","d"):Cg"e") :pm() (Cc("a","b") * Cc("c","d"):Cg"e") :Ct():pm() (Cc("a","b") * Cc("c","d"):Cg"e" * Cc"f") :Ct():pm() (Cc("a","b") * Cc("c","d"):Cg"e" * Cc"f" * Cb"e"):Ct():pm() (Cc("a","b") * Cc("c","d"):Cg"e" * Cc"f" * Cb"e") :pm() we get this output: > (Cc("a","b") * Cc("c","d")) :pm() "a" "b" "c" "d" > (Cc("a","b") * Cc("c","d"):Cg"e") :pm() "a" "b" > (Cc("a","b") * Cc("c","d"):Cg"e") :Ct():pm() {1="a", 2="b", "e"="c"} > (Cc("a","b") * Cc("c","d"):Cg"e" * Cc"f") :Ct():pm() {1="a", 2="b", 3="f", "e"="c"} > (Cc("a","b") * Cc("c","d"):Cg"e" * Cc"f" * Cb"e"):Ct():pm() {1="a", 2="b", 3="f", 4="c", 5="d", "e"="c"} > (Cc("a","b") * Cc("c","d"):Cg"e" * Cc"f" * Cb"e") :pm() "a" "b" "f" "c" "d" > If we define a table like this, {20, 30, a=40, a=50, 60} then the second assignment to "a" will override the first one; lpeg.Cg does something similar to that... Let me use this notation for Ltables. This lpeg pattern Cc("a","b") * Cc("c","d"):Cg"e" * Cc"f" matches the empty string, and returns this Ltable: {."a" "b" e={."c" "d".} "f".} Ltables can be coerced both to tables, by lpeg.Ct, and to lists of values. The output of (Cc("a","b") * Cc("c","d"):Cg"e" * Cc"f") :pm() (Cc("a","b") * Cc("c","d"):Cg"e" * Cc"f") :Ct():pm() (Cc("a","b") * Cc("c","d"):Cg"e" * Cc"f" * Cb"e"):Ct():pm() (Cc("a","b") * Cc("c","d"):Cg"e" * Cc"f" * Cb"e") :pm() is: > (Cc("a","b") * Cc("c","d"):Cg"e" * Cc"f") :pm() "a" "b" "f" > (Cc("a","b") * Cc("c","d"):Cg"e" * Cc"f") :Ct():pm() {1="a", 2="b", 3="f", "e"="c"} > (Cc("a","b") * Cc("c","d"):Cg"e" * Cc"f" * Cb"e"):Ct():pm() {1="a", 2="b", 3="f", 4="c", 5="d", "e"="c"} > (Cc("a","b") * Cc("c","d"):Cg"e" * Cc"f" * Cb"e") :pm() "a" "b" "f" "c" "d" > In my (current) way of thinking this lpeg pattern Cc("a","b") * Cc("c","d"):Cg"e" * Cc"f" * Cb"e" returns this Ltable, {."a" "b" ["e"]={."c" "d".} "f" ["e"]} where the ["e"]={."c" "d".} stores an Ltable in the entry with the key "e" in the current Ltable, and the ["e"] at the end reads the value stored in the key "e", coerces it to a list of values, and adds these values to the current Ltable... Questions: ========== What is the official name of this data structure? Is there a place in which it is described in more details than in the Lpeg manual? Where? The Lpeg manual only talks about "most recent group capture", and it says this: "Most recent means the last complete outermost group capture with the given name. A Complete capture means that the entire pattern corresponding to the capture has matched. An Outermost capture means that the capture is not inside another complete capture." here: http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html#cap-g http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html#cap-b I hope I'm not the only person who finds that too terse... and also, is there anyone here - besides me - who have tried to draw diagrams to understand how the operations on captures work? My current diagrams are here: http://anggtwu.net/LATEX/2023lpegcaptures.pdf Thanks in advance!... Eduardo Ochs http://anggtwu.net/luaforth.html ##### # # capts-question-2 # 2023aug15 # ##### # «capts-question-2» (to ".capts-question-2") # http://lua-users.org/lists/lua-l/2023-08/msg00048.html Roberto: there is no such # http://lua-users.org/lists/lua-l/2023-08/msg00053.html Edrx: I would draw this ... as Hi Roberto, I would draw this > require "lpeg" > f = function (a,b) return string.char(string.byte(a) + string.byte(b)) end > = ((lpeg.C(1) * lpeg.C(1)) / f):match("#$") G > as this - switch to a monospaced font if needed: # $ \-/ \-/ "#" "$" \-----/ "G" and here is a low-tech animation of how the submatches happen and what they return: # $ # $ # $ # $ \-/ \-/ \-/ \-/ \-/ "#" "#" "$" "#" "$" \-----/ "G" Here is a case that I find very strange... no, actually a case that is simple to understand followed by one that I find very strange. Compare: > require "lpeg" > = ((lpeg.C(1):Cg"c" * lpeg.C(1):Cg"d") * lpeg.Cb"c"):match"ab" a > = (lpeg.C(1):Cg"c" * (lpeg.C(1):Cg"d" * lpeg.Cb"c")):match"ab" a > = (lpeg.C(1):Cg"c" * (lpeg.C(1):Cg"d" * lpeg.Cb"x")):match"ab" I draw them as this: a b a b \---/ \---/ \---/ \---/ \---/ \---/ "a" "b" ["c"] "a" "b" ["c"] \---/ \---/ \---/ \---/ c="a" d="b" c="a" d="b" \---------/ \---------/ c="a" d="b" d="b" ["c"] \---------------/ \---------/ c="a" d="b" ["c"] not found? \---------------/ c="a" d="b" "a" Each ["c"] means "fetch the value associated to the key "c" and append it to the current Ltable", and the lower underbrace in the first diagram shows the moment in which that fetch happens and the ["c"] is replaced by "a". The second diagram shows what I _expected_ that would happen in the second match; I expected that in this subpattern (lpeg.C(1):Cg"d" * lpeg.Cb"c") the lpeg.Cb"c" would look only at the "Cg"s that happen inside that subpattern, and I would get an error like this one... stdin:1: back reference 'c' not found ...but I was wrong. I _guess_ that what is happening in the second (...):match"ab" is this: a b \---/ \---/ \---/ "a" "b" ["c"] \---/ \---/ c="a" d="b" \---------/ d="b" ["c"] \---------------/ c="a" d="b" ["c"] \---------------/ c="a" d="b" "a" and the expansion ["c"] -> "a" is delayed as most as possible... Anyway, I hope that these diagrams would make enough sense to the people who can help me fix them, and who can help me fix my mental model... Thanks in advance =S, Eduardo Ochs http://anggtwu.net/luaforth.html (...) ##### # # Defining "subpatterns" by analogy # 2023aug20 # ##### # «capts-subpat-analogy» (to ".capts-subpat-analogy") # «associativity» (to ".associativity") # http://lua-users.org/lists/lua-l/2023-08/msg00055.html Edrx: analogy Hi Sean, > I don't see these as different, due to the fact that > > (a * b) * c = a * (b * c) try this: foo = function (str) return setmetatable({str}, foo_mt) end foo_mt = { __tostring = function (a) return a[1] end, __mul = function (a,b) return foo(format("(%s%s)", a[1], b[1])) end, } = (foo"a" * foo"b") * foo"c" --> "((ab)c)" = foo"a" * (foo"b" * foo"c") --> "(a(bc))" The associativity of the multiplication in Lua is not a "fact". > These are expressions ... what's a "subpattern"? Again, > > (a * b) * c = a * (b * c) Is that a honest question? Here's a way to define subpatterns by analogy. We all know how to build bigger expressions from basic expressions, and after learning how to do that we all learned what are subexpressions, but most of us never saw a formal definition of "subexpression". Similarly, we "all" know what are basic (lpeg) patterns, and we "all" know how to build bigger (lpeg) patterns from basic (lpeg) patters. Most of us know what are sub-(lpeg)-patterns by analogy with subexpressions... > For me, they cloud the issue. To me, a capture captures the text of > a pattern, and possibly transforms it. For example: Makes sense, but most of the lpeg patterns that _I_ wrote in the last year or so transform the original text in ways that are very complex, and most of the captures that appear in the intermediate steps of _my_ transformations are tables, ASTs, or numbers... and that's why _right now_ _I_'m not interested in the basic cases in which lpeg captures simply capture text and transform text - my priority now is to understand some obscure features and corner cases... Cheers, Eduardo (...) # http://lua-users.org/lists/lua-l/2023-08/msg00058.html Edrx: Mainly # http://lua-users.org/lists/lua-l/2023-08/msg00059.html Roberto: I really Hi Roberto, Mainly "last complete outermost group capture" and "immediate evaluation of all its nested captures", in: lpeg.Cb (name) Creates a back capture. This pattern matches the empty string and produces the values produced by the most recent group capture named name (where name can be any Lua value). Most recent means the last complete outermost group capture with the given name. A Complete capture means that the entire pattern corresponding to the capture has matched. An Outermost capture means that the capture is not inside another complete capture. In the same way that LPeg does not specify when it evaluates captures, it does not specify whether it reuses values previously produced by the group or re-evaluates them. lpeg.Cmt(patt, function) Creates a match-time capture. Unlike all other captures, this one is evaluated immediately when a match occurs (even if it is part of a larger pattern that fails later). It forces the immediate evaluation of all its nested captures and then calls function. Cheers =/, E. ##### # # A quirk with (Cb"c":Cg"c"):pm() and (Cb"c":Cg"c"):Ct():pm() # 2023aug20 # ##### # «capts-cbcgct» (to ".capts-cbcgct") # http://lua-users.org/lists/lua-l/2023-08/msg00077.html Edrx # http://lua-users.org/lists/lua-l/2023-08/msg00091.html Roberto Hi Roberto, I gave a mini-presentation/mini-workshop about my diagrams to two friends yesterday and they liked it very much - so now there are at least two people besides me who think that they are useful, and that's more than enough... Anyway, in the code below the tests C1..C3 and D1..D4 are based in your examples. In the block D1..D4 only D4 yields an error, and that makes sense to me - but in the block C1..C3 the tests C1 and C3 yield errors but C2 does not. Is this behavior of C1..C3 something that is explained in the documentation? How? I admit that I'm still struggling with some terms, sorry... The code: require "lpeg" B,C,P,R,S,V = lpeg.B,lpeg.C,lpeg.P,lpeg.R,lpeg.S,lpeg.V Cb,Cc,Cf,Cg = lpeg.Cb,lpeg.Cc,lpeg.Cf,lpeg.Cg Cp,Cs,Ct = lpeg.Cp,lpeg.Cs,lpeg.Ct Carg,Cmt = lpeg.Carg,lpeg.Cmt lpeg.pm = function (pat, str) PP(pat:match(str or "")) end pat1234 = Cc(10, 20) * Cc(30, 40):Cg"a" pat5678 = Cc(50, 60) * Cc(70, 80):Cg"a" (pat1234 ) :pm() -- A1 (pat1234 ):Ct():pm() -- A2 (pat1234 * pat5678 ):Ct():pm() -- A3 (pat1234 * Cb"a" * pat5678 * Cb"a"):Ct():pm() -- A4 (pat1234 * (Cb"a" * pat5678) * Cb"a"):Ct():pm() -- A5 (pat1234 ):Ct():pm() -- B1 (pat1234:Cg"b" ):Ct():pm() -- B2 (pat1234:Cg"b" * Cb"b"):Ct():pm() -- B3 (Cb"c" ) :pm() -- C1 (Cb"c":Cg"c") :pm() -- C2 (Cb"c":Cg"c"):Ct():pm() -- C3 Cc"a" :pm() -- D1 Cc"a":Cg"c" :pm() -- D2 ((Cc"a":Cg"c") / "x") :pm() -- D3 (((Cc"a":Cg"c") / "x") * Cb"c") :pm() -- D4 Its output in a REPL: > require "lpeg" > B,C,P,R,S,V = lpeg.B,lpeg.C,lpeg.P,lpeg.R,lpeg.S,lpeg.V > Cb,Cc,Cf,Cg = lpeg.Cb,lpeg.Cc,lpeg.Cf,lpeg.Cg > Cp,Cs,Ct = lpeg.Cp,lpeg.Cs,lpeg.Ct > Carg,Cmt = lpeg.Carg,lpeg.Cmt > lpeg.pm = function (pat, str) PP(pat:match(str or "")) end > > pat1234 = Cc(10, 20) * Cc(30, 40):Cg"a" > pat5678 = Cc(50, 60) * Cc(70, 80):Cg"a" > > (pat1234 ) :pm() -- A1 10 20 > (pat1234 ):Ct():pm() -- A2 {1=10, 2=20, "a"=30} > (pat1234 * pat5678 ):Ct():pm() -- A3 {1=10, 2=20, 3=50, 4=60, "a"=70} > (pat1234 * Cb"a" * pat5678 * Cb"a"):Ct():pm() -- A4 {1=10, 2=20, 3=30, 4=40, 5=50, 6=60, 7=70, 8=80, "a"=70} > (pat1234 * (Cb"a" * pat5678) * Cb"a"):Ct():pm() -- A5 {1=10, 2=20, 3=30, 4=40, 5=50, 6=60, 7=70, 8=80, "a"=70} > > (pat1234 ):Ct():pm() -- B1 {1=10, 2=20, "a"=30} > (pat1234:Cg"b" ):Ct():pm() -- B2 {"b"=10} > (pat1234:Cg"b" * Cb"b"):Ct():pm() -- B3 {1=10, 2=20, "b"=10} > > (Cb"c" ) :pm() -- C1 stdin:1: back reference 'c' not found stack traceback: [C]: in function 'match' stdin:1: in function 'pm' stdin:1: in main chunk [C]: ? > (Cb"c":Cg"c") :pm() -- C2 1 > (Cb"c":Cg"c"):Ct():pm() -- C3 stdin:1: back reference 'c' not found stack traceback: [C]: in function 'match' stdin:1: in function 'pm' stdin:1: in main chunk [C]: ? > > Cc"a" :pm() -- D1 "a" > Cc"a":Cg"c" :pm() -- D2 1 > ((Cc"a":Cg"c") / "x") :pm() -- D3 "x" > (((Cc"a":Cg"c") / "x") * Cb"c") :pm() -- D4 stdin:1: back reference 'c' not found stack traceback: [C]: in function 'match' stdin:1: in function 'pm' stdin:1: in main chunk [C]: ? > Thanks in advance =/, Eduardo Ochs ##### # # "lpeg.C() drops named captures. Is there a workaround for that?" # 2023aug28 # ##### # «capts-lpeg.C» (to ".capts-lpeg.C") # https://mail.google.com/mail/u/0/#sent/QgrcJHrttkhkhHRmFrcMmDLZQmgDTlFdFRl # http://lua-users.org/lists/lua-l/2023-08/msg00129.html Edrx # http://lua-users.org/lists/lua-l/2023-08/msg00136.html Roberto # http://lua-users.org/lists/lua-l/2023-08/msg00137.html Roberto # http://lua-users.org/lists/lua-l/2023-09/msg00019.html Edrx # http://lua-users.org/lists/lua-l/2023-09/msg00023.html Roberto Hi list, this is related to the discussion in this thread: http://lua-users.org/lists/lua-l/2023-08/threads.html#00041 I was trying to write a lpeg pattern that would parse strings like "(%i3)" and "(%o42)" and return tables like these ones: {a="i", b="3", c="(%i3)"} {a="o", b="42", c="(%o42)"} The long story is that I'm rewriting my functions that parse the logs of Maxima sessions. These logs look this, but that's not relevant now: https://home.csulb.edu/~woollett/mbe1intro.pdf#page=9 Anyway, here's what I tried to do - the "PP" in lpeg.pm is my favorite pretty-printing function, defined in my init file... require "lpeg" B,C,P,R,S,V = lpeg.B,lpeg.C,lpeg.P,lpeg.R,lpeg.S,lpeg.V Cb,Cc,Cf,Cg = lpeg.Cb,lpeg.Cc,lpeg.Cf,lpeg.Cg Cp,Cs,Ct = lpeg.Cp,lpeg.Cs,lpeg.Ct Carg,Cmt = lpeg.Carg,lpeg.Cmt lpeg.pm = function (pat, str) PP(pat:match(str or "")) end pa = S"io":Cg"a" pb = (R"09"^1):Cg"b" pabc = (P"(%" * pa * pb * P")"):C():Cg"c":Ct() pfake = (P"(%i42)" * Cc"i":Cg"a" * Cc"42":Cg"b" * Cc"(%i42)":Cg"c"):Ct() pabc :pm"(%i42)" --> {"c"="(%i42)"} pfake:pm"(%i42)" --> {"a"="i", "b"="42", "c"="(%i42)"} At first I thought that pabc:pm"(%i42)" would print a table with fields "a", "b", and "c", like the table printed by the "fake" pattern pfake in pfake:pm"(%i42)"... but that's not what happens. The explanation is here: lpeg.C (patt) Creates a simple capture, which captures the substring of the subject that matches patt. The captured value is a string. If patt has other captures, their values are returned after this one. Source: http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html#cap-c The "their _values_ are returned after this one" means that only the captures that are "values" are returned; the "named" captures produced by Cg are dropped. This can also be seen in this other test: pe = Cc(20) * Cc(30):Cg"a" * Cc(40) * Cc(50):Cg"b" pe :pm"foo" --> 20 40 pe :Ct():pm"foo" --> {1=20, 2=40, "a"=30, "b"=50} pe:C() :pm"foo" --> "" 20 40 pe:C():Ct():pm"foo" --> {1="", 2=20, 3=40} So: I understand what is happening, and why it happens... but is there a workaround? Is it possible to replace the :C():Cg"c" in pabc = (P"(%" * pa * pb * P")"):C():Cg"c":Ct() by something else, that would put the substring matched by the P"(%" * pa * pb * P")" into the key "c" without dropping the named captures generated by pa and pb? Thanks in advance! Eduardo Ochs http://anggtwu.net/luaforth.html ##### # # An extension to Lua to let us refer to the table being constructed # 2023aug19 # ##### # «capts-current-table» (to ".capts-current-table") # (find-lua51manual "#2.5.7" "Table constructors") # (find-lpegmanual "#cap-g" "group capture") # (find-lpegmanual "#cap-b" "back capture") We know that this {10, 20, ["a"]=30, 40, a=50, 60} is roughly equivalent to this: local T = {} table.insert(T, 10) table.insert(T, 20) T["a"] = 30 table.insert(T, 40) T["a"] = 50 -- overwrites the previous T["a"] table.insert(T, 60) Let's imagine an extension of Lua in which this is valid, {10, 20, a=30, 40, a=50, 60, ["a"], ["a"]+2} \---/ \---/ 50 50 \-----/ 52 and corresponds to this: local T = {} table.insert(T, 10) table.insert(T, 20) T["a"] = 30 table.insert(T, 40) T["a"] = 50 -- overwrites the previous T["a"] table.insert(T, 60) table.insert(T, T["a"]) -- ["a"] becomes T["a"] table.insert(T, T["a"]+2) -- ["a"]+2 becomes T["a"]+2 i.e., each ["a"] references the value stored in the key "a" of the table that is being constructed. I will refer to the "table that is being constructed" as the "current table". _Apparently_ this operation that reads entries from the "current table" is exactly what we need to understand how _group captures_ and _back captures_ work in lpeg. Look at the two examples below, that end with ":pm()"s: * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) require "lpeg" B,C,P,R,S,V = lpeg.B,lpeg.C,lpeg.P,lpeg.R,lpeg.S,lpeg.V Cb,Cc,Cf,Cg = lpeg.Cb,lpeg.Cc,lpeg.Cf,lpeg.Cg Cp,Cs,Ct = lpeg.Cp,lpeg.Cs,lpeg.Ct Carg,Cmt = lpeg.Carg,lpeg.Cmt lpeg.pm = function (pat, str) PP(pat:match(str or "")) end (Cc"a" * Cc"b":Cg"c" * Cc"d" * Cb"c"):Ct():pm() -- \---/ \---/ \---/ \---/ -- "a" "b" "d" ["c"] -- \---------/ -- ["c"]="b" -- \----------------------------------/ -- "a" ["c"]="b" "d" ["c"] -- \----------------------------------------/ -- {"a", ["c"]="b", "d", ["c"] } -- \----------------------------------------/ -- {"a", ["c"]="b", "d", "b" } (Cc"a" * Cc("b","c"):Cg"d" * Cc"e" * Cb"d" * Cb"d"):Ct():pm() -- \---/ \---------/ \---/ \---/ \---/ -- "a" "b" "c" "e" ["d"] ["d"] -- \---------------/ -- ["d"]={."b" "c".} -- \-----------------------------------------------/ -- "a" ["d"]={."b" "c".} "e" ["d"] ["d"] -- \-----------------------------------------------------/ -- {"a", ["d"]={."b" "c".}, "e", ["d"], ["d"] } -- \-----------------------------------------------------/ -- {"a", ["d"]="b", "e", "b","c", "b","c" } The second example shows that Cc("b","c"):Cg"d" doesn't store a "value" in ["d"]... it stores a type of object that is a strange cross between a single value, a table, and a list of values. For more on "lists of values", see: (find-es "lua-intro" "intro:lists") I will call these new objects "Ltables", and I will write them like tables, but delimited with "{. .}" instead of just "{ }"; and sometimes I will omit their commas, and write just {."b" "c".} instead of: {."b", "c".} Ltables can be coerced to values, to lists of values, and to tables. The diagram above shows some of these coercions, but in a messy way... I will try to clean that up in a few days! ##### # # Apresentação sobre capturas: preparação # 2023aug18 # ##### # «apr-capts-prep» (to ".apr-capts-prep") # (find-angg "LUA/Deps1.lua") * (eepitch-shell) * (eepitch-kill) * (eepitch-shell) rm -Rv /tmp/capts/ mkdir /tmp/capts/ cd /tmp/capts/ A=(Capts1 Co1 ELpeg1 Globals1 PCall1 Show2 Subst1 Tree1) for i in $A; do cp -v ~/LUA/${i}.lua .; done cp -v ~/LUA/lua50init.lua edrxlib.lua * (eepitch-shell) * (eepitch-kill) * (eepitch-shell) cd /tmp/capts/ laf set | grep LUA export LUA_INIT= export LUA_INIT=@/tmp/capts/edrxlib.lua lua5.1 require "lpeg" require "Capts1" os.exit() rm -fv /tmp/capts.zip cd /tmp/capts/ zip /tmp/capts.zip * # (find-fline "/tmp/capts/") # (find-cp-angg-links "capts.zip" "/tmp/" "tmp/") cd /tmp/ scp capts.zip $LINP/tmp/ * (eepitch-shell) * (eepitch-kill) * (eepitch-shell) rm -Rfv /tmp/capts.zip cd /tmp/ wget http://anggtwu.net/tmp/capts.zip rm -Rv /tmp/capts/ mkdir /tmp/capts/ cd /tmp/capts/ unzip /tmp/capts.zip # (find-fline "/tmp/capts/") # http://anggtwu.net/tmp/capts.zip ##### # # Apresentação sobre capturas # 2023aug18 # ##### # «apr-capts» (to ".apr-capts") # (find-LATEX "2023lpegcaptures.tex") # https://chrisbarrett2.github.io/On%20the%20Simply-Typed%20Functional%20Machine%20Calculus.pdf * (eepitch-shell) * (eepitch-kill) * (eepitch-shell) rm -Rfv /tmp/capts.zip cd /tmp/ wget http://anggtwu.net/tmp/capts.zip rm -Rv /tmp/capts/ mkdir /tmp/capts/ cd /tmp/capts/ unzip /tmp/capts.zip rm -Rfv capts.zip wget http://anggtwu.net/tmp/capts.zip rm -Rv capts/ mkdir capts/ cd capts/ unzip ../capts.zip * (eepitch-shell) * (eepitch-kill) * (eepitch-shell) cd /tmp/capts/ export LUA_INIT=@/tmp/capts/edrxlib.lua lua5.3 PP(2, "3", {4, 5, a=6, [{7,8}]=9, [{7,8}]=10}) --> 2 "3" {1=4, 2=5, "a"=6, {1=7, 2=8}=9, {1=7, 2=8}=10} lpeg = require "lpeg" B,C,P,R,S,V = lpeg.B,lpeg.C,lpeg.P,lpeg.R,lpeg.S,lpeg.V Cb,Cc,Cf,Cg = lpeg.Cb,lpeg.Cc,lpeg.Cf,lpeg.Cg Cp,Cs,Ct = lpeg.Cp,lpeg.Cs,lpeg.Ct Carg,Cmt = lpeg.Carg,lpeg.Cmt lpeg.pm = function (pat, str) PP(pat:match(str or "")) end (Cc("a","b") * Cc("c","d")) :pm() (Cc("a","b") * Cc("c","d"):Cg"e") :pm() (Cc("a","b") * Cc("c","d"):Cg"e") :Ct():pm() (Cc("a","b") * Cc("c","d"):Cg"e" * Cc"f") :Ct():pm() (Cc("a","b") * Cc("c","d"):Cg"e" * Cc"f" * Cb"e"):Ct():pm() (Cc("a","b") * Cc("c","d"):Cg"e" * Cc"f" * Cb"e") :pm() ##### # # lpeg.Cobeying # 2023sep03 # ##### # «lpeg.Cobeying» (to ".lpeg.Cobeying") # (find-angg "LUA/ELpeg1.lua" "lpeg.Cobeying") * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) require "lpeg" lpeg.Cobeying = function (pat, f) return pat:Cmt(function(subj,pos,o) if f(o) then return true,o else return false end end) end lpeg.pm = function (pat, str) PP(pat:match(str or "")) end B,C,P,R,S,V = lpeg.B,lpeg.C,lpeg.P,lpeg.R,lpeg.S,lpeg.V Cb,Cc,Cf,Cg = lpeg.Cb,lpeg.Cc,lpeg.Cf,lpeg.Cg Cp,Cs,Ct = lpeg.Cp,lpeg.Cs,lpeg.Ct Carg,Cmt = lpeg.Carg,lpeg.Cmt C"abc":pm("abcde") C"abc":Cobeying(function(o) return o:match("^a") end):pm("abcde") --> "abc" C"abc":Cobeying(function(o) return o:match("^b") end):pm("abcde") --> <nil> ##### # # lpeg.Cfromthere # 2023sep09 # ##### # «lpeg.Cfromthere» (to ".lpeg.Cfromthere") # (find-angg "LUA/ELpeg1.lua" "lpeg.Cfromthere") * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) require "lpeg" B,C,P,R,S,V = lpeg.B,lpeg.C,lpeg.P,lpeg.R,lpeg.S,lpeg.V Cb,Cc,Cf,Cg = lpeg.Cb,lpeg.Cc,lpeg.Cf,lpeg.Cg Cp,Cs,Ct = lpeg.Cp,lpeg.Cs,lpeg.Ct Carg,Cmt = lpeg.Carg,lpeg.Cmt lpeg.pm = function (pat, str) PP(pat:match(str or "")) end lpeg.Cfromthere = function (pat) return pat:Cmt(function(subj,pos,there) return pos,subj:sub(there,pos-1) end) end c0 = Cp():Cg"c" -- stores this position in c ab = C(1):Cg"a" * C(1):Cg"b" -- stores things in a and b c1 = Cb"c":Cfromthere():Cg"c" -- replaces c by everything from there to here ab:C():Cg"c" :Ct():pm "ABCD" --> bad: {"c"="AB"} (c0 * ab * c1):Ct():pm "ABCD" --> good: {"a"="A", "b"="B", "c"="AB"} ##### # # lpeg.Ccall - a way to call precompiled (or: external) lpeg patterns # 2023nov15 # ##### # «lpeg.Ccall» (to ".lpeg.Ccall") * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) require "lpeg" B,C,P,R,S,V = lpeg.B,lpeg.C,lpeg.P,lpeg.R,lpeg.S,lpeg.V Cb,Cc,Cf,Cg = lpeg.Cb,lpeg.Cc,lpeg.Cf,lpeg.Cg Cp,Cs,Ct = lpeg.Cp,lpeg.Cs,lpeg.Ct Carg,Cmt = lpeg.Carg,lpeg.Cmt lpeg.pm = function (lpat,subj,init,...) PP(lpat:match(subj,init,...)) end string.pm = function (spat,subj,init,...) PP(subj:match(spat,init,...)) end pat1 = C(C(1)*C(1)) pat1 :pm("abcd") lpeg.Ccall = function (pat) local f = function (subj,pos,...) local pat2 = pat*Cp() local results = pack(pat2:match(subj,pos)) if results.n == 1 and results[1] == nil then return false end local newpos = results[#results] results.n = results.n - 1 return newpos,myunpack(results) end return P(true):Cmt(f) end (C(2) * pat1:Ccall() * C(2)) : pm("abcdefg") (C(2) * pat1 * C(2)) : pm("abcdefg") (C(2) * pat1:Ccall() * C(2)) : pm("ab") (C(2) * pat1 * C(2)) : pm("ab") -- Notes: -- 1) This drops named group captures -- 2) Do I need this? (Answer: no!!!) ##### # # Compare lpeg patterns and string patterns with ":pm"s # 2023nov15 # ##### # «compare-pm» (to ".compare-pm") # (to "globals") * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) require "lpeg" B,C,P,R,S,V = lpeg.B,lpeg.C,lpeg.P,lpeg.R,lpeg.S,lpeg.V Cb,Cc,Cf,Cg = lpeg.Cb,lpeg.Cc,lpeg.Cf,lpeg.Cg Cp,Cs,Ct = lpeg.Cp,lpeg.Cs,lpeg.Ct Carg,Cmt = lpeg.Carg,lpeg.Cmt lpeg.pm = function (lpat,subj,init,...) PP(lpat:match(subj,init,...)) end string.pm = function (spat,subj,init,...) PP(subj:match(spat,init,...)) end ("((.)(.))") :pm("abcd") (C(C(1)*C(1))) :pm("abcd") ("({([io])([0-9]+)})(.*)") :pm("{i42} a+b;") ("({([io])([0-9]+)})(.*)") :pm("abcd") ((P"{" * C(S"io") * C(R"09"^1) * P"}"):C() * C(P(1)^0)) :pm("{i42} a+b;") ((P"{" * C(S"io") * C(R"09"^1) * P"}"):C() * C(P(1)^0)) :pm("abcd") ##### # # EmacsConf2023: my submission # 2023sep09 # ##### # «emacsconf2023» (to ".emacsconf2023") # https://mail.google.com/mail/u/0/#sent/KtbxLthtFKpjqWQxqhgbqRzJFClGMvdzcg Talk title: REPLs in strange places: Lua, LaTeX, LPeg, LPegRex, TikZ Format: pre-recorded video, estimated 40 mins long Introduction for you and your talk (<= 100 words): I am this person here: http://anggtwu.net/eepitch.html Preferred Q&A approach: IRC Public contact information: edrx on IRC [email protected] http://anggtwu.net/ Private emergency contact information: (+55)(21)98884-2389 (WhatsApp) @eduardooochs (Telegram) --snip--snip-- Talk description: Many years ago, when I started programming, my dream was to write games. I failed miserably in that, but I became fascinated by languages, and I discovered Forth - that was perfect for writing languages whose syntax was as simple as possible. Then I switched to GNU/Linux and I had a series of favorite languages; at some point I discovered Lua, that became not only my favorite language but also my favorite tool for implementing other languages. One of the main libraries of Lua is something called LPeg, that lets "people" implement complex parsers in just a few lines of code using PEGs - Parsing Expression Grammars. I've put the "people" in the last paragraph between quotes because for many years I wasn't included in the "people who can implement complex parsers with LPeg"... lots of things in LPeg didn't make sense to me, and I couldn't visualize how it worked. Something was missing - some diagrams, maybe? The main tool for drawing diagrams in LaTeX is something called TikZ, that is HUGE - its manual has over 1000 pages. TikZ includes lots of libraries and extensions, and each one of these libraries and extensions extends TikZ's core language with some extra constructs. I don't know anyone - except for a handful of experts - who knows what is the "core language" of Tikz, that lies, or that should lie, below all these extensions... all of my friends who use TikZ are just "users" of TikZ - they've learned some parts of TikZ by starting with exemples, and by then modifying these examples mostly by trial and error. In particular, no one among my friends knows how styles in TikZ really work; styles are implemented using "keys", that are hard to inspect from a running TeX - see [1] - and I found the chapter on "key management" in the manual very hard to understand. It feels as if something is missing from it... some diagrams, maybe? In my day job I am a mathematician. I work in a federal university in Brazil, and besides teaching I do some research - mostly in areas in which the papers and theses have lots of diagrams, of many different kinds, and in which people use zillions of different programs to draw their diagrams. Every time that I see those diagrams I think "wow, I _need_ to learn how to draw diagrams like that!", but until a few months ago this seemed to be impossible, or very hard, or very painful... This presentation will be about a point in which all these ideas intersect. I am the author of an Emacs package called eev, that encourages using REPLs in a certain way; Lua can be used in several different styles, and if we use it in a certain way that most people hate - with lots of globals, and with an implementation of OO that makes everything inspectable and modifiable - then it becomes very REPL-friendly; there is an extension of LPeg called LPegRex ([2], [3]), that I found promising but hard to use, so I rewrote some parts of it to make them more REPL-friendly, and to make it print its ASTs in 2D ASCII art. The core of my presentation will be about how I am using REPLs written in Lua to write grammars, parsers, and tools to generate many kinds of diagrams, and how I am using these diagrams to document both my own code and other people's programs - the parts of them in which some diagrams seem to be missing. My hope is that people will find these ideas easy to port to other languages besides Lua, to other tools for generating diagrams besides LaTeX - SVG, maybe? - and to other ways to use REPLs in Emacs besides eev. Some ideas in this presentation were inspired by the blog post [4]. [1] https://tex.stackexchange.com/questions/663740/alternative-to-edef-c-pgfkeys-a [2] https://github.com/edubart/lpegrex [3] https://github.com/edubart/lpegrex/blob/main/parsers/lua.lua [4] https://ianthehenry.com/posts/my-kind-of-repl/ --snip--snip-- The release in "Please include this speaker release in order to indicate your agreement with it:" By submitting this proposal, I agree that my presentation at EmacsConf 2023 is subject to the following terms and conditions: The EmacsConf organizers may capture audio and video (a "Recording") of my presentation and any associated materials, which may include slides, notes, transcripts, and prerecording(s) of my presentation that I provide to the EmacsConf organizers. I authorize the EmacsConf organizers to distribute, reproduce, publicly display, and prepare derivative works of the Recording and any derivative works of the Recording (the "Licensed Materials") under the terms of the Creative Commons Attribution-ShareAlike 4.0 International (CC BY-SA 4.0) license. I grant to the EmacsConf organizers permission to use my name, likeness, and biographic information in association with their use of the Licensed Materials under the above license. I represent that I have the authority to grant the above license to the EmacsConf organizers. If my presentation incorporates any material owned by third parties, I represent that the material is sublicensable to the EmacsConf organizers or that my use of them is fair use. ##### # # Building a Programming Language - course by Roberto Ierusalimschy # 2023dec09 # ##### # «bapl» (to ".bapl") # https://www.reddit.com/r/lua/comments/uilo65/building_a_programming_language_by_roberto/ # https://www.youtube.com/playlist?list=PLXPoQXrP0261ceYEnDd5LaZy2UM_4hLae videos of the first week # http://www.youtube.com/watch?v=NLcITMF4VSM Building a Programming Language: Week 1 (Part 1) - Introduction to LPeg # http://www.youtube.com/watch?v=fWoAvmpyEJk Building a Programming Language: Week 1 (Part 2) - Basic Constructions # http://www.youtube.com/watch?v=cLC3WWhL1aE Building a Programming Language: Week 1 (Part 3) - Concatenation, Repetition and Choices # http://www.youtube.com/watch?v=1_7ny3if7_4 Building a Programming Language: Week 1 (Part 4) - Captures # http://www.youtube.com/watch?v=E1d30lgzD7Y Building a Programming Language: Week 1 (Part 5) - Predicates # http://www.youtube.com/watch?v=G1Hid9Mtnn4 Building a Programming Language: Week 1 (Part 6) - Summations # http://www.youtube.com/watch?v=OpEjaC5oFOU Building a Programming Language: Week 1 (Part 7) - Arithmetic Expressions # http://www.youtube.com/watch?v=mQuRFoxAoyM Building a Programming Language: Week 1 (Part 8) - Parenthesized Expressions ##### # # My e-mail about replacing some "printfs" in Lpeg # 2024jan11 # ##### # «replace-printfs-1» (to ".replace-printfs-1") # (to "lpeg-debug-mode") # Help needed to replace some printfs in the lpeg source with something else # https://mail.google.com/mail/u/0/#sent/KtbxLwHLtGvDFzNMkqjlQxNlRsHMSSjNGq # https://groups.google.com/g/lua-l/c/7CkFNrlosW4 Hi list, I am the author of this thing, that adds a Lua interpreter to Emacs, https://github.com/edrx/emlua/#introduction but I was only able to write it because someone - nerditation - helped me with the part in C... https://github.com/edrx/emlua/blob/main/emlua.cpp Now I'm going to ask for help on another thing in C - this time to make the debugging functions of lpeg slightly more useful. 1. Introduction and demo ======================== This is what I use to compile lpeg with "-DLPEG_DEBUG" on Debian, and to test the methods :pcode() and :ptree(), that are only available when LPEG_DEBUG is defined: export S=$HOME/snarf mkdir -p $S/http/www.inf.puc-rio.br/~roberto/lpeg/ cd $S/http/www.inf.puc-rio.br/~roberto/lpeg/ wget -nc 'http://www.inf.puc-rio.br/~roberto/lpeg/lpeg-1.1.0.tar.gz' rm -Rv ~/usrc/lpeg-1.1.0/ mkdir ~/usrc/lpeg-1.1.0/ tar -C ~/usrc/ -xvzf $S/http/www.inf.puc-rio.br/~roberto/lpeg/lpeg-1.1.0.tar.gz cd ~/usrc/lpeg-1.1.0/ ls *.c *.h | tee .files.ch etags $(cat .files.ch) rm -fv *.o rm -fv *.so rm -fv om make LUADIR=/usr/include/lua5.1 COPT="-O2 -DLPEG_DEBUG" lua5.1 HOME = os.getenv("HOME") package.cpath = HOME.."/usrc/lpeg-1.1.0/?.so;" .. package.cpath require "lpeg" AlphaNumeric = lpeg.R("AZ", "az", "09") AnchorChar = AlphaNumeric + lpeg.S("!#$%()*+,-./:;=?@^_{|}~") AnchorChar:pcode() AnchorChar:ptree() os.exit() When I run all that in a shell, the log of the last part is: Lua 5.1.5 Copyright (C) 1994-2012 Lua.org, PUC-Rio > HOME = os.getenv("HOME") > package.cpath = HOME.."/usrc/lpeg-1.1.0/?.so;" .. package.cpath > require "lpeg" > AlphaNumeric = lpeg.R("AZ", "az", "09") > AnchorChar = AlphaNumeric + lpeg.S("!#$%()*+,-./:;=?@^_{|}~") > AnchorChar:pcode() [] 00: set (20-3) [(21)(23-25)(28-3b)(3d)(3f-5a)(5e-5f)(61-7e)] 04: end > AnchorChar:ptree() [] set(04-12) [(21)(23-25)(28-3b)(3d)(3f-5a)(5e-5f)(61-7e)] > os.exit() but pcode and ptree don't return strings - they _print_ strings... 2. Help needed ============== The file ptree.c in the lpeg source contains these lines: static struct luaL_Reg pattreg[] = { {"ptree", lp_printtree}, {"pcode", lp_printcode}, {"match", lp_match}, {"B", lp_behind}, ... } and lpprint.h contains this: #if defined(LPEG_DEBUG) void printpatt (Instruction *p, int n); void printtree (TTree *tree, int ident); void printktable (lua_State *L, int idx); void printcharset (const byte *st); void printcaplist (Capture *cap, Capture *limit); void printinst (const Instruction *op, const Instruction *p); #else #define printktable(L,idx) \ luaL_error(L, "function only implemented in debug mode") #define printtree(tree,i) \ luaL_error(L, "function only implemented in debug mode") #define printpatt(p,n) \ luaL_error(L, "function only implemented in debug mode") #endif The debugging functions - in lpprint.c - are implemented using "printf"s. Suppose that we replace all of the 32 occurrence of "printf" in lpprint.c by "mylpegprintf_C", and mylpegprintf_C is a function or macro that would do this (in pseudocode in Lua): mylpegprintf_C = function (fmt, ...) if _G.mylpegprintf then local str = sprintf(fmt, ...) -- calls sprintf in C _G.mylpegprintf(str) -- calls Lua else printf(fmt, ...) -- calls printf in C end end then it would be trivial to write variants of :pcode and :ptree that would return strings instead of just printing things... Anyone would like to help me with that? I can't offer fame or fortune - just credit and many, many thanks... Thanks in advance (hopefully), Eduardo Ochs http://anggtwu.net/eepitch.html http://anggtwu.net/emacsconf2023.html ##### # # My e-mail about replacing some "printfs" in Lpeg - Roberto's answer # 2024jan11 # ##### # «replace-printfs-2» (to ".replace-printfs-2") # (find-node "(libc)Variable Arguments Output") # (find-node "(libc)Variadic Functions") # (find-node "(libc)How Variadic") If I would do that kind of thing (I probably will, but not now), I would use a 'string buffer' from the auxiliary library to built the resulting string, all in C. I would change all calls to printf to something like this: printf(fmt, ...) --> myprintf(b, fmt, ...) The function 'myprintf' would be something like this: static void myprintf (luaL_Buffer *B, const char *fmt, ...) { va_list argp; char *buff[250]; int len; va_start(argp, fmt); len = vsprintf(buff, fmt, argp); va_end(argp); luaL_addlstring(B, buff, len); } The main functions would be responsible for creating the buffers and finishing them to create the final string result. ##### # # long-strings # 2024jan28 # ##### # «long-strings» (to ".long-strings") # (find-lpegmanual "#ex" "Lua's long strings") Lua's long strings ================== A long string in Lua starts with the pattern [=*[ and ends at the first occurrence of ]=*] with exactly the same number of equal signs. If the opening brackets are followed by a newline, this newline is discarded (that is, it is not part of the string). To match a long string in Lua, the pattern must capture the first repetition of equal signs and then, whenever it finds a candidate for closing the string, check whether it has the same number of equal signs. equals = lpeg.P"="^0 open = "[" * lpeg.Cg(equals, "init") * "[" * lpeg.P"\n"^-1 close = "]" * lpeg.C(equals) * "]" closeeq = lpeg.Cmt(close * lpeg.Cb("init"), function (s, i, a, b) return a == b end) string = open * lpeg.C((lpeg.P(1) - closeeq)^0) * close / 1 The open pattern matches [=*[, capturing the repetitions of equal signs in a group named init; it also discharges an optional newline, if present. The close pattern matches ]=*], also capturing the repetitions of equal signs. The closeeq pattern first matches close; then it uses a back capture to recover the capture made by the previous open, which is named init; finally it uses a match-time capture to check whether both captures are equal. The string pattern starts with an open, then it goes as far as possible until matching closeeq, and then matches the final close. The final numbered capture simply discards the capture made by close. * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) Path.addLUAtopath() require "ELpeg1" -- (find-angg "LUA/ELpeg1.lua") gr,V,VA,VE,PE = Gram.new() V.equals = P"="^0 V.optnl = P"\n"^-1 V.open = "[" * V.equals:Cg"init" * "[" * V.optnl V.close = "]" * V.equals:C() * "]" V.closeeq = (V.close * Cb"init"):Cmt(function (s,i,a,b) return a==b end) V.middle = (1 - V.closeeq)^0 V.string = (V.open * V.middle:C() * V.close) / 1 ##### # # marpa # 2024sep28 # ##### # «marpa» (to ".marpa") # https://ar5iv.labs.arxiv.org/html/1910.08129 Marpa, a practical general parser: the recognizer (Jeffrey Kegler) # https://arxiv.org/abs/1910.08129 Marpa, A practical general parser: the recognizer # https://jeffreykegler.github.io/Marpa-web-site/ # https://jeffreykegler.github.io/Ocean-of-Awareness-blog/ # https://jeffreykegler.github.io/Ocean-of-Awareness-blog/metapages/annotated.html # https://jeffreykegler.github.io/Ocean-of-Awareness-blog/individual/2018/05/knuth_1965.html # https://jeffreykegler.github.io/personal/timeline_v3 *** # https://jeffreykegler.github.io/Ocean-of-Awareness-blog/individual/2013/03/bnf_to_ast.html # https://jeffreykegler.github.io/Ocean-of-Awareness-blog/individual/2015/03/peg.html # https://savage.net.au/Marpa.html # https://github.com/jeffreykegler/old_kollos/blob/master/about.md # https://lukasatkinson.de/2015/marpa-overview/ # https://jeffreykegler.github.io/Ocean-of-Awareness-blog/individual/2011/11/marpa-and-the-ruby-slippers.html # https://loup-vaillant.fr/tutorials/earley-parsing/ # https://jeffreykegler.github.io/Ocean-of-Awareness-blog/individual/2016/03/parus.html birds # https://jeffreykegler.github.io/Ocean-of-Awareness-blog/individual/2012/dsl.html # https://jeffreykegler.github.io/Ocean-of-Awareness-blog/individual/2013/01/dsl_simpler2.html # https://marpa-guide.github.io/index.html https://lists.gnu.org/archive/html/emacs-devel/2021-09/msg02044.html Apparently it has been proven https://lists.csail.mit.edu/mailman/listinfo/peg https://lists.csail.mit.edu/pipermail/peg/ https://lists.csail.mit.edu/pipermail/peg/2019-August/000771.html https://www.reddit.com/r/ProgrammingLanguages/comments/a80stl/are_there_any_languages_that_use_a_peg_in_their/ https://github.com/aappleby/Matcheroni/ Matcheroni & Parseroni (for C++20) https://eyalkalderon.com/blog/nom-error-recovery/ Error recovery with parser combinators (using nom) https://github.com/ohmjs/ohm https://nextjournal.com/dubroy/ohm-parsing-made-easy https://github.com/ohmjs/ohm/tree/main/examples/ https://news.ycombinator.com/item?id=38082594 Ohm: A library and language for building parsers, interpreters, compilers, etc (github.com/ohmjs) https://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ *** https://safinaskar.writeas.com/this-is-why-you-should-never-use-parser-combinators-and-peg https://blog.reverberate.org/2013/09/ll-and-lr-in-context-why-parsing-tools.html https://dickgrune.com/Books/PTAPG_2nd_Edition/ # Local Variables: # coding: utf-8-unix # End: