lua/testes/pm.lua
Roberto Ierusalimschy 264659bd53 Optional 'init' argument to 'string.gmatch'
The function 'string.gmatch' now has an optional 'init' argument,
similar to 'string.find' and 'string.match'. Moreover, there was
some reorganization in the manipulation of indices in the string
library.
This commit also includes small janitorial work in the manual
and in comments in the interpreter loop.
2019-01-08 14:22:32 -02:00

392 lines
12 KiB
Lua
Raw Blame History

-- $Id: testes/pm.lua $
-- See Copyright Notice in file all.lua
print('testing pattern matching')
local function checkerror (msg, f, ...)
local s, err = pcall(f, ...)
assert(not s and string.find(err, msg))
end
function f(s, p)
local i,e = string.find(s, p)
if i then return string.sub(s, i, e) end
end
a,b = string.find('', '') -- empty patterns are tricky
assert(a == 1 and b == 0);
a,b = string.find('alo', '')
assert(a == 1 and b == 0)
a,b = string.find('a\0o a\0o a\0o', 'a', 1) -- first position
assert(a == 1 and b == 1)
a,b = string.find('a\0o a\0o a\0o', 'a\0o', 2) -- starts in the midle
assert(a == 5 and b == 7)
a,b = string.find('a\0o a\0o a\0o', 'a\0o', 9) -- starts in the midle
assert(a == 9 and b == 11)
a,b = string.find('a\0a\0a\0a\0\0ab', '\0ab', 2); -- finds at the end
assert(a == 9 and b == 11);
a,b = string.find('a\0a\0a\0a\0\0ab', 'b') -- last position
assert(a == 11 and b == 11)
assert(string.find('a\0a\0a\0a\0\0ab', 'b\0') == nil) -- check ending
assert(string.find('', '\0') == nil)
assert(string.find('alo123alo', '12') == 4)
assert(string.find('alo123alo', '^12') == nil)
assert(string.match("aaab", ".*b") == "aaab")
assert(string.match("aaa", ".*a") == "aaa")
assert(string.match("b", ".*b") == "b")
assert(string.match("aaab", ".+b") == "aaab")
assert(string.match("aaa", ".+a") == "aaa")
assert(not string.match("b", ".+b"))
assert(string.match("aaab", ".?b") == "ab")
assert(string.match("aaa", ".?a") == "aa")
assert(string.match("b", ".?b") == "b")
assert(f('aloALO', '%l*') == 'alo')
assert(f('aLo_ALO', '%a*') == 'aLo')
assert(f(" \n\r*&\n\r xuxu \n\n", "%g%g%g+") == "xuxu")
assert(f('aaab', 'a*') == 'aaa');
assert(f('aaa', '^.*$') == 'aaa');
assert(f('aaa', 'b*') == '');
assert(f('aaa', 'ab*a') == 'aa')
assert(f('aba', 'ab*a') == 'aba')
assert(f('aaab', 'a+') == 'aaa')
assert(f('aaa', '^.+$') == 'aaa')
assert(f('aaa', 'b+') == nil)
assert(f('aaa', 'ab+a') == nil)
assert(f('aba', 'ab+a') == 'aba')
assert(f('a$a', '.$') == 'a')
assert(f('a$a', '.%$') == 'a$')
assert(f('a$a', '.$.') == 'a$a')
assert(f('a$a', '$$') == nil)
assert(f('a$b', 'a$') == nil)
assert(f('a$a', '$') == '')
assert(f('', 'b*') == '')
assert(f('aaa', 'bb*') == nil)
assert(f('aaab', 'a-') == '')
assert(f('aaa', '^.-$') == 'aaa')
assert(f('aabaaabaaabaaaba', 'b.*b') == 'baaabaaabaaab')
assert(f('aabaaabaaabaaaba', 'b.-b') == 'baaab')
assert(f('alo xo', '.o$') == 'xo')
assert(f(' \n isto <20> assim', '%S%S*') == 'isto')
assert(f(' \n isto <20> assim', '%S*$') == 'assim')
assert(f(' \n isto <20> assim', '[a-z]*$') == 'assim')
assert(f('um caracter ? extra', '[^%sa-z]') == '?')
assert(f('', 'a?') == '')
assert(f('<EFBFBD>', '<EFBFBD>?') == '<EFBFBD>')
assert(f('<EFBFBD>bl', '<EFBFBD>?b?l?') == '<EFBFBD>bl')
assert(f(' <20>bl', '<EFBFBD>?b?l?') == '')
assert(f('aa', '^aa?a?a') == 'aa')
assert(f(']]]<5D>b', '[^]]') == '<EFBFBD>')
assert(f("0alo alo", "%x*") == "0a")
assert(f("alo alo", "%C+") == "alo alo")
print('+')
function f1(s, p)
p = string.gsub(p, "%%([0-9])", function (s)
return "%" .. (tonumber(s)+1)
end)
p = string.gsub(p, "^(^?)", "%1()", 1)
p = string.gsub(p, "($?)$", "()%1", 1)
local t = {string.match(s, p)}
return string.sub(s, t[1], t[#t] - 1)
end
assert(f1('alo alx 123 b\0o b\0o', '(..*) %1') == "b\0o b\0o")
assert(f1('axz123= 4= 4 34', '(.+)=(.*)=%2 %1') == '3= 4= 4 3')
assert(f1('=======', '^(=*)=%1$') == '=======')
assert(string.match('==========', '^([=]*)=%1$') == nil)
local function range (i, j)
if i <= j then
return i, range(i+1, j)
end
end
local abc = string.char(range(0, 127)) .. string.char(range(128, 255));
assert(string.len(abc) == 256)
function strset (p)
local res = {s=''}
string.gsub(abc, p, function (c) res.s = res.s .. c end)
return res.s
end;
assert(string.len(strset('[\200-\210]')) == 11)
assert(strset('[a-z]') == "abcdefghijklmnopqrstuvwxyz")
assert(strset('[a-z%d]') == strset('[%da-uu-z]'))
assert(strset('[a-]') == "-a")
assert(strset('[^%W]') == strset('[%w]'))
assert(strset('[]%%]') == '%]')
assert(strset('[a%-z]') == '-az')
assert(strset('[%^%[%-a%]%-b]') == '-[]^ab')
assert(strset('%Z') == strset('[\1-\255]'))
assert(strset('.') == strset('[\1-\255%z]'))
print('+');
assert(string.match("alo xyzK", "(%w+)K") == "xyz")
assert(string.match("254 K", "(%d*)K") == "")
assert(string.match("alo ", "(%w*)$") == "")
assert(string.match("alo ", "(%w+)$") == nil)
assert(string.find("(<28>lo)", "%(<28>") == 1)
local a, b, c, d, e = string.match("<EFBFBD>lo alo", "^(((.).).* (%w*))$")
assert(a == '<EFBFBD>lo alo' and b == '<EFBFBD>l' and c == '<EFBFBD>' and d == 'alo' and e == nil)
a, b, c, d = string.match('0123456789', '(.+(.?)())')
assert(a == '0123456789' and b == '' and c == 11 and d == nil)
print('+')
assert(string.gsub('<EFBFBD>lo <20>lo', '<EFBFBD>', 'x') == 'xlo xlo')
assert(string.gsub('alo <20>lo ', ' +$', '') == 'alo <20>lo') -- trim
assert(string.gsub(' alo alo ', '^%s*(.-)%s*$', '%1') == 'alo alo') -- double trim
assert(string.gsub('alo alo \n 123\n ', '%s+', ' ') == 'alo alo 123 ')
t = "ab<EFBFBD> d"
a, b = string.gsub(t, '(.)', '%1@')
assert('@'..a == string.gsub(t, '', '@') and b == 5)
a, b = string.gsub('ab<EFBFBD>d', '(.)', '%0@', 2)
assert(a == 'a@b@<40>d' and b == 2)
assert(string.gsub('alo alo', '()[al]', '%1') == '12o 56o')
assert(string.gsub("abc=xyz", "(%w*)(%p)(%w+)", "%3%2%1-%0") ==
"xyz=abc-abc=xyz")
assert(string.gsub("abc", "%w", "%1%0") == "aabbcc")
assert(string.gsub("abc", "%w+", "%0%1") == "abcabc")
assert(string.gsub('<EFBFBD><EFBFBD><EFBFBD>', '$', '\0<EFBFBD><EFBFBD>') == '<EFBFBD><EFBFBD><EFBFBD>\0<EFBFBD><EFBFBD>')
assert(string.gsub('', '^', 'r') == 'r')
assert(string.gsub('', '$', 'r') == 'r')
print('+')
do -- new (5.3.3) semantics for empty matches
assert(string.gsub("a b cd", " *", "-") == "-a-b-c-d-")
local res = ""
local sub = "a \nbc\t\td"
local i = 1
for p, e in string.gmatch(sub, "()%s*()") do
res = res .. string.sub(sub, i, p - 1) .. "-"
i = e
end
assert(res == "-a-b-c-d-")
end
assert(string.gsub("um (dois) tres (quatro)", "(%(%w+%))", string.upper) ==
"um (DOIS) tres (QUATRO)")
do
local function setglobal (n,v) rawset(_G, n, v) end
string.gsub("a=roberto,roberto=a", "(%w+)=(%w%w*)", setglobal)
assert(_G.a=="roberto" and _G.roberto=="a")
end
function f(a,b) return string.gsub(a,'.',b) end
assert(string.gsub("trocar tudo em |teste|b| <20> |beleza|al|", "|([^|]*)|([^|]*)|", f) ==
"trocar tudo em bbbbb <20> alalalalalal")
local function dostring (s) return load(s, "")() or "" end
assert(string.gsub("alo $a='x'$ novamente $return a$",
"$([^$]*)%$",
dostring) == "alo novamente x")
x = string.gsub("$x=string.gsub('alo', '.', string.upper)$ assim vai para $return x$",
"$([^$]*)%$", dostring)
assert(x == ' assim vai para ALO')
t = {}
s = 'a alo jose joao'
r = string.gsub(s, '()(%w+)()', function (a,w,b)
assert(string.len(w) == b-a);
t[a] = b-a;
end)
assert(s == r and t[1] == 1 and t[3] == 3 and t[7] == 4 and t[13] == 4)
function isbalanced (s)
return string.find(string.gsub(s, "%b()", ""), "[()]") == nil
end
assert(isbalanced("(9 ((8))(\0) 7) \0\0 a b ()(c)() a"))
assert(not isbalanced("(9 ((8) 7) a b (\0 c) a"))
assert(string.gsub("alo 'oi' alo", "%b''", '"') == 'alo " alo')
local t = {"apple", "orange", "lime"; n=0}
assert(string.gsub("x and x and x", "x", function () t.n=t.n+1; return t[t.n] end)
== "apple and orange and lime")
t = {n=0}
string.gsub("first second word", "%w%w*", function (w) t.n=t.n+1; t[t.n] = w end)
assert(t[1] == "first" and t[2] == "second" and t[3] == "word" and t.n == 3)
t = {n=0}
assert(string.gsub("first second word", "%w+",
function (w) t.n=t.n+1; t[t.n] = w end, 2) == "first second word")
assert(t[1] == "first" and t[2] == "second" and t[3] == undef)
checkerror("invalid replacement value %(a table%)",
string.gsub, "alo", ".", {a = {}})
checkerror("invalid capture index %%2", string.gsub, "alo", ".", "%2")
checkerror("invalid capture index %%0", string.gsub, "alo", "(%0)", "a")
checkerror("invalid capture index %%1", string.gsub, "alo", "(%1)", "a")
checkerror("invalid use of '%%'", string.gsub, "alo", ".", "%x")
if not _soft then
print("big strings")
local a = string.rep('a', 300000)
assert(string.find(a, '^a*.?$'))
assert(not string.find(a, '^a*.?b$'))
assert(string.find(a, '^a-.?$'))
-- bug in 5.1.2
a = string.rep('a', 10000) .. string.rep('b', 10000)
assert(not pcall(string.gsub, a, 'b'))
end
-- recursive nest of gsubs
function rev (s)
return string.gsub(s, "(.)(.+)", function (c,s1) return rev(s1)..c end)
end
local x = "abcdef"
assert(rev(rev(x)) == x)
-- gsub with tables
assert(string.gsub("alo alo", ".", {}) == "alo alo")
assert(string.gsub("alo alo", "(.)", {a="AA", l=""}) == "AAo AAo")
assert(string.gsub("alo alo", "(.).", {a="AA", l="K"}) == "AAo AAo")
assert(string.gsub("alo alo", "((.)(.?))", {al="AA", o=false}) == "AAo AAo")
assert(string.gsub("alo alo", "().", {'x','yy','zzz'}) == "xyyzzz alo")
t = {}; setmetatable(t, {__index = function (t,s) return string.upper(s) end})
assert(string.gsub("a alo b hi", "%w%w+", t) == "a ALO b HI")
-- tests for gmatch
local a = 0
for i in string.gmatch('abcde', '()') do assert(i == a+1); a=i end
assert(a==6)
t = {n=0}
for w in string.gmatch("first second word", "%w+") do
t.n=t.n+1; t[t.n] = w
end
assert(t[1] == "first" and t[2] == "second" and t[3] == "word")
t = {3, 6, 9}
for i in string.gmatch ("xuxx uu ppar r", "()(.)%2") do
assert(i == table.remove(t, 1))
end
assert(#t == 0)
t = {}
for i,j in string.gmatch("13 14 10 = 11, 15= 16, 22=23", "(%d+)%s*=%s*(%d+)") do
t[tonumber(i)] = tonumber(j)
end
a = 0
for k,v in pairs(t) do assert(k+1 == v+0); a=a+1 end
assert(a == 3)
do -- init parameter in gmatch
local s = 0
for k in string.gmatch("10 20 30", "%d+", 3) do
s = s + tonumber(k)
end
assert(s == 50)
s = 0
for k in string.gmatch("11 21 31", "%d+", -4) do
s = s + tonumber(k)
end
assert(s == 32)
-- there is an empty string at the end of the subject
s = 0
for k in string.gmatch("11 21 31", "%w*", 9) do
s = s + 1
end
assert(s == 1)
-- there are no empty strings after the end of the subject
s = 0
for k in string.gmatch("11 21 31", "%w*", 10) do
s = s + 1
end
assert(s == 0)
end
-- tests for `%f' (`frontiers')
assert(string.gsub("aaa aa a aaa a", "%f[%w]a", "x") == "xaa xa x xaa x")
assert(string.gsub("[[]] [][] [[[[", "%f[[].", "x") == "x[]] x]x] x[[[")
assert(string.gsub("01abc45de3", "%f[%d]", ".") == ".01abc.45de.3")
assert(string.gsub("01abc45 de3x", "%f[%D]%w", ".") == "01.bc45 de3.")
assert(string.gsub("function", "%f[\1-\255]%w", ".") == ".unction")
assert(string.gsub("function", "%f[^\1-\255]", ".") == "function.")
assert(string.find("a", "%f[a]") == 1)
assert(string.find("a", "%f[^%z]") == 1)
assert(string.find("a", "%f[^%l]") == 2)
assert(string.find("aba", "%f[a%z]") == 3)
assert(string.find("aba", "%f[%z]") == 4)
assert(not string.find("aba", "%f[%l%z]"))
assert(not string.find("aba", "%f[^%l%z]"))
local i, e = string.find(" alo aalo allo", "%f[%S].-%f[%s].-%f[%S]")
assert(i == 2 and e == 5)
local k = string.match(" alo aalo allo", "%f[%S](.-%f[%s].-%f[%S])")
assert(k == 'alo ')
local a = {1, 5, 9, 14, 17,}
for k in string.gmatch("alo alo th02 is 1hat", "()%f[%w%d]") do
assert(table.remove(a, 1) == k)
end
assert(#a == 0)
-- malformed patterns
local function malform (p, m)
m = m or "malformed"
local r, msg = pcall(string.find, "a", p)
assert(not r and string.find(msg, m))
end
malform("(.", "unfinished capture")
malform(".)", "invalid pattern capture")
malform("[a")
malform("[]")
malform("[^]")
malform("[a%]")
malform("[a%")
malform("%b")
malform("%ba")
malform("%")
malform("%f", "missing")
-- \0 in patterns
assert(string.match("ab\0\1\2c", "[\0-\2]+") == "\0\1\2")
assert(string.match("ab\0\1\2c", "[\0-\0]+") == "\0")
assert(string.find("b$a", "$\0?") == 2)
assert(string.find("abc\0efg", "%\0") == 4)
assert(string.match("abc\0efg\0\1e\1g", "%b\0\1") == "\0efg\0\1e\1")
assert(string.match("abc\0\0\0", "%\0+") == "\0\0\0")
assert(string.match("abc\0\0\0", "%\0%\0?") == "\0\0")
-- magic char after \0
assert(string.find("abc\0\0","\0.") == 4)
assert(string.find("abcx\0\0abc\0abc","x\0\0abc\0a.") == 4)
print('OK')