Module:UtilsString

local p = {} local h = {}

function p.isEmpty(str) return str == nil or str == "" end

function p.notEmpty(str) return not p.isEmpty(str) end

function p.isBlank(str) return str == nil or mw.text.trim(str) == "" end

function p.nilIfEmpty(str) if str and str ~= "" then return str end end

function p.startsWith(str, pattern) return h.startsWith(str, pattern, true) end function p.startsWithRegex(str, pattern) return h.startsWith(str, pattern, false) end function p._startsWith(pattern) return function(str) return h.startsWith(str, pattern, true) end end function p._startsWithRegex(pattern) return function(str) return h.startsWith(str, pattern, false) end end function h.startsWith(str, pattern, plain) return str:find(pattern, 1, plain) == 1 end

function p.endsWith(str, pattern) return h.endsWith(str, pattern, true) end function p.endsWithRegex(str, pattern) return h.endsWith(str, pattern, false) end function p._endsWith(pattern) return function(str) return h.endsWith(str, pattern, true) end end function p._endsWithRegex(pattern) return function(str) return h.endsWith(str, pattern, false) end end function h.endsWith(str, pattern, plain) local endIndex = 0 repeat endIndex = select(2, str:find(pattern, endIndex + 1, plain)) until endIndex == nil or endIndex == #str or endIndex == 0 return endIndex ~= nil end

function p._split(pattern, plain) return function(str) return p.split(str, pattern, plain) end end -- Original source: https://phabricator.wikimedia.org/diffusion/ELUA/browse/master/includes/engines/LuaCommon/lualib/mw.text.lua function p.split(text, pattern, plain) local ret = {} -- Edge case: gsplit with pattern "" breaks Unicode characters which are composed of multiple bytes. -- Instead we simply iterate over the characters, which is effectively what splitting with "" does. -- If we had Lua 5.3 we could probably do something with the utf8 library. -- Instead we use a snippet from http://lua-users.org/wiki/LuaUnicode if pattern == "" then for m in string.gmatch(text, "([%z\1-\127\194-\244][\128-\191]*)") do         ret[#ret+1] = m        end else for m in h.gsplit( text, pattern, plain ) do			ret[#ret+1] = m		end end return ret end function h.gsplit(text, pattern, plain) if not pattern then pattern = '%s*,%s*' end local s, l = 1, text:len return function if s then local e, n = text:find( pattern, s, plain ) local ret if not e then ret = text:sub( s ) s = nil elseif n < e then -- Empty separator! ret = text:sub( s, e ) if e < l then s = e + 1 else s = nil end else ret = e > s and text:sub( s, e - 1 ) or '' s = n + 1 end return ret end end, nil, nil end

function p.sub(str, s, e)	return p._sub(s, e)(str) end function p._sub(s, e)	return function(str) return string.sub(str, s, e)	end end

local char_to_hex = function(c) return string.format("%%%02X", string.byte(c)) end

-- Source: https://phabricator.wikimedia.org/diffusion/ELUA/browse/master/includes/engines/LuaCommon/lualib/mw.text.lua function p.trim(s, charset) return p._trim(charset)(s) end function p._trim(charset) charset = charset or '\t\r\n\f ' return function(s) s = s:gsub( '^[' .. charset .. ']*(.-)[' .. charset .. ']*$', '%1' ) return s	end end

-- By http://lua-users.org/wiki/RiciLake function p.interpolate(formatStr, tab) return (formatStr:gsub('($%b{})', function(w) return tab[w:sub(3, -2)] or w end)) end

p.Schemas = { split = { str = { type = "string", required = true, },		pattern = { type = "string", default = mw.dumpObject("%s*,%s*"), },		plain = { type = "boolean", },	},	sub = { str = { type = "string", required = true, },		startIndex = { type = "number", required = true, },		endIndex = { type = "number", default = "#str", },	},	trim = { pattern = { type = "string", },		str = { type = "string", required = true, },	},	format = { formatStr = { type = "string", required = true, },		["..."] = {			type = "array", items = { type = "string", },			required = true, },	},	interpolate = { formatStr = { type = "string", required = true, },		args = { type = "map", required = true, keys = { type = "string" }, values = { type = "string" }, },	}, }

p.Documentation = { isEmpty = { params = {"str"}, returns = ' if and only if the value is   or  ', cases = { {				args = {nil}, expect = true, },			{				args = {""}, expect = true, },			{				args = {" "}, expect = false, },		},	},	isBlank = { params = {"str"}, returns = " if and only if   is nil, blank, or whitespace.", cases = { {				args = {" "}, expect = true, },			{				args = {"\n\n\n"}, expect = true, },			{				args = {nil}, expect = true, },			{				args = {"foo"}, expect = false, },		},	},	notEmpty = { params = {"str"}, returns = " if and only if   is neither nil nor an empty string.", cases = { {				args = {" "}, expect = true, },			{				args = {""}, expect = false, },			{				args = {nil}, expect = false, },		}	},	nilIfEmpty = { params = {"str"}, returns = " if value is nil or empty string, otherwise returns the given value.", cases = { outputOnly = true, {				args = {""}, expect = nil, },			{				args = {nil}, expect = nil, },			{				args = {" "}, expect = " ", },		},	},	startsWith = { params = {"str", "pattern"}, _params = {{"pattern"}, {"str"}}, returns = " if   starts with , else  .", cases = { {				args = {"Fooloo Limpah", "Foo"}, expect = true, },			{				args = {"Fooloo Limpah", "foo"}, expect = false, },			{				args = {"Fooloo Limpah", ""}, expect = true, },			{				args = {"foo", ""},				expect = true,			},		},	},	startsWithRegex = {		params = {"str", "pattern"},		_params = {{"pattern"}, {"str"}},		returnperformant alternative to .",		params = {"str", "pattern"},		_params = {{"pattern"}, {"str"}},		returns = "The trimmed string.",		cases = {			outputOnly = true,			{				args = {" foo"},				expect = "foo",			},			{				args = {":Category:Link", ":"},				expect = "Category:Link",			},		},	},	split = {		desc = "A performant alternative to  .",		params = {"str", "pattern", "plain"},		_params = {{"pattern", "plain"}, {"str"}},		returns = "A   of the split strings.",		cases = {			{				args = {" foo,    bar,baz "},				expect = {" foo", "bar", "baz "},			},			{				args = {"foo bar baz", " "},				expect = {"foo", "bar", "baz"},			},			{				desc = "Limited support for Unicode strings",				args = {"アイウエオ", ""},				expect = {"ア","イ","ウ","エ","オ"},			},		},	},	sub = {		desc = "Equivalent to  .",		params = {"str", "startIndex", "endIndex"},		_params = {{"startIndex", "endIndex"}, {"str"}},		returns = "Function returning a substring of   from   to   (inclusive).",		cases = {			outputOnly = true,			{				args = {"Fooloo Limpah", 8},				expect = "Limpah",			},			{				args = {"Fooloo Limpah", 1, 6},				expect = "Fooloo",			},			{				args = {"Fooloo Limpah", 20},				expect = "",			},			{				args = {"Fooloo Limpah", -20},				expect = "Fooloo Limpah",			},			{				args = {"Fooloo Limpah", 8, 20},				expect = "Limpah", 			},		},	},	interpolate = {		desc = "Approximation of string interpolation",		params = {"formatStr", "args"},		returns = "The formatted string.",		cases = {			outputOnly = true,			{				args = {"${wiki} is a ${franchise} encyclopedia that anyone can edit.", {					wiki = "Zelda Wiki",					franchise = "Zelda",				}},				expect = "Zelda Wiki is a Zelda encyclopedia that anyone can edit."			}		}	}, }

return p