Mòdulu:dati Unicode
Aspettu
La ducumintazzioni di stu mòdulu si pò criari nta Mòdulu:dati Unicode/doc
local m_tabbella = require("Mòdulu:tabbella")
local m_params = require("Mòdulu:paràmitri")
local p = {}
function p.getBlocks()
return mw.loadData("Mòdulu:dati Unicode/data/blocks")
end
function p.getScripts()
return mw.loadData("Mòdulu:dati Unicode/data/scripts")
end
function p.getScriptRanges()
return require("Mòdulu:dati Unicode/data/script ranges")
end
function p.getNumberSystems()
return mw.loadData("Mòdulu:dati Unicode/data/numbers")
end
function p.getBlock(lowerCodepoint)
return p.getBlocks()[lowerCodepoint]
end
function p.getBlockForChar(char)
local len = mw.ustring.len(char)
if len ~= 1 then
error(mw.ustring.format('Sulu nu caràttiri privistu, %d dati ("%s")', len, char))
end
local code = mw.ustring.codepoint(char)
for _, block in pairs(p.getBlocks()) do
if block.lower <= code and code <= block.upper then
return block
end
end
return nil
end
function p.getScript(code)
return p.getScripts()[code]
end
function p.getScriptForChar(char)
local len = mw.ustring.len(char)
if len ~= 1 then
error(mw.ustring.format("Sulu nu caràttiri privistu, %d dati", len))
end
local code = mw.ustring.codepoint(char)
local scripts = p.getScripts()
for range, script_code in pairs(p.getScriptRanges()) do
if range[1] <= code and code <= range[2] then
return scripts[script_code]
end
end
return scripts["Unknown"]
end
function p.getScriptForText(text)
local inheritedFound = false
local commonFound = false
local res
for _, script in pairs(p.getScriptsForText(text)) do
local name = script.code
if not commonFound and name == "Common" then
commonFound = true
elseif not inheritedFound and name == "Inherited" then
inheritedFound = true
elseif name ~= "Common" and name ~= "Inherited" then
if res == nil or res.code == "Unknown" then
res = script
elseif res and script.code ~= "Unknown" and script.code ~= res.code then
return p.getScript("Unknown")
end
end
end
if res == nil then
if inheritedFound then
return p.getScript("Inherited")
elseif commonFound then
return p.getScript("Common")
end
end
return res
end
function p.getScriptsForText(text, getRanges)
local res = {}
local scriptsRanges = {}
local i = 1
while i <= mw.ustring.len(text) do
local c = mw.ustring.sub(text, i, i)
local skip = false
if c == "<" then
local j = mw.ustring.find(text, ">", i, true)
if j ~= nil then
table.insert(scriptsRanges, { script = nil, from = i, to = j })
i = j
skip = true
end
end
if not skip then
local script = p.getScriptForChar(c)
local name = script.code
if not res[name] then
res[name] = script
table.insert(scriptsRanges, { script = name, from = i, to = i })
else
local lastRange = scriptsRanges[#scriptsRanges]
if lastRange.script == name and lastRange.to + 1 == i then
lastRange.to = i
else
table.insert(scriptsRanges, { script = name, from = i, to = i })
end
end
end
i = i + 1
end
if getRanges then
return res, scriptsRanges
end
return res
end
function p.textHasScript(text, scriptCode)
local script = p.getScript(scriptCode)
return script ~= nil and p.getScriptsForText(text)[script.code] ~= nil
end
function p.shouldItalicize(text)
local name = p.getScriptForText(text).code
return name == "Latin" or name == "Common" or name == "Inherited"
end
local directionToCss = {
["lr"] = "horizontal-tb",
["rl"] = "horizontal-tb",
["tb"] = "vertical-lr",
["i"] = "inherit",
["m"] = "inherit",
}
local directionToDir = {
["lr"] = "ltr",
["rl"] = "rtl",
["tb"] = "ltr",
}
function p.setWritingDirection(text)
local dirsToIgnore = { lr = true, rl = true }
local res = ""
local scripts, intervals = p.getScriptsForText(text, true)
local prevScriptName
local inSpan = false
for i, interval in ipairs(intervals) do
local substr = mw.ustring.sub(text, interval.from, interval.to)
if interval.script then
local script = scripts[interval.script]
local scriptName = script.name
local scriptDir = script.direction or "i"
local nextScript = intervals[i + 1] and scripts[intervals[i + 1].script] or nil
local nextScriptDir = nextScript and (nextScript.direction or "i") or nil
if inSpan and scriptDir ~= "i" and scriptDir ~= "m" and scriptName ~= prevScriptName then
res = res .. "</span>"
inSpan = false
end
if dirsToIgnore[scriptDir] or prevScriptName == scriptName
or ((scriptDir == "i" or scriptDir == "m") and ((i == 1 and (dirsToIgnore[nextScriptDir] or nextScriptDir == "i" or nextScriptDir == "m")) or i > 1 or not nextScript)) then
res = res .. substr
else
local dir
local cssDir
if scriptDir ~= "i" and scriptDir ~= "m" then
dir = directionToDir[scriptDir]
cssDir = scriptDir
prevScriptName = scriptName
elseif nextScriptDir then
dir = directionToDir[nextScriptDir]
cssDir = nextScriptDir
prevScriptName = nextScript.name
end
if dir then
local dirAttr = dir and ('dir="' .. dir .. '"') or ""
local writingMode = directionToCss[cssDir]
res = res .. mw.ustring.format('<span %s style="writing-mode:%s">', dirAttr, writingMode) .. substr
inSpan = true
end
end
else
if inSpan then
res = res .. "</span>"
inSpan = false
end
res = res .. substr
prevScriptName = nil
end
end
if inSpan then
res = res .. "</span>"
end
return res
end
function p.convertNumber(n, system)
if n < 0 then
error("Le nùmmiru s'havi a èssiri nu nteru pusitivu o nuḍḍu!")
end
local s = p.getNumberSystems()[system]
if not s then
error("Sistema numèricu nu vàliddu: " .. tostring(system))
end
if (s.min_value and n < s.min_value) or (s.max_value and n > s.max_value) then
error("Valuri nun vàlidda: " .. tostring(n))
end
if s.positional then
local base = s.base
local offset = s.zero_offset
local digit = function(i)
return mw.ustring.char(i + offset)
end
if n == 0 then
return digit(0)
end
local res = ""
local i = n
while i > 0 do
res = digit(i % base) .. res
i = math.floor(i / base)
end
return res
elseif system == "rumanu" then
local symbols = {
"I", "V",
"X", "L",
"C", "D",
"M", "ↁ",
"ↂ", "ↇ",
"ↈ",
}
local exp = 0;
local res = "";
local i = n
while i > 0 do
local d = i % 10;
local unit1 = symbols[exp * 2 + 1];
local unit5 = symbols[(exp + 1) * 2];
local unit10 = symbols[(exp + 1) * 2 + 1];
local str = "";
if d ~= 0 then
if d <= 3 then
for _ = 0, d - 1 do
str = str .. unit1
end
elseif d == 4 then
str = str .. unit1 .. unit5
elseif d == 5 then
str = str .. unit5
elseif 5 < d and d < 9 then
str = str .. unit5
for _ = 0, d - 6 do
str = str .. unit1
end
else
str = str .. unit1 .. unit10
end
end
res = str .. res;
i = math.floor(i / 10);
exp = exp + 1;
end
return res;
elseif system == "grecu" then
local symbols = {
"α", "β", "γ", "δ", "ε", "ϛ", "ζ", "η", "θ",
"ι", "κ", "λ", "μ", "ν", "ξ", "ο", "π", "ϟ",
"ρ", "σ", "τ", "υ", "φ", "χ", "ψ", "ω", "ϡ",
}
local exp = 1;
local res = "";
local i = n
while i > 0 do
local d = i % 10
if d > 0 then
if exp == 10 or exp == 10000 then
d = d + 9
elseif exp == 100 or exp == 100000 then
d = d + 18
end
res = symbols[d] .. res
if exp >= 1000 then
res = "͵" .. res
end
end
exp = exp * 10
i = math.floor(i / 10)
end
if n % 1000 ~= 0 then
res = res .. "ʹ"
end
return res
elseif system == "sinugrammi" then
local symbols = { "〇", "一", "二", "三", "四", "五", "六", "七", "八", "九" }
if n == 0 then
return symbols[1]
end
local res = "";
local i = n
while i > 0 do
res = symbols[(i % 10) + 1] .. res
i = math.floor(i / 10)
end
return res
end
end
function p.blockReference(frame)
local args = m_params.process(frame.args, {
[1] = { type = m_params.INT },
})
local blockCode = args[1]
local block
if blockCode then
block = p.getBlock(blockCode)
else
block = p.getBlockForChar(mw.title.getCurrentTitle().text)
end
if block then
return mw.ustring.format("Unicode, Inc., ''[%s %s]'', The Unicode Standard, version %s, %d",
block.url, block.name.en, block.version, block.year)
end
error(mw.ustring.format('Blocco Unicode "%s" nun vàliddu', blockCode or ""))
end
function p.writingDirection(frame)
local args = m_params.process(frame.args, {
[1] = { required = true, allow_empty = true },
})
if args[1] then
return p.setWritingDirection(args[1])
end
return ""
end
function p.codepoint(frame)
local args = m_params.process(frame.args, {
[1] = {
required = true,
checker = function(value)
return mw.ustring.len(value) == 1
end,
},
["hexa"] = { type = m_params.BOOLEAN, default = false },
})
local char = args[1]
local isHex = args["hexa"]
local code = mw.ustring.codepoint(char)
if isHex then
return string.format("%04X", code)
end
return code
end
function p.character(frame)
local args = m_params.process(frame.args, {
[1] = { required = true, type = m_params.INT },
})
local code = tonumber(args[1])
if code ~= nil then
local success, char = pcall(mw.ustring.char, code)
if success then
return char
end
end
error("Puntu di còdici nun bonu")
end
function p.number(frame)
local args = m_params.process(frame.args, {
[1] = { required = true, type = m_params.INT, checker = function(i)
return tonumber(i) >= 0
end },
["script"] = { enum = m_tabbella.keysToList(p.getNumberSystems()), default = "latin" }
})
return p.convertNumber(args[1], args["script"])
end
return p