Lompat ke isi

Modul:aksara

Ḍâri Wikikamus

Dokumentasi untuk modul ini dapat dibuat di Modul:aksara/doc

local M = {}

local function in_ranges(cp, ranges)
	for i = 1, #ranges do
		local r = ranges[i]
		if cp >= r[1] and cp <= r[2] then
			return true
		end
	end
	return false
end

-- Ignore combining diacritical marks
local IGNORE_COMBINING = {
	{0x0300, 0x036F}, {0x1AB0, 0x1AFF}, {0x1DC0, 0x1DFF},
	{0x20D0, 0x20FF}, {0xFE20, 0xFE2F}
}

local function is_ignorable(cp)
	return in_ranges(cp, IGNORE_COMBINING)
end

-- Script data
M.scripts = {
	Arab = { name = "Arabic", class = "Arab", ranges = { {0x0600,0x06FF},{0x0750,0x077F},{0x08A0,0x08FF},{0xFB50,0xFDFF},{0xFE70,0xFEFF},{0x1EE00,0x1EEFF} } },
	Armn = { name = "Armenian", class = "Armn", ranges = { {0x0530,0x058F},{0xFB13,0xFB17} } },
	Bali = { name = "Balinese", class = "Bali", ranges = { {0x1B00,0x1B7F} } },
	Batk = { name = "Batak", class = "Batk", ranges = { {0x1BC0,0x1BFF} } },
	Beng = { name = "Bengali", class = "Beng", ranges = { {0x0980,0x09FF} } },
	Bopo = { name = "Bopomofo", class = "Bopo", ranges = { {0x3100,0x312F},{0x31A0,0x31BF} } },
	Brai = { name = "Braille", class = "Brai", ranges = { {0x2800,0x28FF} } },
	Bugi = { name = "Buginese", class = "Bugi", ranges = { {0x1A00,0x1A1F} } },
	Buhd = { name = "Buhid", class = "Buhd", ranges = { {0x1740,0x175F} } },
	Cham = { name = "Cham", class = "Cham", ranges = { {0xAA00,0xAA5F} } },
	Copt = { name = "Coptic", class = "Copt", ranges = { {0x2C80,0x2CFF},{0x03E2,0x03EF} } },
	Cyrl = { name = "Cyrillic", class = "Cyrl", ranges = { {0x0400,0x04FF},{0x0500,0x052F},{0x2DE0,0x2DFF},{0xA640,0xA69F},{0x1C80,0x1C8F} } },
	Deva = { name = "Devanagari", class = "Deva", ranges = { {0x0900,0x097F},{0xA8E0,0xA8FF} } },
	Dsrt = { name = "Deseret", class = "Dsrt", ranges = { {0x10400,0x1044F} } },
	Egyp = { name = "Egyptian Hieroglyphs", class = "Egyp", ranges = { {0x13000,0x1342F} } },
	Ethi = { name = "Ethiopic", class = "Ethi", ranges = { {0x1200,0x137F},{0x1380,0x139F},{0x2D80,0x2DDF},{0xAB00,0xAB2F} } },
	Geor = { name = "Georgian", class = "Geor", ranges = { {0x10A0,0x10FF},{0x2D00,0x2D2F},{0x1C90,0x1CBF} } },
	Glag = { name = "Glagolitic", class = "Glag", ranges = { {0x2C00,0x2C5F},{0x1E000,0x1E02F} } },
	Goth = { name = "Gothic", class = "Goth", ranges = { {0x10330,0x1034F} } },
	Grek = { name = "Greek", class = "Grek", ranges = { {0x0370,0x03FF},{0x1F00,0x1FFF} } },
	Gujr = { name = "Gujarati", class = "Gujr", ranges = { {0x0A80,0x0AFF} } },
	Guru = { name = "Gurmukhi", class = "Guru", ranges = { {0x0A00,0x0A7F} } },
	Hang = { name = "Hangul", class = "Hang", ranges = { {0x1100,0x11FF},{0x3130,0x318F},{0xAC00,0xD7AF},{0xA960,0xA97F},{0xD7B0,0xD7FF} } },
	Hani = { name = "Han", class = "Hani", ranges = { {0x4E00,0x9FFF},{0x3400,0x4DBF} } },
	Hano = { name = "Hanunoo", class = "Hano", ranges = { {0x1720,0x173F} } },
	Hebr = { name = "Hebrew", class = "Hebr", ranges = { {0x0590,0x05FF},{0xFB1D,0xFB4F} } },
	Hira = { name = "Hiragana", class = "Hira", ranges = { {0x3040,0x309F} } },
	Ital = { name = "Old Italic", class = "Ital", ranges = { {0x10300,0x1032F} } },
	Java = { name = "Javanese", class = "Java", ranges = { {0xA980,0xA9DF} } },
	Kana = { name = "Katakana", class = "Kana", ranges = { {0x30A0,0x30FF},{0x31F0,0x31FF},{0xFF66,0xFF9D} } },
	Khmr = { name = "Khmer", class = "Khmr", ranges = { {0x1780,0x17FF},{0x19E0,0x19FF} } },
	Knda = { name = "Kannada", class = "Knda", ranges = { {0x0C80,0x0CFF} } },
	Lana = { name = "Tai Tham (Lanna)", class = "Lana", ranges = { {0x1A20,0x1AAF} } },
	Laoo = { name = "Lao", class = "Laoo", ranges = { {0x0E80,0x0EFF} } },
	Latn = { name = "Latin", class = "Latn", ranges = { {0x0041,0x005A},{0x0061,0x007A},{0x00C0,0x00FF},{0x0100,0x017F},{0x0180,0x024F},{0x0250,0x02AF},{0x02B0,0x02FF},{0x1D00,0x1D7F},{0x1D80,0x1DBF},{0x1E00,0x1EFF},{0x2C60,0x2C7F},{0xA720,0xA7FF},{0xAB30,0xAB6F} } },
	Limb = { name = "Limbu", class = "Limb", ranges = { {0x1900,0x194F} }},
	Linb = { name = "Linear B", class = "Linb", ranges = { {0x10000,0x1007F},{0x10080,0x100FF} }},
	Maka = { name = "Makasar", class = "Maka", ranges = { {0x11EE0,0x11EFF} }},
	Mlym = { name = "Malayalam", class = "Mlym", ranges = { {0x0D00,0x0D7F} }},
	Mong = { name = "Mongolian", class = "Mong", ranges = { {0x1800,0x18AF},{0x11660,0x1167F},{0x18B0,0x18FF} }},
	Mymr = { name = "Myanmar", class = "Mymr", ranges = { {0x1000,0x109F},{0xAA60,0xAA7F},{0xA9E0,0xA9FF} }},
	Nkoo = { name = "N’Ko", class = "Nkoo", ranges = { {0x07C0,0x07FF} }},
	Ogam = { name = "Ogham", class = "Ogam", ranges = { {0x1680,0x169F} }},
	Orya = { name = "Oriya", class = "Orya", ranges = { {0x0B00,0x0B7F} }},
	Phnx = { name = "Phoenician", class = "Phnx", ranges = { {0x10900,0x1091F} }},
	Rjng = { name = "Rejang", class = "Rjng", ranges = { {0xA930,0xA95F} }},
	Runr = { name = "Runic", class = "Runr", ranges = { {0x16A0,0x16FF} }},
	Sinh = { name = "Sinhala", class = "Sinh", ranges = { {0x0D80,0x0DFF} }},
	Sund = { name = "Sundanese", class = "Sund", ranges = { {0x1B80,0x1BBF},{0x1CC0,0x1CCF} }},
	Syrc = { name = "Syriac", class = "Syrc", ranges = { {0x0700,0x074F},{0x0860,0x086F} }},
	Tagb = { name = "Tagbanwa", class = "Tagb", ranges = { {0x1760,0x177F} }},
	Tale = { name = "Tai Le", class = "Tale", ranges = { {0x1950,0x197F} }},
	Talu = { name = "New Tai Lue", class = "Talu", ranges = { {0x1980,0x19DF} }},
	Taml = { name = "Tamil", class = "Taml", ranges = { {0x0B80,0x0BFF} }},
	Tavt = { name = "Tai Viet", class = "Tavt", ranges = { {0xAA80,0xAADF} }},
	Telu = { name = "Telugu", class = "Telu", ranges = { {0x0C00,0x0C7F} }},
	Thaa = { name = "Thaana", class = "Thaa", ranges = { {0x0780,0x07BF} }},
	Thai = { name = "Thai", class = "Thai", ranges = { {0x0E00,0x0E7F} }},
	Tfng = { name = "Tifinagh", class = "Tfng", ranges = { {0x2D30,0x2D7F} }},
	Tglg = { name = "Tagalog", class = "Tglg", ranges = { {0x1700,0x171F},{0x1730,0x173F} }},
	Tibt = { name = "Tibetan", class = "Tibt", ranges = { {0x0F00,0x0FFF} }},
	Ugar = { name = "Ugaritic", class = "Ugar", ranges = { {0x10380,0x1039F} }},
	Vaii = { name = "Vai", class = "Vaii", ranges = { {0xA500,0xA63F} }},
	Xpeo = { name = "Old Persian", class = "Xpeo", ranges = { {0x103A0,0x103DF} }},
	Xsux = { name = "Cuneiform", class = "Xsux", ranges = { {0x12000,0x123FF},{0x12400,0x1247F},{0x12480,0x1254F} }},
}

-- Alphabetical detection order
M.order = {}
for code, _ in pairs(M.scripts) do
	table.insert(M.order, code)
end
table.sort(M.order)

-- Detect script
function M.detect(text)
	if not text or text == "" then return "Latn" end
	local cps = { mw.ustring.codepoint(text, 1, -1) }
	for i = 1, #cps do
		local cp = cps[i]
		if not is_ignorable(cp) then
			for j = 1, #M.order do
				local code = M.order[j]
				if in_ranges(cp, M.scripts[code].ranges) then
					return code
				end
			end
		end
	end
	return "Latn"
end

-- Get CSS class
function M.getClass(code)
	if code and M.scripts[code] then
		return M.scripts[code].class
	end
	return nil
end

-- Wrap text with a <span> only if script is non-Latin
function M.wrap(text, sc)
    if not text or not sc then return text end
    if sc == "Latn" then
        return text  -- don't wrap Latin
    end
    local scClass = M.getClass(sc)
    if scClass then
        return string.format('<span class="%s">%s</span>', scClass, text)
    else
        return text  -- no wrapping if unknown
    end
end

-- Wrapper for template use: detect script and wrap text if non-Latin
function M.detectWrap(frame)
    local text = frame.args["text"]
    if not text or text == "" then return "" end
    local script = M.detect(text)
    return M.wrap(text, script)
end

return M