Mòdul:ca-pron/AFI
A continuació es mostra la documentació transclosa de la subpàgina /ús. [salta a la caixa de codi]
Mòdul que genera la pronúncia en català. La sortida és formatada amb el Mòdul:ca-pron.
local p = {}
-- dependencies
local listToSet = require("Module:TableTools").listToSet
local track = require('Module:utilitats').track
-- shortcurts for string functions
local str = {}
str.find = mw.ustring.find
str.match = mw.ustring.match
str.gsub = mw.ustring.gsub
str.sub = mw.ustring.sub
str.lower = mw.ustring.lower
str.len = mw.ustring.len
local pagename
-- lects generated, with variant "2"
local lects = {"cen", "cen2", "bal", "bal2", "noc", "noc2", "val", "val2"}
-- additional lects accepted as optional input:
-- Girona "ceng" as variant of "cen", supposedly with respelling
-- Northern "sep" and Algherese "alg", supposedly with IPA
local input_lects = {"cen", "cen2", "ceng", "bal", "bal2", "sep", "alg", "noc", "noc2", "val", "val2"}
local groups_lects = {
ori = {"cen", "cen2", "bal", "bal2"},
occ = {"noc", "noc2", "val", "val2"}
}
local lects_group = {
["cen"] = "ori", ["cen2"] = "ori", ["ceng"] = "ori",
["bal"] = "ori", ["bal2"] = "ori", ["sep"] = "ori", ["alg"] = "ori",
["noc"] = "occ", ["noc2"] = "occ", ["val"] = "occ", ["val2"] = "occ"
}
local vowels = "aàāeèéëêēiíïoòóôōuúü"
local ret_error
local function plainText(text) -- vowels without diacritics
local accents = "[" .. mw.ustring.char(0x0301, 0x0300, 0x0302, 0x0308, 0x0304) .. "]"
-- acute, grave, circumflex, diaresis, macron
local decomposed_text = mw.ustring.toNFD(text)
decomposed_text = str.gsub(decomposed_text, "([aeiou])" .. accents, "%1")
return mw.ustring.toNFC(decomposed_text) -- compose remaining characters (ç, l·l...)
end
local function fixPrefixes(word)
-- Respellings for unassimilated prefixes
-- TODO: review if it should be done by respelling "aero-", etc.
local prefix = {
"aéro", "ànte", "ànti", "àuto", -- ambiguous a-, ab-
"bío",
-- ambiguous co-, contra-
"dia", "dodeca",
"[eé]ntre", "equi", "est[eë]reo", -- ambiguous e-(radic)
"f[oó]to",
"g[aà]stro", "gr[eé]co",
"hendeca", "hepta", "hexa", "h[oô]mo",
"[ií]nfra", "[ií]ntra",
"m[aà]cro", "m[ií]cro", "mono", "m[oô]rfo", "m[uú]lti",
"n[eé]o",
"octo", "orto",
"p[eë]nta", "p[oô]li", "pol[ií]tico", "pr[oô]to", "ps[eë]udo", "psico", -- ambiguous pre-(s), pro-
"qu[aà]si", "qu[ií]mio",
"r[aà]dio", -- ambiguous re-
"s[eë]mi", "sóbre", "s[uú]pra",
"t[eë]rmo", "tetra", "tri", -- ambiguous tele-(r)
"[uú]ltra", "[uu]n[ií]",
"v[ií]ce"
}
local prefix_r = {"b[ií]li", "[eë]xtra", "pr[eé]"}
local prefix_s = {"bi", "c[eé]ntro", "deca", "d[ií]no", "[eë]co",
"[eë]xtra", "hetero", "p[aà]ra", "p[oô]st", "pré", "s[oó]ta", "t[eë]le"
}
local prefix_i = {"pr[eé]", "pr[ií]mo", "pro", "t[eë]le"}
local no_prefix = {"bisa", "bisell", "bis[óò]",
"contrari", "contrau", "diari", "equise", "heterosi",
"monoi", "parasa", "parasit", "preix", "psicosi"
}
-- False prefixes
for _, pr in ipairs(no_prefix) do
if str.find(word, "^" .. pr) then
return word
end
end
-- Double r in prefix + r + vowel
for _, pr in ipairs(prefix_r) do
word = str.gsub(word, "^(" .. pr .. ")r([" .. vowels .. "])", "%1rr%2")
end
word = str.gsub(word, "^eradic", "erradic")
-- Double s in prefix + s + vowel
for _, pr in ipairs(prefix_s) do
word = str.gsub(word, "^(" .. pr .. ")s([" .. vowels .. "])", "%1ss%2")
end
-- Hiatus in prefix + i
for _, pr in ipairs(prefix_i) do
word = str.gsub(word, "^(" .. pr .. ")i(.)", "%1ï%2")
end
if str.find(word, "^rei[^aegx].") and not str.find(word, "^rein[ae]") then
word = str.gsub(word, "^rei", "reï") -- prefix re-, as in ca-general.sil
end
-- Both prefix + r/s or i/u
for _, pr in ipairs(prefix) do
word = str.gsub(word, "^(" .. pr .. ")([rs])([" .. vowels .. "])", "%1%2%2%3")
word = str.gsub(word, "^(" .. pr .. ")i(.)", "%1ï%2")
word = str.gsub(word, "^(" .. pr .. ")u(.)", "%1ü%2")
end
-- Voiced s in prefix roots -fons-, -dins-, -trans-
word = str.gsub(word, "^enfons([" .. vowels .. "])", "enfonz%1")
word = str.gsub(word, "^endins([aàeèéiíoòóuú])", "endinz%1")
word = str.gsub(word, "tr([aà])ns([" .. vowels .. "bdghlmv])", "tr%1nz%2")
-- in + ex > ineks/inegz
word = str.gsub(word, "^inex", "inhex")
return word
end
local function restoreDiaereses(word)
-- Some structural forms do not have diaeresis per diacritic savings, let's restore it to identify hiatus
word = str.gsub(word, "([iu])um(s?)$", "%1üm%2") -- Latinisms (-ius is ambiguous but rare)
word = str.gsub(word, "([aeiou])isme(s?)$", "%1ísme%2") -- suffix -isme
word = str.gsub(word, "([aeiou])ist([ae]s?)$", "%1íst%2") -- suffix -ista
word = str.gsub(word, "([aeou])ir$", "%1ír") -- verbs -ir
word = str.gsub(word, "([aeou])int$", "%1ínt") -- present participle
word = str.gsub(word, "([aeo])ir([éà])$", "%1ïr%2") -- future
word = str.gsub(word, "([^gq]u)ir([éà])$", "%1ïr%2")
word = str.gsub(word, "([aeo])iràs$", "%1ïràs")
word = str.gsub(word, "([^gq]u)iràs$", "%1ïràs")
word = str.gsub(word, "([aeo])ir(e[mu])$", "%1ïr%2")
word = str.gsub(word, "([^gq]u)ir(e[mu])$", "%1ïr%2")
word = str.gsub(word, "([aeo])iran$", "%1ïran")
word = str.gsub(word, "([^gq]u)iran$", "%1ïran")
word = str.gsub(word, "([aeo])iria$", "%1ïria") -- conditional
word = str.gsub(word, "([^gq]u)iria$", "%1ïria")
word = str.gsub(word, "([aeo])ir(ie[sn])$", "%1ïr%2")
word = str.gsub(word, "([^gq]u)ir(ie[sn])$", "%1ïr%2")
return word
end
local function fixY(word)
-- hint for voiced palatal nasal
word = str.gsub(word, "ny", "ñ")
-- y > vowel /i/ else consonant /j/
word = str.gsub(word, "y([^" .. vowels .. "])", "i%1") -- vowel if not next to another vowel
word = str.gsub(word, "([^" .. vowels .. "·%-%.])y", "%1i") -- excluding also syllables separators
return word
end
local function wordRespellings(word, lect)
word = str.gsub(word, "%-([rs]?)", "-%1%1")
word = str.gsub(word, "s%-c([eé])", "-c%1") -- dos-cents... never geminated
word = str.gsub(word, "rç$", "rrs") -- silent r only in plurals -rs
word = fixPrefixes(word) -- internal pause after a prefix
word = restoreDiaereses(word) -- no diaeresis saving
word = fixY(word) -- ny > ñ else y > i vowel or consonant
word = str.gsub(word, "^sc?h([" .. vowels .."])", "xx%1") -- hint xx (scheelita, shigel·la)
word = str.gsub(word, "^s([^" .. vowels .."w])", "es%1") -- epentetic vowel (spin) GEIC 3.3.3.1
word = str.gsub(word, "ch([" .. vowels .."])", "tx%1")
word = str.gsub(word, "([^" .. vowels .. "h])h", "%1") -- anhel, adherir...
-- initial consonant clusters GEIC 2.3.2, GNV 1.3.2.7.1.f
word = str.gsub(word, "^[mp]n", "n")
word = str.gsub(word, "^bd", "d")
if lect ~= "cen" then -- formal doble consonant only in Central (GDLC)
word = str.gsub(word, "^p([st])", "%1") -- psicòleg, ptolemaic
word = str.gsub(word, "^gn", "n") -- gnom
end
return word
end
local function splitVowels(vowels)
local syllables = {{onset = "", vowel = str.sub(vowels, 1, 1), coda = ""}}
vowels = str.sub(vowels, 2)
while vowels ~= "" do
local syll = {onset = "", vowel = "", coda = ""}
syll.onset, syll.vowel, vowels = str.match(vowels, "^([iu]?)(.)(.-)$")
table.insert(syllables, syll)
end
local count = #syllables
if count >= 2 and (syllables[count].vowel == "i" or syllables[count].vowel == "u") then
syllables[count - 1].coda = syllables[count].vowel
syllables[count] = nil
end
return syllables
end
-- Split the word into syllables
local function splitSyllables(remainder, lect)
local valid_onsets = listToSet({
"b", "bl", "br",
"c", "cl", "cr", "ç",
"d", "dr",
"f", "fl", "fr",
"g", "gl", "gr", "gu", "gü",
"h", "hh", -- hint for aspirated h
"i",
"j",
"k", "kl", "kr",
"l", "ll",
"m",
"n", "ny", "ñ",
"p", "pl", "pr",
"qu", "qü",
"r", "rr",
"s", "ss",
"t", "tg", "tj", "tr", "tx", "tz",
"u",
"v", "vl",
"w",
"x",
"y",
"z",
})
local syllables = {}
while remainder ~= "" do
local consonants, vowels
consonants, remainder = str.match(remainder, "^([^aàāeèéêëēiíïoòóôōuúü]*)(.-)$")
vowels, remainder = str.match(remainder, "^([aàāeèéêëēiíïoòóôōuúü]*)(.-)$")
if vowels == "" then
syllables[#syllables].coda = syllables[#syllables].coda .. consonants
else
local onset = consonants
local first_vowel = str.sub(vowels, 1, 1)
if (str.find(onset, "[gq]$") and (first_vowel == "ü" or (first_vowel == "u" and vowels ~= "u")))
or ((onset == "" or onset == "h") and #syllables == 0 and first_vowel == "i" and vowels ~= "i" )
then
onset = onset .. str.sub(vowels, 1, 1)
vowels = str.sub(vowels, 2)
end
local vsyllables = splitVowels(vowels)
vsyllables[1].onset = onset .. vsyllables[1].onset
for _, s in ipairs(vsyllables) do
table.insert(syllables, s)
end
end
end
-- Shift over consonants from the onset to the preceding coda,
-- until the syllable onset is valid
for i = 2, #syllables do
local current = syllables[i]
local previous = syllables[i-1]
while not (current.onset == "" or valid_onsets[current.onset]) do
local letter = str.sub(current.onset, 1, 1)
current.onset = str.sub(current.onset, 2)
if not str.find(letter, "[·%-%.]") then --syllables separators
previous.coda = previous.coda .. letter
else
break
end
end
end
-- Detect stress
for i, syll in ipairs(syllables) do
if str.find(syll.vowel, "^[àèéêëíòóôú]$") then
syllables.stress = i -- primary stress: the last one stressed
syll.stressed = true
end
end
if not syllables.stress then
local count = #syllables
if count == 1 then
syllables.stress = 1
else
local final = syllables[count]
if final.coda == "" or final.coda == "s" or (final.coda == "n" and (final.vowel == "e" or final.vowel == "i" or final.vowel == "ï")) then
syllables.stress = count - 1
else
syllables.stress = count
end
end
syllables[syllables.stress].stressed = true
end
-- Change stress in infinitive + -se, in Balearic
-- informal in Mallorca and Menorca, not in Eivissa
if lect == "bal2"
and str.sub(pagename, -4) == 'r-se'
then
--and i == syllables.stress -- -ar-se, also entémer-se...
syllables[syllables.stress].stressed = nil
syllables[#syllables].stressed = true
syllables[#syllables].vowel = "ə"
syllables.stress = #syllables
end
-- text of syllable from components
for i, syll in ipairs(syllables) do
syllables[i].text = syll.onset .. syll.vowel .. syll.coda
end
return syllables
end
local function midVowelFixes(syllables)
local post_consonants = syllables[syllables.stress].coda
local post_vowel = ""
local post_letters = post_consonants
if syllables.stress == #syllables - 1 then
post_consonants = post_consonants .. syllables[#syllables].onset
post_vowel = syllables[#syllables].vowel
post_letters = post_consonants .. post_vowel .. syllables[#syllables].coda
end
-- most common cases, other ones are ambiguous
local stressed_vowel = syllables[syllables.stress].vowel
if stressed_vowel == "e" then
if post_vowel == "i" or post_vowel == "u" then
return "è"
elseif str.find(post_consonants, "^l") and not str.find(post_consonants, "^lls?$") and syllables.stress == #syllables then
return "è"
elseif post_consonants == "l" or post_consonants == "ls" or post_consonants == "l·l" then
return "è"
elseif post_letters == "nt" or post_letters == "nts" then
return "é"
elseif str.find(post_letters, "^r[ae]?s?$") then
return "é"
elseif str.find(post_consonants, "^r[dfjlnrstxyz]") then -- except bilabial and velar
return "è"
elseif post_letters == "sos" or post_letters == "sa" or post_letters == "ses" then -- inflection of -ès
return "ê"
elseif str.find(post_letters, "^t[ae]?s?$") then
track("ca-pron/et-final")
return "ê"
end
elseif stressed_vowel == "è" then
if syllables.stress == #syllables and (post_letters == "s" or post_letters == "") then -- -ès, -è
track("ca-pron/è-final")
return "ê"
end
elseif stressed_vowel == "o" then
if post_vowel == "i" or post_vowel == "u" then
track("ca-pron/oi-ou")
return "ò"
elseif str.sub(post_letters, 1, 1) == "i" and str.sub(post_letters, 1, 2) ~= "ix" then -- diphthong oi except oix
track("ca-pron/oi-diftong")
return "ò"
elseif post_letters == "ma" or post_letters == "mes" then
track("ca-pron/oma")
return "ó"
elseif str.find(post_letters, "^r[ft]s?$") then
track("ca-pron/orC")
return "ò"
elseif str.find(post_letters, "^r[ae]?s?$") then
track("ca-pron/or-final")
return "ó"
elseif post_letters == "rme" or post_letters == "rmes" then
track("ca-pron/orme")
return "ó"
end
end
return stressed_vowel
end
local function reconstituteWordFromSyllables(syllables)
local word_syl = {}
local stressed_vowel = syllables[syllables.stress].vowel
if str.find(stressed_vowel, "[eèo]") then
syllables[syllables.stress].vowel = midVowelFixes(syllables)
else
syllables[syllables.stress].vowel = str.gsub(stressed_vowel, "[aiu]", {["a"] = "à", ["i"] = "í", ["u"] = "ú"})
end
for i, syl in ipairs(syllables) do
word_syl[i] = syl.onset .. syl.vowel .. syl.coda
end
local word = table.concat(word_syl)
if str.find(pagename, "l·l") then
word = str.gsub(word, "ll", "l·l") -- restore geminated lost in splitSyllables()
end
if str.find(pagename, "-") then
local first = str.match(pagename, "(.+)-")
word = str.gsub(word, first, first .. "-")
end
return word
end
local function replaceContextFree(cons)
cons = str.gsub(cons, "r", "ɾ")
cons = str.gsub(cons, "ɾɾ", "r")
cons = str.gsub(cons, "ss", "s")
cons = str.gsub(cons, "ll", "ʎ")
cons = str.gsub(cons, "ñ", "ɲ") -- hint ny > ñ
if str.find(cons, "hh") then
cons = str.gsub(cons, "hh", "h") -- hint hh > /h/
else
cons = str.gsub(cons, "h", "")
end
cons = str.gsub(cons, "[dt]j", "d͡ʒ")
cons = str.gsub(cons, "tx", "t͡ʃ")
cons = str.gsub(cons, "[dt]z", "d͡z")
cons = str.gsub(cons, "ç", "s")
cons = str.gsub(cons, "[cq]", "k")
cons = str.gsub(cons, "l", "ɫ")
cons = str.gsub(cons, "g", "ɡ")
cons = str.gsub(cons, "j", "ʒ")
cons = str.gsub(cons, "x", "ʃ")
cons = str.gsub(cons, "[iy]", "j") -- must be after j > ʒ and fixY
cons = str.gsub(cons, "[uü]", "w")
return cons
end
local function elisionContinguousVowels(syllables, lect)
-- Eastern contigous vowels losses last one in some contexts
-- GIEC 3.4.i: amb vocal final i inicial dels dos components d’un compost:
---- ə+ə ([ae]+[ae]) elisió, però ea > /eə/ amb excepcions
---- altres vocals idèntiques ii, oo, ou
---- i+e travada: anti/poli(e)sportiu, anti(e)stàtic, multi(e)stàndard
local first_stress = 0
local second_stress = 0
for i = 1, syllables.stress do
if syllables[i].stressed then
if first_stress == 0 then
first_stress = i
elseif second_stress == 0 then
second_stress = i
end
end
end
if first_stress > 0 and second_stress > 0 then -- compound or prefixed word with secondary stress
-- not the case of adverbs with -ment (but sóbreabundàntmént), avoiding possible errors
local sec_str_syl = syllables[second_stress].onset .. syllables[second_stress].vowel .. syllables[second_stress].coda
if sec_str_syl == "ment" then
return syllables
end
-- it doesn't work in àntialcoholísme, a single case with -isme to be respelled
-- elision ea only after some prefixes
-- doing it before reduction /eə/
local prefix_vowel = {"centrea", "entrea", "obrea", "sobrea", "telea", "vicea"}
local prefix_ea_match = false
for _, pr in ipairs(prefix_vowel) do
if str.find(pagename, pr) == 1 then
prefix_ea_match = true
end
end
local i = first_stress + 1
while i < second_stress - 1 do -- unstressed between two accents
local current = syllables[i]
local posterior = syllables[i + 1]
local vowel_pair = current.vowel .. current.coda .. posterior.onset .. posterior.vowel
if str.find(vowel_pair, "[ae]e")
or vowel_pair == "aa"
or (vowel_pair == "ea" and prefix_ea_match)
or (vowel_pair == "ie" and posterior.coda == "s")
or vowel_pair == "ii"
or (vowel_pair == "oe" and posterior.coda == "s")
or str.find(vowel_pair, "o[ou]")
then
syllables[i].coda = syllables[i + 1].coda -- re-syllabification
table.remove(syllables, i + 1)
syllables.stress = syllables.stress - 1
end
i = i + 1
end
end
return syllables
end
local function reductionUnstressAE(syllables, lect) -- Eastern: <ae> /ə/ GIEC 3.3.1.2.b
for i = 1, #syllables do
local current = syllables[i]
if current.stressed == nil and str.find(current.vowel, "[ae]") then
local previous = syllables[i - 1] or {onset = "", vowel = "", coda = ""}
local posterior = syllables[i + 1] or {onset = "", vowel = "", coda = ""}
local pre_vowel_pair = previous.vowel .. previous.coda .. current.onset .. current.vowel
local post_vowel_pair = current.vowel .. current.coda .. posterior.onset .. posterior.vowel
local reduction = true
if pre_vowel_pair == "əe" and not previous.stressed then
reduction = false -- aerificar, aeròbic...
elseif str.find(post_vowel_pair, "e[aɔ]") then
reduction = false -- crear, teòric, but not lleó
elseif i < syllables.stress -1 and post_vowel_pair == "ee" then
posterior.vowel = "ə" -- both reduced, avoiding əe in next loop
elseif i > syllables.stress and post_vowel_pair == "ee" then
reduction = false -- àrees, as singular àrea
elseif lect == "bal"
and i == #syllables and previous.stressed
and str.find(previous.vowel .. current.vowel .. current.coda, "aeks?$")
then
-- UIB: endings -à+C+ec in Majorcan, i.e. mànec
reduction = false
end
if reduction then
current.vowel = str.gsub(current.vowel, "[ae]", "ə")
end
end
end
return syllables
end
local function reductionUnstressO(syllables, lect)
-- Central, Menorca, Eivissa: o àtona > /u/
-- Balear /u/ en una llista de 9 mots (UIB) -- DONE by respelling
-- Alguns parlars mallorquins i occidentals en alguns contextos (GIEC 3.3.4.1.b):
--- seguida de síl·laba tònica i (conill, cosí, coixí, tossir)
--- habitualment precedit de palatoalveolar (Joan, Josep, joventut)
---- do nothing, vowel harmony don't use to happen in Palma
---- it will be as "bal2" for Menorca and Eivissa
-- Valencià (GNV 1.2.2.3.2.e):
--- estàndard: Josep, Joan
--- acceptable en àmbit local: davant i tònica o C labial (collir, tossir, cobert, obert)
---- do nothing, not systematic, neither Northwestern
---- it seems from Old Catalan in first stage of unstressed reduction
---- DONE by respelling according with the DCVB
for i = 1, #syllables do
local current = syllables[i]
local posterior = syllables[i + 1] or {onset = "", vowel = "", coda = ""}
if lect ~= "bal" and current.vowel == "o" and not current.stressed
and not (current.coda == "w" or current.coda .. posterior.onset == "w")
then
current.vowel = "u"
-- some excepcions ow: derivates of brou, jou, nou, ou; done by respelling
end
end
return syllables
end
-- Do context-sensitive phonological changes
local function postprocessGeneral(syllables, lect, pos)
local lect_12 = lect:sub(1, 3)
for i = 1, #syllables do
local current = syllables[i]
local previous = syllables[i - 1] or {onset = "", vowel = "", coda = ""}
-- Stressed -ig, final or in compound: mig, reig, roig, puig, migdiada
local ending = current.vowel .. current.coda
if current.stressed
and ((i == #syllables and str.find(ending, "^iɡs?$"))
or (i == #syllables and str.find(ending, "jɡs?$"))
or (current.onset == "m" and str.find(ending, "^iɡs?$"))
or (current.onset == "r" and str.find(ending, "^[eo]jɡs?$"))
or (current.onset == "p" and str.find(ending, "^ujɡs?$"))
)
then
if lect == "cen2" then
-- iodització final central V+igs: raigs > rajts, fet més endavant
current.coda = str.gsub(current.coda, "(j?)ɡ(s?)", "%1t͡ʃ%2")
else
current.coda = str.gsub(current.coda, "j?ɡ(s?)", "t͡ʃ%1")
end
end
-- Coda consonant losses
if i < #syllables then
current.coda = str.gsub(current.coda, "([ɫn])[td]$", "%1")
current.coda = str.gsub(current.coda, "m[pb]$", "m")
current.coda = str.gsub(current.coda, "s[td]$", "s") -- istme, postgrau
else
current.coda = str.gsub(current.coda, "([ɫn])[td]s", "%1s") -- malalts, accents
current.coda = str.gsub(current.coda, "m[pb]s", "ms") -- camps, tombs
current.coda = str.gsub(current.coda, "mpt(s?)", "mt%1") -- exempt, irredempt
if lect_12 == "cen" or lect_12 == "noc" or lect == "val2" or lect == "bal2" then
current.coda = str.gsub(current.coda, "([ɫn])[td]", "%1") -- malalt, cald, -ment, fecund
if lect_12 == "cen" or lect_12 == "noc" or lect == "val2" then
current.coda = str.gsub(current.coda, "m[pb]", "m") -- camp, tomb
end
end
end
-- Betacism v > /b/ in onsets, not in codas (ovni, hafni)
-- doing it before assimilations and spirants
if lect_12 == "cen" or lect_12 == "noc" or lect == "val2" then
current.onset = str.gsub(current.onset, "v", "b")
end
-- Reduction geminated l·l > /l/
-- doing it before gemination tl > /ll/ and /ʎʎ/ > /ll/
if (lect == "cen2" or lect == "noc2" or lect == "val2")
and current.onset == "ɫ"
then
previous.coda = str.gsub(previous.coda, "ɫ", "")
end
---- Consonant assimilations
-- t + lateral/nasal/plosive assimilation, not tm
if str.find(previous.coda, "t$") and str.find(current.onset, "^ɫ") then
-- tl > /ll/ except cultisms dl /dl/, nor atl /adl/ in Valencian
-- doing it before devoicing d /t/
if lect_12 == "val" then
if not (previous.onset == "" and previous.vowel == "a") then
-- not atles, triatló... but batle, guatla...
previous.coda = lect == "val" and "ɫ" or "" -- /l/ in val2
end
else
previous.coda = "ɫ"
end
elseif str.find(previous.coda, "t$") and str.find(current.onset, "^ʎ") then
if lect_12 == "cen" or lect_12 == "noc" then
-- Central and Northwestern tʎ > /ʎʎ/
previous.coda = str.gsub(previous.coda, "t$", "ʎ")
else
-- Balearic and Valencian: tʎ > /ll/ (inherited) or /ʎ/ (borrowed)
local previous_syl = previous.onset .. previous.vowel .. previous.coda
if str.find(previous_syl, "[bpw]at$") -- batlle, espatlla, guatlla
or str.find(pagename, "[mv]etll") -- ametlla, -vetlla
or (str.find(previous_syl, "tit$") -- titlla
and not str.find(pagename, "estitll")) -- exc. estitll-
or str.find(previous_syl, "m[oɔ]t$") -- motlle
or (str.find(previous_syl, "r[oɔu]t$") -- -rotlle
and lect_12 == "bal")
or (str.find(previous_syl, "ut$") -- butlla, rutlla
and not str.find(pagename, "butll[eo][tf]")) -- exc. butlleta, butlletí, -butllofa
then
previous.coda = "ɫ"
current.onset = "ɫ"
else
previous.coda = ""
end
end
elseif str.find(previous.coda, "t$") and str.find(current.onset, "^n") then
-- tn > /nn/ except cultisms
if not str.find(previous.vowel, "[eɛêë]") then -- except ètnic, etno-...
previous.coda = str.gsub(previous.coda, "t$", "n") -- vietnamita, pitnegre to be fixed
end
elseif previous.coda:find("[dt]$") then -- GIEC 4.4.2.2 plosive assimilations
if current.onset == "k" and lect_12 == "bal" then
-- d/t + k > /kk/ DCVB, not in GDLV, DNV
previous.coda = str.gsub(previous.coda, "[dt]$", "k") -- batcoll, sud-coreà
elseif current.onset == "b"
and lect_12 ~= "val" and previous.stressed == nil
then
-- t + b > /bb/ DCVB & GDLV, not in DNV, not ràtbúf
previous.coda = str.gsub(previous.coda, "t$", "b") -- futbol
elseif current.onset == "v" and lect_12 == "bal" then
-- d/t + v > /bv/ DCVB, not in GDLV, DNV
previous.coda = str.gsub(previous.coda, "[dt]$", "b") -- adverbi, gatvaire
elseif current.onset == "p" and lect_12 == "bal" then
-- t + p > /pp/ DCVB, not in GDLV, DNV
previous.coda = str.gsub(previous.coda, "t$", "p") -- totpoderós
end
end
-- nasal assimilations
if str.find(current.onset, "^[mbp]") then -- nasal + labial > labialized
if str.find(previous.coda, "n$") then
previous.coda = str.gsub(previous.coda, "n$", "m")
elseif str.find(previous.coda, "[bp]$") -- labial + nasal > labialized
and str.find(current.onset, "^m")
and (lect_12 == "cen" or lect_12 == "bal" or lect_12 == "noc")
then
previous.coda = str.gsub(previous.coda, "[bp]$", "m")
end
elseif str.find(current.onset, "^[fv]") then
previous.coda = str.gsub(previous.coda, "[mn]$", "ɱ")
elseif str.find(current.onset, "^[ɡk]") then -- n + velar > velarized
previous.coda = str.gsub(previous.coda, "n$", "ŋ")
elseif str.find(previous.coda, "[ɡk]$") -- velar + n > velarized
and str.find(current.onset, "^n")
and (lect_12 == "cen" or lect_12 == "bal" or lect_12 == "noc")
then
previous.coda = str.gsub(previous.coda, "[ɡk]$", "ŋ")
elseif str.find(current.onset, "^[ʒʎʃɲ]") -- n + palatal > palatalized
or str.find(current.onset, "^t͡ʃ")
or str.find(current.onset, "^d͡ʒ")
then
previous.coda = str.gsub(previous.coda, "n$", "ɲ")
end
if i < #syllables then
current.coda = str.gsub(current.coda, "n[kɡ]", "ŋ") -- sangglaçar, ping-pong, francmaçó...
else
current.coda = str.gsub(current.coda, "n[kɡ]", "ŋk")
end
if i == 1 then
current.onset = str.gsub(current.onset, "ɡn", "ŋn") -- gnom
end
current.coda = str.gsub(current.coda, "[mn]([fv])", "ɱ%1")
current.coda = str.gsub(current.coda, "n([ʃʒ])", "ɲ%1")
current.coda = str.gsub(current.coda, "n(t͡ʃ)", "ɲ%1")
current.coda = str.gsub(current.coda, "n(d͡ʒ)", "ɲ%1")
-- l + palatal > palatalized
-- doing it before fortition of palatal fricatives
if str.find(current.onset, "^[ʎʃɲ]")
or str.find(current.onset, "^t͡ʃ")
or str.find(current.onset, "^d͡ʒ")
then
previous.coda = str.gsub(previous.coda, "ɫ$", "ʎ")
elseif current.onset == "ʒ" and lect_12 == "cen" then
if not (i == 2 and previous.text == "maɫ") -- except prefix mal-
-- endings -àlgia/es, -àlgica/es, -àlgic/s are ambiguous, to be fixed by respelling
and not (i > 2 and i == #syllables - 1 and str.find(previous.text, "aɫ$") and current.text == "ʒi" and str.find(syllables[i + 1].text, "^k?[ae]")) -- except cultisms -àlgia/es, -àlgica/es
and not (i > 2 and i == #syllables and str.find(previous.text, "aɫ$") and str.find(current.text, "ʒiks?")) -- except cultisms -àlgic/s
then
previous.coda = str.gsub(previous.coda, "ɫ$", "ʎ")
end
end
if (lect_12 == "bal" or lect_12 == "val")
and previous.coda == "ʎ" and current.onset == "ʎ"
then
-- no palatal gemination in Balearic and Valencian: Bell-lloc
previous.coda = ""
end
current.coda = str.gsub(current.coda, "ɫʃ", "ʎʃ")
current.coda = str.gsub(current.coda, "ɫ(t͡ʃ)", "ʎ%1")
if lect_12 == "cen" then
current.coda = str.gsub(current.coda, "ɫʒ", "ʎʒ")
end
-- Double sound of letter x > ks/gz (on cultisms, ambiguous in onsets)
-- doing it before fortition ʃ > t͡ʃ
if current.vowel ~= "i" then
current.coda = str.gsub(current.coda, "^ʃs?$", "ks")
end
if previous.coda == "kz" then
previous.coda = "ɡz" -- voicing the group
end
if (previous.onset == "" and (previous.vowel == "e" or previous.vowel == "ɛ")
or str.find(pagename, "hex"))
and ((previous.coda == "" and current.onset == "ʃ") or (previous.coda == "ks" and current.onset == ""))
then
-- ex + (h) vowel > egz
previous.coda = "ɡ"
current.onset = "z"
end
-- Fortition of palatal fricatives
if previous.coda == "ɡ" and current.onset == "ʒ" then
current.onset = "d͡ʒ"
previous.coda = ""
end
if lects_group[lect] == "occ" then
if current.onset == "ʃ"
and previous.coda ~= "j" and (previous.vowel .. previous.coda) ~= "i"
then
-- not hint xx > -ʃʃ, nor <ix>: xiuxiuejar, para-xocs
current.onset = "t͡ʃ"
end
if lect_12 == "noc" then
if current.onset == "ʒ" and (i == 1 or previous.coda ~= '') then
current.onset = "d͡ʒ"
end
elseif lect_12 == "val" then
current.onset = str.gsub(current.onset, "^ʒ", "d͡ʒ")
current.coda = str.gsub(current.coda, "^ʒ", "d͡ʒ")
if previous.vowel == "i" and previous.coda == "" and current.onset == "d͡z" then
current.onset = "z"
end
end
elseif lect == "bal" then -- fortition in -ejar/-ajar
-- GIEC 4.2.3.1: tendència a l’africació del sufix ‑ej(ar)
-- (passejar, festejar, glopejar, plantejar) i en algun altre mot (roja).
-- UIB: s'admet fricativa o africada:
-- assajar, festejar, manejar, rajar, passejar
-- llegir, regir, corregir
-- assajos, batejos, Magí, roja/os, rajola, boja/os, truja i algun altre.
-- FIXME: move to bal2
-- TODO: try only a list of verbs and its forms, others by respelling
-- TODO: currently it does not include all verb forms
if i > 1
and current.vowel == "a" and i == syllables.stress
and str.find(previous.vowel, "[ae]") and previous.coda == ""
then
current.onset = str.gsub(current.onset, "^ʒ", "d͡ʒ")
end
end
current.onset = str.gsub(current.onset, "ʃʃ", "ʃ") -- remove hint
current.coda = str.gsub(current.coda, "ʃʃ", "ʃ")
-- Assimilation s + sibilant
-- GIEC 4.4.3.4:
---- majoria de parlars, elisió: descentrar /s/, desxifrar /ʃ/, desgelar /ʒ/
---- mallorquí i menorquí, africació i geminació: /tts/, /ttʃ/, /ddʒ/
-- UIB-totdret, recomanable allargament, no africació: /ss/, /ʃʃ/, /ʒʒ/
-- done after reduction s-c and exs, and fortition t͡ʃ d͡ʒ
-- done before voicing s > z
if previous.coda:find("s$") then
if current.onset == "s" then
if previous.coda == "s" then
-- elision /ss/ intervocalic except in Balearic
if lect_12 ~= "bal" then
previous.coda = ""
end
else
previous.coda = previous.coda:gsub("s$", "")
end
elseif current.onset == "ʃ" or current.onset == "ʒ" then
if lect_12 == "cen" then
previous.coda = previous.coda:gsub("s$", "")
elseif lect_12 == "bal" then
previous.coda = str.gsub(previous.coda, "s$", current.onset)
end
end
end
-- Voicing or devoicing
local voiced = listToSet({"b", "d", "ɡ", "m", "n", "ɲ", "ɫ", "ʎ", "r", "ɾ", "v", "z", "ʒ"})
local voiceless = listToSet({"p", "t", "k", "f", "s", "ʃ", ""})
local devoicing = {["b"]="p", ["d"]="t", ["ɡ"]="k"}
local voicing = {["p"]="b", ["t"]="d", ["k"]="ɡ", ["f"]="v", ["s"]="z", ["ʃ"]="ʒ"}
local current_initial = str.sub(current.onset, 1, 1)
local previous_final = str.sub(previous.coda, -1)
if voiced[current_initial] and voicing[previous_final] then
previous.coda = str.gsub(previous.coda, previous_final .. "$", voicing[previous_final])
previous.coda = str.gsub(previous.coda, "t͡ʒ", "d͡ʒ")
elseif current_initial == "" and previous_final == "ʃ" then
-- voicing sibilants before a vowel GIEC 4.4.1.2: baix-alemany, Puig-agut
local onset_new = str.gsub(previous.coda, previous_final .. "$", voicing[previous_final])
onset_new = str.gsub(onset_new, "t͡ʒ", "d͡ʒ")
current.onset = str.gsub(onset_new, "j", "")
previous.coda = str.gsub(onset_new, "[^j]", "")
elseif voiceless[current_initial] and devoicing[previous_final] then
previous.coda = str.gsub(previous.coda, previous_final .. "$", devoicing[previous_final])
end
previous.coda = str.gsub(previous.coda, "[bd]s", {["bs"] = "ps", ["ds"] = "ts"})
-- Final devoicing
if i == #syllables then
current.coda = str.gsub(current.coda, "d͡ʒ", "t͡ʃ")
current.coda = str.gsub(current.coda, "d͡z", "t͡s")
current.coda = str.gsub(current.coda, "b", "p")
current.coda = str.gsub(current.coda, "d", "t")
current.coda = str.gsub(current.coda, "ɡ", "k")
current.coda = str.gsub(current.coda, "ʒ", "ʃ")
current.coda = str.gsub(current.coda, "v", "f")
current.coda = str.gsub(current.coda, "z", "s")
end
-- affricate + affricate/fricative (migjorn, puigcerdanenc): GIEC 4.4.3.4.b
if lect ~= "val"
and ((previous.coda == "d͡ʒ" and str.find(current.onset, "ʒ"))
or (previous.coda == "t͡ʃ" and current.onset == "s"))
then
current.onset = previous.coda
previous.coda = ""
end
-- Allophones of r
-- in replaceContextFree(), we converted single r to ɾ and double rr to r
-- doing it before spirant lenitions r+bdg
if i == 1 then
current.onset = str.gsub(current.onset, "^ɾ", "r")
elseif str.find(previous.coda, "[ɫnz]$") then
current.onset = str.gsub(current.onset, "^ɾ", "r")
end
-- GIEC 4.2.6.c: bategant mallorquí, eivissenc, nord-occcidental meridional, valencià
if i < #syllables -- final coda done afterwards depending of hint
and (lect_12 == "cen" or lect == "bal2" or lect == "noc")
then
current.coda = str.gsub(current.coda, "ɾ", "r")
end
-- GNV 1.3.2.1.1.c: reduction of first consonant on ads-, obs-, subs- + C
-- previously devoiced d /t/, b /p/, and reduced sc /ss/ > /s/ (obscé)
if lect == "val2" then
if previous.vowel == "a" then
previous.coda = str.gsub(previous.coda, "ts$", "s")
elseif str.find(previous.vowel, "[ou]") then
previous.coda = str.gsub(previous.coda, "ps$", "s")
end
end
-- Gemination bl, gl: GIEC 4.3.6
local cons_cluster = previous.coda .. current.onset
if lect == "cen2" or lect == "noc2" or lect_12 == "val" then
-- remove respellings bbl/ggl
if cons_cluster == "bbɫ" or cons_cluster == "ɡɡɫ" then
previous.coda = ""
end
elseif str.find(cons_cluster, "^[bɡ]ɫ") and previous.stressed then
previous.coda = str.sub(cons_cluster, 1, 1) -- geminated
end
-- remove respellings +bl/+gl
if str.find(cons_cluster, "%+[bɡ]ɫ") then
previous.coda = ""
end
-- No gemination dd
-- doing it before spirant lenition
if lect_12 == "val" and cons_cluster == "dd" then
previous.coda = ""
end
-- Spirant lenition
-- doing it before voicing assimilation s > z
-- TODO: review with GIEC 4.4.3.3:
-- en obertura
-- després de vocal, semivocal, ròtica, lateral (exc. ð)
-- fricativa labiodental
-- fricativa alveolar i palatoalveolar
-- africada alveolar i palatoalveolar
-- ...
if i > 1
and str.find(current.onset, "^[bdɡ]")
and not str.find(current.onset, "^d͡")
and not (str.find(current.onset, "^b") and (lect_12 == "bal" or lect == "val")) -- no β in v/b distintion
and not (str.find(previous.coda, "[ɫʎ]$") and str.find(current.onset, "^d")) -- except lateral + d
and not str.find(previous.coda, "[pbtdkɡmɱnɲŋ]$")
and not (str.find(previous.coda, "[rz]$") and lect_12 == "cen")
--and (previous.stressed == nil or current.stressed == nil) -- ?
then
current.onset = str.gsub(current.onset, "[bdɡ]", {["b"] = "β", ["d"] = "ð", ["ɡ"] = "ɣ"})
end
-- iodització final central V+igs: raigs > rajts
-- doing it before removing j and after final devoicing
if lect == "cen2" and i == #syllables then
current.coda = str.gsub(current.coda, "jt͡ʃs$", "jt͡s")
end
-- Remove j before palatal obstruents
if lects_group[lect] == "ori" then
current.coda = str.gsub(current.coda, "j([ʃʒ])", "%1")
current.coda = str.gsub(current.coda, "j(t͡ʃ)", "%1")
current.coda = str.gsub(current.coda, "j(d͡ʒ)", "%1")
if str.find(current.onset, "^[ʃʒ]")
or str.find(current.onset, "^t͡ʃ")
or str.find(current.onset, "^d͡ʒ")
then
previous.coda = str.gsub(previous.coda, "j$", "")
end
elseif lect == "val2" then
if str.find(current.onset, "^ʃ") then
previous.coda = str.gsub(previous.coda, "j$", "")
end
end
-- Ascending diphthong /uj/ > /wi/
-- doing it before reduction o
if lect == "val2"
and current.vowel == "u" and str.find(current.coda, "^j")
and i < #syllables and syllables[i + 1].onset ~= "ʃ"
then
current.onset = current.onset .. "w"
current.vowel = "i"
current.coda = str.gsub(current.coda, "^j", "")
end
-- Vowel assimilation of final a
-- TODO: review, standard v. acceptable
if lect == "val2"
and i == #syllables
and current.vowel == "a" and current.coda == ""
and (previous.vowel == "ɛ" or previous.vowel == "ɔ")
then
current.vowel = previous.vowel
end
-- resyllabling some prefix+V
if i == 2 and current.onset == "" then
if previous.coda == "p" then -- sub-
current.onset = "p"
previous.coda = ""
if lect == "val2" then
current.onset = "β"
end
elseif str.find(previous.coda, "r?t$") then -- nord-, sud-
current.onset = "t"
previous.coda = str.gsub(previous.coda, "t$", "")
end
end
end -- by syllables
-- Elision and reduction of unstressed vowels a,e,o
if lects_group[lect] == "ori" then
syllables = elisionContinguousVowels(syllables, lect)
syllables = reductionUnstressAE(syllables, lect)
syllables = reductionUnstressO(syllables, lect)
end
-- Final consonant losses
local final_coda = syllables[#syllables].coda
final_coda = str.gsub(final_coda, "j(t͡ʃ)s?$", "%1")
final_coda = str.gsub(final_coda, "([ʃs])s", "%1") -- homophone plurals -xs, -çs
if lect_12 == "cen" or lect_12 == "noc" or lect == "val2" or (lect == "val" and syllables.stress < #syllables) then
final_coda = str.gsub(final_coda, "ŋk(s?)$", "ŋ%1")
end
if lect == "val2" then
final_coda = str.gsub(final_coda, "ʎs$", "ʎʃ")
end
-- final r, GIEC 4.3.4
-- elision in verbs -ar, -er, -ir, also in val2 GNV 1.3.2.6
-- + adj or nouns -ar, -er, -or stressed not monosyllables, not in val nor val2
-- coverts most cases, but plenty of exceptions to be fixed
if pos == "inf" and lect ~= "val" then
final_coda = str.gsub(final_coda, "^ɾ$", "")
elseif #syllables > 1 and syllables.stress == #syllables and lect_12 ~= "val" then
final_coda = str.gsub(final_coda, "^ɾ(s?)$", "%1") -- no loss with hint rr
end
final_coda = str.gsub(final_coda, "([rɾ])+?", "ɾ") -- remove hint
if lect_12 == "cen" or lect_12 == "noc" or lect_12 == "bal" then
final_coda = str.gsub(final_coda, "([rɾ])ts$", "%1s")
end
-- allophones of r in coda: GIEC 4.2.6.c /r/ in Central, Menorca and most Northwestern
if lect_12 == "cen" or lect == "bal2" or lect == "noc" then
final_coda = str.gsub(final_coda, "ɾ", "r")
else
final_coda = str.gsub(final_coda, "r", "ɾ")
end
syllables[#syllables].coda = final_coda
return syllables
end
local function applySubstitutionSpec(respelling, term)
local from, to = str.match(respelling, "^(.*):(.*)$")
local nsub
if from == "<stress>" then
local mid_vowel_hint = to
local syllables = splitSyllables(term)
local stressed_vowel = syllables[syllables.stress].vowel
if (str.find(mid_vowel_hint, "[èéêë]") and str.find(stressed_vowel, "[eèé]"))
or (str.find(mid_vowel_hint, "[òóô]") and str.find(stressed_vowel, "[oòó]"))
then
syllables[syllables.stress].vowel = mid_vowel_hint
else
ret_error = "no s'ha pogut substituir '" .. mid_vowel_hint
.. "' en la vocal '" .. syllables[syllables.stress].vowel .. "'."
end
term = reconstituteWordFromSyllables(syllables)
elseif from == "<prefix>" then
to = str.sub(to, 1, -2)
local prefix = plainText(to)
local remainder = str.sub(term, #prefix + 1)
local syllables = splitSyllables(remainder) -- includes stress
remainder = reconstituteWordFromSyllables(syllables)
term, nsub = str.gsub(str.sub(term, 1, #prefix), prefix, to, 1)
term = term .. remainder
elseif from == "" then
local plain_to = plainText(to)
if to:sub(-1) == "-" then
-- TODO: add secondary accent
if str.find(term, "-") then
from = str.sub(plain_to, 1, -2)
to = str.sub(to, 1, -2)
else
from = plainText(str.sub(to, 1, -2))
end
term, nsub = str.gsub(term, from, to, 1)
elseif to ~= plain_to then
term, nsub = str.gsub(term, plain_to, to, 1)
else
term = to
end
else
term, nsub = str.gsub(term, from, to, 1)
end
if nsub == 0 then
ret_error = "no s'ha trobat '" .. from .. "' a '" .. term .. "'"
end
return term
end
local function canonRespelling(text)
text = mw.text.trim(text)
text = str.gsub(text, "%s+", " ")
local lower_first = str.lower(str.sub(text, 1, 1))
if lower_first == str.lower(str.sub(pagename, 1, 1)) then
text = lower_first .. str.sub(text, 2)
end
return text
end
local function canonPagename(text)
text = str.lower(text)
text = str.gsub(text, "[¡¿!?']", "")
text = str.gsub(text, ", ", "")
text = str.gsub(text, "^%-", "")
text = str.gsub(text, "%-$", "")
return text
end
local function parseRespelling(respelling, term, input_param)
term = canonRespelling(term)
local hint
if respelling == "-" then
return {omitted = true}
elseif respelling:match("^/(.+)/$") then
return {raw = respelling}
elseif respelling == "+" then
return {term = term}
elseif not str.find(respelling, ":") then
local mid_vowel_hint = str.match(respelling, "^[éèêëóòô]$")
if mid_vowel_hint then
respelling = "<stress>:" .. mid_vowel_hint
hint = mid_vowel_hint
elseif respelling == "ks" then
respelling = "x:ks"
elseif respelling == "xx" then -- xeix <x> /ʃ/ not /t͡ʃ/
term = str.gsub(term, "x(.)", "xx%1") -- initial or interior
term = str.gsub(term, "ixx", "ix") -- not needed
term = str.gsub(term, "([^-])xx", "%1-xx") -- force to onset
return {term = term}
elseif respelling == "rr" then
respelling = "r(s?)$:rr%1"
elseif respelling == "-r" then
if input_param == "val-all" or input_param == "val2-all" then
-- do not apply general hint to Valencian
return {term = term}
else
respelling = "r(s?)$:%1"
end
elseif respelling:find("^[bg]+l$") or respelling:find("%+[bg]l") then
respelling = str.sub(respelling, -2) .. ":" .. respelling
elseif respelling == "yll" then
if input_param:sub(-3) == "all" then
-- GIEC 4.2.5, traditional in Balearic but not formal
if input_param == "bal2-all" then
respelling = "(.)ll:%1y" -- not initial: llenegall
else
return {term = term}
end
else
respelling = "(.)ll:%1y"
end
elseif respelling:sub(-1) == "+" then
respelling = "<prefix>:" .. respelling
elseif respelling:sub(-1) == "-" then
respelling = ":" .. respelling
elseif str.find(respelling, "[àāéèêëēíóòôōú]")
and str.len(respelling) < str.len(term) / 2
then
respelling = ":" .. respelling
else
return {term = canonRespelling(respelling)}
end
end
if str.find(respelling, ":") then
term = applySubstitutionSpec(respelling, term)
else
term = canonRespelling(respelling)
end
return {term = term, hint = hint}
end
local function preprocessWord(syllables, lect, midv)
-- Fix mid vowel
if syllables.stress and midv == nil then
-- do not change mid vowel from respelling
local stressed_vowel = syllables[syllables.stress].vowel
if str.find(stressed_vowel, "[eèo]") then
syllables[syllables.stress].vowel = midVowelFixes(syllables)
if str.find(syllables[syllables.stress].vowel, "[eo]") then
ret_error = "vocal tònica '" .. stressed_vowel .. "' ambigua."
end
end
if ret_error then
return {}
end
end
local syllables_IPA = {stress = syllables.stress}
for key, val in ipairs(syllables) do
syllables_IPA[key] = {onset = val.onset, vowel = val.vowel, coda = val.coda, stressed = val.stressed}
end
-- Replace letters with IPA equivalents
for i, syll in ipairs(syllables_IPA) do
-- Voicing of s
if syll.onset == "s" and i > 1 and (syllables[i-1].coda == "" or syllables[i-1].coda == "i" or syllables[i-1].coda == "u") then
syll.onset = "z"
end
if str.find(syll.vowel, "^[eèéêëēií]$") then
syll.onset = str.gsub(syll.onset, "tg$", "d͡ʒ")
syll.onset = str.gsub(syll.onset, "[cg]$", {["c"] = "s", ["g"] = "ʒ"})
syll.onset = str.gsub(syll.onset, "[qg]u$", {["qu"] = "k", ["gu"] = "ɡ"})
end
syll.onset = replaceContextFree(syll.onset)
syll.coda = replaceContextFree(syll.coda)
local IPA_vowels = {
["a"] = "a", ["à"] = "a",
["e"] = "e", ["è"] = "ɛ", ["é"] = "e",
["i"] = "i", ["í"] = "i", ["ï"] = "i",
["o"] = "o", ["ò"] = "ɔ", ["ó"] = "o",
["u"] = "u", ["ú"] = "u", ["ü"] = "u",
}
local IPA_vowels_eastern = {
["ê"] = "ɛ", ["ë"] = "ɛ", ["ô"] = "ɔ",
}
local IPA_vowels_mallorcan = {
["ê"] = "ə", ["ë"] = "ɛ", ["ô"] = "ɔ",
}
local IPA_vowels_western = {
["ê"] = "e", ["ë"] = "e", ["ô"] = "o",
}
if lect == "bal" then
syll.vowel = str.gsub(syll.vowel, ".", IPA_vowels_mallorcan)
elseif lects_group[lect] == "ori" then
syll.vowel = str.gsub(syll.vowel, ".", IPA_vowels_eastern)
elseif lects_group[lect] == "occ" then
syll.vowel = str.gsub(syll.vowel, ".", IPA_vowels_western)
end
syll.vowel = str.gsub(syll.vowel, ".", IPA_vowels)
syll.text = syll.onset .. syll.vowel .. syll.coda
end
return syllables_IPA
end
local function toIPA(word, lect, pos, midv)
local pronuns = {}
word = wordRespellings(word, lect)
local syllables = splitSyllables(word, lect)
syllables = preprocessWord(syllables, lect, midv)
if ret_error then
return {}
end
-- TODO: process with text instead of table
-- combine syllables
--local combined = {}
--for i, syll in ipairs(syllables) do
-- -- TODO: add primary and seconday accent
-- table.insert(combined, syll.onset .. syll.vowel .. syll.coda)
--end
--local text = table.concat(combined, "·") -- · syllables delimiter
--text = "#" .. text .. "#" -- # word delimiter
--return postprocessGeneral(text, lect)
return postprocessGeneral(syllables, lect, pos)
end
local function generatePhonetic(parsed_respellings)
for lect, termobj in pairs(parsed_respellings) do
if termobj.omitted then
-- leave alone, will handle later
elseif termobj.raw then
termobj.phonetic = termobj.raw
else
termobj.phonetic = toIPA(termobj.term, lect, termobj.pos, termobj.hint)
end
end
end
local function joinSyllables(syllables)
for i, syll in ipairs(syllables) do
syll.vowel = str.gsub(syll.vowel, "[āēō]", {["ā"] = "a", ["ē"] = "e", ["ō"] = "o"})
syll = syll.onset .. syll.vowel .. syll.coda
if i == syllables.stress then -- primary stress
syll = "ˈ" .. syll
elseif syllables[i].stressed then -- secondary stress
syll = "ˌ" .. syll
end
syllables[i] = syll
end
return "/" .. str.gsub(table.concat(syllables, "."), "%.([ˈˌ.])", "%1") .. "/"
end
-- Main entry point
-- input: 1 table of arguments with respelling or hint, general or by lect,
-- as done by Module:ca-pron from the frame of Template:ca-pron;
-- if void then uses parameter "pagename"
-- output: table of pairs lect=pronunciation generated and formatted for geolects or variants
function p.show(pron, page_name, pos)
pagename = canonPagename(page_name or mw.title.getCurrentTitle().subpageText)
local inputs = {}
-- if 1= specified, do all lects
if pron[1] then
for _, lect in ipairs(lects) do
if str.sub(lect, -1) == "2" and pron[2] then
inputs[lect] = {input = pron[2], param = "all"}
else
inputs[lect] = {input = pron[1], param = "all"}
end
end
end
-- then do dialect groups
for group, lects in pairs(groups_lects) do
for _, lect in ipairs(lects) do
if str.sub(lect, -1) == "2" and pron[group .. "2"] then
inputs[lect] = {input = pron[group .. "2"], param = "group"}
elseif pron[group] then
inputs[lect] = {input = pron[group], param = "group"}
end
end
end
-- then do individual lect settings
for _, lect in ipairs(input_lects) do
if pron[lect] then
inputs[lect] = {input = pron[lect], param = "lect"}
elseif str.sub(lect, -1) == "2" and pron[str.sub(lect, 1, -2)] then
inputs[lect] = {input = pron[str.sub(lect, 1, -2)], param = "lect"}
end
end
-- if no inputs given, set all lects based on current pagename
if not next(inputs) then
for _, lect in ipairs(lects) do
inputs[lect] = {input = pagename, param = "pagename"}
end
end
-- Parse the arguments
local parsed_respellings = {}
for lect, inputspec in pairs(inputs) do
local input_split_on_comma = mw.text.split(inputspec.input, " ?, ?")
-- do mid_vowel_hint first
for _, input_part in ipairs(input_split_on_comma) do
if str.match(input_part, "^[éèêëóòô]$") then
parsed_respellings[lect] = parseRespelling(input_part, pagename)
end
end
for _, input_part in ipairs(input_split_on_comma) do
if str.match(input_part, "^[éèêëóòô]$") == nil then
local respelled = pagename
if parsed_respellings[lect] then -- previously respelled
respelled = parsed_respellings[lect].term or pagename
end
local input_param = lect .. "-" .. inputspec.param
parsed_respellings[lect] = parseRespelling(input_part, respelled, input_param)
end
end
parsed_respellings[lect].pos = pos
end
if ret_error then
return {}, ret_error
end
-- Convert each canonicalized respelling to IPA
generatePhonetic(parsed_respellings)
if ret_error then
return {}, ret_error
end
-- Concatenate formatted results
local ret = {}
for lect, pronun_spec in pairs(parsed_respellings) do
if not pronun_spec.omitted then
ret[lect] = pronun_spec.raw or joinSyllables(pronun_spec.phonetic)
end
end
return ret
end
-- on debug console, call p.show with table of arguments and pagename:
-- =mw.logObject(p.show({"ó", bal="-"}, "prova"))
return p