Jump to content

Module:ConvertNumeric

ពីវិគីភីឌា

Documentation for this module may be created at Module:ConvertNumeric/doc

-- Module for converting between different representations of numbers. See talk page for user documentation.
-- For unit tests see: [[Module:ConvertNumeric/testcases]]
-- When editing, preview with: Module_talk:ConvertNumeric/testcases
-- First edit [[Module:ConvertNumeric/sandbox]] and preview with Module_talk:ConvertNumeric/sandbox/testcases

local ones_position = {
    [0] = 'zero',
    [1] = 'one',
    [2] = 'two',
    [3] = 'three',
    [4] = 'four',
    [5] = 'five',
    [6] = 'six',
    [7] = 'seven',
    [8] = 'eight',
    [9] = 'nine',
    [10] = 'ten',
    [11] = 'eleven',
    [12] = 'twelve',
    [13] = 'thirteen',
    [14] = 'fourteen',
    [15] = 'fifteen',
    [16] = 'sixteen',
    [17] = 'seventeen',
    [18] = 'eighteen',
    [19] = 'nineteen'
}

local ones_position_ord = {
    [0] = 'zeroth',
    [1] = 'first',
    [2] = 'second',
    [3] = 'third',
    [4] = 'fourth',
    [5] = 'fifth',
    [6] = 'sixth',
    [7] = 'seventh',
    [8] = 'eighth',
    [9] = 'ninth',
    [10] = 'tenth',
    [11] = 'eleventh',
    [12] = 'twelfth',
    [13] = 'thirteenth',
    [14] = 'fourteenth',
    [15] = 'fifteenth',
    [16] = 'sixteenth',
    [17] = 'seventeenth',
    [18] = 'eighteenth',
    [19] = 'nineteenth'
}

local ones_position_plural = {
    [0] = 'zeros',
    [1] = 'ones',
    [2] = 'twos',
    [3] = 'threes',
    [4] = 'fours',
    [5] = 'fives',
    [6] = 'sixes',
    [7] = 'sevens',
    [8] = 'eights',
    [9] = 'nines',
    [10] = 'tens',
    [11] = 'elevens',
    [12] = 'twelves',
    [13] = 'thirteens',
    [14] = 'fourteens',
    [15] = 'fifteens',
    [16] = 'sixteens',
    [17] = 'seventeens',
    [18] = 'eighteens',
    [19] = 'nineteens'
}

local tens_position = {
    [2] = 'twenty',
    [3] = 'thirty',
    [4] = 'forty',
    [5] = 'fifty',
    [6] = 'sixty',
    [7] = 'seventy',
    [8] = 'eighty',
    [9] = 'ninety'
}

local tens_position_ord = {
    [2] = 'twentieth',
    [3] = 'thirtieth',
    [4] = 'fortieth',
    [5] = 'fiftieth',
    [6] = 'sixtieth',
    [7] = 'seventieth',
    [8] = 'eightieth',
    [9] = 'ninetieth'
}

local tens_position_plural = {
    [2] = 'twenties',
    [3] = 'thirties',
    [4] = 'forties',
    [5] = 'fifties',
    [6] = 'sixties',
    [7] = 'seventies',
    [8] = 'eighties',
    [9] = 'nineties'
}

local groups = {
    [1] = 'thousand',
    [2] = 'million',
    [3] = 'billion',
    [4] = 'trillion',
    [5] = 'quadrillion',
    [6] = 'quintillion',
    [7] = 'sextillion',
    [8] = 'septillion',
    [9] = 'octillion',
    [10] = 'nonillion',
    [11] = 'decillion',
    [12] = 'undecillion',
    [13] = 'duodecillion',
    [14] = 'tredecillion',
    [15] = 'quattuordecillion',
    [16] = 'quindecillion',
    [17] = 'sexdecillion',
    [18] = 'septendecillion',
    [19] = 'octodecillion',
    [20] = 'novemdecillion',
    [21] = 'vigintillion',
    [22] = 'unvigintillion',
    [23] = 'duovigintillion',
    [24] = 'tresvigintillion',
    [25] = 'quattuorvigintillion',
    [26] = 'quinquavigintillion',
    [27] = 'sesvigintillion',
    [28] = 'septemvigintillion',
    [29] = 'octovigintillion',
    [30] = 'novemvigintillion',
    [31] = 'trigintillion',
    [32] = 'untrigintillion',
    [33] = 'duotrigintillion',
    [34] = 'trestrigintillion',
    [35] = 'quattuortrigintillion',
    [36] = 'quinquatrigintillion',
    [37] = 'sestrigintillion',
    [38] = 'septentrigintillion',
    [39] = 'octotrigintillion',
    [40] = 'noventrigintillion',
    [41] = 'quadragintillion',
    [51] = 'quinquagintillion',
    [61] = 'sexagintillion',
    [71] = 'septuagintillion',
    [81] = 'octogintillion',
    [91] = 'nonagintillion',
    [101] = 'centillion',
    [102] = 'uncentillion',
    [103] = 'duocentillion',
    [104] = 'trescentillion',
    [111] = 'decicentillion',
    [112] = 'undecicentillion',
    [121] = 'viginticentillion',
    [122] = 'unviginticentillion',
    [131] = 'trigintacentillion',
    [141] = 'quadragintacentillion',
    [151] = 'quinquagintacentillion',
    [161] = 'sexagintacentillion',
    [171] = 'septuagintacentillion',
    [181] = 'octogintacentillion',
    [191] = 'nonagintacentillion',
    [201] = 'ducentillion',
    [301] = 'trecentillion',
    [401] = 'quadringentillion',
    [501] = 'quingentillion',
    [601] = 'sescentillion',
    [701] = 'septingentillion',
    [801] = 'octingentillion',
    [901] = 'nongentillion',
    [1001] = 'millinillion',
}

local roman_numerals = {
    I = 1,
    V = 5,
    X = 10,
    L = 50,
    C = 100,
    D = 500,
    M = 1000
}

-- Converts a given valid roman numeral (and some invalid roman numerals) to a number. Returns -1, errorstring on error
local function roman_to_numeral(roman)
    if type(roman) ~= "string" then return -1, "roman numeral not a string" end
    local rev = roman:reverse()
    local raising = true
    local last = 0
    local result = 0
    for i = 1, #rev do
        local c = rev:sub(i, i)
        local next = roman_numerals[c]
        if next == nil then return -1, "roman numeral contains illegal character " .. c end
        if next > last then
            result = result + next
            raising = true
        elseif next < last then
            result = result - next
            raising = false
        elseif raising then
            result = result + next
        else
            result = result - next
        end
        last = next
    end
    return result
end

-- Converts a given integer between 0 and 100 to English text (e.g. 47 -> forty-seven)
local function numeral_to_english_less_100(num, ordinal, plural, zero)
    local terminal_ones, terminal_tens
    if ordinal then
        terminal_ones = ones_position_ord
        terminal_tens = tens_position_ord
    elseif plural then
        terminal_ones = ones_position_plural
        terminal_tens = tens_position_plural
    else
        terminal_ones = ones_position
        terminal_tens = tens_position
    end

    if num == 0 and zero ~= nil then
        return zero
    elseif num < 20 then
        return terminal_ones[num]
    elseif num % 10 == 0 then
        return terminal_tens[num / 10]
    else
        return tens_position[math.floor(num / 10)] .. '-' .. terminal_ones[num % 10]
    end
end

local function standard_suffix(ordinal, plural)
    if ordinal then return 'th' end
    if plural then return 's' end
    return ''
end

-- Converts a given integer (in string form) between 0 and 1000 to English text (e.g. 47 -> forty-seven)
local function numeral_to_english_less_1000(num, use_and, ordinal, plural, zero)
    num = tonumber(num)
    if num < 100 then
        return numeral_to_english_less_100(num, ordinal, plural, zero)
    elseif num % 100 == 0 then
        return ones_position[num/100] .. ' hundred' .. standard_suffix(ordinal, plural)
    else
        return ones_position[math.floor(num/100)] .. ' hundred ' .. (use_and and 'and ' or '') .. numeral_to_english_less_100(num % 100, ordinal, plural, zero)
    end
end

-- Converts a number expressed as a string in scientific notation to a string in standard decimal notation
-- e.g. 1.23E5 -> 123000, 1.23E-5 = .0000123. Conversion is exact, no rounding is performed.
local function scientific_notation_to_decimal(num)
    local exponent, subs = num:gsub("^%-?%d*%.?%d*%-?[Ee]([+%-]?%d+)$", "%1")
    if subs == 0 then return num end  -- Input not in scientific notation, just return unmodified
    exponent = tonumber(exponent)

    local negative = num:find("^%-")
    local _, decimal_pos = num:find("%.")
    -- Mantissa will consist of all decimal digits with no decimal point
    local mantissa = num:gsub("^%-?(%d*)%.?(%d*)%-?[Ee][+%-]?%d+$", "%1%2")
    if negative and decimal_pos then decimal_pos = decimal_pos - 1 end
    if not decimal_pos then decimal_pos = #mantissa + 1 end
    local prev_len = #num

    -- Remove leading zeros unless decimal point is in first position
    while decimal_pos > 1 and mantissa:sub(1,1) == '0' do
        mantissa = mantissa:sub(2)
        decimal_pos = decimal_pos - 1
    end
    -- Shift decimal point right for exponent > 0
    while exponent > 0 do
        decimal_pos = decimal_pos + 1
        exponent = exponent - 1
        if decimal_pos > #mantissa + 1 then mantissa = mantissa .. '0' end
        -- Remove leading zeros unless decimal point is in first position
        while decimal_pos > 1 and mantissa:sub(1,1) == '0' do
            mantissa = mantissa:sub(2)
            decimal_pos = decimal_pos - 1
        end
    end
    -- Shift decimal point left for exponent < 0
    while exponent < 0 do
        if decimal_pos == 1 then
            mantissa = '0' .. mantissa
        else
            decimal_pos = decimal_pos - 1
        end
        exponent = exponent + 1
    end

    -- Insert decimal point in correct position and return
    return (negative and '-' or '') .. mantissa:sub(1, decimal_pos - 1) .. '.' .. mantissa:sub(decimal_pos)
end

-- Rounds a number to the nearest integer (NOT USED)
local function round_num(x)
    if x%1 >= 0.5 then
        return math.ceil(x)
    else
        return math.floor(x)
    end
end

-- Rounds a number to the nearest two-word number (round = up, down, or "on" for round to nearest)
-- Numbers with two digits before the decimal will be rounded to an integer as specified by round.
-- Larger numbers will be rounded to a number with only one nonzero digit in front and all other digits zero.
-- Negative sign is preserved and does not count towards word limit.
local function round_for_english(num, round)
    -- If an integer with at most two digits, just return
    if num:find("^%-?%d?%d%.?$") then return num end

    local negative = num:find("^%-")
    if negative then
        -- We're rounding magnitude so flip it
        if round == 'up' then round = 'down' elseif round == 'down' then round = 'up' end
    end

    -- If at most two digits before decimal, round to integer and return
    local _, _, small_int, trailing_digits, round_digit = num:find("^%-?(%d?%d?)%.((%d)%d*)$")
    if small_int then
        local small_int_len = #small_int
        if small_int == '' then small_int = '0' end
        if (round == 'up' and trailing_digits:find('[1-9]')) or (round == 'on' and tonumber(round_digit) >= 5) then
            small_int = tostring(tonumber(small_int) + 1)
        end
        return (negative and '-' or '') .. small_int
    end

    -- When rounding up, any number with > 1 nonzero digit will round up (e.g. 1000000.001 rounds up to 2000000)
    local nonzero_digits = 0
    for digit in num:gfind("[1-9]") do
        nonzero_digits = nonzero_digits + 1
    end

    num = num:gsub("%.%d*$", "") -- Remove decimal part
    -- Second digit used to determine which way to round lead digit
    local _, _, lead_digit, round_digit, round_digit_2, rest = num:find("^%-?(%d)(%d)(%d)(%d*)$")
    if tonumber(lead_digit .. round_digit) < 20 and (1 + #rest) % 3 == 0 then
        -- In English numbers < 20 are one word so put 2 digits in lead and round based on 3rd
        lead_digit = lead_digit .. round_digit
        round_digit = round_digit_2
    else
        rest = round_digit_2 .. rest
    end

    if (round == 'up' and nonzero_digits > 1) or (round == 'on' and tonumber(round_digit) >= 5) then
        lead_digit = tostring(tonumber(lead_digit) + 1)
    end
    -- All digits but lead digit will turn to zero
    rest = rest:gsub("%d", "0")
    return (negative and '-' or '') .. lead_digit .. '0' .. rest
end

local denominators = {
    [2] = { 'half', plural = 'halves' },
    [3] = { 'third' },
    [4] = { 'quarter', us = 'fourth' },
    [5] = { 'fifth' },
    [6] = { 'sixth' },
    [8] = { 'eigth' },
    [16] = { 'sixteenth' },
}

-- Return status, fraction where:
-- status is a string:
--     "finished" if there is a fraction with no whole number;
--     "ok" if fraction is empty or valid;
--     "unsupported" if bad fraction;
-- fraction is a string giving (numerator / denominator) as English text, or is "".
-- Only unsigned fractions with a very limited range of values are supported,
-- except that if whole is empty, the numerator can use "-" to indicate negative.
-- whole (string or nil): nil or "" if no number before the fraction
-- numerator (string or nil): numerator, if any (default = 1 if a denominator is given)
-- denominator (string or nil): denominator, if any
-- sp_us (boolean): true if sp=us
-- negative_word (string): word to use for negative sign, if whole is empty
local function fraction_to_english(whole, numerator, denominator, sp_us, negative_word)
    if numerator or denominator then
        local finished = (whole == nil or whole == '')
        local sign = ''
        if numerator then
            if finished and numerator:sub(1, 1) == '-' then
                numerator = numerator:sub(2)
                sign = negative_word .. ' '
            end
        else
            numerator = '1'
        end
        if not numerator:match('^%d+$') or not denominator or not denominator:match('^%d+$') then
            return 'unsupported', ''
        end
        numerator = tonumber(numerator)
        denominator = tonumber(denominator)
        local dendata = denominators[denominator]
        if not (dendata and 1 <= numerator and numerator <= 99) then
            return 'unsupported', ''
        end
        local numstr, denstr
        if numerator == 1 then
            if finished then
                numstr = 'one'
            else
                numstr = 'a'
            end
            denstr = sp_us and dendata.us or dendata[1]
        else
            numstr = numeral_to_english_less_100(numerator)
            denstr = dendata.plural
            if not denstr then
                denstr = (sp_us and dendata.us or dendata[1]) .. 's'
            end
        end
        if finished then
            return 'finished', sign .. numstr .. ' ' .. denstr
        end
        return 'ok', ' and ' .. numstr .. ' ' .. denstr
    end
    return 'ok', ''
end

-- Takes a decimal number and converts it to English text.
-- Return nil if a fraction cannot be converted (only some numbers are supported for fractions).
-- num (string or nil): the number to convert.
--      Can be an arbitrarily large decimal, such as "-123456789123456789.345", and
--      can use scientific notation (e.g. "1.23E5").
--      May fail for very large numbers not listed in "groups" such as "1E4000".
--      num is nil if there is no whole number before a fraction.
-- numerator (string or nil): numerator of fraction (nil if no fraction)
-- denominator (string or nil): denominator of fraction (nil if no fraction)
-- capitalize (boolean): whether to capitalize the result (e.g. 'One' instead of 'one')
-- use_and (boolean): whether to use the word 'and' between tens/ones place and higher places
-- hyphenate (boolean): whether to hyphenate all words in the result, useful for use as an adjective
-- ordinal (boolean): whether to produce an ordinal (e.g. 'first' instead of 'one')
-- plural (boolean): whether to pluralize the resulting number
-- links: nil: do not add any links; 'on': link "billion" and larger to Orders of magnitude article;
--        any other text: list of numbers to link (e.g. "billion,quadrillion")
-- negative_word: word to use for negative sign (typically 'negative' or 'minus'; nil to use default)
-- round: nil or '': no rounding; 'on': round to nearest two-word number; 'up'/'down': round up/down to two-word number
-- zero: word to use for value '0' (nil to use default)
local function _numeral_to_english(num, numerator, denominator, capitalize, use_and, hyphenate, ordinal, plural, links, negative_word, round, zero)
    if not negative_word then
        if use_and then
            -- TODO Should 'minus' be used when do not have sp=us?
            --      If so, need to update testcases, and need to fix "minus zero".
            -- negative_word = 'minus'
            negative_word = 'negative'
        else
            negative_word = 'negative'
        end
    end
    local status, fraction_text = fraction_to_english(num, numerator, denominator, not use_and, negative_word)
    if status == 'unsupported' then
        return nil
    end
    if status == 'finished' then
        -- Input is a fraction with no whole number.
        -- Hack to avoid executing stuff that depends on num being a number.
        local s = fraction_text
        if hyphenate then s = s:gsub("%s", "-") end
        if capitalize then s = s:gsub("^%l", string.upper) end
        return s
    end
    num = scientific_notation_to_decimal(num)
    if round and round ~= '' then
        if round ~= 'on' and round ~= 'up' and round ~= 'down' then
            return 'Invalid rounding mode'
        end
        num = round_for_english(num, round)
    end

    -- Separate into negative sign, num (digits before decimal), decimal_places (digits after decimal)
    local MINUS = '−'  -- Unicode U+2212 MINUS SIGN (may be in values from [[Module:Convert]])
    if num:sub(1, #MINUS) == MINUS then
        num = '-' .. num:sub(#MINUS + 1)  -- replace MINUS with '-'
    elseif num:sub(1, 1) == '+' then
        num = num:sub(2)  -- ignore any '+'
    end
    local negative = num:find("^%-")
    local decimal_places, subs = num:gsub("^%-?%d*%.(%d+)$", "%1")
    if subs == 0 then decimal_places = nil end
    num, subs = num:gsub("^%-?(%d*)%.?%d*$", "%1")
    if num == '' and decimal_places then num = '0' end
    if subs == 0 or num == '' then return 'Invalid decimal numeral' end

    -- For each group of 3 digits except the last one, print with appropriate group name (e.g. million)
    local s = ''
    while #num > 3 do
        if s ~= '' then s = s .. ' ' end
        local group_num = math.floor((#num - 1) / 3)
        local group = groups[group_num]
        local group_digits = #num - group_num*3
        s = s .. numeral_to_english_less_1000(num:sub(1, group_digits), false, false, false, zero) .. ' '
        if links and (((links == 'on' and group_num >= 3) or links:find(group)) and group_num <= 13) then
            s = s .. '[[Orders_of_magnitude_(numbers)#10' .. group_num*3 .. '|' .. group .. ']]'
        else
            s = s .. group
        end
        num = num:sub(1 + group_digits)
        num = num:gsub("^0*", "")   -- Trim leading zeros
    end

    -- Handle final three digits of integer part
    if s ~= '' and num ~= '' then
        if #num <= 2 and use_and then
            s = s .. ' and '
        else
            s = s .. ' '
        end
    end
    if s == '' or num ~= '' then
        s = s .. numeral_to_english_less_1000(num, use_and, ordinal, plural, zero)
    elseif ordinal or plural then
        -- Round numbers like "one million" take standard suffixes for ordinal/plural
        s = s .. standard_suffix(ordinal, plural)
    end

    -- For decimal places (if any) output "point" followed by spelling out digit by digit
    if decimal_places then
        s = s .. ' point'
        for i = 1, #decimal_places do
            s = s .. ' ' .. ones_position[tonumber(decimal_places:sub(i,i))]
        end
    end

    s = s:gsub("^%s*(.-)%s*$", "%1")   -- Trim whitespace
    if ordinal and plural then s = s .. 's' end  -- s suffix works for all ordinals
    if negative and s ~= zero then s = negative_word .. ' ' .. s end
    s = s:gsub("negative zero", "zero")
    s = s .. fraction_text
    if hyphenate then s = s:gsub("%s", "-") end
    if capitalize then s = s:gsub("^%l", string.upper) end
    return s
end

local p = { spell_number = _numeral_to_english }  -- can be called from another module

function p.numeral_to_english(frame)
    local args = frame.args
    local num = args[1]
    num = num:gsub("^%s*(.-)%s*$", "%1")   -- Trim whitespace
    num = num:gsub(",", "")   -- Remove commas
    if num ~= '' then  -- a fraction may have an empty whole number
        if not num:find("^%-?%d*%.?%d*%-?[Ee]?[+%-]?%d*$") then
            -- Input not in a valid format, try to pass it through #expr to see
            -- if that produces a number (e.g. "3 + 5" will become "8").
            num = frame:preprocess('{{#expr: ' .. num .. '}}')
        end
    end

    -- Pass args from frame to helper function
    local numerator = args['numerator']
    local denominator = args['denominator']
    local case = args['case']
    local sp = args['sp']
    local adj = args['adj']
    local ord = args['ord']
    local pl = args['pl']
    local lk = args['lk']
    local negative = args['negative']
    local round = args['round']
    local zero = args['zero']
    return _numeral_to_english(num, numerator, denominator,
        case == 'U' or case == 'u', sp ~= 'us', adj == 'on',
        ord == 'on', pl == 'on', lk, negative, round, zero) or ''
end

---- recursive function for p.decToHex
local function decToHexDigit(dec)
   local dig = {"0","1","2","3","4","5","6","7","8","9","A","B","C","D","E","F"}
   local div = math.floor(dec/16)
   local mod = dec-(16*div)
   if div >= 1 then return decToHexDigit(div)..dig[mod+1] else return dig[mod+1] end
end -- I think this is supposed to be done with a tail call but first I want something that works at all
---- finds all the decimal numbers in the input text and hexes each of them
function p.decToHex(frame)
   local args=frame.args
   local parent=frame.getParent(frame)
   local pargs={}
   if parent then pargs=parent.args end
   local text=args[1] or pargs[1] or ""
   local minlength=args.minlength or pargs.minlength or 1
   minlength=tonumber(minlength)
   local prowl=mw.ustring.gmatch(text,"(.-)(%d+)")
   local output=""
   repeat
      local chaff,dec=prowl()
      if not(dec) then break end
      local hex=decToHexDigit(dec)
      while (mw.ustring.len(hex)<minlength) do hex="0"..hex end
      output=output..chaff..hex
   until false
   local chaff=mw.ustring.match(text,"(%D+)$") or ""
   return output..chaff
end

return p