-- Credit: https://gist.github.com/phi-gamma/2622252

packagedata		= packagedata or { } -- namespace proposal for packages

dofile(kpse.find_file"char-def.lua")      -- unicode tables
dofile(kpse.find_file"lualibs-table.lua") -- old Context table code

local utf  = unicode.utf8
local node = node
local type = type

local lower, utfchar, utfvalues = string.lower, utf.char, string.utfvalues
local tableconcat, iowrite      = table.concat, io.write
local stringformat, texprint    = string.format, tex.print

local traverse_nodes  = node.traverse
local chardata        = characters.data

local glyph_code      = node.id"glyph"
local disc_code       = node.id"disc"
local kern_code       = node.id"kern"
local kerning_code    = 0 -- from font

-- * LaTeX counters interface

local latex = latex or {}

-- This reflects the standard Lua(La)TeX way
-- Credit: https://tex.stackexchange.com/a/330403/238079
latex.count = {}
setmetatable(latex.count, {__index = function(t, counter) 
    return tex.count['c@' .. counter]
  end} )

function latex.getcount (counter)
   return tex.getcount('c@' .. counter)
end

function latex.setcount (counter, value)
   return tex.setcount('c@' .. counter, value)
end

function latex.inccount (counter)
   return latex.setcount(counter, latex.count[counter] + 1)
end

-- * Global wordcount state

-- This table holds the counters we are counting into. Keys are names
-- of LaTeX counters, values are true (we are counting into it) or
-- false (we aren't).
local counters = {}

-- Least number of of characters required to count as a word.
local threshold = 1

function enable_counter (counter)
   counters[counter] = true
end

function disable_counter (counter)
   counters[counter] = false
end

function set_threshold (n)
   if n then
      threshold = n
   end
end

-- * Counting

local is_letter = table.tohash { "ll", "lm", "lo", "lt", "lu" }

local charcache = { } --- memo without metatable
local lcchar = function(code)
  if code then
    if charcache[code] then return charcache[code] end
    local c = chardata[code]
    c = c and c.lccode
    if c then --utfstring
      if type(c) == "table" then
        c = utfchar(unpack(c))
      else
        c = utfchar(c)
      end
    else
      if type(code) == "number" then
        c = utfchar(code)
      else
        c = code
      end
    end
    charcache[code] = c
    return c
  end
end

local lowerchar = function (str)
  local new, n = { }, 0
  for val in utfvalues(str) do
    n = n + 1
    new[n] = lcchar(val) -- could be inlined here as well ..
  end
  return tableconcat(new)
end

local function mark_words (head, whenfound)
  local current, done = head, nil, 0, false
  local str, s, nds, n = { }, 0, { }, 0
  local function action()
    if s > 0 then
      local word = tableconcat(str, "", 1, s)
      local mark = whenfound(word)
      if mark then
        done = true
        for i=1,n do
          mark(nds[i])
        end
      end
    end
    n, s = 0, 0
  end
  while current do -- iterate
    local id = current.id
    if id == glyph_code then
      local components = current.components
      if components then
        n = n + 1
        nds[n] = current
        for g in traverse_nodes(components) do
          s = s + 1
          str[s] = utfchar(g.char)
        end
      else
        local code = current.char
        local data = chardata[code]
        if data and is_letter[data.category] then
          n = n + 1
          nds[n] = current
          s = s + 1
          str[s] = utfchar(code)
        elseif s > 0 then
          action()
        end
      end
    elseif id == disc_code then -- take the replace
      if n > 0 then
        n = n + 1
        nds[n] = current
      end
    elseif id == kern_code and current.subtype == kerning_code and s > 0 then
      -- ok
    elseif s > 0 then
      action()
    end
    current = current.next
  end
  if s > 0 then
      action()
  end
  return head, done
end

local function insert_word (str) -- -Y´sweep(l,s)¡
   if #str >= threshold then
      -- For each enabled counter...
      for counter, enabled in pairs(counters) do
	 -- ...increment the counter.
	 if enabled then latex.inccount(counter) end
      end
   end
end

local callback = function (head)
  return mark_words(head, insert_word)
end

-- * Export module

return {
   latex = latex,
   enable_counter = enable_counter,
   disable_counter = disable_counter,
   callback = callback,
   set_threshold = set_threshold
}
