From e8d6613ac89280c9d81788d1b8ee060804b0fb24 Mon Sep 17 00:00:00 2001 From: PhatPhuckDave Date: Mon, 3 Nov 2025 16:32:14 +0100 Subject: [PATCH] Add ParseCSV as lua function --- processor/processor.go | 143 ++++++++++++++++++++++++++++++++++------- 1 file changed, 121 insertions(+), 22 deletions(-) diff --git a/processor/processor.go b/processor/processor.go index 622a217..f869bf1 100644 --- a/processor/processor.go +++ b/processor/processor.go @@ -189,24 +189,122 @@ end ---@param table table ---@param depth number? -function DumpTable(table, depth) +function dump(table, depth) if depth == nil then depth = 0 end if (depth > 200) then - print("Error: Depth > 200 in dumpTable()") + print("Error: Depth > 200 in dump()") return end for k, v in pairs(table) do if (type(v) == "table") then print(string.rep(" ", depth) .. k .. ":") - DumpTable(v, depth + 1) + dump(v, depth + 1) else print(string.rep(" ", depth) .. k .. ": ", v) end end end +--- Parses CSV text into rows and fields using a minimal RFC 4180 state machine. +--- +--- Requirements/assumptions: +--- - Input is a single string containing the entire CSV content. +--- - Field separators are commas (,). +--- - Newlines between rows may be "\n" or "\r\n". "\r\n" is treated as one line break. +--- - Fields may be quoted with double quotes ("). +--- - Inside quoted fields, doubled quotes ("") represent a literal quote character. +--- - No backslash escaping is supported (not part of RFC 4180). +--- - Newlines inside quoted fields are preserved as part of the field. +--- - Leading/trailing spaces are preserved; no trimming is performed. +--- - Empty fields and empty rows are preserved. +--- - The final row is emitted even if the text does not end with a newline. +--- +--- Returns: +--- - A table (array) of rows; each row is a table (array) of string fields. +local function parseCSV(csv) + local rows = {} + local fields = {} + local field = {} + + local STATE_DEFAULT = 1 + local STATE_IN_QUOTES = 2 + local STATE_QUOTE_IN_QUOTES = 3 + local state = STATE_DEFAULT + + local i = 1 + local len = #csv + + while i <= len do + local c = csv:sub(i, i) + + if state == STATE_DEFAULT then + if c == '"' then + state = STATE_IN_QUOTES + i = i + 1 + elseif c == ',' then + table.insert(fields, table.concat(field)) + field = {} + i = i + 1 + elseif c == '\r' or c == '\n' then + table.insert(fields, table.concat(field)) + field = {} + table.insert(rows, fields) + fields = {} + if c == '\r' and i < len and csv:sub(i + 1, i + 1) == '\n' then + i = i + 2 + else + i = i + 1 + end + else + table.insert(field, c) + i = i + 1 + end + elseif state == STATE_IN_QUOTES then + if c == '"' then + state = STATE_QUOTE_IN_QUOTES + i = i + 1 + else + table.insert(field, c) + i = i + 1 + end + else -- STATE_QUOTE_IN_QUOTES + if c == '"' then + table.insert(field, '"') + state = STATE_IN_QUOTES + i = i + 1 + elseif c == ',' then + table.insert(fields, table.concat(field)) + field = {} + state = STATE_DEFAULT + i = i + 1 + elseif c == '\r' or c == '\n' then + table.insert(fields, table.concat(field)) + field = {} + table.insert(rows, fields) + fields = {} + state = STATE_DEFAULT + if c == '\r' and i < len and csv:sub(i + 1, i + 1) == '\n' then + i = i + 2 + else + i = i + 1 + end + else + state = STATE_DEFAULT + -- Don't increment i, reprocess character in DEFAULT state + end + end + end + + if #field > 0 or #fields > 0 then + table.insert(fields, table.concat(field)) + table.insert(rows, fields) + end + + return rows +end + -- String to number conversion helper function num(str) return tonumber(str) or 0 @@ -519,25 +617,26 @@ func GetLuaFunctionsHelp() string { return `Lua Functions Available in Global Environment: MATH FUNCTIONS: - min(a, b) - Returns the minimum of two numbers - max(a, b) - Returns the maximum of two numbers - round(x, n) - Rounds x to n decimal places (default 0) - floor(x) - Returns the floor of x - ceil(x) - Returns the ceiling of x + min(a, b) - Returns the minimum of two numbers + max(a, b) - Returns the maximum of two numbers + round(x, n) - Rounds x to n decimal places (default 0) + floor(x) - Returns the floor of x + ceil(x) - Returns the ceiling of x STRING FUNCTIONS: - upper(s) - Converts string to uppercase - lower(s) - Converts string to lowercase - format(s, ...) - Formats string using Lua string.format - trim(s) - Removes leading/trailing whitespace + upper(s) - Converts string to uppercase + lower(s) - Converts string to lowercase + format(s, ...) - Formats string using Lua string.format + trim(s) - Removes leading/trailing whitespace strsplit(inputstr, sep) - Splits string by separator (default: whitespace) - num(str) - Converts string to number (returns 0 if invalid) - str(num) - Converts number to string - is_number(str) - Returns true if string is numeric + parseCSV(csv) - Parses CSV text into rows of fields + num(str) - Converts string to number (returns 0 if invalid) + str(num) - Converts number to string + is_number(str) - Returns true if string is numeric TABLE FUNCTIONS: - DumpTable(table, depth) - Prints table structure recursively - isArray(t) - Returns true if table is a sequential array + dump(table, depth) - Prints table structure recursively + isArray(t) - Returns true if table is a sequential array HTTP FUNCTIONS: fetch(url, options) - Makes HTTP request, returns response table @@ -552,12 +651,12 @@ UTILITY FUNCTIONS: print(...) - Prints arguments to Go logger EXAMPLES: - round(3.14159, 2) -> 3.14 + round(3.14159, 2) -> 3.14 strsplit("a,b,c", ",") -> {"a", "b", "c"} - upper("hello") -> "HELLO" - min(5, 3) -> 3 - num("123") -> 123 - is_number("abc") -> false + upper("hello") -> "HELLO" + min(5, 3) -> 3 + num("123") -> 123 + is_number("abc") -> false fetch("https://api.example.com/data") re("(\\w+)@(\\w+)", "user@domain.com") -> {"user@domain.com", "user", "domain.com"}` }