Add an options parameter to csv parser and comment support

This commit is contained in:
2025-11-15 15:48:07 +01:00
parent efc602e0ba
commit ce28b948d0
2 changed files with 193 additions and 15 deletions

View File

@@ -48,11 +48,16 @@ function dump(table, depth)
end
end
--- @class ParserOptions
--- @field delimiter string? The field delimiter (default: ",").
--- @field hasHeaders boolean? If true, first non-comment row is treated as headers (default: false).
--- @field hasComments boolean? If true, lines starting with '#' are skipped (default: false).
--- Parses CSV text into rows and fields using a minimal RFC 4180 state machine.
---
--- Requirements/assumptions:
--- - Input is a single string containing the entire CSV content.
--- - Field separators are specified by delimiter parameter (default: comma).
--- - Field separators are specified by delimiter option (default: comma).
--- - Newlines between rows may be "\n" or "\r\n". "\r\n" is treated as one line break.
--- - Fields may be quoted with double quotes (").
--- - Inside quoted fields, doubled quotes ("") represent a literal quote character.
@@ -61,14 +66,17 @@ end
--- - Leading/trailing spaces are preserved; no trimming is performed.
--- - Empty fields and empty rows are preserved.
--- - The final row is emitted even if the text does not end with a newline.
--- - Lines starting with '#' (after optional leading whitespace) are treated as comments and skipped if hasComments is true.
---
--- @param csv string The CSV text to parse.
--- @param delimiter string? The field delimiter (default: ",").
--- @param hasHeaders boolean? If true, first row is treated as headers and rows can be accessed by header name (default: false).
--- @param options ParserOptions? Options for the parser
--- @return table A table (array) of rows; each row is a table with numeric indices and optionally header-named keys.
function fromCSV(csv, delimiter, hasHeaders)
if delimiter == nil then delimiter = "," end
if hasHeaders == nil then hasHeaders = false end
function fromCSV(csv, options)
if options == nil then options = {} end
local delimiter = options.delimiter or ","
local hasHeaders = options.hasHeaders or false
local hasComments = options.hasComments or false
local allRows = {}
local fields = {}
local field = {}
@@ -95,7 +103,13 @@ function fromCSV(csv, delimiter, hasHeaders)
elseif c == "\r" or c == "\n" then
table.insert(fields, table.concat(field))
field = {}
table.insert(allRows, fields)
local shouldAdd = true
if hasComments and #fields > 0 then
local firstField = fields[1]
local trimmed = string.gsub(firstField, "^%s*(.-)%s*$", "%1")
if string.sub(trimmed, 1, 1) == "#" then shouldAdd = false end
end
if shouldAdd then table.insert(allRows, fields) end
fields = {}
if c == "\r" and i < len and csv:sub(i + 1, i + 1) == "\n" then
i = i + 2
@@ -127,7 +141,13 @@ function fromCSV(csv, delimiter, hasHeaders)
elseif c == "\r" or c == "\n" then
table.insert(fields, table.concat(field))
field = {}
table.insert(allRows, fields)
local shouldAdd = true
if hasComments and #fields > 0 then
local firstField = fields[1]
local trimmed = string.gsub(firstField, "^%s*(.-)%s*$", "%1")
if string.sub(trimmed, 1, 1) == "#" then shouldAdd = false end
end
if shouldAdd then table.insert(allRows, fields) end
fields = {}
state = STATE_DEFAULT
if c == "\r" and i < len and csv:sub(i + 1, i + 1) == "\n" then
@@ -144,7 +164,13 @@ function fromCSV(csv, delimiter, hasHeaders)
if #field > 0 or #fields > 0 then
table.insert(fields, table.concat(field))
table.insert(allRows, fields)
local shouldAdd = true
if hasComments and #fields > 0 then
local firstField = fields[1]
local trimmed = string.gsub(firstField, "^%s*(.-)%s*$", "%1")
if string.sub(trimmed, 1, 1) == "#" then shouldAdd = false end
end
if shouldAdd then table.insert(allRows, fields) end
end
if hasHeaders and #allRows > 0 then