Add an options parameter to csv parser and comment support

This commit is contained in:
2025-11-15 15:48:07 +01:00
parent efc602e0ba
commit ce28b948d0
2 changed files with 193 additions and 15 deletions

View File

@@ -27,7 +27,7 @@ end)
-- Test fromCSV with headers
test("fromCSV with headers", function()
local csv = "foo,bar,baz\n1,2,3\n4,5,6"
local rows = fromCSV(csv, ",", true)
local rows = fromCSV(csv, { hasHeaders = true })
assert(#rows == 2, "Should have 2 data rows")
assert(rows[1][1] == "1", "First row first field should be '1'")
assert(rows[1].foo == "1", "First row foo should be '1'")
@@ -38,7 +38,7 @@ end)
-- Test fromCSV with custom delimiter
test("fromCSV with tab delimiter", function()
local csv = "a\tb\tc\n1\t2\t3"
local rows = fromCSV(csv, "\t")
local rows = fromCSV(csv, { delimiter = "\t" })
assert(#rows == 2, "Should have 2 rows")
assert(rows[1][1] == "a", "First row first field should be 'a'")
assert(rows[2][2] == "2", "Second row second field should be '2'")
@@ -85,12 +85,155 @@ end)
-- Test round trip with headers
test("fromCSV toCSV round trip with headers", function()
local original = "foo,bar,baz\n1,2,3\n4,5,6"
local rows = fromCSV(original, ",", true)
local rows = fromCSV(original, { hasHeaders = true })
local csv = toCSV(rows)
local expected = "1,2,3\n4,5,6"
assert(csv == expected, "Round trip with headers should preserve data rows")
end)
-- Test fromCSV with comments
test("fromCSV with comments", function()
local csv = "# This is a comment\nfoo,bar,baz\n1,2,3\n# Another comment\n4,5,6"
local rows = fromCSV(csv, { hasComments = true })
assert(#rows == 3, "Should have 3 rows (comments filtered, header + 2 data rows)")
assert(rows[1][1] == "foo", "First row should be header row")
assert(rows[2][1] == "1", "Second row first field should be '1'")
assert(rows[3][1] == "4", "Third row first field should be '4'")
end)
-- Test fromCSV with comments and headers
test("fromCSV with comments and headers", function()
local csv = "#mercenary_profiles\nId,Name,Value\n1,Test,100\n# End of data\n2,Test2,200"
local rows = fromCSV(csv, { hasHeaders = true, hasComments = true })
assert(#rows == 2, "Should have 2 data rows")
assert(rows[1].Id == "1", "First row Id should be '1'")
assert(rows[1].Name == "Test", "First row Name should be 'Test'")
assert(rows[1].Value == "100", "First row Value should be '100'")
assert(rows[2].Id == "2", "Second row Id should be '2'")
end)
-- Test fromCSV with comments disabled
test("fromCSV without comments", function()
local csv = "# This should not be filtered\nfoo,bar\n1,2"
local rows = fromCSV(csv, { hasComments = false })
assert(#rows == 3, "Should have 3 rows (including comment)")
assert(rows[1][1] == "# This should not be filtered", "Comment line should be preserved")
end)
-- Test fromCSV with comment at start
test("fromCSV comment at start", function()
local csv = "# Header comment\nId,Name\n1,Test"
local rows = fromCSV(csv, { hasComments = true })
assert(#rows == 2, "Should have 2 rows (comment filtered)")
assert(rows[1][1] == "Id", "First row should be header")
end)
-- Test fromCSV with comment with leading whitespace
test("fromCSV comment with whitespace", function()
local csv = " # Comment with spaces\nId,Name\n1,Test"
local rows = fromCSV(csv, { hasComments = true })
assert(#rows == 2, "Should have 2 rows (comment with spaces filtered)")
assert(rows[1][1] == "Id", "First row should be header")
end)
-- Test fromCSV with comment with tabs
test("fromCSV comment with tabs", function()
local csv = "\t# Comment with tab\nId,Name\n1,Test"
local rows = fromCSV(csv, { hasComments = true })
assert(#rows == 2, "Should have 2 rows (comment with tab filtered)")
assert(rows[1][1] == "Id", "First row should be header")
end)
-- Test fromCSV with multiple consecutive comments
test("fromCSV multiple consecutive comments", function()
local csv = "# First comment\n# Second comment\n# Third comment\nId,Name\n1,Test"
local rows = fromCSV(csv, { hasComments = true })
assert(#rows == 2, "Should have 2 rows (all comments filtered)")
assert(rows[1][1] == "Id", "First row should be header")
end)
-- Test fromCSV with comment in middle of data
test("fromCSV comment in middle", function()
local csv = "Id,Name\n1,Test\n# Middle comment\n2,Test2"
local rows = fromCSV(csv, { hasComments = true })
assert(#rows == 3, "Should have 3 rows (comment filtered)")
assert(rows[1][1] == "Id", "First row should be header")
assert(rows[2][1] == "1", "Second row should be first data")
assert(rows[3][1] == "2", "Third row should be second data")
end)
-- Test fromCSV with comment at end
test("fromCSV comment at end", function()
local csv = "Id,Name\n1,Test\n# End comment"
local rows = fromCSV(csv, { hasComments = true })
assert(#rows == 2, "Should have 2 rows (end comment filtered)")
assert(rows[1][1] == "Id", "First row should be header")
assert(rows[2][1] == "1", "Second row should be data")
end)
-- Test fromCSV with empty comment line
test("fromCSV empty comment", function()
local csv = "#\nId,Name\n1,Test"
local rows = fromCSV(csv, { hasComments = true })
assert(#rows == 2, "Should have 2 rows (empty comment filtered)")
assert(rows[1][1] == "Id", "First row should be header")
end)
-- Test fromCSV with comment and headers
test("fromCSV comment with headers enabled", function()
local csv = "#mercenary_profiles\nId,Name,Value\n1,Test,100\n2,Test2,200"
local rows = fromCSV(csv, { hasHeaders = true, hasComments = true })
assert(#rows == 2, "Should have 2 data rows")
assert(rows[1].Id == "1", "First row Id should be '1'")
assert(rows[1].Name == "Test", "First row Name should be 'Test'")
assert(rows[2].Id == "2", "Second row Id should be '2'")
end)
-- Test fromCSV with comment and TSV delimiter
test("fromCSV comment with tab delimiter", function()
local csv = "# Comment\nId\tName\n1\tTest"
local rows = fromCSV(csv, { delimiter = "\t", hasComments = true })
assert(#rows == 2, "Should have 2 rows")
assert(rows[1][1] == "Id", "First row should be header")
assert(rows[2][1] == "1", "Second row first field should be '1'")
end)
-- Test fromCSV with comment and headers and TSV
test("fromCSV comment with headers and TSV", function()
local csv = "#mercenary_profiles\nId\tName\tValue\n1\tTest\t100"
local rows = fromCSV(csv, { delimiter = "\t", hasHeaders = true, hasComments = true })
assert(#rows == 1, "Should have 1 data row")
assert(rows[1].Id == "1", "Row Id should be '1'")
assert(rows[1].Name == "Test", "Row Name should be 'Test'")
assert(rows[1].Value == "100", "Row Value should be '100'")
end)
-- Test fromCSV with data field starting with # (not a comment)
test("fromCSV data field starting with hash", function()
local csv = "Id,Name\n1,#NotAComment\n2,Test"
local rows = fromCSV(csv, { hasComments = true })
assert(#rows == 3, "Should have 3 rows (data with # not filtered)")
assert(rows[1][1] == "Id", "First row should be header")
assert(rows[2][2] == "#NotAComment", "Second row should have #NotAComment as data")
end)
-- Test fromCSV with quoted field starting with #
test("fromCSV quoted field with hash", function()
local csv = 'Id,Name\n1,"#NotAComment"\n2,Test'
local rows = fromCSV(csv, { hasComments = true })
assert(#rows == 3, "Should have 3 rows (quoted # not filtered)")
assert(rows[2][2] == "#NotAComment", "Quoted field with # should be preserved")
end)
-- Test fromCSV with comment after quoted field
test("fromCSV comment after quoted field", function()
local csv = 'Id,Name\n1,"Test"\n# This is a comment\n2,Test2'
local rows = fromCSV(csv, { hasComments = true })
assert(#rows == 3, "Should have 3 rows (comment filtered)")
assert(rows[2][2] == "Test", "Quoted field should be preserved")
assert(rows[3][1] == "2", "Third row should be second data row")
end)
-- Math function tests
test("min function", function()
assert(min(5, 3) == 3, "min(5, 3) should be 3")
@@ -203,9 +346,18 @@ test("isArray function", function()
assert(isArray({}) == true, "isArray should return true for empty array")
assert(isArray({ a = 1, b = 2 }) == false, "isArray should return false for map")
assert(isArray({ 1, 2, [4] = 4 }) == false, "isArray should return false for sparse array")
assert(isArray({ [1] = 1, [2] = 2, [3] = 3 }) == true, "isArray should return true for 1-indexed array")
assert(isArray({ [0] = 1, [1] = 2 }) == false, "isArray should return false for 0-indexed array")
assert(isArray({ [1] = 1, [2] = 2, [4] = 4 }) == false, "isArray should return false for non-sequential array")
assert(
isArray({ [1] = 1, [2] = 2, [3] = 3 }) == true,
"isArray should return true for 1-indexed array"
)
assert(
isArray({ [0] = 1, [1] = 2 }) == false,
"isArray should return false for 0-indexed array"
)
assert(
isArray({ [1] = 1, [2] = 2, [4] = 4 }) == false,
"isArray should return false for non-sequential array"
)
assert(isArray("not a table") == false, "isArray should return false for non-table")
assert(isArray(123) == false, "isArray should return false for number")
end)

View File

@@ -48,11 +48,16 @@ function dump(table, depth)
end
end
--- @class ParserOptions
--- @field delimiter string? The field delimiter (default: ",").
--- @field hasHeaders boolean? If true, first non-comment row is treated as headers (default: false).
--- @field hasComments boolean? If true, lines starting with '#' are skipped (default: false).
--- Parses CSV text into rows and fields using a minimal RFC 4180 state machine.
---
--- Requirements/assumptions:
--- - Input is a single string containing the entire CSV content.
--- - Field separators are specified by delimiter parameter (default: comma).
--- - Field separators are specified by delimiter option (default: comma).
--- - Newlines between rows may be "\n" or "\r\n". "\r\n" is treated as one line break.
--- - Fields may be quoted with double quotes (").
--- - Inside quoted fields, doubled quotes ("") represent a literal quote character.
@@ -61,14 +66,17 @@ end
--- - Leading/trailing spaces are preserved; no trimming is performed.
--- - Empty fields and empty rows are preserved.
--- - The final row is emitted even if the text does not end with a newline.
--- - Lines starting with '#' (after optional leading whitespace) are treated as comments and skipped if hasComments is true.
---
--- @param csv string The CSV text to parse.
--- @param delimiter string? The field delimiter (default: ",").
--- @param hasHeaders boolean? If true, first row is treated as headers and rows can be accessed by header name (default: false).
--- @param options ParserOptions? Options for the parser
--- @return table A table (array) of rows; each row is a table with numeric indices and optionally header-named keys.
function fromCSV(csv, delimiter, hasHeaders)
if delimiter == nil then delimiter = "," end
if hasHeaders == nil then hasHeaders = false end
function fromCSV(csv, options)
if options == nil then options = {} end
local delimiter = options.delimiter or ","
local hasHeaders = options.hasHeaders or false
local hasComments = options.hasComments or false
local allRows = {}
local fields = {}
local field = {}
@@ -95,7 +103,13 @@ function fromCSV(csv, delimiter, hasHeaders)
elseif c == "\r" or c == "\n" then
table.insert(fields, table.concat(field))
field = {}
table.insert(allRows, fields)
local shouldAdd = true
if hasComments and #fields > 0 then
local firstField = fields[1]
local trimmed = string.gsub(firstField, "^%s*(.-)%s*$", "%1")
if string.sub(trimmed, 1, 1) == "#" then shouldAdd = false end
end
if shouldAdd then table.insert(allRows, fields) end
fields = {}
if c == "\r" and i < len and csv:sub(i + 1, i + 1) == "\n" then
i = i + 2
@@ -127,7 +141,13 @@ function fromCSV(csv, delimiter, hasHeaders)
elseif c == "\r" or c == "\n" then
table.insert(fields, table.concat(field))
field = {}
table.insert(allRows, fields)
local shouldAdd = true
if hasComments and #fields > 0 then
local firstField = fields[1]
local trimmed = string.gsub(firstField, "^%s*(.-)%s*$", "%1")
if string.sub(trimmed, 1, 1) == "#" then shouldAdd = false end
end
if shouldAdd then table.insert(allRows, fields) end
fields = {}
state = STATE_DEFAULT
if c == "\r" and i < len and csv:sub(i + 1, i + 1) == "\n" then
@@ -144,7 +164,13 @@ function fromCSV(csv, delimiter, hasHeaders)
if #field > 0 or #fields > 0 then
table.insert(fields, table.concat(field))
table.insert(allRows, fields)
local shouldAdd = true
if hasComments and #fields > 0 then
local firstField = fields[1]
local trimmed = string.gsub(firstField, "^%s*(.-)%s*$", "%1")
if string.sub(trimmed, 1, 1) == "#" then shouldAdd = false end
end
if shouldAdd then table.insert(allRows, fields) end
end
if hasHeaders and #allRows > 0 then