Add an options parameter to csv parser and comment support
This commit is contained in:
@@ -27,7 +27,7 @@ end)
|
||||
-- Test fromCSV with headers
|
||||
test("fromCSV with headers", function()
|
||||
local csv = "foo,bar,baz\n1,2,3\n4,5,6"
|
||||
local rows = fromCSV(csv, ",", true)
|
||||
local rows = fromCSV(csv, { hasHeaders = true })
|
||||
assert(#rows == 2, "Should have 2 data rows")
|
||||
assert(rows[1][1] == "1", "First row first field should be '1'")
|
||||
assert(rows[1].foo == "1", "First row foo should be '1'")
|
||||
@@ -38,7 +38,7 @@ end)
|
||||
-- Test fromCSV with custom delimiter
|
||||
test("fromCSV with tab delimiter", function()
|
||||
local csv = "a\tb\tc\n1\t2\t3"
|
||||
local rows = fromCSV(csv, "\t")
|
||||
local rows = fromCSV(csv, { delimiter = "\t" })
|
||||
assert(#rows == 2, "Should have 2 rows")
|
||||
assert(rows[1][1] == "a", "First row first field should be 'a'")
|
||||
assert(rows[2][2] == "2", "Second row second field should be '2'")
|
||||
@@ -85,12 +85,155 @@ end)
|
||||
-- Test round trip with headers
|
||||
test("fromCSV toCSV round trip with headers", function()
|
||||
local original = "foo,bar,baz\n1,2,3\n4,5,6"
|
||||
local rows = fromCSV(original, ",", true)
|
||||
local rows = fromCSV(original, { hasHeaders = true })
|
||||
local csv = toCSV(rows)
|
||||
local expected = "1,2,3\n4,5,6"
|
||||
assert(csv == expected, "Round trip with headers should preserve data rows")
|
||||
end)
|
||||
|
||||
-- Test fromCSV with comments
|
||||
test("fromCSV with comments", function()
|
||||
local csv = "# This is a comment\nfoo,bar,baz\n1,2,3\n# Another comment\n4,5,6"
|
||||
local rows = fromCSV(csv, { hasComments = true })
|
||||
assert(#rows == 3, "Should have 3 rows (comments filtered, header + 2 data rows)")
|
||||
assert(rows[1][1] == "foo", "First row should be header row")
|
||||
assert(rows[2][1] == "1", "Second row first field should be '1'")
|
||||
assert(rows[3][1] == "4", "Third row first field should be '4'")
|
||||
end)
|
||||
|
||||
-- Test fromCSV with comments and headers
|
||||
test("fromCSV with comments and headers", function()
|
||||
local csv = "#mercenary_profiles\nId,Name,Value\n1,Test,100\n# End of data\n2,Test2,200"
|
||||
local rows = fromCSV(csv, { hasHeaders = true, hasComments = true })
|
||||
assert(#rows == 2, "Should have 2 data rows")
|
||||
assert(rows[1].Id == "1", "First row Id should be '1'")
|
||||
assert(rows[1].Name == "Test", "First row Name should be 'Test'")
|
||||
assert(rows[1].Value == "100", "First row Value should be '100'")
|
||||
assert(rows[2].Id == "2", "Second row Id should be '2'")
|
||||
end)
|
||||
|
||||
-- Test fromCSV with comments disabled
|
||||
test("fromCSV without comments", function()
|
||||
local csv = "# This should not be filtered\nfoo,bar\n1,2"
|
||||
local rows = fromCSV(csv, { hasComments = false })
|
||||
assert(#rows == 3, "Should have 3 rows (including comment)")
|
||||
assert(rows[1][1] == "# This should not be filtered", "Comment line should be preserved")
|
||||
end)
|
||||
|
||||
-- Test fromCSV with comment at start
|
||||
test("fromCSV comment at start", function()
|
||||
local csv = "# Header comment\nId,Name\n1,Test"
|
||||
local rows = fromCSV(csv, { hasComments = true })
|
||||
assert(#rows == 2, "Should have 2 rows (comment filtered)")
|
||||
assert(rows[1][1] == "Id", "First row should be header")
|
||||
end)
|
||||
|
||||
-- Test fromCSV with comment with leading whitespace
|
||||
test("fromCSV comment with whitespace", function()
|
||||
local csv = " # Comment with spaces\nId,Name\n1,Test"
|
||||
local rows = fromCSV(csv, { hasComments = true })
|
||||
assert(#rows == 2, "Should have 2 rows (comment with spaces filtered)")
|
||||
assert(rows[1][1] == "Id", "First row should be header")
|
||||
end)
|
||||
|
||||
-- Test fromCSV with comment with tabs
|
||||
test("fromCSV comment with tabs", function()
|
||||
local csv = "\t# Comment with tab\nId,Name\n1,Test"
|
||||
local rows = fromCSV(csv, { hasComments = true })
|
||||
assert(#rows == 2, "Should have 2 rows (comment with tab filtered)")
|
||||
assert(rows[1][1] == "Id", "First row should be header")
|
||||
end)
|
||||
|
||||
-- Test fromCSV with multiple consecutive comments
|
||||
test("fromCSV multiple consecutive comments", function()
|
||||
local csv = "# First comment\n# Second comment\n# Third comment\nId,Name\n1,Test"
|
||||
local rows = fromCSV(csv, { hasComments = true })
|
||||
assert(#rows == 2, "Should have 2 rows (all comments filtered)")
|
||||
assert(rows[1][1] == "Id", "First row should be header")
|
||||
end)
|
||||
|
||||
-- Test fromCSV with comment in middle of data
|
||||
test("fromCSV comment in middle", function()
|
||||
local csv = "Id,Name\n1,Test\n# Middle comment\n2,Test2"
|
||||
local rows = fromCSV(csv, { hasComments = true })
|
||||
assert(#rows == 3, "Should have 3 rows (comment filtered)")
|
||||
assert(rows[1][1] == "Id", "First row should be header")
|
||||
assert(rows[2][1] == "1", "Second row should be first data")
|
||||
assert(rows[3][1] == "2", "Third row should be second data")
|
||||
end)
|
||||
|
||||
-- Test fromCSV with comment at end
|
||||
test("fromCSV comment at end", function()
|
||||
local csv = "Id,Name\n1,Test\n# End comment"
|
||||
local rows = fromCSV(csv, { hasComments = true })
|
||||
assert(#rows == 2, "Should have 2 rows (end comment filtered)")
|
||||
assert(rows[1][1] == "Id", "First row should be header")
|
||||
assert(rows[2][1] == "1", "Second row should be data")
|
||||
end)
|
||||
|
||||
-- Test fromCSV with empty comment line
|
||||
test("fromCSV empty comment", function()
|
||||
local csv = "#\nId,Name\n1,Test"
|
||||
local rows = fromCSV(csv, { hasComments = true })
|
||||
assert(#rows == 2, "Should have 2 rows (empty comment filtered)")
|
||||
assert(rows[1][1] == "Id", "First row should be header")
|
||||
end)
|
||||
|
||||
-- Test fromCSV with comment and headers
|
||||
test("fromCSV comment with headers enabled", function()
|
||||
local csv = "#mercenary_profiles\nId,Name,Value\n1,Test,100\n2,Test2,200"
|
||||
local rows = fromCSV(csv, { hasHeaders = true, hasComments = true })
|
||||
assert(#rows == 2, "Should have 2 data rows")
|
||||
assert(rows[1].Id == "1", "First row Id should be '1'")
|
||||
assert(rows[1].Name == "Test", "First row Name should be 'Test'")
|
||||
assert(rows[2].Id == "2", "Second row Id should be '2'")
|
||||
end)
|
||||
|
||||
-- Test fromCSV with comment and TSV delimiter
|
||||
test("fromCSV comment with tab delimiter", function()
|
||||
local csv = "# Comment\nId\tName\n1\tTest"
|
||||
local rows = fromCSV(csv, { delimiter = "\t", hasComments = true })
|
||||
assert(#rows == 2, "Should have 2 rows")
|
||||
assert(rows[1][1] == "Id", "First row should be header")
|
||||
assert(rows[2][1] == "1", "Second row first field should be '1'")
|
||||
end)
|
||||
|
||||
-- Test fromCSV with comment and headers and TSV
|
||||
test("fromCSV comment with headers and TSV", function()
|
||||
local csv = "#mercenary_profiles\nId\tName\tValue\n1\tTest\t100"
|
||||
local rows = fromCSV(csv, { delimiter = "\t", hasHeaders = true, hasComments = true })
|
||||
assert(#rows == 1, "Should have 1 data row")
|
||||
assert(rows[1].Id == "1", "Row Id should be '1'")
|
||||
assert(rows[1].Name == "Test", "Row Name should be 'Test'")
|
||||
assert(rows[1].Value == "100", "Row Value should be '100'")
|
||||
end)
|
||||
|
||||
-- Test fromCSV with data field starting with # (not a comment)
|
||||
test("fromCSV data field starting with hash", function()
|
||||
local csv = "Id,Name\n1,#NotAComment\n2,Test"
|
||||
local rows = fromCSV(csv, { hasComments = true })
|
||||
assert(#rows == 3, "Should have 3 rows (data with # not filtered)")
|
||||
assert(rows[1][1] == "Id", "First row should be header")
|
||||
assert(rows[2][2] == "#NotAComment", "Second row should have #NotAComment as data")
|
||||
end)
|
||||
|
||||
-- Test fromCSV with quoted field starting with #
|
||||
test("fromCSV quoted field with hash", function()
|
||||
local csv = 'Id,Name\n1,"#NotAComment"\n2,Test'
|
||||
local rows = fromCSV(csv, { hasComments = true })
|
||||
assert(#rows == 3, "Should have 3 rows (quoted # not filtered)")
|
||||
assert(rows[2][2] == "#NotAComment", "Quoted field with # should be preserved")
|
||||
end)
|
||||
|
||||
-- Test fromCSV with comment after quoted field
|
||||
test("fromCSV comment after quoted field", function()
|
||||
local csv = 'Id,Name\n1,"Test"\n# This is a comment\n2,Test2'
|
||||
local rows = fromCSV(csv, { hasComments = true })
|
||||
assert(#rows == 3, "Should have 3 rows (comment filtered)")
|
||||
assert(rows[2][2] == "Test", "Quoted field should be preserved")
|
||||
assert(rows[3][1] == "2", "Third row should be second data row")
|
||||
end)
|
||||
|
||||
-- Math function tests
|
||||
test("min function", function()
|
||||
assert(min(5, 3) == 3, "min(5, 3) should be 3")
|
||||
@@ -203,9 +346,18 @@ test("isArray function", function()
|
||||
assert(isArray({}) == true, "isArray should return true for empty array")
|
||||
assert(isArray({ a = 1, b = 2 }) == false, "isArray should return false for map")
|
||||
assert(isArray({ 1, 2, [4] = 4 }) == false, "isArray should return false for sparse array")
|
||||
assert(isArray({ [1] = 1, [2] = 2, [3] = 3 }) == true, "isArray should return true for 1-indexed array")
|
||||
assert(isArray({ [0] = 1, [1] = 2 }) == false, "isArray should return false for 0-indexed array")
|
||||
assert(isArray({ [1] = 1, [2] = 2, [4] = 4 }) == false, "isArray should return false for non-sequential array")
|
||||
assert(
|
||||
isArray({ [1] = 1, [2] = 2, [3] = 3 }) == true,
|
||||
"isArray should return true for 1-indexed array"
|
||||
)
|
||||
assert(
|
||||
isArray({ [0] = 1, [1] = 2 }) == false,
|
||||
"isArray should return false for 0-indexed array"
|
||||
)
|
||||
assert(
|
||||
isArray({ [1] = 1, [2] = 2, [4] = 4 }) == false,
|
||||
"isArray should return false for non-sequential array"
|
||||
)
|
||||
assert(isArray("not a table") == false, "isArray should return false for non-table")
|
||||
assert(isArray(123) == false, "isArray should return false for number")
|
||||
end)
|
||||
|
||||
@@ -48,11 +48,16 @@ function dump(table, depth)
|
||||
end
|
||||
end
|
||||
|
||||
--- @class ParserOptions
|
||||
--- @field delimiter string? The field delimiter (default: ",").
|
||||
--- @field hasHeaders boolean? If true, first non-comment row is treated as headers (default: false).
|
||||
--- @field hasComments boolean? If true, lines starting with '#' are skipped (default: false).
|
||||
|
||||
--- Parses CSV text into rows and fields using a minimal RFC 4180 state machine.
|
||||
---
|
||||
--- Requirements/assumptions:
|
||||
--- - Input is a single string containing the entire CSV content.
|
||||
--- - Field separators are specified by delimiter parameter (default: comma).
|
||||
--- - Field separators are specified by delimiter option (default: comma).
|
||||
--- - Newlines between rows may be "\n" or "\r\n". "\r\n" is treated as one line break.
|
||||
--- - Fields may be quoted with double quotes (").
|
||||
--- - Inside quoted fields, doubled quotes ("") represent a literal quote character.
|
||||
@@ -61,14 +66,17 @@ end
|
||||
--- - Leading/trailing spaces are preserved; no trimming is performed.
|
||||
--- - Empty fields and empty rows are preserved.
|
||||
--- - The final row is emitted even if the text does not end with a newline.
|
||||
--- - Lines starting with '#' (after optional leading whitespace) are treated as comments and skipped if hasComments is true.
|
||||
---
|
||||
--- @param csv string The CSV text to parse.
|
||||
--- @param delimiter string? The field delimiter (default: ",").
|
||||
--- @param hasHeaders boolean? If true, first row is treated as headers and rows can be accessed by header name (default: false).
|
||||
--- @param options ParserOptions? Options for the parser
|
||||
--- @return table A table (array) of rows; each row is a table with numeric indices and optionally header-named keys.
|
||||
function fromCSV(csv, delimiter, hasHeaders)
|
||||
if delimiter == nil then delimiter = "," end
|
||||
if hasHeaders == nil then hasHeaders = false end
|
||||
function fromCSV(csv, options)
|
||||
if options == nil then options = {} end
|
||||
local delimiter = options.delimiter or ","
|
||||
local hasHeaders = options.hasHeaders or false
|
||||
local hasComments = options.hasComments or false
|
||||
|
||||
local allRows = {}
|
||||
local fields = {}
|
||||
local field = {}
|
||||
@@ -95,7 +103,13 @@ function fromCSV(csv, delimiter, hasHeaders)
|
||||
elseif c == "\r" or c == "\n" then
|
||||
table.insert(fields, table.concat(field))
|
||||
field = {}
|
||||
table.insert(allRows, fields)
|
||||
local shouldAdd = true
|
||||
if hasComments and #fields > 0 then
|
||||
local firstField = fields[1]
|
||||
local trimmed = string.gsub(firstField, "^%s*(.-)%s*$", "%1")
|
||||
if string.sub(trimmed, 1, 1) == "#" then shouldAdd = false end
|
||||
end
|
||||
if shouldAdd then table.insert(allRows, fields) end
|
||||
fields = {}
|
||||
if c == "\r" and i < len and csv:sub(i + 1, i + 1) == "\n" then
|
||||
i = i + 2
|
||||
@@ -127,7 +141,13 @@ function fromCSV(csv, delimiter, hasHeaders)
|
||||
elseif c == "\r" or c == "\n" then
|
||||
table.insert(fields, table.concat(field))
|
||||
field = {}
|
||||
table.insert(allRows, fields)
|
||||
local shouldAdd = true
|
||||
if hasComments and #fields > 0 then
|
||||
local firstField = fields[1]
|
||||
local trimmed = string.gsub(firstField, "^%s*(.-)%s*$", "%1")
|
||||
if string.sub(trimmed, 1, 1) == "#" then shouldAdd = false end
|
||||
end
|
||||
if shouldAdd then table.insert(allRows, fields) end
|
||||
fields = {}
|
||||
state = STATE_DEFAULT
|
||||
if c == "\r" and i < len and csv:sub(i + 1, i + 1) == "\n" then
|
||||
@@ -144,7 +164,13 @@ function fromCSV(csv, delimiter, hasHeaders)
|
||||
|
||||
if #field > 0 or #fields > 0 then
|
||||
table.insert(fields, table.concat(field))
|
||||
table.insert(allRows, fields)
|
||||
local shouldAdd = true
|
||||
if hasComments and #fields > 0 then
|
||||
local firstField = fields[1]
|
||||
local trimmed = string.gsub(firstField, "^%s*(.-)%s*$", "%1")
|
||||
if string.sub(trimmed, 1, 1) == "#" then shouldAdd = false end
|
||||
end
|
||||
if shouldAdd then table.insert(allRows, fields) end
|
||||
end
|
||||
|
||||
if hasHeaders and #allRows > 0 then
|
||||
|
||||
Reference in New Issue
Block a user