From ce28b948d02c7c16c16eceb2cca4276ccbb3d136 Mon Sep 17 00:00:00 2001 From: PhatPhuckDave Date: Sat, 15 Nov 2025 15:48:07 +0100 Subject: [PATCH] Add an options parameter to csv parser and comment support --- processor/luahelper-test.lua | 164 +++++++++++++++++++++++++++++++++-- processor/luahelper.lua | 44 ++++++++-- 2 files changed, 193 insertions(+), 15 deletions(-) diff --git a/processor/luahelper-test.lua b/processor/luahelper-test.lua index 377f543..81a5e4f 100644 --- a/processor/luahelper-test.lua +++ b/processor/luahelper-test.lua @@ -27,7 +27,7 @@ end) -- Test fromCSV with headers test("fromCSV with headers", function() local csv = "foo,bar,baz\n1,2,3\n4,5,6" - local rows = fromCSV(csv, ",", true) + local rows = fromCSV(csv, { hasHeaders = true }) assert(#rows == 2, "Should have 2 data rows") assert(rows[1][1] == "1", "First row first field should be '1'") assert(rows[1].foo == "1", "First row foo should be '1'") @@ -38,7 +38,7 @@ end) -- Test fromCSV with custom delimiter test("fromCSV with tab delimiter", function() local csv = "a\tb\tc\n1\t2\t3" - local rows = fromCSV(csv, "\t") + local rows = fromCSV(csv, { delimiter = "\t" }) assert(#rows == 2, "Should have 2 rows") assert(rows[1][1] == "a", "First row first field should be 'a'") assert(rows[2][2] == "2", "Second row second field should be '2'") @@ -85,12 +85,155 @@ end) -- Test round trip with headers test("fromCSV toCSV round trip with headers", function() local original = "foo,bar,baz\n1,2,3\n4,5,6" - local rows = fromCSV(original, ",", true) + local rows = fromCSV(original, { hasHeaders = true }) local csv = toCSV(rows) local expected = "1,2,3\n4,5,6" assert(csv == expected, "Round trip with headers should preserve data rows") end) +-- Test fromCSV with comments +test("fromCSV with comments", function() + local csv = "# This is a comment\nfoo,bar,baz\n1,2,3\n# Another comment\n4,5,6" + local rows = fromCSV(csv, { hasComments = true }) + assert(#rows == 3, "Should have 3 rows (comments filtered, header + 2 data rows)") + assert(rows[1][1] == "foo", "First row should be header row") + assert(rows[2][1] == "1", "Second row first field should be '1'") + assert(rows[3][1] == "4", "Third row first field should be '4'") +end) + +-- Test fromCSV with comments and headers +test("fromCSV with comments and headers", function() + local csv = "#mercenary_profiles\nId,Name,Value\n1,Test,100\n# End of data\n2,Test2,200" + local rows = fromCSV(csv, { hasHeaders = true, hasComments = true }) + assert(#rows == 2, "Should have 2 data rows") + assert(rows[1].Id == "1", "First row Id should be '1'") + assert(rows[1].Name == "Test", "First row Name should be 'Test'") + assert(rows[1].Value == "100", "First row Value should be '100'") + assert(rows[2].Id == "2", "Second row Id should be '2'") +end) + +-- Test fromCSV with comments disabled +test("fromCSV without comments", function() + local csv = "# This should not be filtered\nfoo,bar\n1,2" + local rows = fromCSV(csv, { hasComments = false }) + assert(#rows == 3, "Should have 3 rows (including comment)") + assert(rows[1][1] == "# This should not be filtered", "Comment line should be preserved") +end) + +-- Test fromCSV with comment at start +test("fromCSV comment at start", function() + local csv = "# Header comment\nId,Name\n1,Test" + local rows = fromCSV(csv, { hasComments = true }) + assert(#rows == 2, "Should have 2 rows (comment filtered)") + assert(rows[1][1] == "Id", "First row should be header") +end) + +-- Test fromCSV with comment with leading whitespace +test("fromCSV comment with whitespace", function() + local csv = " # Comment with spaces\nId,Name\n1,Test" + local rows = fromCSV(csv, { hasComments = true }) + assert(#rows == 2, "Should have 2 rows (comment with spaces filtered)") + assert(rows[1][1] == "Id", "First row should be header") +end) + +-- Test fromCSV with comment with tabs +test("fromCSV comment with tabs", function() + local csv = "\t# Comment with tab\nId,Name\n1,Test" + local rows = fromCSV(csv, { hasComments = true }) + assert(#rows == 2, "Should have 2 rows (comment with tab filtered)") + assert(rows[1][1] == "Id", "First row should be header") +end) + +-- Test fromCSV with multiple consecutive comments +test("fromCSV multiple consecutive comments", function() + local csv = "# First comment\n# Second comment\n# Third comment\nId,Name\n1,Test" + local rows = fromCSV(csv, { hasComments = true }) + assert(#rows == 2, "Should have 2 rows (all comments filtered)") + assert(rows[1][1] == "Id", "First row should be header") +end) + +-- Test fromCSV with comment in middle of data +test("fromCSV comment in middle", function() + local csv = "Id,Name\n1,Test\n# Middle comment\n2,Test2" + local rows = fromCSV(csv, { hasComments = true }) + assert(#rows == 3, "Should have 3 rows (comment filtered)") + assert(rows[1][1] == "Id", "First row should be header") + assert(rows[2][1] == "1", "Second row should be first data") + assert(rows[3][1] == "2", "Third row should be second data") +end) + +-- Test fromCSV with comment at end +test("fromCSV comment at end", function() + local csv = "Id,Name\n1,Test\n# End comment" + local rows = fromCSV(csv, { hasComments = true }) + assert(#rows == 2, "Should have 2 rows (end comment filtered)") + assert(rows[1][1] == "Id", "First row should be header") + assert(rows[2][1] == "1", "Second row should be data") +end) + +-- Test fromCSV with empty comment line +test("fromCSV empty comment", function() + local csv = "#\nId,Name\n1,Test" + local rows = fromCSV(csv, { hasComments = true }) + assert(#rows == 2, "Should have 2 rows (empty comment filtered)") + assert(rows[1][1] == "Id", "First row should be header") +end) + +-- Test fromCSV with comment and headers +test("fromCSV comment with headers enabled", function() + local csv = "#mercenary_profiles\nId,Name,Value\n1,Test,100\n2,Test2,200" + local rows = fromCSV(csv, { hasHeaders = true, hasComments = true }) + assert(#rows == 2, "Should have 2 data rows") + assert(rows[1].Id == "1", "First row Id should be '1'") + assert(rows[1].Name == "Test", "First row Name should be 'Test'") + assert(rows[2].Id == "2", "Second row Id should be '2'") +end) + +-- Test fromCSV with comment and TSV delimiter +test("fromCSV comment with tab delimiter", function() + local csv = "# Comment\nId\tName\n1\tTest" + local rows = fromCSV(csv, { delimiter = "\t", hasComments = true }) + assert(#rows == 2, "Should have 2 rows") + assert(rows[1][1] == "Id", "First row should be header") + assert(rows[2][1] == "1", "Second row first field should be '1'") +end) + +-- Test fromCSV with comment and headers and TSV +test("fromCSV comment with headers and TSV", function() + local csv = "#mercenary_profiles\nId\tName\tValue\n1\tTest\t100" + local rows = fromCSV(csv, { delimiter = "\t", hasHeaders = true, hasComments = true }) + assert(#rows == 1, "Should have 1 data row") + assert(rows[1].Id == "1", "Row Id should be '1'") + assert(rows[1].Name == "Test", "Row Name should be 'Test'") + assert(rows[1].Value == "100", "Row Value should be '100'") +end) + +-- Test fromCSV with data field starting with # (not a comment) +test("fromCSV data field starting with hash", function() + local csv = "Id,Name\n1,#NotAComment\n2,Test" + local rows = fromCSV(csv, { hasComments = true }) + assert(#rows == 3, "Should have 3 rows (data with # not filtered)") + assert(rows[1][1] == "Id", "First row should be header") + assert(rows[2][2] == "#NotAComment", "Second row should have #NotAComment as data") +end) + +-- Test fromCSV with quoted field starting with # +test("fromCSV quoted field with hash", function() + local csv = 'Id,Name\n1,"#NotAComment"\n2,Test' + local rows = fromCSV(csv, { hasComments = true }) + assert(#rows == 3, "Should have 3 rows (quoted # not filtered)") + assert(rows[2][2] == "#NotAComment", "Quoted field with # should be preserved") +end) + +-- Test fromCSV with comment after quoted field +test("fromCSV comment after quoted field", function() + local csv = 'Id,Name\n1,"Test"\n# This is a comment\n2,Test2' + local rows = fromCSV(csv, { hasComments = true }) + assert(#rows == 3, "Should have 3 rows (comment filtered)") + assert(rows[2][2] == "Test", "Quoted field should be preserved") + assert(rows[3][1] == "2", "Third row should be second data row") +end) + -- Math function tests test("min function", function() assert(min(5, 3) == 3, "min(5, 3) should be 3") @@ -203,9 +346,18 @@ test("isArray function", function() assert(isArray({}) == true, "isArray should return true for empty array") assert(isArray({ a = 1, b = 2 }) == false, "isArray should return false for map") assert(isArray({ 1, 2, [4] = 4 }) == false, "isArray should return false for sparse array") - assert(isArray({ [1] = 1, [2] = 2, [3] = 3 }) == true, "isArray should return true for 1-indexed array") - assert(isArray({ [0] = 1, [1] = 2 }) == false, "isArray should return false for 0-indexed array") - assert(isArray({ [1] = 1, [2] = 2, [4] = 4 }) == false, "isArray should return false for non-sequential array") + assert( + isArray({ [1] = 1, [2] = 2, [3] = 3 }) == true, + "isArray should return true for 1-indexed array" + ) + assert( + isArray({ [0] = 1, [1] = 2 }) == false, + "isArray should return false for 0-indexed array" + ) + assert( + isArray({ [1] = 1, [2] = 2, [4] = 4 }) == false, + "isArray should return false for non-sequential array" + ) assert(isArray("not a table") == false, "isArray should return false for non-table") assert(isArray(123) == false, "isArray should return false for number") end) diff --git a/processor/luahelper.lua b/processor/luahelper.lua index eab83bd..485666b 100644 --- a/processor/luahelper.lua +++ b/processor/luahelper.lua @@ -48,11 +48,16 @@ function dump(table, depth) end end +--- @class ParserOptions +--- @field delimiter string? The field delimiter (default: ","). +--- @field hasHeaders boolean? If true, first non-comment row is treated as headers (default: false). +--- @field hasComments boolean? If true, lines starting with '#' are skipped (default: false). + --- Parses CSV text into rows and fields using a minimal RFC 4180 state machine. --- --- Requirements/assumptions: --- - Input is a single string containing the entire CSV content. ---- - Field separators are specified by delimiter parameter (default: comma). +--- - Field separators are specified by delimiter option (default: comma). --- - Newlines between rows may be "\n" or "\r\n". "\r\n" is treated as one line break. --- - Fields may be quoted with double quotes ("). --- - Inside quoted fields, doubled quotes ("") represent a literal quote character. @@ -61,14 +66,17 @@ end --- - Leading/trailing spaces are preserved; no trimming is performed. --- - Empty fields and empty rows are preserved. --- - The final row is emitted even if the text does not end with a newline. +--- - Lines starting with '#' (after optional leading whitespace) are treated as comments and skipped if hasComments is true. --- --- @param csv string The CSV text to parse. ---- @param delimiter string? The field delimiter (default: ","). ---- @param hasHeaders boolean? If true, first row is treated as headers and rows can be accessed by header name (default: false). +--- @param options ParserOptions? Options for the parser --- @return table A table (array) of rows; each row is a table with numeric indices and optionally header-named keys. -function fromCSV(csv, delimiter, hasHeaders) - if delimiter == nil then delimiter = "," end - if hasHeaders == nil then hasHeaders = false end +function fromCSV(csv, options) + if options == nil then options = {} end + local delimiter = options.delimiter or "," + local hasHeaders = options.hasHeaders or false + local hasComments = options.hasComments or false + local allRows = {} local fields = {} local field = {} @@ -95,7 +103,13 @@ function fromCSV(csv, delimiter, hasHeaders) elseif c == "\r" or c == "\n" then table.insert(fields, table.concat(field)) field = {} - table.insert(allRows, fields) + local shouldAdd = true + if hasComments and #fields > 0 then + local firstField = fields[1] + local trimmed = string.gsub(firstField, "^%s*(.-)%s*$", "%1") + if string.sub(trimmed, 1, 1) == "#" then shouldAdd = false end + end + if shouldAdd then table.insert(allRows, fields) end fields = {} if c == "\r" and i < len and csv:sub(i + 1, i + 1) == "\n" then i = i + 2 @@ -127,7 +141,13 @@ function fromCSV(csv, delimiter, hasHeaders) elseif c == "\r" or c == "\n" then table.insert(fields, table.concat(field)) field = {} - table.insert(allRows, fields) + local shouldAdd = true + if hasComments and #fields > 0 then + local firstField = fields[1] + local trimmed = string.gsub(firstField, "^%s*(.-)%s*$", "%1") + if string.sub(trimmed, 1, 1) == "#" then shouldAdd = false end + end + if shouldAdd then table.insert(allRows, fields) end fields = {} state = STATE_DEFAULT if c == "\r" and i < len and csv:sub(i + 1, i + 1) == "\n" then @@ -144,7 +164,13 @@ function fromCSV(csv, delimiter, hasHeaders) if #field > 0 or #fields > 0 then table.insert(fields, table.concat(field)) - table.insert(allRows, fields) + local shouldAdd = true + if hasComments and #fields > 0 then + local firstField = fields[1] + local trimmed = string.gsub(firstField, "^%s*(.-)%s*$", "%1") + if string.sub(trimmed, 1, 1) == "#" then shouldAdd = false end + end + if shouldAdd then table.insert(allRows, fields) end end if hasHeaders and #allRows > 0 then