Add named capture group tests

This commit is contained in:
2025-03-26 12:13:18 +01:00
parent b63b4d1352
commit a8c2257f20
2 changed files with 797 additions and 0 deletions

View File

@@ -145,6 +145,19 @@ func (p *RegexProcessor) ProcessContent(content string, pattern string, luaExpr
continue
}
namedCaptures := make(map[string]string)
groupNames := compiledPattern.SubexpNames()
for i, name := range groupNames {
if i == 0 {
continue
}
if i < len(match) {
namedCaptures[name] = string(match[i])
}
}
log.Printf("Named captures: %v", namedCaptures)
captures := make([]string, 0, len(groups)/2)
for j := 0; j < len(groups); j += 2 {
captures = append(captures, content[groups[j]:groups[j+1]])

View File

@@ -547,3 +547,787 @@ func TestEdgeCases(t *testing.T) {
})
}
}
func TestNamedCaptureGroups(t *testing.T) {
content := `<config>
<item>
<value>100</value>
</item>
</config>`
expected := `<config>
<item>
<value>200</value>
</item>
</config>`
p := &RegexProcessor{}
result, mods, matches, err := p.ProcessContent(content, `(?s)<value>(?<amount>\d+)</value>`, "amount = amount * 2")
if err != nil {
t.Fatalf("Error processing content: %v", err)
}
if matches != 1 {
t.Errorf("Expected 1 match, got %d", matches)
}
if mods != 1 {
t.Errorf("Expected 1 modification, got %d", mods)
}
if result != expected {
t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result)
}
}
func TestMultipleNamedCaptureGroups(t *testing.T) {
content := `<product>
<name>Widget</name>
<price>15.99</price>
<quantity>10</quantity>
</product>`
expected := `<product>
<name>WIDGET</name>
<price>23.99</price>
<quantity>15</quantity>
</product>`
p := &RegexProcessor{}
result, mods, matches, err := p.ProcessContent(
content,
`(?s)<name>(?<prodName>[^<]+)</name>.*?<price>(?<prodPrice>\d+\.\d+)</price>.*?<quantity>(?<prodQty>\d+)</quantity>`,
`prodName = string.upper(prodName)
prodPrice = round(prodPrice + 8, 2)
prodQty = prodQty + 5`)
if err != nil {
t.Fatalf("Error processing content: %v", err)
}
if matches != 1 {
t.Errorf("Expected 1 match, got %d", matches)
}
if mods != 1 {
t.Errorf("Expected 1 modification, got %d", mods)
}
if result != expected {
t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result)
}
}
func TestMixedIndexedAndNamedCaptures(t *testing.T) {
content := `<entry>
<id>12345</id>
<data>value</data>
</entry>`
expected := `<entry>
<id>24690</id>
<data>VALUE</data>
</entry>`
p := &RegexProcessor{}
result, mods, matches, err := p.ProcessContent(
content,
`(?s)<id>(\d+)</id>.*?<data>(?<dataField>[^<]+)</data>`,
`v1 = v1 * 2
dataField = string.upper(dataField)`)
if err != nil {
t.Fatalf("Error processing content: %v", err)
}
if matches != 1 {
t.Errorf("Expected 1 match, got %d", matches)
}
if mods != 1 {
t.Errorf("Expected 1 modification, got %d", mods)
}
if result != expected {
t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result)
}
}
func TestComplexNestedNamedCaptures(t *testing.T) {
content := `<person>
<details>
<name>John Smith</name>
<age>32</age>
</details>
<contact>
<email>john@example.com</email>
</contact>
</person>`
expected := `<person>
<details>
<name>JOHN SMITH (32)</name>
<age>32</age>
</details>
<contact>
<email>john@example.com</email>
</contact>
</person>`
p := &RegexProcessor{}
result, mods, matches, err := p.ProcessContent(
content,
`(?s)<details>.*?<name>(?<fullName>[^<]+)</name>.*?<age>(?<age>\d+)</age>`,
`fullName = string.upper(fullName) .. " (" .. age .. ")"`)
if err != nil {
t.Fatalf("Error processing content: %v", err)
}
if matches != 1 {
t.Errorf("Expected 1 match, got %d", matches)
}
if mods != 1 {
t.Errorf("Expected 1 modification, got %d", mods)
}
if result != expected {
t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result)
}
}
func TestNamedCaptureWithVariableReadback(t *testing.T) {
content := `<stats>
<health>100</health>
<mana>200</mana>
</stats>`
expected := `<stats>
<health>150</health>
<mana>300</mana>
</stats>`
p := &RegexProcessor{}
result, mods, matches, err := p.ProcessContent(
content,
`(?s)<health>(?<hp>\d+)</health>.*?<mana>(?<mp>\d+)</mana>`,
`hp = hp * 1.5
mp = mp * 1.5`)
if err != nil {
t.Fatalf("Error processing content: %v", err)
}
if matches != 1 {
t.Errorf("Expected 1 match, got %d", matches)
}
if mods != 1 {
t.Errorf("Expected 1 modification, got %d", mods)
}
if result != expected {
t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result)
}
}
func TestNamedCaptureWithSpecialCharsInName(t *testing.T) {
content := `<data value="42" min="10" max="100" />`
expected := `<data value="84" min="10" max="100" />`
p := &RegexProcessor{}
result, mods, matches, err := p.ProcessContent(
content,
`<data value="(?<val_1>\d+)"`,
`val_1 = val_1 * 2`)
if err != nil {
t.Fatalf("Error processing content: %v", err)
}
if matches != 1 {
t.Errorf("Expected 1 match, got %d", matches)
}
if mods != 1 {
t.Errorf("Expected 1 modification, got %d", mods)
}
if result != expected {
t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result)
}
}
func TestEmptyNamedCapture(t *testing.T) {
content := `<tag attr="" />`
expected := `<tag attr="default" />`
p := &RegexProcessor{}
result, mods, matches, err := p.ProcessContent(
content,
`attr="(?<value>.*?)"`,
`value = value == "" and "default" or value`)
if err != nil {
t.Fatalf("Error processing content: %v", err)
}
if matches != 1 {
t.Errorf("Expected 1 match, got %d", matches)
}
if mods != 1 {
t.Errorf("Expected 1 modification, got %d", mods)
}
if result != expected {
t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result)
}
}
func TestMultipleNamedCapturesInSameLine(t *testing.T) {
content := `<rect x="10" y="20" width="100" height="50" />`
expected := `<rect x="20" y="40" width="200" height="100" />`
p := &RegexProcessor{}
result, mods, matches, err := p.ProcessContent(
content,
`x="(?<x>\d+)" y="(?<y>\d+)" width="(?<w>\d+)" height="(?<h>\d+)"`,
`x = x * 2
y = y * 2
w = w * 2
h = h * 2`)
if err != nil {
t.Fatalf("Error processing content: %v", err)
}
if matches != 1 {
t.Errorf("Expected 1 match, got %d", matches)
}
if mods != 1 {
t.Errorf("Expected 1 modification, got %d", mods)
}
if result != expected {
t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result)
}
}
func TestConditionalNamedCapture(t *testing.T) {
content := `
<item status="active" count="5" />
<item status="inactive" count="10" />
`
expected := `
<item status="active" count="10" />
<item status="inactive" count="10" />
`
p := &RegexProcessor{}
result, mods, matches, err := p.ProcessContent(
content,
`<item status="(?<status>[^"]+)" count="(?<count>\d+)"`,
`count = status == "active" and count * 2 or count`)
if err != nil {
t.Fatalf("Error processing content: %v", err)
}
if matches != 2 {
t.Errorf("Expected 2 matches, got %d", matches)
}
if mods != 2 {
t.Errorf("Expected 2 modifications, got %d", mods)
}
if result != expected {
t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result)
}
}
func TestLuaFunctionsOnNamedCaptures(t *testing.T) {
content := `
<user name="john doe" role="user" />
<user name="jane smith" role="admin" />
`
expected := `
<user name="John Doe" role="user" />
<user name="JANE SMITH" role="admin" />
`
p := &RegexProcessor{}
result, mods, matches, err := p.ProcessContent(
content,
`<user name="(?<name>[^"]+)" role="(?<role>[^"]+)"`,
`-- Capitalize first letters for regular users
if role == "user" then
name = name:gsub("(%w)(%w*)", function(first, rest) return first:upper()..rest end):gsub(" (%w)(%w*)", " %1%2":gsub("%%1", function(x) return x:upper() end))
else
-- Uppercase for admins
name = string.upper(name)
end`)
if err != nil {
t.Fatalf("Error processing content: %v", err)
}
if matches != 2 {
t.Errorf("Expected 2 matches, got %d", matches)
}
if mods != 2 {
t.Errorf("Expected 2 modifications, got %d", mods)
}
// For simpler tests, we can use this. More complex string modifications
// might need additional transformations before comparison
normalizedResult := normalizeWhitespace(result)
normalizedExpected := normalizeWhitespace(expected)
if normalizedResult != normalizedExpected {
t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result)
}
}
func TestNamedCaptureWithMath(t *testing.T) {
content := `
<item price="19.99" quantity="3" />
`
expected := `
<item price="19.99" quantity="3" total="59.97" />
`
p := &RegexProcessor{}
result, mods, matches, err := p.ProcessContent(
content,
`<item price="(?<price>\d+\.\d+)" quantity="(?<qty>\d+)"`,
`-- Calculate and add total
replacement = string.format('<item price="%s" quantity="%s" total="%.2f" />',
price, qty, price * qty)`)
if err != nil {
t.Fatalf("Error processing content: %v", err)
}
if matches != 1 {
t.Errorf("Expected 1 match, got %d", matches)
}
if mods != 1 {
t.Errorf("Expected 1 modification, got %d", mods)
}
if result != expected {
t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result)
}
}
func TestNamedCaptureWithGlobals(t *testing.T) {
content := `<temp unit="C">25</temp>`
expected := `<temp unit="F">77</temp>`
p := &RegexProcessor{}
result, mods, matches, err := p.ProcessContent(
content,
`<temp unit="(?<unit>[CF]?)">(?<value>\d+)</temp>`,
`if unit == "C" then
value = value * 9/5 + 32
unit = "F"
elseif unit == "F" then
value = (value - 32) * 5/9
unit = "C"
end`)
if err != nil {
t.Fatalf("Error processing content: %v", err)
}
if matches != 1 {
t.Errorf("Expected 1 match, got %d", matches)
}
if mods != 1 {
t.Errorf("Expected 1 modification, got %d", mods)
}
if result != expected {
t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result)
}
}
func TestMixedDynamicAndNamedCaptures(t *testing.T) {
content := `
<color rgb="255,0,0" name="red" />
<color rgb="0,255,0" name="green" />
`
expected := `
<color rgb="255,0,0" name="RED" hex="#FF0000" />
<color rgb="0,255,0" name="GREEN" hex="#00FF00" />
`
p := &RegexProcessor{}
result, mods, matches, err := p.ProcessContent(
content,
`<color rgb="(?<r>\d+),(?<g>\d+),(?<b>\d+)" name="(?<colorName>[^"]+)"`,
`-- Uppercase the name
colorName = string.upper(colorName)
-- Create hex color
local hex = string.format("#%02X%02X%02X", tonumber(r), tonumber(g), tonumber(b))
-- Replace the entire match
replacement = string.format('<color rgb="%s,%s,%s" name="%s" hex="%s" />',
r, g, b, colorName, hex)`)
if err != nil {
t.Fatalf("Error processing content: %v", err)
}
if matches != 2 {
t.Errorf("Expected 2 matches, got %d", matches)
}
if mods != 2 {
t.Errorf("Expected 2 modifications, got %d", mods)
}
if result != expected {
t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result)
}
}
func TestNamedCapturesWithMultipleReferences(t *testing.T) {
content := `<text>Hello world</text>`
expected := `<text format="uppercase" length="11">HELLO WORLD</text>`
p := &RegexProcessor{}
result, mods, matches, err := p.ProcessContent(
content,
`<text>(?<content>[^<]+)</text>`,
`local uppercaseContent = string.upper(content)
local contentLength = string.len(content)
replacement = string.format('<text format="uppercase" length="%d">%s</text>',
contentLength, uppercaseContent)`)
if err != nil {
t.Fatalf("Error processing content: %v", err)
}
if matches != 1 {
t.Errorf("Expected 1 match, got %d", matches)
}
if mods != 1 {
t.Errorf("Expected 1 modification, got %d", mods)
}
if result != expected {
t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result)
}
}
func TestNamedCaptureWithJsonData(t *testing.T) {
content := `<data>{"name":"John","age":30}</data>`
expected := `<data>{"name":"JOHN","age":30}</data>`
p := &RegexProcessor{}
result, mods, matches, err := p.ProcessContent(
content,
`<data>(?<json>\{.*?\})</data>`,
`-- Parse JSON (simplified, assumes valid JSON)
local name = json:match('"name":"([^"]+)"')
local upperName = string.upper(name)
json = json:gsub('"name":"([^"]+)"', '"name":"' .. upperName .. '"')`)
if err != nil {
t.Fatalf("Error processing content: %v", err)
}
if matches != 1 {
t.Errorf("Expected 1 match, got %d", matches)
}
if mods != 1 {
t.Errorf("Expected 1 modification, got %d", mods)
}
if result != expected {
t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result)
}
}
func TestNamedCaptureInXML(t *testing.T) {
content := `
<product>
<sku>ABC-123</sku>
<price currency="USD">19.99</price>
<stock>25</stock>
</product>
`
expected := `
<product>
<sku>ABC-123</sku>
<price currency="USD">23.99</price>
<stock>20</stock>
</product>
`
p := &RegexProcessor{}
result, mods, matches, err := p.ProcessContent(
content,
`(?s)<price currency="(?<currency>[^"]+)">(?<price>\d+\.\d+)</price>.*?<stock>(?<stock>\d+)</stock>`,
`-- Add 20% to price if USD
if currency == "USD" then
price = round(price * 1.20, 2)
end
-- Reduce stock by 5
stock = stock - 5`)
if err != nil {
t.Fatalf("Error processing content: %v", err)
}
if matches != 1 {
t.Errorf("Expected 1 match, got %d", matches)
}
if mods != 1 {
t.Errorf("Expected 1 modification, got %d", mods)
}
if result != expected {
t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result)
}
}
func TestNestedNamedCaptureGroups(t *testing.T) {
content := `<nested>
<outer>
<inner>100</inner>
</outer>
</nested>`
expected := `<nested>
<outer>
<inner>200</inner>
</outer>
</nested>`
p := &RegexProcessor{}
result, mods, matches, err := p.ProcessContent(
content,
`(?s)<outer>(?<outerContent>.*?<inner>(?<innerValue>\d+)</inner>.*?)</outer>`,
`innerValue = innerValue * 2`)
if err != nil {
t.Fatalf("Error processing content: %v", err)
}
if matches != 1 {
t.Errorf("Expected 1 match, got %d", matches)
}
if mods != 1 {
t.Errorf("Expected 1 modification, got %d", mods)
}
if result != expected {
t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result)
}
}
func TestComprehensiveNamedCaptures(t *testing.T) {
content := `
<products>
<product sku="AB-123" status="in-stock">
<name>Widget A</name>
<price currency="USD">19.99</price>
<quantity>15</quantity>
</product>
<product sku="CD-456" status="out-of-stock">
<name>Widget B</name>
<price currency="EUR">29.99</price>
<quantity>0</quantity>
</product>
<product sku="EF-789" status="in-stock">
<name>Widget C</name>
<price currency="GBP">39.99</price>
<quantity>5</quantity>
</product>
</products>
`
expected := `
<products>
<product sku="AB-123" status="in-stock" discounted="true">
<name>WIDGET A</name>
<price currency="USD">15.99</price>
<quantity>15</quantity>
</product>
<product sku="CD-456" status="out-of-stock" discounted="false">
<name>Widget B</name>
<price currency="EUR">29.99</price>
<quantity>0</quantity>
</product>
<product sku="EF-789" status="in-stock" discounted="true">
<name>WIDGET C</name>
<price currency="GBP">31.99</price>
<quantity>5</quantity>
</product>
</products>
`
p := &RegexProcessor{}
result, mods, matches, err := p.ProcessContent(
content,
`(?s)<product sku="(?<sku>[^"]+)" status="(?<status>[^"]+)"[^>]*>\s*<name>(?<product_name>[^<]+)</name>\s*<price currency="(?<currency>[^"]+)">(?<price>\d+\.\d+)</price>\s*<quantity>(?<qty>\d+)</quantity>`,
`-- Only process in-stock items
if status == "in-stock" then
-- Transform name to uppercase
product_name = string.upper(product_name)
-- Apply discount based on currency
local discounted = true
if currency == "USD" then
price = round(price * 0.8, 2) -- 20% discount for USD
elseif currency == "GBP" then
price = round(price * 0.8, 2) -- 20% discount for GBP
price = price + 8 -- Add shipping cost for GBP
else
discounted = false
end
-- Add discounted attribute
replacement = string.format('<product sku="%s" status="%s" discounted="%s">\n\t\t\t<name>%s</name>\n\t\t\t<price currency="%s">%.2f</price>\n\t\t\t<quantity>%s</quantity>',
sku, status, tostring(discounted), product_name, currency, price, qty)
else
-- Add discounted attribute for out-of-stock items (always false)
replacement = string.format('<product sku="%s" status="%s" discounted="false">\n\t\t\t<name>%s</name>\n\t\t\t<price currency="%s">%s</price>\n\t\t\t<quantity>%s</quantity>',
sku, status, product_name, currency, price, qty)
end`)
if err != nil {
t.Fatalf("Error processing content: %v", err)
}
if matches != 3 {
t.Errorf("Expected 3 matches, got %d", matches)
}
if mods != 3 {
t.Errorf("Expected 3 modifications, got %d", mods)
}
// Normalize whitespace for comparison
normalizedResult := normalizeWhitespace(result)
normalizedExpected := normalizeWhitespace(expected)
if normalizedResult != normalizedExpected {
t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result)
}
}
func TestVariousNamedCaptureFormats(t *testing.T) {
content := `
<data>
<entry id="1" value="100" />
<entry id="2" value="200" status="active" />
<entry id="3" value="300" status="inactive" />
</data>
`
expected := `
<data>
<entry id="ID-1" value="200" />
<entry id="ID-2" value="400" status="ACTIVE" />
<entry id="ID-3" value="300" status="inactive" />
</data>
`
p := &RegexProcessor{}
result, mods, matches, err := p.ProcessContent(
content,
`<entry id="(?<id_num>\d+)" value="(?<val>\d+)"(?: status="(?<status>[^"]*)")?`,
`-- Prefix the ID with "ID-"
id_num = "ID-" .. id_num
-- Double the value except for inactive status
if not status or status ~= "inactive" then
val = val * 2
end
-- Convert status to uppercase if present and active
if status and status == "active" then
status = string.upper(status)
end
-- Build the replacement based on whether status exists
if status then
replacement = string.format('<entry id="%s" value="%s" status="%s" />', id_num, val, status)
else
replacement = string.format('<entry id="%s" value="%s" />', id_num, val)
end`)
if err != nil {
t.Fatalf("Error processing content: %v", err)
}
if matches != 3 {
t.Errorf("Expected 3 matches, got %d", matches)
}
if mods != 3 {
t.Errorf("Expected 3 modifications, got %d", mods)
}
normalizedResult := normalizeWhitespace(result)
normalizedExpected := normalizeWhitespace(expected)
if normalizedResult != normalizedExpected {
t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result)
}
}
func TestSimpleNamedCapture(t *testing.T) {
content := `<product name="Widget" price="19.99"/>`
expected := `<product name="WIDGET" price="19.99"/>`
p := &RegexProcessor{}
result, mods, matches, err := p.ProcessContent(
content,
`name="(?<product_name>[^"]+)"`,
`product_name = string.upper(product_name)`)
if err != nil {
t.Fatalf("Error processing content: %v", err)
}
if matches != 1 {
t.Errorf("Expected 1 match, got %d", matches)
}
if mods != 1 {
t.Errorf("Expected 1 modification, got %d", mods)
}
if result != expected {
t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result)
}
}