diff --git a/processor/regex.go b/processor/regex.go index efa27ec..40cf861 100644 --- a/processor/regex.go +++ b/processor/regex.go @@ -145,6 +145,19 @@ func (p *RegexProcessor) ProcessContent(content string, pattern string, luaExpr continue } + namedCaptures := make(map[string]string) + groupNames := compiledPattern.SubexpNames() + + for i, name := range groupNames { + if i == 0 { + continue + } + if i < len(match) { + namedCaptures[name] = string(match[i]) + } + } + log.Printf("Named captures: %v", namedCaptures) + captures := make([]string, 0, len(groups)/2) for j := 0; j < len(groups); j += 2 { captures = append(captures, content[groups[j]:groups[j+1]]) diff --git a/processor/regex_test.go b/processor/regex_test.go index c0f4ceb..53f7444 100644 --- a/processor/regex_test.go +++ b/processor/regex_test.go @@ -547,3 +547,787 @@ func TestEdgeCases(t *testing.T) { }) } } + +func TestNamedCaptureGroups(t *testing.T) { + content := ` + + 100 + +` + + expected := ` + + 200 + +` + + p := &RegexProcessor{} + result, mods, matches, err := p.ProcessContent(content, `(?s)(?\d+)`, "amount = amount * 2") + + if err != nil { + t.Fatalf("Error processing content: %v", err) + } + + if matches != 1 { + t.Errorf("Expected 1 match, got %d", matches) + } + + if mods != 1 { + t.Errorf("Expected 1 modification, got %d", mods) + } + + if result != expected { + t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result) + } +} + +func TestMultipleNamedCaptureGroups(t *testing.T) { + content := ` + Widget + 15.99 + 10 +` + + expected := ` + WIDGET + 23.99 + 15 +` + + p := &RegexProcessor{} + result, mods, matches, err := p.ProcessContent( + content, + `(?s)(?[^<]+).*?(?\d+\.\d+).*?(?\d+)`, + `prodName = string.upper(prodName) + prodPrice = round(prodPrice + 8, 2) + prodQty = prodQty + 5`) + + if err != nil { + t.Fatalf("Error processing content: %v", err) + } + + if matches != 1 { + t.Errorf("Expected 1 match, got %d", matches) + } + + if mods != 1 { + t.Errorf("Expected 1 modification, got %d", mods) + } + + if result != expected { + t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result) + } +} + +func TestMixedIndexedAndNamedCaptures(t *testing.T) { + content := ` + 12345 + value +` + + expected := ` + 24690 + VALUE +` + + p := &RegexProcessor{} + result, mods, matches, err := p.ProcessContent( + content, + `(?s)(\d+).*?(?[^<]+)`, + `v1 = v1 * 2 + dataField = string.upper(dataField)`) + + if err != nil { + t.Fatalf("Error processing content: %v", err) + } + + if matches != 1 { + t.Errorf("Expected 1 match, got %d", matches) + } + + if mods != 1 { + t.Errorf("Expected 1 modification, got %d", mods) + } + + if result != expected { + t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result) + } +} + +func TestComplexNestedNamedCaptures(t *testing.T) { + content := ` +
+ John Smith + 32 +
+ + john@example.com + +
` + + expected := ` +
+ JOHN SMITH (32) + 32 +
+ + john@example.com + +
` + + p := &RegexProcessor{} + result, mods, matches, err := p.ProcessContent( + content, + `(?s)
.*?(?[^<]+).*?(?\d+)`, + `fullName = string.upper(fullName) .. " (" .. age .. ")"`) + + if err != nil { + t.Fatalf("Error processing content: %v", err) + } + + if matches != 1 { + t.Errorf("Expected 1 match, got %d", matches) + } + + if mods != 1 { + t.Errorf("Expected 1 modification, got %d", mods) + } + + if result != expected { + t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result) + } +} + +func TestNamedCaptureWithVariableReadback(t *testing.T) { + content := ` + 100 + 200 +` + + expected := ` + 150 + 300 +` + + p := &RegexProcessor{} + result, mods, matches, err := p.ProcessContent( + content, + `(?s)(?\d+).*?(?\d+)`, + `hp = hp * 1.5 + mp = mp * 1.5`) + + if err != nil { + t.Fatalf("Error processing content: %v", err) + } + + if matches != 1 { + t.Errorf("Expected 1 match, got %d", matches) + } + + if mods != 1 { + t.Errorf("Expected 1 modification, got %d", mods) + } + + if result != expected { + t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result) + } +} + +func TestNamedCaptureWithSpecialCharsInName(t *testing.T) { + content := `` + + expected := `` + + p := &RegexProcessor{} + result, mods, matches, err := p.ProcessContent( + content, + `` + + expected := `` + + p := &RegexProcessor{} + result, mods, matches, err := p.ProcessContent( + content, + `attr="(?.*?)"`, + `value = value == "" and "default" or value`) + + if err != nil { + t.Fatalf("Error processing content: %v", err) + } + + if matches != 1 { + t.Errorf("Expected 1 match, got %d", matches) + } + + if mods != 1 { + t.Errorf("Expected 1 modification, got %d", mods) + } + + if result != expected { + t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result) + } +} + +func TestMultipleNamedCapturesInSameLine(t *testing.T) { + content := `` + + expected := `` + + p := &RegexProcessor{} + result, mods, matches, err := p.ProcessContent( + content, + `x="(?\d+)" y="(?\d+)" width="(?\d+)" height="(?\d+)"`, + `x = x * 2 + y = y * 2 + w = w * 2 + h = h * 2`) + + if err != nil { + t.Fatalf("Error processing content: %v", err) + } + + if matches != 1 { + t.Errorf("Expected 1 match, got %d", matches) + } + + if mods != 1 { + t.Errorf("Expected 1 modification, got %d", mods) + } + + if result != expected { + t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result) + } +} + +func TestConditionalNamedCapture(t *testing.T) { + content := ` + + + ` + + expected := ` + + + ` + + p := &RegexProcessor{} + result, mods, matches, err := p.ProcessContent( + content, + ` + + ` + + expected := ` + + + ` + + p := &RegexProcessor{} + result, mods, matches, err := p.ProcessContent( + content, + ` + ` + + expected := ` + + ` + + p := &RegexProcessor{} + result, mods, matches, err := p.ProcessContent( + content, + `', + price, qty, price * qty)`) + + if err != nil { + t.Fatalf("Error processing content: %v", err) + } + + if matches != 1 { + t.Errorf("Expected 1 match, got %d", matches) + } + + if mods != 1 { + t.Errorf("Expected 1 modification, got %d", mods) + } + + if result != expected { + t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result) + } +} + +func TestNamedCaptureWithGlobals(t *testing.T) { + content := `25` + + expected := `77` + + p := &RegexProcessor{} + result, mods, matches, err := p.ProcessContent( + content, + `(?\d+)`, + `if unit == "C" then + value = value * 9/5 + 32 + unit = "F" + elseif unit == "F" then + value = (value - 32) * 5/9 + unit = "C" + end`) + + if err != nil { + t.Fatalf("Error processing content: %v", err) + } + + if matches != 1 { + t.Errorf("Expected 1 match, got %d", matches) + } + + if mods != 1 { + t.Errorf("Expected 1 modification, got %d", mods) + } + + if result != expected { + t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result) + } +} + +func TestMixedDynamicAndNamedCaptures(t *testing.T) { + content := ` + + + ` + + expected := ` + + + ` + + p := &RegexProcessor{} + result, mods, matches, err := p.ProcessContent( + content, + `', + r, g, b, colorName, hex)`) + + if err != nil { + t.Fatalf("Error processing content: %v", err) + } + + if matches != 2 { + t.Errorf("Expected 2 matches, got %d", matches) + } + + if mods != 2 { + t.Errorf("Expected 2 modifications, got %d", mods) + } + + if result != expected { + t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result) + } +} + +func TestNamedCapturesWithMultipleReferences(t *testing.T) { + content := `Hello world` + + expected := `HELLO WORLD` + + p := &RegexProcessor{} + result, mods, matches, err := p.ProcessContent( + content, + `(?[^<]+)`, + `local uppercaseContent = string.upper(content) + local contentLength = string.len(content) + replacement = string.format('%s', + contentLength, uppercaseContent)`) + + if err != nil { + t.Fatalf("Error processing content: %v", err) + } + + if matches != 1 { + t.Errorf("Expected 1 match, got %d", matches) + } + + if mods != 1 { + t.Errorf("Expected 1 modification, got %d", mods) + } + + if result != expected { + t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result) + } +} + +func TestNamedCaptureWithJsonData(t *testing.T) { + content := `{"name":"John","age":30}` + + expected := `{"name":"JOHN","age":30}` + + p := &RegexProcessor{} + result, mods, matches, err := p.ProcessContent( + content, + `(?\{.*?\})`, + `-- Parse JSON (simplified, assumes valid JSON) + local name = json:match('"name":"([^"]+)"') + local upperName = string.upper(name) + json = json:gsub('"name":"([^"]+)"', '"name":"' .. upperName .. '"')`) + + if err != nil { + t.Fatalf("Error processing content: %v", err) + } + + if matches != 1 { + t.Errorf("Expected 1 match, got %d", matches) + } + + if mods != 1 { + t.Errorf("Expected 1 modification, got %d", mods) + } + + if result != expected { + t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result) + } +} + +func TestNamedCaptureInXML(t *testing.T) { + content := ` + + ABC-123 + 19.99 + 25 + + ` + + expected := ` + + ABC-123 + 23.99 + 20 + + ` + + p := &RegexProcessor{} + result, mods, matches, err := p.ProcessContent( + content, + `(?s)(?\d+\.\d+).*?(?\d+)`, + `-- Add 20% to price if USD + if currency == "USD" then + price = round(price * 1.20, 2) + end + + -- Reduce stock by 5 + stock = stock - 5`) + + if err != nil { + t.Fatalf("Error processing content: %v", err) + } + + if matches != 1 { + t.Errorf("Expected 1 match, got %d", matches) + } + + if mods != 1 { + t.Errorf("Expected 1 modification, got %d", mods) + } + + if result != expected { + t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result) + } +} + +func TestNestedNamedCaptureGroups(t *testing.T) { + content := ` + + 100 + +` + + expected := ` + + 200 + +` + + p := &RegexProcessor{} + result, mods, matches, err := p.ProcessContent( + content, + `(?s)(?.*?(?\d+).*?)`, + `innerValue = innerValue * 2`) + + if err != nil { + t.Fatalf("Error processing content: %v", err) + } + + if matches != 1 { + t.Errorf("Expected 1 match, got %d", matches) + } + + if mods != 1 { + t.Errorf("Expected 1 modification, got %d", mods) + } + + if result != expected { + t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result) + } +} + +func TestComprehensiveNamedCaptures(t *testing.T) { + content := ` + + + Widget A + 19.99 + 15 + + + Widget B + 29.99 + 0 + + + Widget C + 39.99 + 5 + + + ` + + expected := ` + + + WIDGET A + 15.99 + 15 + + + Widget B + 29.99 + 0 + + + WIDGET C + 31.99 + 5 + + + ` + + p := &RegexProcessor{} + result, mods, matches, err := p.ProcessContent( + content, + `(?s)]*>\s*(?[^<]+)\s*(?\d+\.\d+)\s*(?\d+)`, + `-- Only process in-stock items + if status == "in-stock" then + -- Transform name to uppercase + product_name = string.upper(product_name) + + -- Apply discount based on currency + local discounted = true + if currency == "USD" then + price = round(price * 0.8, 2) -- 20% discount for USD + elseif currency == "GBP" then + price = round(price * 0.8, 2) -- 20% discount for GBP + price = price + 8 -- Add shipping cost for GBP + else + discounted = false + end + + -- Add discounted attribute + replacement = string.format('\n\t\t\t%s\n\t\t\t%.2f\n\t\t\t%s', + sku, status, tostring(discounted), product_name, currency, price, qty) + else + -- Add discounted attribute for out-of-stock items (always false) + replacement = string.format('\n\t\t\t%s\n\t\t\t%s\n\t\t\t%s', + sku, status, product_name, currency, price, qty) + end`) + + if err != nil { + t.Fatalf("Error processing content: %v", err) + } + + if matches != 3 { + t.Errorf("Expected 3 matches, got %d", matches) + } + + if mods != 3 { + t.Errorf("Expected 3 modifications, got %d", mods) + } + + // Normalize whitespace for comparison + normalizedResult := normalizeWhitespace(result) + normalizedExpected := normalizeWhitespace(expected) + + if normalizedResult != normalizedExpected { + t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result) + } +} + +func TestVariousNamedCaptureFormats(t *testing.T) { + content := ` + + + + + + ` + + expected := ` + + + + + + ` + + p := &RegexProcessor{} + result, mods, matches, err := p.ProcessContent( + content, + `', id_num, val, status) + else + replacement = string.format('', id_num, val) + end`) + + if err != nil { + t.Fatalf("Error processing content: %v", err) + } + + if matches != 3 { + t.Errorf("Expected 3 matches, got %d", matches) + } + + if mods != 3 { + t.Errorf("Expected 3 modifications, got %d", mods) + } + + normalizedResult := normalizeWhitespace(result) + normalizedExpected := normalizeWhitespace(expected) + + if normalizedResult != normalizedExpected { + t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result) + } +} + +func TestSimpleNamedCapture(t *testing.T) { + content := `` + + expected := `` + + p := &RegexProcessor{} + result, mods, matches, err := p.ProcessContent( + content, + `name="(?[^"]+)"`, + `product_name = string.upper(product_name)`) + + if err != nil { + t.Fatalf("Error processing content: %v", err) + } + + if matches != 1 { + t.Errorf("Expected 1 match, got %d", matches) + } + + if mods != 1 { + t.Errorf("Expected 1 modification, got %d", mods) + } + + if result != expected { + t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result) + } +}