From 583b2169dc155aeb6ba5cddbdad76fcfd604ed7e Mon Sep 17 00:00:00 2001 From: PhatPhuckDave Date: Mon, 24 Mar 2025 21:29:03 +0100 Subject: [PATCH] Rework regex processor to be more betterer --- processor/regex.go | 88 ++++++++++++++++++++++++----------------- processor/regex_test.go | 24 ++++++----- 2 files changed, 66 insertions(+), 46 deletions(-) diff --git a/processor/regex.go b/processor/regex.go index 7a131e9..74d66b9 100644 --- a/processor/regex.go +++ b/processor/regex.go @@ -50,13 +50,13 @@ func (p *RegexProcessor) ToLua(L *lua.LState, data interface{}) error { } // Set variables for each capture group, starting from v1/s1 for the first capture - for i := 1; i < len(captures); i++ { + for i := 0; i < len(captures); i++ { // Set string version (always available as s1, s2, etc.) - L.SetGlobal(fmt.Sprintf("s%d", i), lua.LString(captures[i])) + L.SetGlobal(fmt.Sprintf("s%d", i+1), lua.LString(captures[i])) // Try to convert to number and set v1, v2, etc. if val, err := strconv.ParseFloat(captures[i], 64); err == nil { - L.SetGlobal(fmt.Sprintf("v%d", i), lua.LNumber(val)) + L.SetGlobal(fmt.Sprintf("v%d", i+1), lua.LNumber(val)) } } @@ -111,70 +111,86 @@ func (p *RegexProcessor) ProcessContent(content string, pattern string, luaExpr luaExpr = BuildLuaScript(luaExpr) fmt.Printf("Changing Lua expression from: %s to: %s\n", previous, luaExpr) - L := lua.NewState() + L, err := NewLuaState() + if err != nil { + return "", 0, 0, fmt.Errorf("error creating Lua state: %v", err) + } defer L.Close() // Initialize Lua environment modificationCount := 0 - matchCount := 0 - - // Load math library - L.Push(L.GetGlobal("require")) - L.Push(lua.LString("math")) - if err := L.PCall(1, 1, nil); err != nil { - return content, 0, 0, fmt.Errorf("error loading Lua math library: %v", err) - } - - // Initialize helper functions - if err := InitLuaHelpers(L); err != nil { - return content, 0, 0, err - } // Process all regex matches - result := compiledPattern.ReplaceAllStringFunc(content, func(match string) string { - matchCount++ - captures := compiledPattern.FindStringSubmatch(match) - if len(captures) <= 1 { - // No capture groups, return unchanged + result := content + indices := compiledPattern.FindAllStringSubmatchIndex(content, -1) + // We walk backwards because we're replacing something with something else that might be longer + // And in the case it is longer than the original all indicces past that change will be fucked up + // By going backwards we fuck up all the indices to the end of the file that we don't care about + // Because there either aren't any (last match) or they're already modified (subsequent matches) + for i := len(indices) - 1; i >= 0; i-- { + matchIndices := indices[i] + // Why we're doing this whole song and dance of indices is to properly handle empty matches + // Plus it's a little cleaner to surgically replace our matches + // If we were to use string.replace and encountered an empty match there'd be nothing to replace + // But using indices an empty match would have its starting and ending indices be the same + // So when we're cutting open the array we say 0:7 + modified + 7:end + // As if concatenating in the middle of the array + // Plus it supports lookarounds + match := content[matchIndices[0]:matchIndices[1]] + + groups := matchIndices[2:] + if len(groups) <= 0 { fmt.Println("No capture groups for lua to chew on") - return match + continue + } + if len(groups)%2 == 1 { + fmt.Println("Odd number of indices of groups, what the fuck?") + continue + } + + captures := make([]string, 0, len(groups)/2) + for j := 0; j < len(groups); j += 2 { + captures = append(captures, content[groups[j]:groups[j+1]]) } if err := p.ToLua(L, captures); err != nil { fmt.Println("Error setting Lua variables:", err) - return match + continue } - // Execute the user's Lua code if err := L.DoString(luaExpr); err != nil { - fmt.Println("Error executing Lua code:", err) - return match // Return unchanged on error + fmt.Printf("Error executing Lua code %s for group %s: %v", luaExpr, captures, err) + continue } // Get modifications from Lua modResult, err := p.FromLua(L) if err != nil { fmt.Println("Error getting modifications:", err) - return match + continue } // Apply modifications to the matched text modsMap, ok := modResult.(map[int]string) if !ok || len(modsMap) == 0 { fmt.Println("No modifications to apply") - return match // No changes + continue } // Apply the modifications to the original match - result := match - for i, newVal := range modsMap { - oldVal := captures[i+1] - result = strings.Replace(result, oldVal, newVal, 1) + replacement := match + for i := len(modsMap) - 1; i >= 0; i-- { + newVal := modsMap[i] + // Indices of the group are relative to content + // To relate them to match we have to subtract the match start index + groupStart := groups[i*2] - matchIndices[0] + groupEnd := groups[i*2+1] - matchIndices[0] + replacement = replacement[:groupStart] + newVal + replacement[groupEnd:] } modificationCount++ - return result - }) + result = result[:matchIndices[0]] + replacement + result[matchIndices[1]:] + } - return result, modificationCount, matchCount, nil + return result, modificationCount, len(indices), nil } diff --git a/processor/regex_test.go b/processor/regex_test.go index 425be8e..a0d3f6b 100644 --- a/processor/regex_test.go +++ b/processor/regex_test.go @@ -181,7 +181,7 @@ func TestArrayNotation(t *testing.T) { } } -func TestMultipleMatches(t *testing.T) { +func TestMultipleNumericMatches(t *testing.T) { content := ` 50 100 @@ -212,19 +212,21 @@ func TestMultipleMatches(t *testing.T) { if result != expected { t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result) } +} - // Test string operations - content = ` +func TestMultipleStringMatches(t *testing.T) { + content := ` John Mary ` - expected = ` + expected := ` John_modified Mary_modified ` - result, mods, matches, err = p.ProcessContent(content, `([A-Za-z]+)`, `s1 = s1 .. "_modified"`) + p := &RegexProcessor{} + result, mods, matches, err := p.ProcessContent(content, `([A-Za-z]+)`, `s1 = s1 .. "_modified"`) if err != nil { t.Fatalf("Error processing content: %v", err) @@ -243,7 +245,7 @@ func TestMultipleMatches(t *testing.T) { } } -func TestStringOperations(t *testing.T) { +func TestStringUpperCase(t *testing.T) { content := ` John Mary @@ -273,19 +275,21 @@ func TestStringOperations(t *testing.T) { if result != expected { t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result) } +} - // Test string concatenation - content = ` +func TestStringConcatenation(t *testing.T) { + content := ` Apple Banana ` - expected = ` + expected := ` Apple_fruit Banana_fruit ` - result, mods, matches, err = p.ProcessContent(content, `([A-Za-z]+)`, `s1 = s1 .. "_fruit"`) + p := &RegexProcessor{} + result, mods, matches, err := p.ProcessContent(content, `([A-Za-z]+)`, `s1 = s1 .. "_fruit"`) if err != nil { t.Fatalf("Error processing content: %v", err)