Rework regex processor to be more betterer
This commit is contained in:
@@ -50,13 +50,13 @@ func (p *RegexProcessor) ToLua(L *lua.LState, data interface{}) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Set variables for each capture group, starting from v1/s1 for the first capture
|
// Set variables for each capture group, starting from v1/s1 for the first capture
|
||||||
for i := 1; i < len(captures); i++ {
|
for i := 0; i < len(captures); i++ {
|
||||||
// Set string version (always available as s1, s2, etc.)
|
// Set string version (always available as s1, s2, etc.)
|
||||||
L.SetGlobal(fmt.Sprintf("s%d", i), lua.LString(captures[i]))
|
L.SetGlobal(fmt.Sprintf("s%d", i+1), lua.LString(captures[i]))
|
||||||
|
|
||||||
// Try to convert to number and set v1, v2, etc.
|
// Try to convert to number and set v1, v2, etc.
|
||||||
if val, err := strconv.ParseFloat(captures[i], 64); err == nil {
|
if val, err := strconv.ParseFloat(captures[i], 64); err == nil {
|
||||||
L.SetGlobal(fmt.Sprintf("v%d", i), lua.LNumber(val))
|
L.SetGlobal(fmt.Sprintf("v%d", i+1), lua.LNumber(val))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -111,70 +111,86 @@ func (p *RegexProcessor) ProcessContent(content string, pattern string, luaExpr
|
|||||||
luaExpr = BuildLuaScript(luaExpr)
|
luaExpr = BuildLuaScript(luaExpr)
|
||||||
fmt.Printf("Changing Lua expression from: %s to: %s\n", previous, luaExpr)
|
fmt.Printf("Changing Lua expression from: %s to: %s\n", previous, luaExpr)
|
||||||
|
|
||||||
L := lua.NewState()
|
L, err := NewLuaState()
|
||||||
|
if err != nil {
|
||||||
|
return "", 0, 0, fmt.Errorf("error creating Lua state: %v", err)
|
||||||
|
}
|
||||||
defer L.Close()
|
defer L.Close()
|
||||||
|
|
||||||
// Initialize Lua environment
|
// Initialize Lua environment
|
||||||
modificationCount := 0
|
modificationCount := 0
|
||||||
matchCount := 0
|
|
||||||
|
|
||||||
// Load math library
|
|
||||||
L.Push(L.GetGlobal("require"))
|
|
||||||
L.Push(lua.LString("math"))
|
|
||||||
if err := L.PCall(1, 1, nil); err != nil {
|
|
||||||
return content, 0, 0, fmt.Errorf("error loading Lua math library: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize helper functions
|
|
||||||
if err := InitLuaHelpers(L); err != nil {
|
|
||||||
return content, 0, 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process all regex matches
|
// Process all regex matches
|
||||||
result := compiledPattern.ReplaceAllStringFunc(content, func(match string) string {
|
result := content
|
||||||
matchCount++
|
indices := compiledPattern.FindAllStringSubmatchIndex(content, -1)
|
||||||
captures := compiledPattern.FindStringSubmatch(match)
|
// We walk backwards because we're replacing something with something else that might be longer
|
||||||
if len(captures) <= 1 {
|
// And in the case it is longer than the original all indicces past that change will be fucked up
|
||||||
// No capture groups, return unchanged
|
// By going backwards we fuck up all the indices to the end of the file that we don't care about
|
||||||
|
// Because there either aren't any (last match) or they're already modified (subsequent matches)
|
||||||
|
for i := len(indices) - 1; i >= 0; i-- {
|
||||||
|
matchIndices := indices[i]
|
||||||
|
// Why we're doing this whole song and dance of indices is to properly handle empty matches
|
||||||
|
// Plus it's a little cleaner to surgically replace our matches
|
||||||
|
// If we were to use string.replace and encountered an empty match there'd be nothing to replace
|
||||||
|
// But using indices an empty match would have its starting and ending indices be the same
|
||||||
|
// So when we're cutting open the array we say 0:7 + modified + 7:end
|
||||||
|
// As if concatenating in the middle of the array
|
||||||
|
// Plus it supports lookarounds
|
||||||
|
match := content[matchIndices[0]:matchIndices[1]]
|
||||||
|
|
||||||
|
groups := matchIndices[2:]
|
||||||
|
if len(groups) <= 0 {
|
||||||
fmt.Println("No capture groups for lua to chew on")
|
fmt.Println("No capture groups for lua to chew on")
|
||||||
return match
|
continue
|
||||||
|
}
|
||||||
|
if len(groups)%2 == 1 {
|
||||||
|
fmt.Println("Odd number of indices of groups, what the fuck?")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
captures := make([]string, 0, len(groups)/2)
|
||||||
|
for j := 0; j < len(groups); j += 2 {
|
||||||
|
captures = append(captures, content[groups[j]:groups[j+1]])
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := p.ToLua(L, captures); err != nil {
|
if err := p.ToLua(L, captures); err != nil {
|
||||||
fmt.Println("Error setting Lua variables:", err)
|
fmt.Println("Error setting Lua variables:", err)
|
||||||
return match
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Execute the user's Lua code
|
|
||||||
if err := L.DoString(luaExpr); err != nil {
|
if err := L.DoString(luaExpr); err != nil {
|
||||||
fmt.Println("Error executing Lua code:", err)
|
fmt.Printf("Error executing Lua code %s for group %s: %v", luaExpr, captures, err)
|
||||||
return match // Return unchanged on error
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get modifications from Lua
|
// Get modifications from Lua
|
||||||
modResult, err := p.FromLua(L)
|
modResult, err := p.FromLua(L)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println("Error getting modifications:", err)
|
fmt.Println("Error getting modifications:", err)
|
||||||
return match
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply modifications to the matched text
|
// Apply modifications to the matched text
|
||||||
modsMap, ok := modResult.(map[int]string)
|
modsMap, ok := modResult.(map[int]string)
|
||||||
if !ok || len(modsMap) == 0 {
|
if !ok || len(modsMap) == 0 {
|
||||||
fmt.Println("No modifications to apply")
|
fmt.Println("No modifications to apply")
|
||||||
return match // No changes
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply the modifications to the original match
|
// Apply the modifications to the original match
|
||||||
result := match
|
replacement := match
|
||||||
for i, newVal := range modsMap {
|
for i := len(modsMap) - 1; i >= 0; i-- {
|
||||||
oldVal := captures[i+1]
|
newVal := modsMap[i]
|
||||||
result = strings.Replace(result, oldVal, newVal, 1)
|
// Indices of the group are relative to content
|
||||||
|
// To relate them to match we have to subtract the match start index
|
||||||
|
groupStart := groups[i*2] - matchIndices[0]
|
||||||
|
groupEnd := groups[i*2+1] - matchIndices[0]
|
||||||
|
replacement = replacement[:groupStart] + newVal + replacement[groupEnd:]
|
||||||
}
|
}
|
||||||
|
|
||||||
modificationCount++
|
modificationCount++
|
||||||
return result
|
result = result[:matchIndices[0]] + replacement + result[matchIndices[1]:]
|
||||||
})
|
}
|
||||||
|
|
||||||
return result, modificationCount, matchCount, nil
|
return result, modificationCount, len(indices), nil
|
||||||
}
|
}
|
||||||
|
@@ -181,7 +181,7 @@ func TestArrayNotation(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMultipleMatches(t *testing.T) {
|
func TestMultipleNumericMatches(t *testing.T) {
|
||||||
content := `<data>
|
content := `<data>
|
||||||
<entry>50</entry>
|
<entry>50</entry>
|
||||||
<entry>100</entry>
|
<entry>100</entry>
|
||||||
@@ -212,19 +212,21 @@ func TestMultipleMatches(t *testing.T) {
|
|||||||
if result != expected {
|
if result != expected {
|
||||||
t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result)
|
t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Test string operations
|
func TestMultipleStringMatches(t *testing.T) {
|
||||||
content = `<data>
|
content := `<data>
|
||||||
<name>John</name>
|
<name>John</name>
|
||||||
<name>Mary</name>
|
<name>Mary</name>
|
||||||
</data>`
|
</data>`
|
||||||
|
|
||||||
expected = `<data>
|
expected := `<data>
|
||||||
<name>John_modified</name>
|
<name>John_modified</name>
|
||||||
<name>Mary_modified</name>
|
<name>Mary_modified</name>
|
||||||
</data>`
|
</data>`
|
||||||
|
|
||||||
result, mods, matches, err = p.ProcessContent(content, `<name>([A-Za-z]+)</name>`, `s1 = s1 .. "_modified"`)
|
p := &RegexProcessor{}
|
||||||
|
result, mods, matches, err := p.ProcessContent(content, `<name>([A-Za-z]+)</name>`, `s1 = s1 .. "_modified"`)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Error processing content: %v", err)
|
t.Fatalf("Error processing content: %v", err)
|
||||||
@@ -243,7 +245,7 @@ func TestMultipleMatches(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestStringOperations(t *testing.T) {
|
func TestStringUpperCase(t *testing.T) {
|
||||||
content := `<users>
|
content := `<users>
|
||||||
<user>John</user>
|
<user>John</user>
|
||||||
<user>Mary</user>
|
<user>Mary</user>
|
||||||
@@ -273,19 +275,21 @@ func TestStringOperations(t *testing.T) {
|
|||||||
if result != expected {
|
if result != expected {
|
||||||
t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result)
|
t.Errorf("Expected content to be:\n%s\n\nGot:\n%s", expected, result)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Test string concatenation
|
func TestStringConcatenation(t *testing.T) {
|
||||||
content = `<products>
|
content := `<products>
|
||||||
<product>Apple</product>
|
<product>Apple</product>
|
||||||
<product>Banana</product>
|
<product>Banana</product>
|
||||||
</products>`
|
</products>`
|
||||||
|
|
||||||
expected = `<products>
|
expected := `<products>
|
||||||
<product>Apple_fruit</product>
|
<product>Apple_fruit</product>
|
||||||
<product>Banana_fruit</product>
|
<product>Banana_fruit</product>
|
||||||
</products>`
|
</products>`
|
||||||
|
|
||||||
result, mods, matches, err = p.ProcessContent(content, `<product>([A-Za-z]+)</product>`, `s1 = s1 .. "_fruit"`)
|
p := &RegexProcessor{}
|
||||||
|
result, mods, matches, err := p.ProcessContent(content, `<product>([A-Za-z]+)</product>`, `s1 = s1 .. "_fruit"`)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Error processing content: %v", err)
|
t.Fatalf("Error processing content: %v", err)
|
||||||
|
Reference in New Issue
Block a user