283 lines
9.5 KiB
Go
283 lines
9.5 KiB
Go
package processor
|
|
|
|
import (
|
|
"fmt"
|
|
"log"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
|
|
lua "github.com/yuin/gopher-lua"
|
|
)
|
|
|
|
// RegexProcessor implements the Processor interface using regex patterns
|
|
type RegexProcessor struct{}
|
|
|
|
// ToLua sets capture groups as Lua variables (v1, v2, etc. for numeric values and s1, s2, etc. for strings)
|
|
func (p *RegexProcessor) ToLua(L *lua.LState, data interface{}) error {
|
|
captures, ok := data.([]string)
|
|
if !ok {
|
|
return fmt.Errorf("expected []string for captures, got %T", data)
|
|
}
|
|
|
|
// Set variables for each capture group, starting from v1/s1 for the first capture
|
|
for i := 0; i < len(captures); i++ {
|
|
// Set string version (always available as s1, s2, etc.)
|
|
L.SetGlobal(fmt.Sprintf("s%d", i+1), lua.LString(captures[i]))
|
|
|
|
// Try to convert to number and set v1, v2, etc.
|
|
if val, err := strconv.ParseFloat(captures[i], 64); err == nil {
|
|
L.SetGlobal(fmt.Sprintf("v%d", i+1), lua.LNumber(val))
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// FromLua implements the Processor interface for RegexProcessor
|
|
func (p *RegexProcessor) FromLua(L *lua.LState) (interface{}, error) {
|
|
// Get the modified values after Lua execution
|
|
modifications := make(map[int]string)
|
|
|
|
// Check for modifications to v1-v12 and s1-s12
|
|
for i := 0; i < 12; i++ {
|
|
// Check both v and s variables to see if any were modified
|
|
vVarName := fmt.Sprintf("v%d", i+1)
|
|
sVarName := fmt.Sprintf("s%d", i+1)
|
|
|
|
vLuaVal := L.GetGlobal(vVarName)
|
|
sLuaVal := L.GetGlobal(sVarName)
|
|
|
|
// If our value is a number then it's very likely we want it to be a number
|
|
// And not a string
|
|
// If we do want it to be a string we will cast it into a string in lua
|
|
// wait that wouldn't work... Casting v to a string would not load it here
|
|
if vLuaVal.Type() == lua.LTNumber {
|
|
modifications[i] = vLuaVal.String()
|
|
continue
|
|
}
|
|
if sLuaVal.Type() == lua.LTString {
|
|
modifications[i] = sLuaVal.String()
|
|
continue
|
|
}
|
|
|
|
}
|
|
|
|
return modifications, nil
|
|
}
|
|
|
|
type NamedCapture struct {
|
|
Name string
|
|
Value string
|
|
Range [2]int
|
|
}
|
|
|
|
// ProcessContent applies regex replacement with Lua processing
|
|
func (p *RegexProcessor) ProcessContent(content string, pattern string, luaExpr string) (string, int, int, error) {
|
|
// Handle special pattern modifications
|
|
if !strings.HasPrefix(pattern, "(?s)") {
|
|
pattern = "(?s)" + pattern
|
|
log.Printf("Pattern modified to include (?s): %s", pattern)
|
|
}
|
|
|
|
// The order of these replaces is important
|
|
// This one handles !num-s inside of named capture groups
|
|
// If it were not here our !num in a named capture group would
|
|
// Expand to another capture group in the capture group
|
|
// We really only want one (our named) capture group
|
|
namedGroupNum := regexp.MustCompile(`(?:(\?<[^>]+>)(!num))`)
|
|
pattern = namedGroupNum.ReplaceAllStringFunc(pattern, func(match string) string {
|
|
parts := namedGroupNum.FindStringSubmatch(match)
|
|
if len(parts) != 3 {
|
|
return match
|
|
}
|
|
replacement := `\d*\.?\d+`
|
|
return parts[1] + replacement
|
|
})
|
|
pattern = strings.ReplaceAll(pattern, "!num", `"?(\d*\.?\d+)"?`)
|
|
pattern = strings.ReplaceAll(pattern, "!any", `.*?`)
|
|
repPattern := regexp.MustCompile(`!rep\(([^,]+),\s*(\d+)\)`)
|
|
// !rep(pattern, count) repeats the pattern n times
|
|
// Inserting !any between each repetition
|
|
pattern = repPattern.ReplaceAllStringFunc(pattern, func(match string) string {
|
|
parts := repPattern.FindStringSubmatch(match)
|
|
if len(parts) != 3 {
|
|
return match
|
|
}
|
|
repeatedPattern := parts[1]
|
|
count := parts[2]
|
|
repetitions, _ := strconv.Atoi(count)
|
|
return strings.Repeat(repeatedPattern+".*?", repetitions-1) + repeatedPattern
|
|
})
|
|
|
|
compiledPattern, err := regexp.Compile(pattern)
|
|
if err != nil {
|
|
log.Printf("Error compiling pattern: %v", err)
|
|
return "", 0, 0, fmt.Errorf("error compiling pattern: %v", err)
|
|
}
|
|
log.Printf("Compiled pattern successfully: %s", pattern)
|
|
|
|
previous := luaExpr
|
|
luaExpr = BuildLuaScript(luaExpr)
|
|
log.Printf("Changing Lua expression from: %s to: %s", previous, luaExpr)
|
|
|
|
// Initialize Lua environment
|
|
modificationCount := 0
|
|
|
|
// Process all regex matches
|
|
result := content
|
|
indices := compiledPattern.FindAllStringSubmatchIndex(content, -1)
|
|
log.Printf("Found %d matches in the content", len(indices))
|
|
|
|
// We walk backwards because we're replacing something with something else that might be longer
|
|
// And in the case it is longer than the original all indicces past that change will be fucked up
|
|
// By going backwards we fuck up all the indices to the end of the file that we don't care about
|
|
// Because there either aren't any (last match) or they're already modified (subsequent matches)
|
|
for i := len(indices) - 1; i >= 0; i-- {
|
|
L, err := NewLuaState()
|
|
if err != nil {
|
|
log.Printf("Error creating Lua state: %v", err)
|
|
return "", 0, 0, fmt.Errorf("error creating Lua state: %v", err)
|
|
}
|
|
// Hmm... Maybe we don't want to defer this..
|
|
// Maybe we want to close them every iteration
|
|
// We'll leave it as is for now
|
|
defer L.Close()
|
|
log.Printf("Lua state created successfully")
|
|
|
|
matchIndices := indices[i]
|
|
log.Printf("Processing match indices: %v", matchIndices)
|
|
|
|
// Why we're doing this whole song and dance of indices is to properly handle empty matches
|
|
// Plus it's a little cleaner to surgically replace our matches
|
|
// If we were to use string.replace and encountered an empty match there'd be nothing to replace
|
|
// But using indices an empty match would have its starting and ending indices be the same
|
|
// So when we're cutting open the array we say 0:7 + modified + 7:end
|
|
// As if concatenating in the middle of the array
|
|
// Plus it supports lookarounds
|
|
match := content[matchIndices[0]:matchIndices[1]]
|
|
log.Printf("Matched content: %s", match)
|
|
|
|
groups := matchIndices[2:]
|
|
if len(groups) <= 0 {
|
|
log.Println("No capture groups for lua to chew on")
|
|
continue
|
|
}
|
|
if len(groups)%2 == 1 {
|
|
log.Println("Odd number of indices of groups, what the fuck?")
|
|
continue
|
|
}
|
|
for _, index := range groups {
|
|
if index == -1 {
|
|
// return "", 0, 0, fmt.Errorf("negative indices encountered: %v. This indicates that there was an issue with the match indices, possibly due to an empty match or an unexpected pattern. Please check the regex pattern and input content.", matchIndices)
|
|
log.Printf("Negative indices encountered: %v. This indicates that there was an issue with the match indices, possibly due to an empty match or an unexpected pattern. This is not an error but it's possibly not what you want.", matchIndices)
|
|
continue
|
|
}
|
|
}
|
|
|
|
captures := make([]string, 0, len(groups)/2)
|
|
for j := 0; j < len(groups); j += 2 {
|
|
if groups[j] == -1 || groups[j+1] == -1 {
|
|
continue
|
|
}
|
|
captures = append(captures, content[groups[j]:groups[j+1]])
|
|
}
|
|
log.Printf("Captured groups: %v", captures)
|
|
|
|
// We have to use array to preserve order
|
|
// Very important for the reconstruction step
|
|
// Because we must overwrite the values in reverse order
|
|
// See comments a few dozen lines above for more details
|
|
namedCaptures := make([]NamedCapture, 0, len(groups)/2)
|
|
groupNames := compiledPattern.SubexpNames()[1:]
|
|
for i, name := range groupNames {
|
|
if name == "" {
|
|
continue
|
|
}
|
|
if groups[i*2] == -1 || groups[i*2+1] == -1 {
|
|
continue
|
|
}
|
|
namedCaptures = append(namedCaptures, NamedCapture{
|
|
Name: name,
|
|
Value: captures[i],
|
|
Range: [2]int{groups[i*2], groups[i*2+1]},
|
|
})
|
|
}
|
|
|
|
log.Printf("Named captures: %v", namedCaptures)
|
|
|
|
if err := p.ToLua(L, captures); err != nil {
|
|
log.Printf("Error setting Lua variables: %v", err)
|
|
continue
|
|
}
|
|
log.Println("Lua variables set successfully")
|
|
|
|
for _, capture := range namedCaptures {
|
|
if capture.Name == "" {
|
|
continue
|
|
}
|
|
if val, err := strconv.ParseFloat(capture.Value, 64); err == nil {
|
|
L.SetGlobal(capture.Name, lua.LNumber(val))
|
|
} else {
|
|
L.SetGlobal(capture.Name, lua.LString(capture.Value))
|
|
}
|
|
}
|
|
|
|
if err := L.DoString(luaExpr); err != nil {
|
|
log.Printf("Error executing Lua code %s for group %s: %v", luaExpr, captures, err)
|
|
continue
|
|
}
|
|
log.Println("Lua code executed successfully")
|
|
|
|
// Get modifications from Lua
|
|
modResult, err := p.FromLua(L)
|
|
if err != nil {
|
|
log.Printf("Error getting modifications: %v", err)
|
|
continue
|
|
}
|
|
|
|
// Apply modifications to the matched text
|
|
modsMap, ok := modResult.(map[int]string)
|
|
if !ok || len(modsMap) == 0 {
|
|
log.Println("No modifications to apply")
|
|
continue
|
|
}
|
|
|
|
replacement := ""
|
|
replacementVar := L.GetGlobal("replacement")
|
|
if replacementVar.Type() != lua.LTNil {
|
|
replacement = replacementVar.String()
|
|
}
|
|
if replacement == "" {
|
|
// Apply the modifications to the original match
|
|
replacement = match
|
|
for i := len(modsMap) - 1; i >= 0; i-- {
|
|
newVal := modsMap[i]
|
|
log.Printf("Applying modification: %s", newVal)
|
|
// Indices of the group are relative to content
|
|
// To relate them to match we have to subtract the match start index
|
|
groupStart := groups[i*2] - matchIndices[0]
|
|
groupEnd := groups[i*2+1] - matchIndices[0]
|
|
replacement = replacement[:groupStart] + newVal + replacement[groupEnd:]
|
|
}
|
|
|
|
for i := len(namedCaptures) - 1; i >= 0; i-- {
|
|
capture := namedCaptures[i]
|
|
if capture.Name == "" {
|
|
continue
|
|
}
|
|
groupStart := capture.Range[0] - matchIndices[0]
|
|
groupEnd := capture.Range[1] - matchIndices[0]
|
|
luaValue := L.GetGlobal(capture.Name).String()
|
|
replacement = replacement[:groupStart] + luaValue + replacement[groupEnd:]
|
|
}
|
|
}
|
|
modificationCount++
|
|
result = result[:matchIndices[0]] + replacement + result[matchIndices[1]:]
|
|
log.Printf("Modification count updated: %d", modificationCount)
|
|
}
|
|
|
|
log.Printf("Process completed with %d modifications", modificationCount)
|
|
return result, modificationCount, len(indices), nil
|
|
}
|