286 lines
9.2 KiB
Go
286 lines
9.2 KiB
Go
package processor
|
|
|
|
import (
|
|
"fmt"
|
|
"log"
|
|
"regexp"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
|
|
lua "github.com/yuin/gopher-lua"
|
|
)
|
|
|
|
// RegexProcessor implements the Processor interface using regex patterns
|
|
type RegexProcessor struct{}
|
|
|
|
// ToLua sets capture groups as Lua variables (v1, v2, etc. for numeric values and s1, s2, etc. for strings)
|
|
func (p *RegexProcessor) ToLua(L *lua.LState, data interface{}) error {
|
|
captureGroups, ok := data.([]*CaptureGroup)
|
|
if !ok {
|
|
return fmt.Errorf("expected []*CaptureGroup for captures, got %T", data)
|
|
}
|
|
|
|
groupindex := 0
|
|
for _, capture := range captureGroups {
|
|
if capture.Name == "" {
|
|
// We don't want to change the name of the capture group
|
|
// Even if it's empty
|
|
tempName := fmt.Sprintf("%d", groupindex+1)
|
|
groupindex++
|
|
|
|
L.SetGlobal("s"+tempName, lua.LString(capture.Value))
|
|
|
|
val, err := strconv.ParseFloat(capture.Value, 64)
|
|
if err == nil {
|
|
L.SetGlobal("v"+tempName, lua.LNumber(val))
|
|
}
|
|
} else {
|
|
val, err := strconv.ParseFloat(capture.Value, 64)
|
|
if err == nil {
|
|
L.SetGlobal(capture.Name, lua.LNumber(val))
|
|
} else {
|
|
L.SetGlobal(capture.Name, lua.LString(capture.Value))
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (p *RegexProcessor) FromLua(L *lua.LState) (interface{}, error) {
|
|
// Stub to satisfy interface
|
|
return nil, nil
|
|
}
|
|
|
|
// FromLua implements the Processor interface for RegexProcessor
|
|
func (p *RegexProcessor) FromLuaCustom(L *lua.LState, captureGroups []*CaptureGroup) ([]*CaptureGroup, error) {
|
|
captureIndex := 0
|
|
for _, capture := range captureGroups {
|
|
if capture.Name == "" {
|
|
capture.Name = fmt.Sprintf("%d", captureIndex+1)
|
|
|
|
vVarName := fmt.Sprintf("v%s", capture.Name)
|
|
sVarName := fmt.Sprintf("s%s", capture.Name)
|
|
captureIndex++
|
|
|
|
vLuaVal := L.GetGlobal(vVarName)
|
|
sLuaVal := L.GetGlobal(sVarName)
|
|
|
|
if sLuaVal.Type() == lua.LTString {
|
|
capture.Updated = sLuaVal.String()
|
|
}
|
|
// Numbers have priority
|
|
if vLuaVal.Type() == lua.LTNumber {
|
|
capture.Updated = vLuaVal.String()
|
|
}
|
|
} else {
|
|
// Easy shit
|
|
capture.Updated = L.GetGlobal(capture.Name).String()
|
|
}
|
|
}
|
|
|
|
return captureGroups, nil
|
|
}
|
|
|
|
type CaptureGroup struct {
|
|
Name string
|
|
Value string
|
|
Updated string
|
|
Range [2]int
|
|
}
|
|
type ReplaceCommand struct {
|
|
From int
|
|
To int
|
|
With string
|
|
}
|
|
|
|
// ProcessContent applies regex replacement with Lua processing
|
|
func (p *RegexProcessor) ProcessContent(content string, pattern string, luaExpr string) (string, int, int, error) {
|
|
pattern = ResolveRegexPlaceholders(pattern)
|
|
compiledPattern, err := regexp.Compile(pattern)
|
|
if err != nil {
|
|
log.Printf("Error compiling pattern: %v", err)
|
|
return "", 0, 0, fmt.Errorf("error compiling pattern: %v", err)
|
|
}
|
|
log.Printf("Compiled pattern successfully: %s", pattern)
|
|
|
|
previous := luaExpr
|
|
luaExpr = BuildLuaScript(luaExpr)
|
|
log.Printf("Changing Lua expression from: %s to: %s", previous, luaExpr)
|
|
|
|
// Initialize Lua environment
|
|
modificationCount := 0
|
|
|
|
// Process all regex matches
|
|
result := content
|
|
indices := compiledPattern.FindAllStringSubmatchIndex(content, -1)
|
|
log.Printf("Found %d matches in the content", len(indices))
|
|
|
|
// We walk backwards because we're replacing something with something else that might be longer
|
|
// And in the case it is longer than the original all indicces past that change will be fucked up
|
|
// By going backwards we fuck up all the indices to the end of the file that we don't care about
|
|
// Because there either aren't any (last match) or they're already modified (subsequent matches)
|
|
for i := len(indices) - 1; i >= 0; i-- {
|
|
L, err := NewLuaState()
|
|
if err != nil {
|
|
log.Printf("Error creating Lua state: %v", err)
|
|
return "", 0, 0, fmt.Errorf("error creating Lua state: %v", err)
|
|
}
|
|
// Hmm... Maybe we don't want to defer this..
|
|
// Maybe we want to close them every iteration
|
|
// We'll leave it as is for now
|
|
defer L.Close()
|
|
log.Printf("Lua state created successfully")
|
|
|
|
matchIndices := indices[i]
|
|
log.Printf("Processing match indices: %v", matchIndices)
|
|
|
|
// Why we're doing this whole song and dance of indices is to properly handle empty matches
|
|
// Plus it's a little cleaner to surgically replace our matches
|
|
// If we were to use string.replace and encountered an empty match there'd be nothing to replace
|
|
// But using indices an empty match would have its starting and ending indices be the same
|
|
// So when we're cutting open the array we say 0:7 + modified + 7:end
|
|
// As if concatenating in the middle of the array
|
|
// Plus it supports lookarounds
|
|
match := content[matchIndices[0]:matchIndices[1]]
|
|
log.Printf("Matched content: %s", match)
|
|
|
|
groups := matchIndices[2:]
|
|
if len(groups) <= 0 {
|
|
log.Println("No capture groups for lua to chew on")
|
|
continue
|
|
}
|
|
if len(groups)%2 == 1 {
|
|
log.Println("Odd number of indices of groups, what the fuck?")
|
|
continue
|
|
}
|
|
for _, index := range groups {
|
|
if index == -1 {
|
|
// return "", 0, 0, fmt.Errorf("negative indices encountered: %v. This indicates that there was an issue with the match indices, possibly due to an empty match or an unexpected pattern. Please check the regex pattern and input content.", matchIndices)
|
|
log.Printf("Negative indices encountered: %v. This indicates that there was an issue with the match indices, possibly due to an empty match or an unexpected pattern. This is not an error but it's possibly not what you want.", matchIndices)
|
|
continue
|
|
}
|
|
}
|
|
|
|
// We have to use array to preserve order
|
|
// Very important for the reconstruction step
|
|
// Because we must overwrite the values in reverse order
|
|
// See comments a few dozen lines above for more details
|
|
captureGroups := make([]*CaptureGroup, 0, len(groups)/2)
|
|
groupNames := compiledPattern.SubexpNames()[1:]
|
|
for i, name := range groupNames {
|
|
// if name == "" {
|
|
// continue
|
|
// }
|
|
start := groups[i*2]
|
|
end := groups[i*2+1]
|
|
if start == -1 || end == -1 {
|
|
continue
|
|
}
|
|
|
|
captureGroups = append(captureGroups, &CaptureGroup{
|
|
Name: name,
|
|
Value: content[start:end],
|
|
Range: [2]int{start, end},
|
|
})
|
|
}
|
|
|
|
log.Printf("Capture groups: %v", captureGroups)
|
|
|
|
if err := p.ToLua(L, captureGroups); err != nil {
|
|
log.Printf("Error setting Lua variables: %v", err)
|
|
continue
|
|
}
|
|
log.Println("Lua variables set successfully")
|
|
|
|
if err := L.DoString(luaExpr); err != nil {
|
|
log.Printf("Error executing Lua code %s for groups %+v: %v", luaExpr, captureGroups, err)
|
|
continue
|
|
}
|
|
log.Println("Lua code executed successfully")
|
|
|
|
// Get modifications from Lua
|
|
captureGroups, err = p.FromLuaCustom(L, captureGroups)
|
|
if err != nil {
|
|
log.Printf("Error getting modifications: %v", err)
|
|
continue
|
|
}
|
|
|
|
replacement := ""
|
|
replacementVar := L.GetGlobal("replacement")
|
|
if replacementVar.Type() != lua.LTNil {
|
|
replacement = replacementVar.String()
|
|
}
|
|
if replacement == "" {
|
|
commands := make([]ReplaceCommand, 0, len(captureGroups))
|
|
// Apply the modifications to the original match
|
|
replacement = match
|
|
for _, capture := range captureGroups {
|
|
log.Printf("Applying modification: %s", capture.Updated)
|
|
// Indices of the group are relative to content
|
|
// To relate them to match we have to subtract the match start index
|
|
// replacement = replacement[:groupStart] + newVal + replacement[groupEnd:]
|
|
commands = append(commands, ReplaceCommand{
|
|
From: capture.Range[0] - matchIndices[0],
|
|
To: capture.Range[1] - matchIndices[0],
|
|
With: capture.Updated,
|
|
})
|
|
}
|
|
|
|
sort.Slice(commands, func(i, j int) bool {
|
|
return commands[i].From > commands[j].From
|
|
})
|
|
|
|
for _, command := range commands {
|
|
replacement = replacement[:command.From] + command.With + replacement[command.To:]
|
|
}
|
|
}
|
|
modificationCount++
|
|
result = result[:matchIndices[0]] + replacement + result[matchIndices[1]:]
|
|
log.Printf("Modification count updated: %d", modificationCount)
|
|
}
|
|
|
|
log.Printf("Process completed with %d modifications", modificationCount)
|
|
return result, modificationCount, len(indices), nil
|
|
}
|
|
|
|
// The order of these replaces is important
|
|
// This one handles !num-s inside of named capture groups
|
|
// If it were not here our !num in a named capture group would
|
|
// Expand to another capture group in the capture group
|
|
// We really only want one (our named) capture group
|
|
func ResolveRegexPlaceholders(pattern string) string {
|
|
// Handle special pattern modifications
|
|
if !strings.HasPrefix(pattern, "(?s)") {
|
|
pattern = "(?s)" + pattern
|
|
log.Printf("Pattern modified to include (?s): %s", pattern)
|
|
}
|
|
|
|
namedGroupNum := regexp.MustCompile(`(?:(\?<[^>]+>)(!num))`)
|
|
pattern = namedGroupNum.ReplaceAllStringFunc(pattern, func(match string) string {
|
|
parts := namedGroupNum.FindStringSubmatch(match)
|
|
if len(parts) != 3 {
|
|
return match
|
|
}
|
|
replacement := `-?\d*\.?\d+`
|
|
return parts[1] + replacement
|
|
})
|
|
pattern = strings.ReplaceAll(pattern, "!num", `"?(-?\d*\.?\d+)"?`)
|
|
pattern = strings.ReplaceAll(pattern, "!any", `.*?`)
|
|
repPattern := regexp.MustCompile(`!rep\(([^,]+),\s*(\d+)\)`)
|
|
// !rep(pattern, count) repeats the pattern n times
|
|
// Inserting !any between each repetition
|
|
pattern = repPattern.ReplaceAllStringFunc(pattern, func(match string) string {
|
|
parts := repPattern.FindStringSubmatch(match)
|
|
if len(parts) != 3 {
|
|
return match
|
|
}
|
|
repeatedPattern := parts[1]
|
|
count := parts[2]
|
|
repetitions, _ := strconv.Atoi(count)
|
|
return strings.Repeat(repeatedPattern+".*?", repetitions-1) + repeatedPattern
|
|
})
|
|
return pattern
|
|
}
|