Files
BigChef/processor/regex.go
2025-03-26 22:26:02 +01:00

288 lines
9.3 KiB
Go

package processor
import (
"fmt"
"log"
"regexp"
"sort"
"strconv"
"strings"
lua "github.com/yuin/gopher-lua"
)
// RegexProcessor implements the Processor interface using regex patterns
type RegexProcessor struct{}
// ToLua sets capture groups as Lua variables (v1, v2, etc. for numeric values and s1, s2, etc. for strings)
func (p *RegexProcessor) ToLua(L *lua.LState, data interface{}) error {
captureGroups, ok := data.([]*CaptureGroup)
if !ok {
return fmt.Errorf("expected []*CaptureGroup for captures, got %T", data)
}
groupindex := 0
for _, capture := range captureGroups {
if capture.Name == "" {
// We don't want to change the name of the capture group
// Even if it's empty
tempName := fmt.Sprintf("%d", groupindex+1)
groupindex++
L.SetGlobal("s"+tempName, lua.LString(capture.Value))
val, err := strconv.ParseFloat(capture.Value, 64)
if err == nil {
L.SetGlobal("v"+tempName, lua.LNumber(val))
}
} else {
val, err := strconv.ParseFloat(capture.Value, 64)
if err == nil {
L.SetGlobal(capture.Name, lua.LNumber(val))
} else {
L.SetGlobal(capture.Name, lua.LString(capture.Value))
}
}
}
return nil
}
func (p *RegexProcessor) FromLua(L *lua.LState) (interface{}, error) {
// Stub to satisfy interface
return nil, nil
}
// FromLua implements the Processor interface for RegexProcessor
func (p *RegexProcessor) FromLuaCustom(L *lua.LState, captureGroups []*CaptureGroup) ([]*CaptureGroup, error) {
captureIndex := 0
for _, capture := range captureGroups {
if capture.Name == "" {
capture.Name = fmt.Sprintf("%d", captureIndex+1)
vVarName := fmt.Sprintf("v%s", capture.Name)
sVarName := fmt.Sprintf("s%s", capture.Name)
captureIndex++
vLuaVal := L.GetGlobal(vVarName)
sLuaVal := L.GetGlobal(sVarName)
if sLuaVal.Type() == lua.LTString {
capture.Updated = sLuaVal.String()
}
// Numbers have priority
if vLuaVal.Type() == lua.LTNumber {
capture.Updated = vLuaVal.String()
}
} else {
// Easy shit
capture.Updated = L.GetGlobal(capture.Name).String()
}
}
return captureGroups, nil
}
type CaptureGroup struct {
Name string
Value string
Updated string
Range [2]int
}
type ReplaceCommand struct {
From int
To int
With string
}
// ProcessContent applies regex replacement with Lua processing
func (p *RegexProcessor) ProcessContent(content string, pattern string, luaExpr string) (string, int, int, error) {
pattern = ResolveRegexPlaceholders(pattern)
compiledPattern, err := regexp.Compile(pattern)
if err != nil {
log.Printf("Error compiling pattern: %v", err)
return "", 0, 0, fmt.Errorf("error compiling pattern: %v", err)
}
log.Printf("Compiled pattern successfully: %s", pattern)
previous := luaExpr
luaExpr = BuildLuaScript(luaExpr)
log.Printf("Changing Lua expression from: %s to: %s", previous, luaExpr)
// Initialize Lua environment
modificationCount := 0
// Process all regex matches
result := content
indices := compiledPattern.FindAllStringSubmatchIndex(content, -1)
log.Printf("Found %d matches in the content", len(indices))
// We walk backwards because we're replacing something with something else that might be longer
// And in the case it is longer than the original all indicces past that change will be fucked up
// By going backwards we fuck up all the indices to the end of the file that we don't care about
// Because there either aren't any (last match) or they're already modified (subsequent matches)
for i := len(indices) - 1; i >= 0; i-- {
L, err := NewLuaState()
if err != nil {
log.Printf("Error creating Lua state: %v", err)
return "", 0, 0, fmt.Errorf("error creating Lua state: %v", err)
}
// Hmm... Maybe we don't want to defer this..
// Maybe we want to close them every iteration
// We'll leave it as is for now
defer L.Close()
log.Printf("Lua state created successfully")
matchIndices := indices[i]
log.Printf("Processing match indices: %v", matchIndices)
// Why we're doing this whole song and dance of indices is to properly handle empty matches
// Plus it's a little cleaner to surgically replace our matches
// If we were to use string.replace and encountered an empty match there'd be nothing to replace
// But using indices an empty match would have its starting and ending indices be the same
// So when we're cutting open the array we say 0:7 + modified + 7:end
// As if concatenating in the middle of the array
// Plus it supports lookarounds
match := content[matchIndices[0]:matchIndices[1]]
log.Printf("Matched content: %s", match)
groups := matchIndices[2:]
if len(groups) <= 0 {
log.Println("No capture groups for lua to chew on")
continue
}
if len(groups)%2 == 1 {
log.Println("Odd number of indices of groups, what the fuck?")
continue
}
for _, index := range groups {
if index == -1 {
// return "", 0, 0, fmt.Errorf("negative indices encountered: %v. This indicates that there was an issue with the match indices, possibly due to an empty match or an unexpected pattern. Please check the regex pattern and input content.", matchIndices)
log.Printf("Negative indices encountered: %v. This indicates that there was an issue with the match indices, possibly due to an empty match or an unexpected pattern. This is not an error but it's possibly not what you want.", matchIndices)
continue
}
}
// We have to use array to preserve order
// Very important for the reconstruction step
// Because we must overwrite the values in reverse order
// See comments a few dozen lines above for more details
captureGroups := make([]*CaptureGroup, 0, len(groups)/2)
groupNames := compiledPattern.SubexpNames()[1:]
for i, name := range groupNames {
// if name == "" {
// continue
// }
start := groups[i*2]
end := groups[i*2+1]
if start == -1 || end == -1 {
continue
}
captureGroups = append(captureGroups, &CaptureGroup{
Name: name,
Value: content[start:end],
Range: [2]int{start, end},
})
}
for _, capture := range captureGroups {
log.Printf("Capture group: %+v", *capture)
}
if err := p.ToLua(L, captureGroups); err != nil {
log.Printf("Error setting Lua variables: %v", err)
continue
}
log.Println("Lua variables set successfully")
if err := L.DoString(luaExpr); err != nil {
log.Printf("Error executing Lua code %s for groups %+v: %v", luaExpr, captureGroups, err)
continue
}
log.Println("Lua code executed successfully")
// Get modifications from Lua
captureGroups, err = p.FromLuaCustom(L, captureGroups)
if err != nil {
log.Printf("Error getting modifications: %v", err)
continue
}
replacement := ""
replacementVar := L.GetGlobal("replacement")
if replacementVar.Type() != lua.LTNil {
replacement = replacementVar.String()
}
if replacement == "" {
commands := make([]ReplaceCommand, 0, len(captureGroups))
// Apply the modifications to the original match
replacement = match
for _, capture := range captureGroups {
log.Printf("Applying modification: %s", capture.Updated)
// Indices of the group are relative to content
// To relate them to match we have to subtract the match start index
// replacement = replacement[:groupStart] + newVal + replacement[groupEnd:]
commands = append(commands, ReplaceCommand{
From: capture.Range[0] - matchIndices[0],
To: capture.Range[1] - matchIndices[0],
With: capture.Updated,
})
}
sort.Slice(commands, func(i, j int) bool {
return commands[i].From > commands[j].From
})
for _, command := range commands {
replacement = replacement[:command.From] + command.With + replacement[command.To:]
}
}
modificationCount++
result = result[:matchIndices[0]] + replacement + result[matchIndices[1]:]
log.Printf("Modification count updated: %d", modificationCount)
}
log.Printf("Process completed with %d modifications", modificationCount)
return result, modificationCount, len(indices), nil
}
// The order of these replaces is important
// This one handles !num-s inside of named capture groups
// If it were not here our !num in a named capture group would
// Expand to another capture group in the capture group
// We really only want one (our named) capture group
func ResolveRegexPlaceholders(pattern string) string {
// Handle special pattern modifications
if !strings.HasPrefix(pattern, "(?s)") {
pattern = "(?s)" + pattern
log.Printf("Pattern modified to include (?s): %s", pattern)
}
namedGroupNum := regexp.MustCompile(`(?:(\?<[^>]+>)(!num))`)
pattern = namedGroupNum.ReplaceAllStringFunc(pattern, func(match string) string {
parts := namedGroupNum.FindStringSubmatch(match)
if len(parts) != 3 {
return match
}
replacement := `-?\d*\.?\d+`
return parts[1] + replacement
})
pattern = strings.ReplaceAll(pattern, "!num", `"?(-?\d*\.?\d+)"?`)
pattern = strings.ReplaceAll(pattern, "!any", `.*?`)
repPattern := regexp.MustCompile(`!rep\(([^,]+),\s*(\d+)\)`)
// !rep(pattern, count) repeats the pattern n times
// Inserting !any between each repetition
pattern = repPattern.ReplaceAllStringFunc(pattern, func(match string) string {
parts := repPattern.FindStringSubmatch(match)
if len(parts) != 3 {
return match
}
repeatedPattern := parts[1]
count := parts[2]
repetitions, _ := strconv.Atoi(count)
return strings.Repeat(repeatedPattern+".*?", repetitions-1) + repeatedPattern
})
return pattern
}