Files
BigChef/processor/regex.go

356 lines
12 KiB
Go

package processor
import (
"fmt"
"regexp"
"strconv"
"strings"
lua "github.com/yuin/gopher-lua"
"modify/logger"
)
// RegexProcessor implements the Processor interface using regex patterns
type RegexProcessor struct{}
// ToLua sets capture groups as Lua variables (v1, v2, etc. for numeric values and s1, s2, etc. for strings)
func (p *RegexProcessor) ToLua(L *lua.LState, data interface{}) error {
captureGroups, ok := data.([]*CaptureGroup)
if !ok {
return fmt.Errorf("expected []*CaptureGroup for captures, got %T", data)
}
groupindex := 0
for _, capture := range captureGroups {
if capture.Name == "" {
// We don't want to change the name of the capture group
// Even if it's empty
tempName := fmt.Sprintf("%d", groupindex+1)
groupindex++
L.SetGlobal("s"+tempName, lua.LString(capture.Value))
val, err := strconv.ParseFloat(capture.Value, 64)
if err == nil {
L.SetGlobal("v"+tempName, lua.LNumber(val))
}
} else {
val, err := strconv.ParseFloat(capture.Value, 64)
if err == nil {
L.SetGlobal(capture.Name, lua.LNumber(val))
} else {
L.SetGlobal(capture.Name, lua.LString(capture.Value))
}
}
}
return nil
}
func (p *RegexProcessor) FromLua(L *lua.LState) (interface{}, error) {
// Stub to satisfy interface
return nil, nil
}
// FromLua implements the Processor interface for RegexProcessor
func (p *RegexProcessor) FromLuaCustom(L *lua.LState, captureGroups []*CaptureGroup) ([]*CaptureGroup, error) {
captureIndex := 0
for _, capture := range captureGroups {
if capture.Name == "" {
capture.Name = fmt.Sprintf("%d", captureIndex+1)
vVarName := fmt.Sprintf("v%s", capture.Name)
sVarName := fmt.Sprintf("s%s", capture.Name)
captureIndex++
vLuaVal := L.GetGlobal(vVarName)
sLuaVal := L.GetGlobal(sVarName)
if sLuaVal.Type() == lua.LTString {
capture.Updated = sLuaVal.String()
}
// Numbers have priority
if vLuaVal.Type() == lua.LTNumber {
capture.Updated = vLuaVal.String()
}
} else {
// Easy shit
capture.Updated = L.GetGlobal(capture.Name).String()
}
}
return captureGroups, nil
}
type CaptureGroup struct {
Name string
Value string
Updated string
Range [2]int
}
// ProcessContent applies regex replacement with Lua processing
func (p *RegexProcessor) ProcessContent(content string, pattern string, luaExpr string) (string, int, int, error) {
pattern = ResolveRegexPlaceholders(pattern)
logger.Debug("Compiling regex pattern: %s", pattern)
compiledPattern, err := regexp.Compile(pattern)
if err != nil {
logger.Error("Error compiling pattern: %v", err)
return "", 0, 0, fmt.Errorf("error compiling pattern: %v", err)
}
logger.Debug("Compiled pattern successfully: %s", pattern)
previous := luaExpr
luaExpr = BuildLuaScript(luaExpr)
logger.Debug("Transformed Lua expression: %q → %q", previous, luaExpr)
// Initialize Lua environment
modificationCount := 0
// Process all regex matches
result := content
indices := compiledPattern.FindAllStringSubmatchIndex(content, -1)
logger.Debug("Found %d matches in content of length %d", len(indices), len(content))
// We walk backwards because we're replacing something with something else that might be longer
// And in the case it is longer than the original all indicces past that change will be fucked up
// By going backwards we fuck up all the indices to the end of the file that we don't care about
// Because there either aren't any (last match) or they're already modified (subsequent matches)
for i := len(indices) - 1; i >= 0; i-- {
logger.Debug("Processing match %d of %d", i+1, len(indices))
L, err := NewLuaState()
if err != nil {
logger.Error("Error creating Lua state: %v", err)
return "", 0, 0, fmt.Errorf("error creating Lua state: %v", err)
}
// Hmm... Maybe we don't want to defer this..
// Maybe we want to close them every iteration
// We'll leave it as is for now
defer L.Close()
logger.Trace("Lua state created successfully for match %d", i+1)
matchIndices := indices[i]
logger.Trace("Match indices: %v (match position %d-%d)", matchIndices, matchIndices[0], matchIndices[1])
// Why we're doing this whole song and dance of indices is to properly handle empty matches
// Plus it's a little cleaner to surgically replace our matches
// If we were to use string.replace and encountered an empty match there'd be nothing to replace
// But using indices an empty match would have its starting and ending indices be the same
// So when we're cutting open the array we say 0:7 + modified + 7:end
// As if concatenating in the middle of the array
// Plus it supports lookarounds
match := content[matchIndices[0]:matchIndices[1]]
matchPreview := match
if len(match) > 50 {
matchPreview = match[:47] + "..."
}
logger.Trace("Matched content: %q (length: %d)", matchPreview, len(match))
groups := matchIndices[2:]
if len(groups) <= 0 {
logger.Warning("No capture groups found for match %q and regex %q", matchPreview, pattern)
continue
}
if len(groups)%2 == 1 {
logger.Warning("Invalid number of group indices (%d), should be even: %v", len(groups), groups)
continue
}
// Count how many valid groups we have
validGroups := 0
for j := 0; j < len(groups); j += 2 {
if groups[j] != -1 && groups[j+1] != -1 {
validGroups++
}
}
logger.Debug("Found %d valid capture groups in match", validGroups)
for _, index := range groups {
if index == -1 {
logger.Warning("Negative index encountered in match indices %v. This may indicate an issue with the regex pattern or an empty/optional capture group.", matchIndices)
continue
}
}
// We have to use array to preserve order
// Very important for the reconstruction step
// Because we must overwrite the values in reverse order
// See comments a few dozen lines above for more details
captureGroups := make([]*CaptureGroup, 0, len(groups)/2)
groupNames := compiledPattern.SubexpNames()[1:]
for i, name := range groupNames {
start := groups[i*2]
end := groups[i*2+1]
if start == -1 || end == -1 {
continue
}
value := content[start:end]
captureGroups = append(captureGroups, &CaptureGroup{
Name: name,
Value: value,
Range: [2]int{start, end},
})
// Include name info in log if available
if name != "" {
logger.Trace("Capture group '%s': %q (pos %d-%d)", name, value, start, end)
} else {
logger.Trace("Capture group #%d: %q (pos %d-%d)", i+1, value, start, end)
}
}
captureGroups = deduplicateGroups(captureGroups)
if err := p.ToLua(L, captureGroups); err != nil {
logger.Error("Failed to set Lua variables: %v", err)
continue
}
logger.Trace("Set %d capture groups as Lua variables", len(captureGroups))
if err := L.DoString(luaExpr); err != nil {
logger.Error("Lua script execution failed: %v\nScript: %s\nCapture Groups: %+v",
err, luaExpr, captureGroups)
continue
}
logger.Trace("Lua script executed successfully")
// Get modifications from Lua
captureGroups, err = p.FromLuaCustom(L, captureGroups)
if err != nil {
logger.Error("Failed to retrieve modifications from Lua: %v", err)
continue
}
logger.Trace("Retrieved updated values from Lua")
replacement := ""
replacementVar := L.GetGlobal("replacement")
if replacementVar.Type() != lua.LTNil {
replacement = replacementVar.String()
logger.Debug("Using global replacement: %q", replacement)
}
// Check if modification flag is set
modifiedVal := L.GetGlobal("modified")
if modifiedVal.Type() != lua.LTBool || !lua.LVAsBool(modifiedVal) {
logger.Debug("Skipping match - no modifications made by Lua script")
continue
}
// if replacement == "" {
// commands := make([]ReplaceCommand, 0, len(captureGroups))
// // Apply the modifications to the original match
// replacement = match
//
// // Count groups that were actually modified
// modifiedGroups := 0
// for _, capture := range captureGroups {
// if capture.Value != capture.Updated {
// modifiedGroups++
// }
// }
// logger.Debug("%d of %d capture groups were modified", modifiedGroups, len(captureGroups))
//
// for _, capture := range captureGroups {
// if capture.Value == capture.Updated {
// logger.Trace("Capture group unchanged: %s", capture.Value)
// continue
// }
//
// // Log what changed with context
// logger.Debug("Modifying group %s: %q → %q",
// capture.Name, capture.Value, capture.Updated)
//
// // Indices of the group are relative to content
// // To relate them to match we have to subtract the match start index
// // replacement = replacement[:groupStart] + newVal + replacement[groupEnd:]
// commands = append(commands, ReplaceCommand{
// From: capture.Range[0] - matchIndices[0],
// To: capture.Range[1] - matchIndices[0],
// With: capture.Updated,
// })
// }
// }
// Preview the replacement for logging
replacementPreview := replacement
if len(replacement) > 50 {
replacementPreview = replacement[:47] + "..."
}
logger.Debug("Replacing match %q with %q", matchPreview, replacementPreview)
modificationCount++
result = result[:matchIndices[0]] + replacement + result[matchIndices[1]:]
logger.Debug("Match #%d processed, running modification count: %d", i+1, modificationCount)
}
logger.Info("Regex processing complete: %d modifications from %d matches", modificationCount, len(indices))
return result, modificationCount, len(indices), nil
}
func deduplicateGroups(captureGroups []*CaptureGroup) []*CaptureGroup {
deduplicatedGroups := make([]*CaptureGroup, 0)
for _, group := range captureGroups {
overlaps := false
logger.Debug("Checking capture group: %s with range %v", group.Name, group.Range)
for _, existingGroup := range deduplicatedGroups {
logger.Debug("Comparing with existing group: %s with range %v", existingGroup.Name, existingGroup.Range)
if group.Range[0] < existingGroup.Range[1] && group.Range[1] > existingGroup.Range[0] {
overlaps = true
logger.Warning("Detected overlap between capture group '%s' and existing group '%s' in range %v-%v and %v-%v", group.Name, existingGroup.Name, group.Range[0], group.Range[1], existingGroup.Range[0], existingGroup.Range[1])
break
}
}
if overlaps {
// We CAN just continue despite this fuckup
logger.Error("Overlapping capture group: %s", group.Name)
continue
}
logger.Debug("No overlap detected for capture group: %s. Adding to deduplicated groups.", group.Name)
deduplicatedGroups = append(deduplicatedGroups, group)
}
return deduplicatedGroups
}
// The order of these replaces is important
// This one handles !num-s inside of named capture groups
// If it were not here our !num in a named capture group would
// Expand to another capture group in the capture group
// We really only want one (our named) capture group
func ResolveRegexPlaceholders(pattern string) string {
// Handle special pattern modifications
if !strings.HasPrefix(pattern, "(?s)") {
pattern = "(?s)" + pattern
// Use fmt.Printf for test compatibility
fmt.Printf("Pattern modified to include (?s): %s\n", pattern)
}
namedGroupNum := regexp.MustCompile(`(?:(\?<[^>]+>)(!num))`)
pattern = namedGroupNum.ReplaceAllStringFunc(pattern, func(match string) string {
parts := namedGroupNum.FindStringSubmatch(match)
if len(parts) != 3 {
return match
}
replacement := `-?\d*\.?\d+`
return parts[1] + replacement
})
pattern = strings.ReplaceAll(pattern, "!num", `"?(-?\d*\.?\d+)"?`)
pattern = strings.ReplaceAll(pattern, "!any", `.*?`)
repPattern := regexp.MustCompile(`!rep\(([^,]+),\s*(\d+)\)`)
// !rep(pattern, count) repeats the pattern n times
// Inserting !any between each repetition
pattern = repPattern.ReplaceAllStringFunc(pattern, func(match string) string {
parts := repPattern.FindStringSubmatch(match)
if len(parts) != 3 {
return match
}
repeatedPattern := parts[1]
count := parts[2]
repetitions, _ := strconv.Atoi(count)
return strings.Repeat(repeatedPattern+".*?", repetitions-1) + repeatedPattern
})
return pattern
}