Files
BigChef/processor/regex.go

253 lines
8.1 KiB
Go

package processor
import (
"fmt"
"log"
"regexp"
"strconv"
"strings"
lua "github.com/yuin/gopher-lua"
)
// RegexProcessor implements the Processor interface using regex patterns
type RegexProcessor struct{}
// ToLua sets capture groups as Lua variables (v1, v2, etc. for numeric values and s1, s2, etc. for strings)
func (p *RegexProcessor) ToLua(L *lua.LState, data interface{}) error {
captures, ok := data.([]string)
if !ok {
return fmt.Errorf("expected []string for captures, got %T", data)
}
// Set variables for each capture group, starting from v1/s1 for the first capture
for i := 0; i < len(captures); i++ {
// Set string version (always available as s1, s2, etc.)
L.SetGlobal(fmt.Sprintf("s%d", i+1), lua.LString(captures[i]))
// Try to convert to number and set v1, v2, etc.
if val, err := strconv.ParseFloat(captures[i], 64); err == nil {
L.SetGlobal(fmt.Sprintf("v%d", i+1), lua.LNumber(val))
}
}
return nil
}
// FromLua implements the Processor interface for RegexProcessor
func (p *RegexProcessor) FromLua(L *lua.LState) (interface{}, error) {
// Get the modified values after Lua execution
modifications := make(map[int]string)
// Check for modifications to v1-v12 and s1-s12
for i := 0; i < 12; i++ {
// Check both v and s variables to see if any were modified
vVarName := fmt.Sprintf("v%d", i+1)
sVarName := fmt.Sprintf("s%d", i+1)
vLuaVal := L.GetGlobal(vVarName)
sLuaVal := L.GetGlobal(sVarName)
// If our value is a number then it's very likely we want it to be a number
// And not a string
// If we do want it to be a string we will cast it into a string in lua
// wait that wouldn't work... Casting v to a string would not load it here
if vLuaVal.Type() == lua.LTNumber {
modifications[i] = vLuaVal.String()
continue
}
if sLuaVal.Type() == lua.LTString {
modifications[i] = sLuaVal.String()
continue
}
}
return modifications, nil
}
type NamedCapture struct {
Name string
Value string
Range [2]int
}
// ProcessContent applies regex replacement with Lua processing
func (p *RegexProcessor) ProcessContent(content string, pattern string, luaExpr string) (string, int, int, error) {
// Handle special pattern modifications
if !strings.HasPrefix(pattern, "(?s)") {
pattern = "(?s)" + pattern
log.Printf("Pattern modified to include (?s): %s", pattern)
}
pattern = strings.ReplaceAll(pattern, "!num", `"?(\d*\.?\d+)"?`)
pattern = strings.ReplaceAll(pattern, "!any", `.*?`)
repPattern := regexp.MustCompile(`!rep\(([^,]+),\s*(\d+)\)`)
// !rep(pattern, count) repeats the pattern n times
// Inserting !any between each repetition
pattern = repPattern.ReplaceAllStringFunc(pattern, func(match string) string {
parts := repPattern.FindStringSubmatch(match)
if len(parts) != 3 {
return match
}
repeatedPattern := parts[1]
count := parts[2]
repetitions, _ := strconv.Atoi(count)
return strings.Repeat(repeatedPattern+".*?", repetitions-1) + repeatedPattern
})
compiledPattern, err := regexp.Compile(pattern)
if err != nil {
log.Printf("Error compiling pattern: %v", err)
return "", 0, 0, fmt.Errorf("error compiling pattern: %v", err)
}
log.Printf("Compiled pattern successfully: %s", pattern)
previous := luaExpr
luaExpr = BuildLuaScript(luaExpr)
log.Printf("Changing Lua expression from: %s to: %s", previous, luaExpr)
L, err := NewLuaState()
if err != nil {
log.Printf("Error creating Lua state: %v", err)
return "", 0, 0, fmt.Errorf("error creating Lua state: %v", err)
}
defer L.Close()
log.Printf("Lua state created successfully")
// Initialize Lua environment
modificationCount := 0
// Process all regex matches
result := content
indices := compiledPattern.FindAllStringSubmatchIndex(content, -1)
log.Printf("Found %d matches in the content", len(indices))
// We walk backwards because we're replacing something with something else that might be longer
// And in the case it is longer than the original all indicces past that change will be fucked up
// By going backwards we fuck up all the indices to the end of the file that we don't care about
// Because there either aren't any (last match) or they're already modified (subsequent matches)
for i := len(indices) - 1; i >= 0; i-- {
matchIndices := indices[i]
log.Printf("Processing match indices: %v", matchIndices)
// Why we're doing this whole song and dance of indices is to properly handle empty matches
// Plus it's a little cleaner to surgically replace our matches
// If we were to use string.replace and encountered an empty match there'd be nothing to replace
// But using indices an empty match would have its starting and ending indices be the same
// So when we're cutting open the array we say 0:7 + modified + 7:end
// As if concatenating in the middle of the array
// Plus it supports lookarounds
match := content[matchIndices[0]:matchIndices[1]]
log.Printf("Matched content: %s", match)
groups := matchIndices[2:]
if len(groups) <= 0 {
log.Println("No capture groups for lua to chew on")
continue
}
if len(groups)%2 == 1 {
log.Println("Odd number of indices of groups, what the fuck?")
continue
}
captures := make([]string, 0, len(groups)/2)
for j := 0; j < len(groups); j += 2 {
captures = append(captures, content[groups[j]:groups[j+1]])
}
log.Printf("Captured groups: %v", captures)
// We have to use array to preserve order
// Very important for the reconstruction step
// Because we must overwrite the values in reverse order
// See comments a few dozen lines above for more details
namedCaptures := make([]NamedCapture, 0, len(groups)/2)
groupNames := compiledPattern.SubexpNames()[1:]
for i, name := range groupNames {
if name == "" {
continue
}
namedCaptures = append(namedCaptures, NamedCapture{
Name: name,
Value: captures[i],
Range: [2]int{groups[i*2], groups[i*2+1]},
})
}
log.Printf("Named captures: %v", namedCaptures)
if err := p.ToLua(L, captures); err != nil {
log.Printf("Error setting Lua variables: %v", err)
continue
}
log.Println("Lua variables set successfully")
for _, capture := range namedCaptures {
if capture.Name == "" {
continue
}
if val, err := strconv.ParseFloat(capture.Value, 64); err == nil {
L.SetGlobal(capture.Name, lua.LNumber(val))
} else {
L.SetGlobal(capture.Name, lua.LString(capture.Value))
}
}
if err := L.DoString(luaExpr); err != nil {
log.Printf("Error executing Lua code %s for group %s: %v", luaExpr, captures, err)
continue
}
log.Println("Lua code executed successfully")
// Get modifications from Lua
modResult, err := p.FromLua(L)
if err != nil {
log.Printf("Error getting modifications: %v", err)
continue
}
// Apply modifications to the matched text
modsMap, ok := modResult.(map[int]string)
if !ok || len(modsMap) == 0 {
log.Println("No modifications to apply")
continue
}
replacement := ""
replacementVar := L.GetGlobal("replacement")
if replacementVar.Type() != lua.LTNil {
replacement = replacementVar.String()
}
if replacement == "" {
// Apply the modifications to the original match
replacement = match
for i := len(modsMap) - 1; i >= 0; i-- {
newVal := modsMap[i]
log.Printf("Applying modification: %s", newVal)
// Indices of the group are relative to content
// To relate them to match we have to subtract the match start index
groupStart := groups[i*2] - matchIndices[0]
groupEnd := groups[i*2+1] - matchIndices[0]
replacement = replacement[:groupStart] + newVal + replacement[groupEnd:]
}
for i := len(namedCaptures) - 1; i >= 0; i-- {
capture := namedCaptures[i]
if capture.Name == "" {
continue
}
groupStart := capture.Range[0] - matchIndices[0]
groupEnd := capture.Range[1] - matchIndices[0]
luaValue := L.GetGlobal(capture.Name).String()
replacement = replacement[:groupStart] + luaValue + replacement[groupEnd:]
}
}
modificationCount++
result = result[:matchIndices[0]] + replacement + result[matchIndices[1]:]
log.Printf("Modification count updated: %d", modificationCount)
}
log.Printf("Process completed with %d modifications", modificationCount)
return result, modificationCount, len(indices), nil
}