Files
binary-susser/main.go
2025-03-12 21:47:22 +01:00

218 lines
4.6 KiB
Go

package main
import (
"bufio"
"flag"
"fmt"
"log"
"os"
"path/filepath"
"strings"
"sync"
"sync/atomic"
"time"
)
var Error *log.Logger
var Warning *log.Logger
func init() {
log.SetFlags(log.Lmicroseconds | log.Lshortfile)
log.SetOutput(os.Stdout)
Error = log.New(os.Stderr,
fmt.Sprintf("%sERROR:%s ", "\033[0;101m", "\033[0m"),
log.Lmicroseconds|log.Lshortfile)
Warning = log.New(os.Stdout,
fmt.Sprintf("%sWarning:%s ", "\033[0;93m", "\033[0m"),
log.Lmicroseconds|log.Lshortfile)
}
type ExtData struct {
ext string
binaryCount int
textCount int
}
var debug bool
func main() {
raw := flag.Bool("r", false, "More application friendly output")
debugF := flag.Bool("d", false, "Debug mode")
flag.Parse()
debug = *debugF
dir := flag.Arg(0)
if dir == "" {
dir = "."
}
dir = NormalizePath(dir)
if debug {
log.Printf("Scanning directory: %s", dir)
}
files := make(chan string, 10000)
status := make(chan error)
go GetSyncFilesRecursively(dir, files, status)
extensionTypeCount := sync.Map{}
wg := sync.WaitGroup{}
for file := range files {
wg.Add(1)
go func(file string) {
defer wg.Done()
if debug {
log.Printf("Processing file: %s", file) // Log the file being processed
}
f, err := os.Open(file)
if err != nil {
if debug {
log.Printf("Error opening file %s: %v", file, err)
}
return
}
defer f.Close() // Ensure the file is closed after processing
scanner := bufio.NewScanner(f)
if scanner.Scan() {
ext := filepath.Ext(file)
extData, _ := extensionTypeCount.LoadOrStore(ext, &ExtData{ext: ext, binaryCount: 0, textCount: 0})
if IsStringBinary(scanner.Text()) {
extData.(*ExtData).binaryCount++
if debug {
log.Printf("Binary file detected: %s (%s)", file, ext) // Log binary file detection
}
} else {
extData.(*ExtData).textCount++
if debug {
log.Printf("Text file detected: %s (%s)", file, ext) // Log text file detection
}
}
} else if err := scanner.Err(); err != nil {
if debug {
log.Printf("Error reading line from file %s: %v", file, err)
}
}
}(file)
}
wg.Wait()
extensionTypeCount.Range(func(key, value any) bool {
extData := value.(*ExtData)
if extData.ext == "" {
return true
}
if extData.binaryCount > extData.textCount*2 {
if *raw {
fmt.Println(extData.ext)
} else {
if debug {
log.Printf("Extension: %q, Binary Count: %d, Text Count: %d", extData.ext, extData.binaryCount, extData.textCount)
}
}
}
return true
})
}
func IsStringBinary(s string) bool {
if debug {
log.Printf("Checking if string is binary: %q", s)
}
for _, c := range s {
// 65279 is GOD DAMNED BOM dogshit
if (c < ' ' || c > '~') && c != 65279 {
if debug {
log.Printf("Found non-printable character: '%c' with ASCII value %d", c, c)
}
return true
}
}
if debug {
log.Println("String is not binary.")
}
return false
}
func GetSyncFilesRecursively(input string, output chan string, status chan error) {
defer close(output)
defer close(status)
var filesProcessed int32
var foldersProcessed int32
var activeWorkers int32
done := make(chan struct{})
defer close(done)
directories := make(chan string, 100000)
workerPool := make(chan struct{}, 4000)
directories <- input
allDone := make(chan struct{})
go func() {
var wg sync.WaitGroup
go func() {
for {
if atomic.LoadInt32(&activeWorkers) == 0 && len(directories) == 0 {
time.Sleep(10 * time.Millisecond)
if atomic.LoadInt32(&activeWorkers) == 0 && len(directories) == 0 {
close(allDone)
return
}
}
time.Sleep(50 * time.Millisecond)
}
}()
for {
select {
case directory, ok := <-directories:
if !ok {
wg.Wait()
return
}
atomic.AddInt32(&activeWorkers, 1)
go func(dir string) {
workerPool <- struct{}{}
atomic.AddInt32(&foldersProcessed, 1)
processDirectory(dir, directories, output, &filesProcessed)
<-workerPool
atomic.AddInt32(&activeWorkers, -1)
}(directory)
}
}
}()
<-allDone
}
func processDirectory(directory string, directories chan<- string, output chan<- string, filesProcessed *int32) {
files, err := os.ReadDir(directory)
if err != nil {
//log.Printf("Error reading directory %s: %+v", directory, err)
return
}
for _, file := range files {
if file.IsDir() {
directories <- filepath.Join(directory, file.Name())
} else {
output <- filepath.Join(directory, file.Name())
atomic.AddInt32(filesProcessed, 1)
}
}
}
func NormalizePath(input string) string {
input = filepath.Clean(input)
input = filepath.ToSlash(input)
input = strings.ReplaceAll(input, "\"", "")
return input
}