diff --git a/go.mod b/go.mod index fca369b..72e9e3d 100644 --- a/go.mod +++ b/go.mod @@ -2,4 +2,18 @@ module binarysusser go 1.24.1 -require github.com/bmatcuk/doublestar/v4 v4.8.1 +require ( + git.site.quack-lab.dev/dave/cylogger v1.5.0 + git.site.quack-lab.dev/dave/cyutils v1.5.0 + github.com/bmatcuk/doublestar/v4 v4.8.1 +) + +require ( + github.com/google/go-cmp v0.5.9 // indirect + github.com/hexops/valast v1.5.0 // indirect + golang.org/x/mod v0.7.0 // indirect + golang.org/x/sys v0.3.0 // indirect + golang.org/x/time v0.12.0 // indirect + golang.org/x/tools v0.4.0 // indirect + mvdan.cc/gofumpt v0.4.0 // indirect +) diff --git a/go.sum b/go.sum index 7faa1ee..4511a3e 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,34 @@ +git.site.quack-lab.dev/dave/cylogger v1.5.0 h1:9H/eEMD1dqJ9hEudwbszxrzE9lN0P0iCeYOzYRPMWOA= +git.site.quack-lab.dev/dave/cylogger v1.5.0/go.mod h1:wctgZplMvroA4X6p8f4B/LaCKtiBcT1Pp+L14kcS8jk= +git.site.quack-lab.dev/dave/cyutils v1.5.0 h1:U5pojDNoXV4Kj/dlPaGm2COaT4aX6zu88gBF+nTYeJw= +git.site.quack-lab.dev/dave/cyutils v1.5.0/go.mod h1:fBjALu2Cp2u2bDr+E4zbGVMBeIgFzROg+4TCcTNAiQU= github.com/bmatcuk/doublestar/v4 v4.8.1 h1:54Bopc5c2cAvhLRAzqOGCYHYyhcDHsFF4wWIR5wKP38= github.com/bmatcuk/doublestar/v4 v4.8.1/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc= +github.com/frankban/quicktest v1.14.3 h1:FJKSZTDHjyhriyC81FLQ0LY93eSai0ZyR/ZIkd3ZUKE= +github.com/frankban/quicktest v1.14.3/go.mod h1:mgiwOwqx65TmIk1wJ6Q7wvnVMocbUorkibMOrVTHZps= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/hexops/autogold v0.8.1 h1:wvyd/bAJ+Dy+DcE09BoLk6r4Fa5R5W+O+GUzmR985WM= +github.com/hexops/autogold v0.8.1/go.mod h1:97HLDXyG23akzAoRYJh/2OBs3kd80eHyKPvZw0S5ZBY= +github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= +github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= +github.com/hexops/valast v1.5.0 h1:FBTuvVi0wjTngtXJRZXMbkN/Dn6DgsUsBwch2DUJU8Y= +github.com/hexops/valast v1.5.0/go.mod h1:Jcy1pNH7LNraVaAZDLyv21hHg2WBv9Nf9FL6fGxU7o4= +github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= +github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= +github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= +golang.org/x/mod v0.7.0 h1:LapD9S96VoQRhi/GrNTqeBJFrUjs5UHCAtTlgwA5oZA= +golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.3.0 h1:w8ZOecv6NaNa/zC8944JTU3vz4u6Lagfk4RPQxv92NQ= +golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= +golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= +golang.org/x/tools v0.4.0 h1:7mTAgkunk3fr4GAloyyCasadO6h9zSsQZbwvcaIciV4= +golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ= +mvdan.cc/gofumpt v0.4.0 h1:JVf4NN1mIpHogBj7ABpgOyZc65/UUOkKQFkoURsz4MM= +mvdan.cc/gofumpt v0.4.0/go.mod h1:PljLOHDeZqgS8opHRKLzp2It2VBuSdteAgqUfzMTxlQ= diff --git a/main.go b/main.go index e2e0cc8..6e07030 100644 --- a/main.go +++ b/main.go @@ -11,23 +11,10 @@ import ( "sync" "github.com/bmatcuk/doublestar/v4" + logger "git.site.quack-lab.dev/dave/cylogger" + utils "git.site.quack-lab.dev/dave/cyutils" ) -var Error *log.Logger -var Warning *log.Logger - -func init() { - log.SetFlags(log.Lmicroseconds | log.Lshortfile) - log.SetOutput(os.Stdout) - - Error = log.New(os.Stderr, - fmt.Sprintf("%sERROR:%s ", "\033[0;101m", "\033[0m"), - log.Lmicroseconds|log.Lshortfile) - Warning = log.New(os.Stdout, - fmt.Sprintf("%sWarning:%s ", "\033[0;93m", "\033[0m"), - log.Lmicroseconds|log.Lshortfile) -} - type ExtData struct { ext string binaryCount int @@ -37,70 +24,58 @@ type ExtData struct { var debug bool func main() { - raw := flag.Bool("r", false, "More application friendly output") - debugF := flag.Bool("d", false, "Debug mode") + track := flag.Bool("t", false, "Git add and commit the results") flag.Parse() - debug = *debugF + logger.InitFlag() + logger.Info("Starting binarysusser") + dir := flag.Arg(0) if dir == "" { dir = "." } + logger.Info("Scanning directory: %s", dir) + dir = NormalizePath(dir) - if debug { - log.Printf("Scanning directory: %s", dir) - } + logger.Info("Normalized directory: %s", dir) files, err := doublestar.Glob(os.DirFS(dir), "**/*") if err != nil { - log.Fatalf("Error globbing directory: %v", err) + logger.Error("Error globbing directory: %v", err) + os.Exit(1) } + logger.Info("Found %d files", len(files)) + extensionTypeCount := sync.Map{} - wg := sync.WaitGroup{} - for _, file := range files { - wg.Add(1) - go func(file string) { - defer wg.Done() - if debug { - log.Printf("[%s] Processing file", file) // Log the file being processed - } - - isBinary, err := IsBinaryFile(file) - if err != nil { - if debug { - log.Printf("[%s] Error analyzing file: %v", file, err) - } - return - } - - ext := filepath.Ext(file) - extData, _ := extensionTypeCount.LoadOrStore(ext, &ExtData{ext: ext, binaryCount: 0, textCount: 0}) - if isBinary { - extData.(*ExtData).binaryCount++ - if debug { - log.Printf("[%s] Binary file detected: (%s)", file, ext) - } - } else { - extData.(*ExtData).textCount++ - if debug { - log.Printf("[%s] Text file detected: (%s)", file, ext) - } - } - }(file) - } - wg.Wait() + utils.WithWorkers(20, files, func(worker, i int, file string) { + filelog := logger.Default.WithPrefix(fmt.Sprintf("file=%s", file)) + filelog.Debug("Processing file") + isBinary, err := IsBinaryFile(file) + if err != nil { + filelog.Error("Error analyzing file: %v", err) + return + } + ext := filepath.Ext(file) + extData, _ := extensionTypeCount.LoadOrStore(ext, &ExtData{ext: ext, binaryCount: 0, textCount: 0}) + if isBinary { + extData.(*ExtData).binaryCount++ + filelog.Debug("Binary file detected: (%s)", ext) + } else { + extData.(*ExtData).textCount++ + filelog.Debug("Text file detected: (%s)", ext) + } + }) + logger.Info("Processing complete") extensionTypeCount.Range(func(key, value any) bool { extData := value.(*ExtData) + extlog := logger.Default.WithPrefix(fmt.Sprintf("ext=%s", extData.ext)) if extData.ext == "" { + extlog.Debug("Skipping empty extension") return true } if extData.binaryCount > extData.textCount*2 { - if *raw { - fmt.Println(extData.ext) - } else { - log.Printf("Extension: %q, Binary Count: %d, Text Count: %d", extData.ext, extData.binaryCount, extData.textCount) - } + extlog.Info("Extension: %q, Binary Count: %d, Text Count: %d", extData.ext, extData.binaryCount, extData.textCount) } return true }) @@ -109,16 +84,13 @@ func main() { // IsBinaryFile detects if a file is binary by analyzing a sample of its content // It uses multiple heuristics for more reliable detection func IsBinaryFile(filename string) (bool, error) { - if debug { - log.Printf("[%s] Starting binary detection for file", filename) - } + filelog := logger.Default.WithPrefix(fmt.Sprintf("file=%s", filename)) + filelog.Debug("Starting binary detection for file") // Open the file file, err := os.Open(filename) if err != nil { - if debug { - log.Printf("[%s] Failed to open file: %v", filename, err) - } + filelog.Error("Failed to open file: %v", err) return false, err } defer file.Close() @@ -126,12 +98,10 @@ func IsBinaryFile(filename string) (bool, error) { // Get file info for size fileInfo, err := file.Stat() if err != nil { - if debug { - log.Printf("[%s] Failed to get file stats: %v", filename, err) - } - } else if debug { - log.Printf("[%s] File size: %d bytes", filename, fileInfo.Size()) + filelog.Error("Failed to get file stats: %v", err) + return false, err } + filelog.Debug("File size: %d bytes", fileInfo.Size()) // Create a buffer to read a sample (first 8KB is usually enough) // Adjust the buffer size as needed @@ -139,22 +109,16 @@ func IsBinaryFile(filename string) (bool, error) { buffer := make([]byte, sampleSize) // Read a sample from the file - if debug { - log.Printf("[%s] Reading %d byte sample from file", filename, sampleSize) - } + filelog.Debug("Reading %d byte sample from file", sampleSize) bytesRead, err := file.Read(buffer) if err != nil && err.Error() != "EOF" { - if debug { - log.Printf("[%s] Error reading from file: %v", filename, err) - } + filelog.Error("Error reading from file: %v", err) return false, err } // Adjust buffer to actual bytes read buffer = buffer[:bytesRead] - if debug { - log.Printf("[%s] Actually read %d bytes from file", filename, bytesRead) - } + filelog.Debug("Actually read %d bytes from file", bytesRead) // Null byte check - common in binary files, rare in text files nullCount := 0 @@ -168,16 +132,12 @@ func IsBinaryFile(filename string) (bool, error) { // Count of characters analyzed totalBytes := bytesRead - if debug { - log.Printf("[%s] Analyzing bytes for binary detection...", filename) - } + filelog.Debug("Analyzing bytes for binary detection...") // Check each byte in the sample for _, b := range buffer { // Update character frequency (debug only) - if debug { - charFreq[b]++ - } + charFreq[b]++ // Count null bytes if b == 0 { @@ -219,17 +179,12 @@ func IsBinaryFile(filename string) (bool, error) { extendedAsciiPercentage = 100.0 * float64(extendedAsciiCount) / float64(totalBytes) } - if debug { - log.Printf("[%s] File", filename) - log.Printf("[%s] Size analyzed: %d bytes", filename, totalBytes) - log.Printf("[%s] Null bytes: %d (%.2f%%)", filename, nullCount, nullPercentage) - log.Printf("[%s] Non-printable: %d (%.2f%%)", filename, nonPrintableCount, nonPrintablePercentage) - log.Printf("[%s] Control chars: %d (%.2f%%)", filename, controlCharCount, controlCharPercentage) - log.Printf("[%s] Extended ASCII: %d (%.2f%%)", filename, extendedAsciiCount, extendedAsciiPercentage) - log.Printf("[%s] Thresholds: nulls > %.2f%%, non-printable > %.2f%%", - filename, - 100.0*nullThreshold/float64(totalBytes), - 100.0*nonPrintableThreshold/float64(totalBytes)) + filelog.Trace("File size analyzed: %d bytes", totalBytes) + filelog.Trace("Null bytes: %d (%.2f%%)", nullCount, nullPercentage) + filelog.Trace("Non-printable: %d (%.2f%%)", nonPrintableCount, nonPrintablePercentage) + filelog.Trace("Control chars: %d (%.2f%%)", controlCharCount, controlCharPercentage) + filelog.Trace("Extended ASCII: %d (%.2f%%)", extendedAsciiCount, extendedAsciiPercentage) + filelog.Trace("Thresholds: nulls > %.2f%%, non-printable > %.2f%%", 100.0*nullThreshold/float64(totalBytes), 100.0*nonPrintableThreshold/float64(totalBytes)) // Print top 10 most frequent non-printable characters if any were found if nonPrintableCount > 0 { @@ -266,25 +221,20 @@ func IsBinaryFile(filename string) (bool, error) { } } } - } isBinary := float64(nullCount) > nullThreshold || float64(nonPrintableCount) > nonPrintableThreshold - if debug { if isBinary { - log.Printf("[%s] RESULT: %s is detected as BINARY file", filename, filename) + filelog.Debug("File is detected as BINARY file") if float64(nullCount) > nullThreshold { - log.Printf("[%s] - Detected as binary due to null bytes: %.2f%% > threshold %.2f%%", - filename, nullPercentage, 100.0*nullThreshold/float64(totalBytes)) + filelog.Trace("Detected as binary due to null bytes: %.2f%% > threshold %.2f%%", nullPercentage, 100.0*nullThreshold/float64(totalBytes)) } if float64(nonPrintableCount) > nonPrintableThreshold { - log.Printf("[%s] - Detected as binary due to non-printable chars: %.2f%% > threshold %.2f%%", - filename, nonPrintablePercentage, 100.0*nonPrintableThreshold/float64(totalBytes)) + filelog.Trace("Detected as binary due to non-printable chars: %.2f%% > threshold %.2f%%", nonPrintablePercentage, 100.0*nonPrintableThreshold/float64(totalBytes)) } } else { - log.Printf("[%s] RESULT: %s is detected as TEXT file", filename, filename) + filelog.Debug("File is detected as TEXT file") } - } return isBinary, nil } @@ -301,21 +251,16 @@ func isWhitespace(b byte) bool { // IsStringBinary is kept for backwards compatibility func IsStringBinary(s string) bool { - if debug { - log.Printf("Checking if string is binary: %q", s) - } + filelog := logger.Default.WithPrefix(fmt.Sprintf("file=%s", s)) + filelog.Debug("Checking if string is binary: %q", s) for _, c := range s { // 65279 is GOD DAMNED BOM dogshit if (c < ' ' || c > '~') && c != 65279 { - if debug { - log.Printf("Found non-printable character: '%c' with ASCII value %d", c, c) - } + filelog.Trace("Found non-printable character: '%c' with ASCII value %d", c, c) return true } } - if debug { - log.Println("String is not binary.") - } + filelog.Debug("String is not binary.") return false }