Entirely rework logging
This commit is contained in:
16
go.mod
16
go.mod
@@ -2,4 +2,18 @@ module binarysusser
|
||||
|
||||
go 1.24.1
|
||||
|
||||
require github.com/bmatcuk/doublestar/v4 v4.8.1
|
||||
require (
|
||||
git.site.quack-lab.dev/dave/cylogger v1.5.0
|
||||
git.site.quack-lab.dev/dave/cyutils v1.5.0
|
||||
github.com/bmatcuk/doublestar/v4 v4.8.1
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/google/go-cmp v0.5.9 // indirect
|
||||
github.com/hexops/valast v1.5.0 // indirect
|
||||
golang.org/x/mod v0.7.0 // indirect
|
||||
golang.org/x/sys v0.3.0 // indirect
|
||||
golang.org/x/time v0.12.0 // indirect
|
||||
golang.org/x/tools v0.4.0 // indirect
|
||||
mvdan.cc/gofumpt v0.4.0 // indirect
|
||||
)
|
||||
|
||||
32
go.sum
32
go.sum
@@ -1,2 +1,34 @@
|
||||
git.site.quack-lab.dev/dave/cylogger v1.5.0 h1:9H/eEMD1dqJ9hEudwbszxrzE9lN0P0iCeYOzYRPMWOA=
|
||||
git.site.quack-lab.dev/dave/cylogger v1.5.0/go.mod h1:wctgZplMvroA4X6p8f4B/LaCKtiBcT1Pp+L14kcS8jk=
|
||||
git.site.quack-lab.dev/dave/cyutils v1.5.0 h1:U5pojDNoXV4Kj/dlPaGm2COaT4aX6zu88gBF+nTYeJw=
|
||||
git.site.quack-lab.dev/dave/cyutils v1.5.0/go.mod h1:fBjALu2Cp2u2bDr+E4zbGVMBeIgFzROg+4TCcTNAiQU=
|
||||
github.com/bmatcuk/doublestar/v4 v4.8.1 h1:54Bopc5c2cAvhLRAzqOGCYHYyhcDHsFF4wWIR5wKP38=
|
||||
github.com/bmatcuk/doublestar/v4 v4.8.1/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc=
|
||||
github.com/frankban/quicktest v1.14.3 h1:FJKSZTDHjyhriyC81FLQ0LY93eSai0ZyR/ZIkd3ZUKE=
|
||||
github.com/frankban/quicktest v1.14.3/go.mod h1:mgiwOwqx65TmIk1wJ6Q7wvnVMocbUorkibMOrVTHZps=
|
||||
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
|
||||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/hexops/autogold v0.8.1 h1:wvyd/bAJ+Dy+DcE09BoLk6r4Fa5R5W+O+GUzmR985WM=
|
||||
github.com/hexops/autogold v0.8.1/go.mod h1:97HLDXyG23akzAoRYJh/2OBs3kd80eHyKPvZw0S5ZBY=
|
||||
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
|
||||
github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
|
||||
github.com/hexops/valast v1.5.0 h1:FBTuvVi0wjTngtXJRZXMbkN/Dn6DgsUsBwch2DUJU8Y=
|
||||
github.com/hexops/valast v1.5.0/go.mod h1:Jcy1pNH7LNraVaAZDLyv21hHg2WBv9Nf9FL6fGxU7o4=
|
||||
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
|
||||
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
|
||||
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
|
||||
golang.org/x/mod v0.7.0 h1:LapD9S96VoQRhi/GrNTqeBJFrUjs5UHCAtTlgwA5oZA=
|
||||
golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
||||
golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
|
||||
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sys v0.3.0 h1:w8ZOecv6NaNa/zC8944JTU3vz4u6Lagfk4RPQxv92NQ=
|
||||
golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE=
|
||||
golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
|
||||
golang.org/x/tools v0.4.0 h1:7mTAgkunk3fr4GAloyyCasadO6h9zSsQZbwvcaIciV4=
|
||||
golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ=
|
||||
mvdan.cc/gofumpt v0.4.0 h1:JVf4NN1mIpHogBj7ABpgOyZc65/UUOkKQFkoURsz4MM=
|
||||
mvdan.cc/gofumpt v0.4.0/go.mod h1:PljLOHDeZqgS8opHRKLzp2It2VBuSdteAgqUfzMTxlQ=
|
||||
|
||||
151
main.go
151
main.go
@@ -11,23 +11,10 @@ import (
|
||||
"sync"
|
||||
|
||||
"github.com/bmatcuk/doublestar/v4"
|
||||
logger "git.site.quack-lab.dev/dave/cylogger"
|
||||
utils "git.site.quack-lab.dev/dave/cyutils"
|
||||
)
|
||||
|
||||
var Error *log.Logger
|
||||
var Warning *log.Logger
|
||||
|
||||
func init() {
|
||||
log.SetFlags(log.Lmicroseconds | log.Lshortfile)
|
||||
log.SetOutput(os.Stdout)
|
||||
|
||||
Error = log.New(os.Stderr,
|
||||
fmt.Sprintf("%sERROR:%s ", "\033[0;101m", "\033[0m"),
|
||||
log.Lmicroseconds|log.Lshortfile)
|
||||
Warning = log.New(os.Stdout,
|
||||
fmt.Sprintf("%sWarning:%s ", "\033[0;93m", "\033[0m"),
|
||||
log.Lmicroseconds|log.Lshortfile)
|
||||
}
|
||||
|
||||
type ExtData struct {
|
||||
ext string
|
||||
binaryCount int
|
||||
@@ -37,70 +24,58 @@ type ExtData struct {
|
||||
var debug bool
|
||||
|
||||
func main() {
|
||||
raw := flag.Bool("r", false, "More application friendly output")
|
||||
debugF := flag.Bool("d", false, "Debug mode")
|
||||
track := flag.Bool("t", false, "Git add and commit the results")
|
||||
flag.Parse()
|
||||
debug = *debugF
|
||||
logger.InitFlag()
|
||||
logger.Info("Starting binarysusser")
|
||||
|
||||
dir := flag.Arg(0)
|
||||
if dir == "" {
|
||||
dir = "."
|
||||
}
|
||||
logger.Info("Scanning directory: %s", dir)
|
||||
|
||||
dir = NormalizePath(dir)
|
||||
if debug {
|
||||
log.Printf("Scanning directory: %s", dir)
|
||||
}
|
||||
logger.Info("Normalized directory: %s", dir)
|
||||
|
||||
files, err := doublestar.Glob(os.DirFS(dir), "**/*")
|
||||
if err != nil {
|
||||
log.Fatalf("Error globbing directory: %v", err)
|
||||
logger.Error("Error globbing directory: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
logger.Info("Found %d files", len(files))
|
||||
|
||||
extensionTypeCount := sync.Map{}
|
||||
wg := sync.WaitGroup{}
|
||||
for _, file := range files {
|
||||
wg.Add(1)
|
||||
go func(file string) {
|
||||
defer wg.Done()
|
||||
if debug {
|
||||
log.Printf("[%s] Processing file", file) // Log the file being processed
|
||||
}
|
||||
|
||||
utils.WithWorkers(20, files, func(worker, i int, file string) {
|
||||
filelog := logger.Default.WithPrefix(fmt.Sprintf("file=%s", file))
|
||||
filelog.Debug("Processing file")
|
||||
isBinary, err := IsBinaryFile(file)
|
||||
if err != nil {
|
||||
if debug {
|
||||
log.Printf("[%s] Error analyzing file: %v", file, err)
|
||||
}
|
||||
filelog.Error("Error analyzing file: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
ext := filepath.Ext(file)
|
||||
extData, _ := extensionTypeCount.LoadOrStore(ext, &ExtData{ext: ext, binaryCount: 0, textCount: 0})
|
||||
if isBinary {
|
||||
extData.(*ExtData).binaryCount++
|
||||
if debug {
|
||||
log.Printf("[%s] Binary file detected: (%s)", file, ext)
|
||||
}
|
||||
filelog.Debug("Binary file detected: (%s)", ext)
|
||||
} else {
|
||||
extData.(*ExtData).textCount++
|
||||
if debug {
|
||||
log.Printf("[%s] Text file detected: (%s)", file, ext)
|
||||
filelog.Debug("Text file detected: (%s)", ext)
|
||||
}
|
||||
}
|
||||
}(file)
|
||||
}
|
||||
wg.Wait()
|
||||
})
|
||||
logger.Info("Processing complete")
|
||||
|
||||
extensionTypeCount.Range(func(key, value any) bool {
|
||||
extData := value.(*ExtData)
|
||||
extlog := logger.Default.WithPrefix(fmt.Sprintf("ext=%s", extData.ext))
|
||||
if extData.ext == "" {
|
||||
extlog.Debug("Skipping empty extension")
|
||||
return true
|
||||
}
|
||||
if extData.binaryCount > extData.textCount*2 {
|
||||
if *raw {
|
||||
fmt.Println(extData.ext)
|
||||
} else {
|
||||
log.Printf("Extension: %q, Binary Count: %d, Text Count: %d", extData.ext, extData.binaryCount, extData.textCount)
|
||||
}
|
||||
extlog.Info("Extension: %q, Binary Count: %d, Text Count: %d", extData.ext, extData.binaryCount, extData.textCount)
|
||||
}
|
||||
return true
|
||||
})
|
||||
@@ -109,16 +84,13 @@ func main() {
|
||||
// IsBinaryFile detects if a file is binary by analyzing a sample of its content
|
||||
// It uses multiple heuristics for more reliable detection
|
||||
func IsBinaryFile(filename string) (bool, error) {
|
||||
if debug {
|
||||
log.Printf("[%s] Starting binary detection for file", filename)
|
||||
}
|
||||
filelog := logger.Default.WithPrefix(fmt.Sprintf("file=%s", filename))
|
||||
filelog.Debug("Starting binary detection for file")
|
||||
|
||||
// Open the file
|
||||
file, err := os.Open(filename)
|
||||
if err != nil {
|
||||
if debug {
|
||||
log.Printf("[%s] Failed to open file: %v", filename, err)
|
||||
}
|
||||
filelog.Error("Failed to open file: %v", err)
|
||||
return false, err
|
||||
}
|
||||
defer file.Close()
|
||||
@@ -126,12 +98,10 @@ func IsBinaryFile(filename string) (bool, error) {
|
||||
// Get file info for size
|
||||
fileInfo, err := file.Stat()
|
||||
if err != nil {
|
||||
if debug {
|
||||
log.Printf("[%s] Failed to get file stats: %v", filename, err)
|
||||
}
|
||||
} else if debug {
|
||||
log.Printf("[%s] File size: %d bytes", filename, fileInfo.Size())
|
||||
filelog.Error("Failed to get file stats: %v", err)
|
||||
return false, err
|
||||
}
|
||||
filelog.Debug("File size: %d bytes", fileInfo.Size())
|
||||
|
||||
// Create a buffer to read a sample (first 8KB is usually enough)
|
||||
// Adjust the buffer size as needed
|
||||
@@ -139,22 +109,16 @@ func IsBinaryFile(filename string) (bool, error) {
|
||||
buffer := make([]byte, sampleSize)
|
||||
|
||||
// Read a sample from the file
|
||||
if debug {
|
||||
log.Printf("[%s] Reading %d byte sample from file", filename, sampleSize)
|
||||
}
|
||||
filelog.Debug("Reading %d byte sample from file", sampleSize)
|
||||
bytesRead, err := file.Read(buffer)
|
||||
if err != nil && err.Error() != "EOF" {
|
||||
if debug {
|
||||
log.Printf("[%s] Error reading from file: %v", filename, err)
|
||||
}
|
||||
filelog.Error("Error reading from file: %v", err)
|
||||
return false, err
|
||||
}
|
||||
|
||||
// Adjust buffer to actual bytes read
|
||||
buffer = buffer[:bytesRead]
|
||||
if debug {
|
||||
log.Printf("[%s] Actually read %d bytes from file", filename, bytesRead)
|
||||
}
|
||||
filelog.Debug("Actually read %d bytes from file", bytesRead)
|
||||
|
||||
// Null byte check - common in binary files, rare in text files
|
||||
nullCount := 0
|
||||
@@ -168,16 +132,12 @@ func IsBinaryFile(filename string) (bool, error) {
|
||||
// Count of characters analyzed
|
||||
totalBytes := bytesRead
|
||||
|
||||
if debug {
|
||||
log.Printf("[%s] Analyzing bytes for binary detection...", filename)
|
||||
}
|
||||
filelog.Debug("Analyzing bytes for binary detection...")
|
||||
|
||||
// Check each byte in the sample
|
||||
for _, b := range buffer {
|
||||
// Update character frequency (debug only)
|
||||
if debug {
|
||||
charFreq[b]++
|
||||
}
|
||||
|
||||
// Count null bytes
|
||||
if b == 0 {
|
||||
@@ -219,17 +179,12 @@ func IsBinaryFile(filename string) (bool, error) {
|
||||
extendedAsciiPercentage = 100.0 * float64(extendedAsciiCount) / float64(totalBytes)
|
||||
}
|
||||
|
||||
if debug {
|
||||
log.Printf("[%s] File", filename)
|
||||
log.Printf("[%s] Size analyzed: %d bytes", filename, totalBytes)
|
||||
log.Printf("[%s] Null bytes: %d (%.2f%%)", filename, nullCount, nullPercentage)
|
||||
log.Printf("[%s] Non-printable: %d (%.2f%%)", filename, nonPrintableCount, nonPrintablePercentage)
|
||||
log.Printf("[%s] Control chars: %d (%.2f%%)", filename, controlCharCount, controlCharPercentage)
|
||||
log.Printf("[%s] Extended ASCII: %d (%.2f%%)", filename, extendedAsciiCount, extendedAsciiPercentage)
|
||||
log.Printf("[%s] Thresholds: nulls > %.2f%%, non-printable > %.2f%%",
|
||||
filename,
|
||||
100.0*nullThreshold/float64(totalBytes),
|
||||
100.0*nonPrintableThreshold/float64(totalBytes))
|
||||
filelog.Trace("File size analyzed: %d bytes", totalBytes)
|
||||
filelog.Trace("Null bytes: %d (%.2f%%)", nullCount, nullPercentage)
|
||||
filelog.Trace("Non-printable: %d (%.2f%%)", nonPrintableCount, nonPrintablePercentage)
|
||||
filelog.Trace("Control chars: %d (%.2f%%)", controlCharCount, controlCharPercentage)
|
||||
filelog.Trace("Extended ASCII: %d (%.2f%%)", extendedAsciiCount, extendedAsciiPercentage)
|
||||
filelog.Trace("Thresholds: nulls > %.2f%%, non-printable > %.2f%%", 100.0*nullThreshold/float64(totalBytes), 100.0*nonPrintableThreshold/float64(totalBytes))
|
||||
|
||||
// Print top 10 most frequent non-printable characters if any were found
|
||||
if nonPrintableCount > 0 {
|
||||
@@ -266,24 +221,19 @@ func IsBinaryFile(filename string) (bool, error) {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
isBinary := float64(nullCount) > nullThreshold || float64(nonPrintableCount) > nonPrintableThreshold
|
||||
|
||||
if debug {
|
||||
if isBinary {
|
||||
log.Printf("[%s] RESULT: %s is detected as BINARY file", filename, filename)
|
||||
filelog.Debug("File is detected as BINARY file")
|
||||
if float64(nullCount) > nullThreshold {
|
||||
log.Printf("[%s] - Detected as binary due to null bytes: %.2f%% > threshold %.2f%%",
|
||||
filename, nullPercentage, 100.0*nullThreshold/float64(totalBytes))
|
||||
filelog.Trace("Detected as binary due to null bytes: %.2f%% > threshold %.2f%%", nullPercentage, 100.0*nullThreshold/float64(totalBytes))
|
||||
}
|
||||
if float64(nonPrintableCount) > nonPrintableThreshold {
|
||||
log.Printf("[%s] - Detected as binary due to non-printable chars: %.2f%% > threshold %.2f%%",
|
||||
filename, nonPrintablePercentage, 100.0*nonPrintableThreshold/float64(totalBytes))
|
||||
filelog.Trace("Detected as binary due to non-printable chars: %.2f%% > threshold %.2f%%", nonPrintablePercentage, 100.0*nonPrintableThreshold/float64(totalBytes))
|
||||
}
|
||||
} else {
|
||||
log.Printf("[%s] RESULT: %s is detected as TEXT file", filename, filename)
|
||||
}
|
||||
filelog.Debug("File is detected as TEXT file")
|
||||
}
|
||||
|
||||
return isBinary, nil
|
||||
@@ -301,21 +251,16 @@ func isWhitespace(b byte) bool {
|
||||
|
||||
// IsStringBinary is kept for backwards compatibility
|
||||
func IsStringBinary(s string) bool {
|
||||
if debug {
|
||||
log.Printf("Checking if string is binary: %q", s)
|
||||
}
|
||||
filelog := logger.Default.WithPrefix(fmt.Sprintf("file=%s", s))
|
||||
filelog.Debug("Checking if string is binary: %q", s)
|
||||
for _, c := range s {
|
||||
// 65279 is GOD DAMNED BOM dogshit
|
||||
if (c < ' ' || c > '~') && c != 65279 {
|
||||
if debug {
|
||||
log.Printf("Found non-printable character: '%c' with ASCII value %d", c, c)
|
||||
}
|
||||
filelog.Trace("Found non-printable character: '%c' with ASCII value %d", c, c)
|
||||
return true
|
||||
}
|
||||
}
|
||||
if debug {
|
||||
log.Println("String is not binary.")
|
||||
}
|
||||
filelog.Debug("String is not binary.")
|
||||
return false
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user