From 1d0517ab66f32370c48a436558a9a34c83987895 Mon Sep 17 00:00:00 2001 From: PhatPhuckDave Date: Sat, 8 Mar 2025 11:01:23 +0100 Subject: [PATCH] Implement type sussination --- main.go | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 62 insertions(+), 3 deletions(-) diff --git a/main.go b/main.go index 53ddf19..d45dd6e 100644 --- a/main.go +++ b/main.go @@ -1,6 +1,8 @@ package main import ( + "bufio" + "flag" "fmt" "log" "os" @@ -27,14 +29,71 @@ func init() { } func main() { + flag.Parse() + dir := flag.Arg(0) + if dir == "" { + dir = "." + } + dir = NormalizePath(dir) + log.Printf("Scanning directory: %s", dir) + files := make(chan string, 10000) status := make(chan error) - go GetSyncFilesRecursively(".", files, status) + go GetSyncFilesRecursively(dir, files, status) + typeByExtension := sync.Map{} + countByExtension := sync.Map{} + wg := sync.WaitGroup{} for file := range files { - log.Println(file) + wg.Add(1) + go func(file string) { + defer wg.Done() + f, err := os.Open(file) + if err != nil { + log.Printf("Error opening file %s: %v", file, err) + return + } + + scanner := bufio.NewScanner(f) + if scanner.Scan() { + ext := filepath.Ext(file) + count, _ := countByExtension.LoadOrStore(ext, 0) + if IsStringBinary(scanner.Text()) { + log.Printf("Binary file: %s (%s)", file, ext) + typeByExtension.Store(ext, "binary") + countByExtension.Store(ext, count.(int)+1) + } else { + log.Printf("Text file: %s (%s)", file, ext) + typeByExtension.Store(ext, "text") + countByExtension.Store(ext, count.(int)+1) + } + } else if err := scanner.Err(); err != nil { + log.Printf("Error reading line from file %s: %v", file, err) + } + + f.Close() + }(file) } + wg.Wait() + + countByExtension.Range(func(key, value any) bool { + typ, ok := typeByExtension.Load(key) + if !ok { + typ = "unknown" + } + log.Printf("Extension: %s, Type: %s, Count: %d", key, typ, value.(int)) + return true + }) +} + +func IsStringBinary(s string) bool { + for _, c := range s { + if c < ' ' || c > '~' { + return true + } + } + return false } func GetSyncFilesRecursively(input string, output chan string, status chan error) { @@ -141,7 +200,7 @@ func processDirectory(directory string, directories chan<- string, output chan<- } } -func NormalizePath(input, workdir string) string { +func NormalizePath(input string) string { input = filepath.Clean(input) input = filepath.ToSlash(input) input = strings.ReplaceAll(input, "\"", "")