Files
binary-susser/main.go

209 lines
4.5 KiB
Go

package main
import (
"bufio"
"flag"
"fmt"
"log"
"os"
"path/filepath"
"strings"
"sync"
"sync/atomic"
"time"
)
var Error *log.Logger
var Warning *log.Logger
func init() {
log.SetFlags(log.Lmicroseconds | log.Lshortfile)
log.SetOutput(os.Stdout)
Error = log.New(os.Stderr,
fmt.Sprintf("%sERROR:%s ", "\033[0;101m", "\033[0m"),
log.Lmicroseconds|log.Lshortfile)
Warning = log.New(os.Stdout,
fmt.Sprintf("%sWarning:%s ", "\033[0;93m", "\033[0m"),
log.Lmicroseconds|log.Lshortfile)
}
func main() {
flag.Parse()
dir := flag.Arg(0)
if dir == "" {
dir = "."
}
dir = NormalizePath(dir)
log.Printf("Scanning directory: %s", dir)
files := make(chan string, 10000)
status := make(chan error)
go GetSyncFilesRecursively(dir, files, status)
typeByExtension := sync.Map{}
countByExtension := sync.Map{}
wg := sync.WaitGroup{}
for file := range files {
wg.Add(1)
go func(file string) {
defer wg.Done()
f, err := os.Open(file)
if err != nil {
log.Printf("Error opening file %s: %v", file, err)
return
}
scanner := bufio.NewScanner(f)
if scanner.Scan() {
ext := filepath.Ext(file)
count, _ := countByExtension.LoadOrStore(ext, 0)
if IsStringBinary(scanner.Text()) {
log.Printf("Binary file: %s (%s)", file, ext)
typeByExtension.Store(ext, "binary")
countByExtension.Store(ext, count.(int)+1)
} else {
log.Printf("Text file: %s (%s)", file, ext)
typeByExtension.Store(ext, "text")
countByExtension.Store(ext, count.(int)+1)
}
} else if err := scanner.Err(); err != nil {
log.Printf("Error reading line from file %s: %v", file, err)
}
f.Close()
}(file)
}
wg.Wait()
countByExtension.Range(func(key, value any) bool {
typ, ok := typeByExtension.Load(key)
if !ok {
typ = "unknown"
}
log.Printf("Extension: %s, Type: %s, Count: %d", key, typ, value.(int))
return true
})
}
func IsStringBinary(s string) bool {
for _, c := range s {
if c < ' ' || c > '~' {
return true
}
}
return false
}
func GetSyncFilesRecursively(input string, output chan string, status chan error) {
defer close(output)
defer close(status)
var filesProcessed int32
var foldersProcessed int32
var activeWorkers int32
progressTicker := time.NewTicker(200 * time.Millisecond)
defer progressTicker.Stop()
done := make(chan struct{})
defer close(done)
directories := make(chan string, 100000)
workerPool := make(chan struct{}, 4000)
directories <- input
go func() {
for {
select {
case <-progressTicker.C:
dirCount := len(directories)
workers := atomic.LoadInt32(&activeWorkers)
fmt.Printf("\rFiles processed: %8d; Folders processed: %8d; Active workers: %8d; Directory queue: %8d",
atomic.LoadInt32(&filesProcessed),
atomic.LoadInt32(&foldersProcessed),
workers,
dirCount)
case <-done:
// Final progress update
fmt.Printf("\nFiles processed: %8d; Folders processed: %8d; Completed successfully\n",
atomic.LoadInt32(&filesProcessed),
atomic.LoadInt32(&foldersProcessed))
return
}
}
}()
allDone := make(chan struct{})
go func() {
var wg sync.WaitGroup
go func() {
for {
if atomic.LoadInt32(&activeWorkers) == 0 && len(directories) == 0 {
time.Sleep(10 * time.Millisecond)
if atomic.LoadInt32(&activeWorkers) == 0 && len(directories) == 0 {
close(allDone)
return
}
}
time.Sleep(50 * time.Millisecond)
}
}()
for {
select {
case directory, ok := <-directories:
if !ok {
wg.Wait()
return
}
atomic.AddInt32(&activeWorkers, 1)
go func(dir string) {
workerPool <- struct{}{}
atomic.AddInt32(&foldersProcessed, 1)
processDirectory(dir, directories, output, &filesProcessed)
<-workerPool
atomic.AddInt32(&activeWorkers, -1)
}(directory)
}
}
}()
<-allDone
log.Printf("Files processed: %d; Folders processed: %d",
atomic.LoadInt32(&filesProcessed),
atomic.LoadInt32(&foldersProcessed))
}
func processDirectory(directory string, directories chan<- string, output chan<- string, filesProcessed *int32) {
files, err := os.ReadDir(directory)
if err != nil {
log.Printf("Error reading directory %s: %+v", directory, err)
return
}
for _, file := range files {
if file.IsDir() {
directories <- filepath.Join(directory, file.Name())
} else {
output <- filepath.Join(directory, file.Name())
atomic.AddInt32(filesProcessed, 1)
}
}
}
func NormalizePath(input string) string {
input = filepath.Clean(input)
input = filepath.ToSlash(input)
input = strings.ReplaceAll(input, "\"", "")
return input
}