diff --git a/main.go b/main.go index a372ca5..06cf2f4 100644 --- a/main.go +++ b/main.go @@ -2,7 +2,9 @@ package main import ( "flag" + "image" "image/jpeg" + "image/png" "os" "path/filepath" "sync" @@ -14,6 +16,7 @@ import ( func main() { thresh := flag.Int("thresh", 10, "Threshold for distance") + workers := flag.Int("workers", 100, "Number of workers") flag.Parse() logger.InitFlag() hashes := &sync.Map{} @@ -39,11 +42,14 @@ func main() { } logger.Info("Patterns expanded to %d files", len(files)) + workerChan := make(chan struct{}, *workers) wg := sync.WaitGroup{} for _, file := range files { + workerChan <- struct{}{} wg.Add(1) go func(file string) { defer wg.Done() + defer func() { <-workerChan }() log := logger.Default.WithPrefix(file) ext := filepath.Ext(file) if ext != ".jpg" && ext != ".jpeg" && ext != ".png" { @@ -57,11 +63,19 @@ func main() { return } defer imgfile.Close() - img, err := jpeg.Decode(imgfile) + + isPng := ext == ".png" + var img image.Image + if isPng { + img, err = png.Decode(imgfile) + } else { + img, err = jpeg.Decode(imgfile) + } if err != nil { log.Error("Failed to decode image: %v", err) return } + hash, err := goimagehash.ExtPerceptionHash(img, 8, 8) if err != nil { log.Error("Failed to calculate hash: %v", err) @@ -75,41 +89,48 @@ func main() { groupedImages := make(map[string][]string) wg.Wait() - processed := make(map[string]bool) + processed := &sync.Map{} hashes.Range(func(key, value interface{}) bool { - filea := key.(string) - hasha := value.(*goimagehash.ExtImageHash) + workerChan <- struct{}{} + wg.Add(1) + go func(key, value interface{}) { + defer wg.Done() + defer func() { <-workerChan }() + filea := key.(string) + hasha := value.(*goimagehash.ExtImageHash) - if processed[filea] { - return true - } + if _, ok := processed.Load(filea); ok { + return + } - var group []string - hashes.Range(func(key, value interface{}) bool { - fileb := key.(string) - hashb := value.(*goimagehash.ExtImageHash) - if filea == fileb { + var group []string + hashes.Range(func(key, value interface{}) bool { + fileb := key.(string) + hashb := value.(*goimagehash.ExtImageHash) + if filea == fileb { + return true + } + distance, err := hasha.Distance(hashb) + if err != nil { + logger.Error("Failed to calculate distance: %v", err) + return true + } + logger.Debug("Distance between %v and %v: %v", filea, fileb, distance) + if distance <= *thresh { + group = append(group, fileb) + processed.Store(fileb, true) + } return true - } - distance, err := hasha.Distance(hashb) - if err != nil { - logger.Error("Failed to calculate distance: %v", err) - return true - } - logger.Debug("Distance between %v and %v: %v", filea, fileb, distance) - if distance <= *thresh { - group = append(group, fileb) - processed[fileb] = true - } - return true - }) + }) - if len(group) > 0 { - groupedImages[filea] = group - processed[filea] = true - } + if len(group) > 0 { + groupedImages[filea] = group + processed.Store(filea, true) + } + }(key, value) return true }) + wg.Wait() // Deduplicate by keeping the largest file in each group for file, group := range groupedImages {