package main

import (
	"bufio"
	"flag"
	"fmt"
	"log"
	"os"
	"path/filepath"
	"sort"
	"strings"
	"sync"
	"sync/atomic"
	"time"
)

var Error *log.Logger
var Warning *log.Logger

func init() {
	log.SetFlags(log.Lmicroseconds | log.Lshortfile)
	log.SetOutput(os.Stdout)

	Error = log.New(os.Stderr,
		fmt.Sprintf("%sERROR:%s ", "\033[0;101m", "\033[0m"),
		log.Lmicroseconds|log.Lshortfile)
	Warning = log.New(os.Stdout,
		fmt.Sprintf("%sWarning:%s ", "\033[0;93m", "\033[0m"),
		log.Lmicroseconds|log.Lshortfile)
}

func main() {
	flag.Parse()
	dir := flag.Arg(0)
	if dir == "" {
		dir = "."
	}
	dir = NormalizePath(dir)
	log.Printf("Scanning directory: %s", dir)

	files := make(chan string, 10000)
	status := make(chan error)

	go GetSyncFilesRecursively(dir, files, status)

	extensionTypeCount := sync.Map{}
	wg := sync.WaitGroup{}
	for file := range files {
		wg.Add(1)
		go func(file string) {
			defer wg.Done()
			f, err := os.Open(file)
			if err != nil {
				log.Printf("Error opening file %s: %v", file, err)
				return
			}

			scanner := bufio.NewScanner(f)
			if scanner.Scan() {
				ext := filepath.Ext(file)
				key := ext
				if IsStringBinary(scanner.Text()) {
					key += " (binary)"
					count, _ := extensionTypeCount.LoadOrStore(key, 0)
					extensionTypeCount.Store(key, count.(int)+1)
					//log.Printf("Binary file: %s (%s)", file, ext)
				} else {
					key += " (text)"
					count, _ := extensionTypeCount.LoadOrStore(key, 0)
					extensionTypeCount.Store(key, count.(int)+1)
					//log.Printf("Text file: %s (%s)", file, ext)
				}
			} else if err := scanner.Err(); err != nil {
				log.Printf("Error reading line from file %s: %v", file, err)
			}

			f.Close()
		}(file)
	}
	wg.Wait()

	// Create a map to store raw extension names and their binary vs text counts
	extensionBinaryTextCount := make(map[string][2]int)

	// Collect all raw extensions and their counts
	extensionTypeCount.Range(func(key, value any) bool {
		keyStr := key.(string)
		count := value.(int)

		// Check if it's a text file (has " (text)" suffix)
		if strings.HasSuffix(keyStr, " (text)") {
			baseExt := strings.TrimSuffix(keyStr, " (text)")
			counts, exists := extensionBinaryTextCount[baseExt]
			if !exists {
				counts = [2]int{0, 0}
			}
			counts[1] = count // index 1 for text count
			extensionBinaryTextCount[baseExt] = counts
		} else {
			// Binary file
			counts, exists := extensionBinaryTextCount[keyStr]
			if !exists {
				counts = [2]int{0, 0}
			}
			counts[0] = count // index 0 for binary count
			extensionBinaryTextCount[keyStr] = counts
		}
		return true
	})

	// Get all extensions that have more binary occurrences than text
	var binaryDominantExts []string
	for ext, counts := range extensionBinaryTextCount {
		binaryCount := counts[0]
		textCount := counts[1]

		if binaryCount > textCount {
			binaryDominantExts = append(binaryDominantExts, ext)
		}
	}

	// Sort the extensions
	sort.Strings(binaryDominantExts)

	// Print only the extensions that are more likely to be binary
	fmt.Println("Extensions that are predominantly binary:")
	for _, ext := range binaryDominantExts {
		counts := extensionBinaryTextCount[ext]
		fmt.Printf("Extension: %s, Binary Count: %d, Text Count: %d\n",
			ext, counts[0], counts[1])
	}
}

func IsStringBinary(s string) bool {
	for _, c := range s {
		if c < ' ' || c > '~' {
			return true
		}
	}
	return false
}

func GetSyncFilesRecursively(input string, output chan string, status chan error) {
	defer close(output)
	defer close(status)

	var filesProcessed int32
	var foldersProcessed int32
	var activeWorkers int32

	progressTicker := time.NewTicker(200 * time.Millisecond)
	defer progressTicker.Stop()

	done := make(chan struct{})
	defer close(done)

	directories := make(chan string, 100000)
	workerPool := make(chan struct{}, 4000)
	directories <- input

	go func() {
		for {
			select {
			case <-progressTicker.C:
				dirCount := len(directories)
				workers := atomic.LoadInt32(&activeWorkers)
				fmt.Printf("\rFiles processed: %8d; Folders processed: %8d; Active workers: %8d; Directory queue: %8d",
					atomic.LoadInt32(&filesProcessed),
					atomic.LoadInt32(&foldersProcessed),
					workers,
					dirCount)
			case <-done:
				// Final progress update
				fmt.Printf("\nFiles processed: %8d; Folders processed: %8d; Completed successfully\n",
					atomic.LoadInt32(&filesProcessed),
					atomic.LoadInt32(&foldersProcessed))
				return
			}
		}
	}()

	allDone := make(chan struct{})

	go func() {
		var wg sync.WaitGroup

		go func() {
			for {
				if atomic.LoadInt32(&activeWorkers) == 0 && len(directories) == 0 {
					time.Sleep(10 * time.Millisecond)
					if atomic.LoadInt32(&activeWorkers) == 0 && len(directories) == 0 {
						close(allDone)
						return
					}
				}
				time.Sleep(50 * time.Millisecond)
			}
		}()

		for {
			select {
			case directory, ok := <-directories:
				if !ok {
					wg.Wait()
					return
				}

				atomic.AddInt32(&activeWorkers, 1)

				go func(dir string) {
					workerPool <- struct{}{}

					atomic.AddInt32(&foldersProcessed, 1)
					processDirectory(dir, directories, output, &filesProcessed)

					<-workerPool
					atomic.AddInt32(&activeWorkers, -1)
				}(directory)
			}
		}
	}()

	<-allDone

	log.Printf("Files processed: %d; Folders processed: %d",
		atomic.LoadInt32(&filesProcessed),
		atomic.LoadInt32(&foldersProcessed))
}

func processDirectory(directory string, directories chan<- string, output chan<- string, filesProcessed *int32) {
	files, err := os.ReadDir(directory)
	if err != nil {
		log.Printf("Error reading directory %s: %+v", directory, err)
		return
	}

	for _, file := range files {
		if file.IsDir() {
			directories <- filepath.Join(directory, file.Name())
		} else {
			output <- filepath.Join(directory, file.Name())
			atomic.AddInt32(filesProcessed, 1)
		}
	}
}

func NormalizePath(input string) string {
	input = filepath.Clean(input)
	input = filepath.ToSlash(input)
	input = strings.ReplaceAll(input, "\"", "")
	return input
}