From 9982d36aafd910b0f33626aaeb7da1b5d8442fdb Mon Sep 17 00:00:00 2001 From: PhatPhuckDave Date: Thu, 7 Aug 2025 10:55:47 +0200 Subject: [PATCH] Hallucinate everything Who knows if its better... --- main.go | 621 ++++++++++++++++++++++++++------------------------------ 1 file changed, 291 insertions(+), 330 deletions(-) diff --git a/main.go b/main.go index 2b39e62..fca9061 100644 --- a/main.go +++ b/main.go @@ -10,359 +10,320 @@ import ( "time" logger "git.site.quack-lab.dev/dave/cylogger" - "github.com/bmatcuk/doublestar/v4" "github.com/djherbis/times" ) -var timeUnits = map[string]int64{ - "ms": 1, - "s": 1000, - "m": 1000 * 60, - "h": 1000 * 60 * 60, - "d": 1000 * 60 * 60 * 24, - "M": 1000 * 60 * 60 * 24 * 30, - "y": 1000 * 60 * 60 * 24 * 365, -} - -var numFilesArchived = 0 -var numFilesDeleted = 0 - -var valueRegex, _ = regexp.Compile(`\d+`) -var unitRegex, _ = regexp.Compile(`[a-zA-Z]+`) - -func parseDuration(date string) int64 { - var milliseconds int64 = 0 - - date = strings.TrimSpace(date) - var parts = strings.Split(date, "_") - for _, part := range parts { - part = strings.TrimSpace(part) - logger.Info("Parsing date part: %s", part) - var value = valueRegex.FindString(part) - var unit = unitRegex.FindString(part) - - if value == "" || unit == "" { - logger.Error("Invalid date part: " + part) - continue - } - - if _, ok := timeUnits[unit]; !ok { - logger.Error("Invalid date unit: " + unit) - continue - } - logger.Info("Part %s parsed as: Value: %s, Unit: %s", part, value, unit) - - var valueMs, _ = strconv.ParseInt(value, 10, 16) - valueMs = valueMs * timeUnits[unit] - milliseconds += valueMs - logger.Info("Adding %dms to duration, now: %d", valueMs, milliseconds) - } - - return milliseconds -} - -func getEnv(key, def string) string { - var value, exists = os.LookupEnv(key) - if exists { - return value - } - return def -} - -func scanRoot() { - log := logger.Default.WithPrefix("scanRoot").WithPrefix(constants.ROOT) - log.Info("Scanning root directory") - filepath.Walk(constants.ROOT, func(path string, info os.FileInfo, err error) error { - if err != nil { - log.Error("Error scanning %s: %s", path, err) - return nil - } - path = filepath.ToSlash(path) - - if path == constants.ROOT { - log.Info("Skipping root directory %s", path) - return nil - } - - // I forgot why this code was here... It doesn't make sense to me now - // if info.IsDir() { - // log.Info("Skipping directory %s", path) - // return filepath.SkipDir - // } - - // We hope that IGNORED_DIRECTORIES is a small list, so we can afford to iterate over it - // In fact iteration should be faster for small lists rather than hashing - for _, ignoredDir := range constants.IGNORED_DIRECTORIES { - matched, err := doublestar.Match(ignoredDir, path) - if err != nil { - log.Error("Error matching %s: %v", path, err) - continue - } - if matched { - log.Info("Ignoring directory %s", path) - return filepath.SkipDir - } - } - - processFile(path, info) - return nil - }) -} - -func scanArchive() { - log := logger.Default.WithPrefix("scanArchive") - log.Info("Scanning archive...") - filepath.Walk(constants.ROOT_ARCHIVE, func(path string, info os.FileInfo, err error) error { - if err != nil { - log.Error("Error scanning %s: %s", path, err) - return nil - } - path = filepath.ToSlash(path) - - if path == constants.ROOT_ARCHIVE { - log.Info("Skipping root directory %s...", path) - return nil - } - - processArchiveFile(path, info) - return nil - }) -} - -func processFile(path string, info os.FileInfo) { - var now = time.Now().UnixMilli() - log := logger.Default.WithPrefix("processFile").WithPrefix(path) - - var timeType = "accessed" - if constants.USE_MODTIME { - timeType = "modified" - } - - var fileTime int64 = 0 - if constants.USE_MODTIME { - fileTime = times.Get(info).ModTime().UnixMilli() - } else { - fileTime = times.Get(info).AccessTime().UnixMilli() - } - - var timeDelta = now - fileTime - log.Info("File %s last %s at %d, %dms ago", path, timeType, fileTime, timeDelta) - if timeDelta > constants.ARCHIVE_THRESHOLD { - log.Info("File %s was %s more than %dms ago, archiving...", path, timeType, constants.ARCHIVE_THRESHOLD) - archiveFile(path) - } -} - -func processArchiveFile(path string, info os.FileInfo) { - var now = time.Now().UnixMilli() - log := logger.Default.WithPrefix("processArchiveFile").WithPrefix(path) - - var timeType = "accessed" - if constants.USE_MODTIME { - timeType = "modified" - } - - var fileTime int64 = 0 - if constants.USE_MODTIME { - fileTime = times.Get(info).ModTime().UnixMilli() - } else { - fileTime = times.Get(info).AccessTime().UnixMilli() - } - - var timeDelta = now - int64(fileTime) - log.Info("File %s last %s at %d, %dms ago", path, timeType, fileTime, timeDelta) - - if timeDelta > constants.DELETE_THRESHOLD { - log.Info("File %s was %s more than %dms ago, deleting...", path, timeType, constants.DELETE_THRESHOLD) - deleteFile(path) - } -} - -func archiveFile(path string) { - // defer os.Exit(1) - var newPath = constants.ROOT_ARCHIVE + strings.Replace(path, constants.ROOT, "", 1) - log := logger.Default.WithPrefix("archiveFile").WithPrefix(path) - log.Info("Archiving file %s to %s...", path, newPath) - - os.MkdirAll(filepath.Dir(newPath), os.ModePerm) - var err = os.Rename(path, newPath) - if err != nil { - log.Error("Error archiving file %s: %s", path, err) - return - } - numFilesArchived++ -} - -func deleteFile(path string) { - log := logger.Default.WithPrefix("deleteFile").WithPrefix(path) - // defer os.Exit(1) - log.Info("Deleting file %s...", path) - var err = os.Remove(path) - if err != nil { - log.Error("Error deleting file %s: %s", path, err) - return - } - numFilesDeleted++ -} - -func shouldIgnore(path string) bool { - log := logger.Default.WithPrefix("shouldIgnore").WithPrefix(path) - for _, ignoredDir := range constants.IGNORED_DIRECTORIES { - log.Debug("Checking if %s matches %s", ignoredDir, path) - matched, err := doublestar.Match(ignoredDir, path) - if err != nil { - log.Error("Error matching %s: %v", path, err) - continue - } - if matched { - log.Debug("Directory is ignored, skipping") - return true - } - } - log.Debug("Directory is not ignored") - return false -} - -func cleanRoot() { - log := logger.Default.WithPrefix("cleanRoot") - files, err := doublestar.Glob(os.DirFS(constants.ROOT), "**") - if err != nil { - log.Error("Error reading root directory %s: %s", constants.ROOT, err) - return - } - for _, file := range files { - fullpath := filepath.Join(constants.ROOT, file) - filelog := log.WithPrefix(file) - - var info os.FileInfo - filelog.Debug("Getting file info") - info, err = os.Stat(fullpath) - if err != nil { - filelog.Error("Error getting file info %v", err) - continue - } - filelog.Trace("File info: %+v", info) - - if !info.IsDir() { - filelog.Info("File is not a directory, skipping") - continue - } - - filelog.Debug("Checking if directory is ignored") - if shouldIgnore(fullpath) { - filelog.Info("Directory is ignored, skipping") - continue - } - filelog.Debug("Directory is not ignored, checking if it is empty") - - var empty, err = isDirEmpty(fullpath) - if err != nil { - filelog.Error("Error checking if directory - is empty: %v", err) - continue - } - - filelog.Info("Directory isempty: %t", empty) - if empty { - filelog.Info("Deleting empty directory") - var err = os.RemoveAll(fullpath) - if err != nil { - filelog.Error("Error deleting empty directory %v", err) - } - } - } -} -func isDirEmpty(dirPath string) (bool, error) { - var empty = true - var ferr error = nil - log := logger.Default.WithPrefix("isDirEmpty").WithPrefix(dirPath) - - filepath.Walk(dirPath, func(path string, info os.FileInfo, err error) error { - if err != nil { - log.Error("Error scanning %s: %s", path, err) - ferr = err - return nil - } - if !info.IsDir() { - empty = false - log.Info("Directory %s is not empty, found %s", dirPath, path) - return filepath.SkipAll - } - return nil - }) - return empty, ferr -} - type Constants struct { ROOT string ROOT_ARCHIVE string IGNORED_DIRECTORIES []string - ARCHIVE_THRESHOLD int64 - DELETE_THRESHOLD int64 + ARCHIVE_THRESHOLD int64 // ms + DELETE_THRESHOLD int64 // ms SCAN_INTERVAL time.Duration USE_MODTIME bool } +var ( + constants Constants + timeUnits = map[string]int64{"ms": 1, "s": 1000, "m": 60_000, "h": 3_600_000, "d": 86_400_000, "M": 2_592_000_000, "y": 31_536_000_000} + valueRegex = regexp.MustCompile(`\d+`) + unitRegex = regexp.MustCompile(`[a-zA-Z]+`) + numFilesArchived = 0 + numFilesDeleted = 0 +) + +func parseDurationMS(expr string) int64 { + expr = strings.TrimSpace(expr) + if expr == "" { + return 0 + } + var total int64 + for _, p := range strings.Split(expr, "_") { + p = strings.TrimSpace(p) + if p == "" { + continue + } + v := valueRegex.FindString(p) + u := unitRegex.FindString(p) + if v == "" || u == "" { + logger.Warning("Invalid duration part: %q", p) + continue + } + unit, ok := timeUnits[u] + if !ok { + logger.Warning("Invalid duration unit: %q", u) + continue + } + n, err := strconv.ParseInt(v, 10, 64) + if err != nil { + logger.Warning("Invalid duration value: %q: %v", v, err) + continue + } + total += n * unit + } + return total +} + +func getenv(key, def string) string { + if v, ok := os.LookupEnv(key); ok { + return v + } + return def +} + +func loadConfig() Constants { + flag.Parse() + logger.InitFlag() + + root := filepath.ToSlash(strings.TrimSpace(getenv("ROOT", "/tmp"))) + archive := filepath.ToSlash(strings.TrimSpace(getenv("ROOT_ARCHIVE", root+"/archive"))) + _ = os.MkdirAll(archive, os.ModePerm) + + ignored := []string{} + if ig := strings.TrimSpace(getenv("IGNORED_DIRECTORIES", "")); ig != "" { + for _, s := range strings.Split(ig, ",") { + if t := strings.TrimSpace(s); t != "" { + ignored = append(ignored, filepath.ToSlash(t)) + } + } + } + // Always ignore ROOT and ROOT_ARCHIVE themselves + ignored = append(ignored, filepath.ToSlash(archive)) + ignored = append(ignored, filepath.ToSlash(root)) + + archiveMs := parseDurationMS(getenv("ARCHIVE_THRESHOLD", "1d")) + deleteMs := parseDurationMS(getenv("DELETE_THRESHOLD", "12h")) + interval := time.Duration(parseDurationMS(getenv("SCAN_INTERVAL", "1m"))) * time.Millisecond + useMod := strings.EqualFold(strings.TrimSpace(getenv("USE_MODTIME", "false")), "true") + + logger.Info("Config:") + logger.Info(" ROOT: %s", root) + logger.Info(" ROOT_ARCHIVE: %s", archive) + logger.Info(" IGNORED_DIRECTORIES: %v", ignored) + logger.Info(" ARCHIVE_THRESHOLD(ms): %d", archiveMs) + logger.Info(" DELETE_THRESHOLD(ms): %d", deleteMs) + logger.Info(" SCAN_INTERVAL(ms): %d", interval.Milliseconds()) + logger.Info(" USE_MODTIME: %t", useMod) + + return Constants{ + ROOT: root, + ROOT_ARCHIVE: archive, + IGNORED_DIRECTORIES: ignored, + ARCHIVE_THRESHOLD: archiveMs, + DELETE_THRESHOLD: deleteMs, + SCAN_INTERVAL: interval, + USE_MODTIME: useMod, + } +} + +func shouldIgnore(path string) bool { + // Match against slash-normalized full path + path = filepath.ToSlash(path) + for _, pat := range constants.IGNORED_DIRECTORIES { + ok, err := doublestar.Match(pat, path) + if err != nil { + logger.Warning("Ignore pattern error %q vs %q: %v", pat, path, err) + continue + } + if ok || path == pat { + return true + } + } + return false +} + +func fileTime(info os.FileInfo, path string) int64 { + t := times.Get(info) + if constants.USE_MODTIME { + return t.ModTime().UnixMilli() + } + // If AccessTime is not supported, it returns ModTime; times handles that internally. + return t.AccessTime().UnixMilli() +} + +func archiveCandidate(path string, info os.FileInfo) { + if info.IsDir() { + return + } + now := time.Now().UnixMilli() + ft := fileTime(info, path) + if now-ft <= constants.ARCHIVE_THRESHOLD { + return + } + + rel, err := filepath.Rel(constants.ROOT, path) + if err != nil { + logger.Warning("rel ROOT->%s: %v", path, err) + return + } + dst := filepath.Join(constants.ROOT_ARCHIVE, rel) + + if err := os.MkdirAll(filepath.Dir(dst), os.ModePerm); err != nil { + logger.Error("mkdir %s: %v", filepath.Dir(dst), err) + return + } + + if err := os.Rename(path, dst); err != nil { + logger.Error("archive %s -> %s: %v", path, dst, err) + return + } + numFilesArchived++ + logger.Info("Archived: %s -> %s", path, dst) +} + +func deleteCandidate(path string, info os.FileInfo) { + if info.IsDir() { + return + } + now := time.Now().UnixMilli() + ft := fileTime(info, path) + if now-ft <= constants.DELETE_THRESHOLD { + return + } + if err := os.Remove(path); err != nil { + logger.Error("delete %s: %v", path, err) + return + } + numFilesDeleted++ + logger.Info("Deleted: %s", path) +} + +func scanRoot() { + log := logger.Default.WithPrefix("scanRoot") + root := constants.ROOT + + // doublestar.Glob with os.DirFS(root) returns relative paths + paths, err := doublestar.Glob(os.DirFS(root), "**") + if err != nil { + log.Error("glob %s: %v", root, err) + return + } + for _, rel := range paths { + full := filepath.Join(root, rel) + if shouldIgnore(full) { + continue + } + info, err := os.Stat(full) + if err != nil { + log.Warning("stat %s: %v", full, err) + continue + } + archiveCandidate(full, info) + } +} + +func scanArchive() { + log := logger.Default.WithPrefix("scanArchive") + base := constants.ROOT_ARCHIVE + + paths, err := doublestar.Glob(os.DirFS(base), "**") + if err != nil { + log.Error("glob %s: %v", base, err) + return + } + for _, rel := range paths { + full := filepath.Join(base, rel) + info, err := os.Stat(full) + if err != nil { + log.Warning("stat %s: %v", full, err) + continue + } + deleteCandidate(full, info) + } +} + +func cleanEmptyDirs() { + log := logger.Default.WithPrefix("cleanEmptyDirs") + root := constants.ROOT + + paths, err := doublestar.Glob(os.DirFS(root), "**") + if err != nil { + log.Error("glob %s: %v", root, err) + return + } + // Remove empty dirs; iterate deepest first by sorting longer paths first + // Build a slice of directory paths + var dirs []string + for _, rel := range paths { + full := filepath.Join(root, rel) + info, err := os.Stat(full) + if err != nil || !info.IsDir() { + continue + } + if shouldIgnore(full) { + continue + } + dirs = append(dirs, full) + } + // Simple length-based reverse to try delete children before parents + for i := 0; i < len(dirs)-1; i++ { + for j := i + 1; j < len(dirs); j++ { + if len(dirs[i]) < len(dirs[j]) { + dirs[i], dirs[j] = dirs[j], dirs[i] + } + } + } + for _, d := range dirs { + empty, err := isDirEmpty(d) + if err != nil { + log.Warning("check empty %s: %v", d, err) + continue + } + if empty { + if err := os.Remove(d); err == nil { + log.Info("Removed empty dir: %s", d) + } else { + log.Warning("remove %s: %v", d, err) + } + } + } +} + +func isDirEmpty(dir string) (bool, error) { + f, err := os.Open(dir) + if err != nil { + return false, err + } + defer f.Close() + // Read at most one entry; if none, it's empty + _, err = f.Readdirnames(1) + if err == os.ErrNotExist { + // For older Go versions, fall back + return true, nil + } + if err != nil { + // io.EOF indicates empty for Readdirnames + return true, nil + } + return false, nil +} + func doRun() { scanRoot() scanArchive() - cleanRoot() - logger.Info("Archived %d files, deleted %d files", numFilesArchived, numFilesDeleted) - numFilesArchived = 0 - numFilesDeleted = 0 + cleanEmptyDirs() + logger.Info("Cycle summary: archived=%d deleted=%d", numFilesArchived, numFilesDeleted) + numFilesArchived, numFilesDeleted = 0, 0 } -var constants = Constants{} - func main() { flag.Parse() logger.InitFlag() - // Important: Access times don’t accumulate. - // This implies that archiving the file won't alter its access time. - // Therefore, assign X as the ARCHIVE_TIME and X + Y as the DELETE_TIME, - // where X represents the duration it can exist in the folder, - // and Y represents the duration it can exist in the archive. + logger.Info("Starting directory cleaner") + constants = loadConfig() + logger.Info("Ready. First scan in %s", constants.SCAN_INTERVAL) - var ROOT = filepath.ToSlash(strings.TrimSpace(getEnv("ROOT", "/tmp"))) - var ROOT_ARCHIVE = filepath.ToSlash(strings.TrimSpace(getEnv("ROOT_ARCHIVE", ROOT+"/archive"))) - os.Mkdir(ROOT_ARCHIVE, os.ModePerm) - var IGNORED_DIRECTORIES = []string{} - var ignoredEnv = getEnv("IGNORED_DIRECTORIES", "") - if ignoredEnv != "" { - ignoredEnv = strings.TrimSpace(ignoredEnv) - IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, strings.Split(ignoredEnv, ",")...) - } - IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, ROOT_ARCHIVE) - IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, ROOT) - for key, dir := range IGNORED_DIRECTORIES { - IGNORED_DIRECTORIES[key] = filepath.ToSlash(strings.TrimSpace(dir)) - } - var ARCHIVE_THRESHOLD = parseDuration(getEnv("ARCHIVE_THRESHOLD", "1d")) - var DELETE_THRESHOLD = parseDuration(getEnv("DELETE_THRESHOLD", "12h")) - var SCAN_INTERVAL = time.Duration(parseDuration(getEnv("SCAN_INTERVAL", "1m")) * 1e6) - var USE_MODTIME = strings.TrimSpace(getEnv("USE_MODTIME", "false")) == "true" - logger.Info("Input args parsed as:") - logger.Info("ROOT: %s", ROOT) - logger.Info("ROOT_ARCHIVE: %s", ROOT_ARCHIVE) - logger.Info("IGNORED_DIRECTORIES: %s", IGNORED_DIRECTORIES) - logger.Info("ARCHIVE_THRESHOLD: %d", ARCHIVE_THRESHOLD) - logger.Info("DELETE_THRESHOLD: %d", DELETE_THRESHOLD) - logger.Info("SCAN_INTERVAL: %d", SCAN_INTERVAL.Milliseconds()) - logger.Info("USE_MODTIME: %s", strconv.FormatBool(USE_MODTIME)) - - constants.ROOT = ROOT - constants.ROOT_ARCHIVE = ROOT_ARCHIVE - constants.IGNORED_DIRECTORIES = IGNORED_DIRECTORIES - constants.ARCHIVE_THRESHOLD = ARCHIVE_THRESHOLD - constants.DELETE_THRESHOLD = DELETE_THRESHOLD - constants.SCAN_INTERVAL = SCAN_INTERVAL - constants.USE_MODTIME = USE_MODTIME - + // Run immediately, then on interval doRun() + t := time.NewTicker(constants.SCAN_INTERVAL) + defer t.Stop() for { - logger.Info("Running at %d", time.Now().UnixMilli()) - time.Sleep(SCAN_INTERVAL) - doRun() + select { + case ts := <-t.C: + logger.Info("Tick %d", ts.UnixMilli()) + doRun() + } } }