package main import ( "flag" "os" "path/filepath" "regexp" "strconv" "strings" "time" logger "git.site.quack-lab.dev/dave/cylogger" "github.com/djherbis/times" ) var timeUnits = map[string]int64{ "ms": 1, "s": 1000, "m": 1000 * 60, "h": 1000 * 60 * 60, "d": 1000 * 60 * 60 * 24, "M": 1000 * 60 * 60 * 24 * 30, "y": 1000 * 60 * 60 * 24 * 365, } var numFilesArchived = 0 var numFilesDeleted = 0 var valueRegex, _ = regexp.Compile(`\d+`) var unitRegex, _ = regexp.Compile(`[a-zA-Z]+`) func parseDuration(date string) int64 { var milliseconds int64 = 0 date = strings.TrimSpace(date) var parts = strings.Split(date, "_") for _, part := range parts { part = strings.TrimSpace(part) logger.Info("Parsing date part: %s", part) var value = valueRegex.FindString(part) var unit = unitRegex.FindString(part) if value == "" || unit == "" { logger.Error("Invalid date part: " + part) continue } if _, ok := timeUnits[unit]; !ok { logger.Error("Invalid date unit: " + unit) continue } logger.Info("Part %s parsed as: Value: %s, Unit: %s", part, value, unit) var valueMs, _ = strconv.ParseInt(value, 10, 16) valueMs = valueMs * timeUnits[unit] milliseconds += valueMs logger.Info("Adding %dms to duration, now: %d", valueMs, milliseconds) } return milliseconds } func getEnv(key, def string) string { var value, exists = os.LookupEnv(key) if exists { return value } return def } func scanRoot() { log := logger.Default.WithPrefix("scanRoot") log.Info("Scanning root directory...") filepath.Walk(constants.ROOT, func(path string, info os.FileInfo, err error) error { if err != nil { log.Error("Error scanning %s: %s", path, err) return nil } path = filepath.ToSlash(path) if path == constants.ROOT { log.Info("Skipping root directory %s...", path) return nil } // I forgot why this code was here... It doesn't make sense to me now // if info.IsDir() { // log.Info("Skipping directory %s...", path) // return filepath.SkipDir // } // We hope that IGNORED_DIRECTORIES is a small list, so we can afford to iterate over it // In fact iteration should be faster for small lists rather than hashing for _, ignoredDir := range constants.IGNORED_DIRECTORIES { log.Info("Ignored directories: %s", constants.IGNORED_DIRECTORIES) if strings.HasPrefix(path, ignoredDir) { log.Info("Ignoring directory %s", path) return filepath.SkipDir } } processFile(path, info) return nil }) } func scanArchive() { log := logger.Default.WithPrefix("scanArchive") log.Info("Scanning archive...") filepath.Walk(constants.ROOT_ARCHIVE, func(path string, info os.FileInfo, err error) error { if err != nil { log.Error("Error scanning %s: %s", path, err) return nil } path = filepath.ToSlash(path) if path == constants.ROOT_ARCHIVE { log.Info("Skipping root directory %s...", path) return nil } processArchiveFile(path, info) return nil }) } func processFile(path string, info os.FileInfo) { var now = time.Now().UnixMilli() log := logger.Default.WithPrefix("processFile").WithPrefix(path) var timeType = "accessed" if constants.USE_MODTIME { timeType = "modified" } var fileTime int64 = 0 if constants.USE_MODTIME { fileTime = times.Get(info).ModTime().UnixMilli() } else { fileTime = times.Get(info).AccessTime().UnixMilli() } var timeDelta = now - fileTime log.Info("File %s last %s at %d, %dms ago", path, timeType, fileTime, timeDelta) if timeDelta > constants.ARCHIVE_THRESHOLD { log.Info("File %s was %s more than %dms ago, archiving...", path, timeType, constants.ARCHIVE_THRESHOLD) archiveFile(path) } } func processArchiveFile(path string, info os.FileInfo) { var now = time.Now().UnixMilli() log := logger.Default.WithPrefix("processArchiveFile").WithPrefix(path) var timeType = "accessed" if constants.USE_MODTIME { timeType = "modified" } var fileTime int64 = 0 if constants.USE_MODTIME { fileTime = times.Get(info).ModTime().UnixMilli() } else { fileTime = times.Get(info).AccessTime().UnixMilli() } var timeDelta = now - int64(fileTime) log.Info("File %s last %s at %d, %dms ago", path, timeType, fileTime, timeDelta) if timeDelta > constants.DELETE_THRESHOLD { log.Info("File %s was %s more than %dms ago, deleting...", path, timeType, constants.DELETE_THRESHOLD) deleteFile(path) } } func archiveFile(path string) { // defer os.Exit(1) var newPath = constants.ROOT_ARCHIVE + strings.Replace(path, constants.ROOT, "", 1) log := logger.Default.WithPrefix("archiveFile").WithPrefix(path) log.Info("Archiving file %s to %s...", path, newPath) os.MkdirAll(filepath.Dir(newPath), os.ModePerm) var err = os.Rename(path, newPath) if err != nil { log.Error("Error archiving file %s: %s", path, err) return } numFilesArchived++ } func deleteFile(path string) { log := logger.Default.WithPrefix("deleteFile").WithPrefix(path) // defer os.Exit(1) log.Info("Deleting file %s...", path) var err = os.Remove(path) if err != nil { log.Error("Error deleting file %s: %s", path, err) return } numFilesDeleted++ } func cleanRoot() { log := logger.Default.WithPrefix("cleanRoot") var files, err = os.ReadDir(constants.ROOT) if err != nil { log.Error("Error reading root directory %s: %s", constants.ROOT, err) return } for _, file := range files { if !file.IsDir() { continue } var empty, err = isDirEmpty(constants.ROOT + "/" + file.Name()) if err != nil { log.Error("Error checking if directory %s is empty: %s", file.Name(), err) continue } log.Info("Directory %s isempty: %t", file.Name(), empty) if empty { log.Info("Deleting empty directory %s", file.Name()) var err = os.RemoveAll(constants.ROOT + "/" + file.Name()) if err != nil { log.Error("Error deleting empty directory %s: %s", file.Name(), err) } } } } func isDirEmpty(dirPath string) (bool, error) { var empty = true var ferr error = nil log := logger.Default.WithPrefix("isDirEmpty").WithPrefix(dirPath) filepath.Walk(dirPath, func(path string, info os.FileInfo, err error) error { if err != nil { log.Error("Error scanning %s: %s", path, err) ferr = err return nil } if !info.IsDir() { empty = false log.Info("Directory %s is not empty, found %s", dirPath, path) return filepath.SkipAll } return nil }) return empty, ferr } type Constants struct { ROOT string ROOT_ARCHIVE string IGNORED_DIRECTORIES []string ARCHIVE_THRESHOLD int64 DELETE_THRESHOLD int64 SCAN_INTERVAL time.Duration USE_MODTIME bool } func doRun() { scanRoot() scanArchive() cleanRoot() logger.Info("Archived %d files, deleted %d files", numFilesArchived, numFilesDeleted) numFilesArchived = 0 numFilesDeleted = 0 } var constants = Constants{} func main() { flag.Parse() logger.InitFlag() // Important: Access times don’t accumulate. // This implies that archiving the file won't alter its access time. // Therefore, assign X as the ARCHIVE_TIME and X + Y as the DELETE_TIME, // where X represents the duration it can exist in the folder, // and Y represents the duration it can exist in the archive. logger.Info("Starting directory cleaner") var ROOT = filepath.ToSlash(strings.TrimSpace(getEnv("ROOT", "/tmp"))) var ROOT_ARCHIVE = filepath.ToSlash(strings.TrimSpace(getEnv("ROOT_ARCHIVE", ROOT+"/archive"))) os.Mkdir(ROOT_ARCHIVE, os.ModePerm) var IGNORED_DIRECTORIES = []string{} var ignoredEnv = getEnv("IGNORED_DIRECTORIES", "") if ignoredEnv != "" { ignoredEnv = strings.TrimSpace(ignoredEnv) IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, strings.Split(ignoredEnv, ",")...) } IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, ROOT_ARCHIVE) for key, dir := range IGNORED_DIRECTORIES { IGNORED_DIRECTORIES[key] = filepath.ToSlash(strings.TrimSpace(dir)) } var ARCHIVE_THRESHOLD = parseDuration(getEnv("ARCHIVE_THRESHOLD", "1d")) var DELETE_THRESHOLD = parseDuration(getEnv("DELETE_THRESHOLD", "12h")) var SCAN_INTERVAL = time.Duration(parseDuration(getEnv("SCAN_INTERVAL", "1m")) * 1e6) var USE_MODTIME = strings.TrimSpace(getEnv("USE_MODTIME", "false")) == "true" logger.Info("Input args parsed as:") logger.Info("ROOT: %s", ROOT) logger.Info("ROOT_ARCHIVE: %s", ROOT_ARCHIVE) logger.Info("IGNORED_DIRECTORIES: %s", IGNORED_DIRECTORIES) logger.Info("ARCHIVE_THRESHOLD: %d", ARCHIVE_THRESHOLD) logger.Info("DELETE_THRESHOLD: %d", DELETE_THRESHOLD) logger.Info("SCAN_INTERVAL: %d", SCAN_INTERVAL.Milliseconds()) logger.Info("USE_MODTIME: %s", strconv.FormatBool(USE_MODTIME)) constants.ROOT = ROOT constants.ROOT_ARCHIVE = ROOT_ARCHIVE constants.IGNORED_DIRECTORIES = IGNORED_DIRECTORIES constants.ARCHIVE_THRESHOLD = ARCHIVE_THRESHOLD constants.DELETE_THRESHOLD = DELETE_THRESHOLD constants.SCAN_INTERVAL = SCAN_INTERVAL constants.USE_MODTIME = USE_MODTIME doRun() for { logger.Info("Running at %d", time.Now().UnixMilli()) time.Sleep(SCAN_INTERVAL) doRun() } }