package main import ( "log" "os" "path/filepath" "regexp" "strconv" "strings" "time" "github.com/djherbis/times" ) var timeUnits = map[string]int64{ "ms": 1, "s": 1000, "m": 1000 * 60, "h": 1000 * 60 * 60, "d": 1000 * 60 * 60 * 24, "M": 1000 * 60 * 60 * 24 * 30, "y": 1000 * 60 * 60 * 24 * 365, } var numFilesArchived = 0 var numFilesDeleted = 0 var valueRegex, _ = regexp.Compile(`\d+`) var unitRegex, _ = regexp.Compile(`[a-zA-Z]+`) func parseDuration(date string) int64 { var milliseconds int64 = 0 date = strings.TrimSpace(date) var parts = strings.Split(date, "_") for _, part := range parts { part = strings.TrimSpace(part) log.Printf("Parsing date part: %s", part) var value = valueRegex.FindString(part) var unit = unitRegex.FindString(part) if value == "" || unit == "" { log.Println("Invalid date part: " + part) continue } if _, ok := timeUnits[unit]; !ok { log.Println("Invalid date unit: " + unit) continue } log.Printf("Part %s parsed as: Value: %s, Unit: %s", part, value, unit) var valueMs, _ = strconv.ParseInt(value, 10, 16) valueMs = valueMs * timeUnits[unit] milliseconds += valueMs log.Printf("Adding %dms to duration, now: %d", valueMs, milliseconds) } return milliseconds } func getEnv(key, def string) string { var value, exists = os.LookupEnv(key) if exists { return value } return def } func scanRoot() { log.Println("Scanning root directory...") filepath.Walk(constants.ROOT, func(path string, info os.FileInfo, err error) error { if err != nil { log.Printf("Error scanning %s: %s", path, err) return nil } path = filepath.ToSlash(path) if path == constants.ROOT { log.Printf("Skipping root directory %s...", path) return nil } // I forgot why this code was here... It doesn't make sense to me now // if info.IsDir() { // log.Printf("Skipping directory %s...", path) // return filepath.SkipDir // } // We hope that IGNORED_DIRECTORIES is a small list, so we can afford to iterate over it // In fact iteration should be faster for small lists rather than hashing for _, ignoredDir := range constants.IGNORED_DIRECTORIES { log.Println(constants.IGNORED_DIRECTORIES, len(constants.IGNORED_DIRECTORIES)) if strings.HasPrefix(path, ignoredDir) { log.Printf("Ignoring directory %s", path) return filepath.SkipDir } } processFile(path, info) return nil }) } func scanArchive() { log.Println("Scanning archive...") filepath.Walk(constants.ROOT_ARCHIVE, func(path string, info os.FileInfo, err error) error { log.Printf("Scanning archive file %s...", path) if err != nil { log.Printf("Error scanning %s: %s", path, err) return nil } path = filepath.ToSlash(path) if path == constants.ROOT_ARCHIVE { log.Printf("Skipping root directory %s...", path) return nil } processArchiveFile(path, info) return nil }) } func processFile(path string, info os.FileInfo) { now := time.Now().UnixMilli() log.Printf("Processing file %s...", path) timeType := "accessed" if constants.USE_MODTIME { timeType = "modified" } var fileTime int64 if constants.USE_MODTIME { fileTime = times.Get(info).ModTime().UnixMilli() } else { fileTime = times.Get(info).AccessTime().UnixMilli() } timeDelta := now - fileTime log.Printf("File %s last %s at %d, %dms ago", path, timeType, fileTime, timeDelta) if timeDelta > constants.ARCHIVE_THRESHOLD { log.Printf("File %s was %s more than %dms ago, archiving...", path, timeType, constants.ARCHIVE_THRESHOLD) archiveFile(path) } } func processArchiveFile(path string, info os.FileInfo) { now := time.Now().UnixMilli() timeType := "accessed" if constants.USE_MODTIME { timeType = "modified" } var fileTime int64 if constants.USE_MODTIME { fileTime = times.Get(info).ModTime().UnixMilli() } else { fileTime = times.Get(info).AccessTime().UnixMilli() } timeDelta := now - int64(fileTime) fileTimeFormatted := time.UnixMilli(fileTime).Format("15:04:05.000000") timeDeltaFormatted := time.Duration(timeDelta) * time.Millisecond log.Printf("File %s last %s at %s, %s ago", path, timeType, fileTimeFormatted, timeDeltaFormatted) if timeDelta > constants.DELETE_THRESHOLD { log.Printf("File %s was %s more than %dms ago, deleting...", path, timeType, constants.DELETE_THRESHOLD) deleteFile(path) } } func archiveFile(path string) { newPath := constants.ROOT_ARCHIVE + strings.Replace(path, constants.ROOT, "", 1) log.Printf("Archiving file %s to %s...", path, newPath) err := os.MkdirAll(filepath.Dir(newPath), os.ModePerm) if err != nil { log.Printf("Error creating directory %s: %s", filepath.Dir(newPath), err) return } // err := os.Rename(path, newPath) // if err != nil { // log.Printf("Error archiving file %s: %s", path, err) // return // } numFilesArchived++ } func deleteFile(path string) { log.Printf("Deleting file %s...", path) // err := os.Remove(path) // if err != nil { // log.Printf("Error deleting file %s: %s", path, err) // return // } numFilesDeleted++ } func cleanRoot() { files, err := os.ReadDir(constants.ROOT) if err != nil { log.Printf("Error reading root directory %s: %s", constants.ROOT, err) return } for _, file := range files { if !file.IsDir() { continue } empty, err := isDirEmpty(constants.ROOT + "/" + file.Name()) if err != nil { log.Printf("Error checking if directory %s is empty: %s", file.Name(), err) continue } log.Printf("Directory %s isempty: %t", file.Name(), empty) if empty { log.Printf("Deleting empty directory %s", file.Name()) err := os.RemoveAll(constants.ROOT + "/" + file.Name()) if err != nil { log.Printf("Error deleting empty directory %s: %s", file.Name(), err) } } } } func isDirEmpty(dirPath string) (bool, error) { var empty = true var ferr error = nil filepath.Walk(dirPath, func(path string, info os.FileInfo, err error) error { if err != nil { log.Printf("Error scanning %s: %s", path, err) ferr = err return nil } if !info.IsDir() { empty = false log.Printf("Directory %s is not empty, found %s", dirPath, path) return filepath.SkipAll } return nil }) return empty, ferr } type Constants struct { ROOT string ROOT_ARCHIVE string IGNORED_DIRECTORIES []string ARCHIVE_THRESHOLD int64 DELETE_THRESHOLD int64 SCAN_INTERVAL time.Duration USE_MODTIME bool } var constants = &Constants{} func main() { log.SetFlags(log.Lmicroseconds) // Important: Access times don’t accumulate. // This implies that archiving the file won't alter its access time. // Therefore, assign X as the ARCHIVE_TIME and X + Y as the DELETE_TIME, // where X represents the duration it can exist in the folder, // and Y represents the duration it can exist in the archive. ROOT := filepath.ToSlash(strings.TrimSpace(getEnv("ROOT", "C:/tmp"))) ROOT_ARCHIVE := filepath.ToSlash(strings.TrimSpace(getEnv("ROOT_ARCHIVE", ROOT+"/archive"))) os.Mkdir(ROOT_ARCHIVE, os.ModePerm) IGNORED_DIRECTORIES := []string{} ignoredEnv := getEnv("IGNORED_DIRECTORIES", "") if ignoredEnv != "" { ignoredEnv = strings.TrimSpace(ignoredEnv) IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, strings.Split(ignoredEnv, ",")...) } IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, ROOT_ARCHIVE) for key, dir := range IGNORED_DIRECTORIES { IGNORED_DIRECTORIES[key] = filepath.ToSlash(strings.TrimSpace(dir)) } ARCHIVE_THRESHOLD := parseDuration(getEnv("ARCHIVE_THRESHOLD", "1d")) DELETE_THRESHOLD := parseDuration(getEnv("DELETE_THRESHOLD", "12h")) SCAN_INTERVAL := time.Duration(parseDuration(getEnv("SCAN_INTERVAL", "1m"))) * time.Millisecond USE_MODTIME := strings.TrimSpace(getEnv("USE_MODTIME", "false")) == "true" constants.ROOT = ROOT constants.ROOT_ARCHIVE = ROOT_ARCHIVE constants.IGNORED_DIRECTORIES = IGNORED_DIRECTORIES constants.ARCHIVE_THRESHOLD = ARCHIVE_THRESHOLD constants.DELETE_THRESHOLD = DELETE_THRESHOLD constants.SCAN_INTERVAL = SCAN_INTERVAL constants.USE_MODTIME = USE_MODTIME log.Println("Input args parsed as:") log.Printf("ROOT: %s", ROOT) log.Printf("ROOT_ARCHIVE: %s", ROOT_ARCHIVE) log.Printf("IGNORED_DIRECTORIES: %s", IGNORED_DIRECTORIES) log.Printf("ARCHIVE_THRESHOLD: %s", time.Duration(ARCHIVE_THRESHOLD) * time.Millisecond) log.Printf("DELETE_THRESHOLD: %s", time.Duration(DELETE_THRESHOLD) * time.Millisecond) log.Printf("SCAN_INTERVAL: %s", SCAN_INTERVAL) log.Printf("USE_MODTIME: %s", strconv.FormatBool(USE_MODTIME)) doRun() for { os.Exit(0) time.Sleep(SCAN_INTERVAL) doRun() } } func doRun() { log.Printf("Running at %s", time.Now().Format("15:04:05")) scanRoot() scanArchive() cleanRoot() log.Printf("Archived %d files, deleted %d files", numFilesArchived, numFilesDeleted) numFilesArchived = 0 numFilesDeleted = 0 }