From 7c8c92be9a2ead5f31447875016231b5ca093418 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Majdand=C5=BEi=C4=87?= Date: Sat, 22 Jun 2024 13:45:20 +0200 Subject: [PATCH] Cleanup --- main.go | 622 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 311 insertions(+), 311 deletions(-) diff --git a/main.go b/main.go index ed54d86..8fd1c66 100644 --- a/main.go +++ b/main.go @@ -1,311 +1,311 @@ -package main - -import ( - "log" - "os" - "path/filepath" - "regexp" - "strconv" - "strings" - "time" - - "github.com/djherbis/times" -) - -var timeUnits = map[string]int64{ - "ms": 1, - "s": 1000, - "m": 1000 * 60, - "h": 1000 * 60 * 60, - "d": 1000 * 60 * 60 * 24, - "M": 1000 * 60 * 60 * 24 * 30, - "y": 1000 * 60 * 60 * 24 * 365, -} - -var numFilesArchived = 0 -var numFilesDeleted = 0 - -var valueRegex, _ = regexp.Compile(`\d+`) -var unitRegex, _ = regexp.Compile(`[a-zA-Z]+`) - -func parseDuration(date string) int64 { - var milliseconds int64 = 0 - - date = strings.TrimSpace(date) - var parts = strings.Split(date, "_") - for _, part := range parts { - part = strings.TrimSpace(part) - log.Printf("Parsing date part: %s\n", part) - var value = valueRegex.FindString(part) - var unit = unitRegex.FindString(part) - - if value == "" || unit == "" { - log.Println("Invalid date part: " + part) - continue - } - - if _, ok := timeUnits[unit]; !ok { - log.Println("Invalid date unit: " + unit) - continue - } - log.Printf("Part %s parsed as: Value: %s, Unit: %s\n", part, value, unit) - - var valueMs, _ = strconv.ParseInt(value, 10, 16) - valueMs = valueMs * timeUnits[unit] - milliseconds += valueMs - log.Printf("Adding %dms to duration, now: %d\n", valueMs, milliseconds) - } - - return milliseconds -} - -func getEnv(key, def string) string { - var value, exists = os.LookupEnv(key) - if exists { - return value - } - return def -} - -func scanRoot() { - log.Println("Scanning root directory...") - filepath.Walk(constants.ROOT, func(path string, info os.FileInfo, err error) error { - if err != nil { - log.Printf("Error scanning %s: %s\n", path, err) - return nil - } - path = filepath.ToSlash(path) - - if path == constants.ROOT { - log.Printf("Skipping root directory %s...\n", path) - return nil - } - - // I forgot why this code was here... It doesn't make sense to me now - // if info.IsDir() { - // log.Printf("Skipping directory %s...\n", path) - // return filepath.SkipDir - // } - - // We hope that IGNORED_DIRECTORIES is a small list, so we can afford to iterate over it - // In fact iteration should be faster for small lists rather than hashing - for _, ignoredDir := range constants.IGNORED_DIRECTORIES { - log.Println(constants.IGNORED_DIRECTORIES, len(constants.IGNORED_DIRECTORIES)) - if strings.HasPrefix(path, ignoredDir) { - log.Printf("Ignoring directory %s\n", path) - return filepath.SkipDir - } - } - - processFile(path, info) - return nil - }) -} - -func scanArchive() { - log.Println("Scanning archive...") - filepath.Walk(constants.ROOT_ARCHIVE, func(path string, info os.FileInfo, err error) error { - if err != nil { - log.Printf("Error scanning %s: %s\n", path, err) - return nil - } - path = filepath.ToSlash(path) - - if path == constants.ROOT_ARCHIVE { - log.Printf("Skipping root directory %s...\n", path) - return nil - } - - processArchiveFile(path, info) - return nil - }) -} - -func processFile(path string, info os.FileInfo) { - var now = time.Now().UnixMilli() - - var timeType = "accessed" - if constants.USE_MODTIME { - timeType = "modified" - } - - var fileTime int64 = 0 - if constants.USE_MODTIME { - fileTime = times.Get(info).ModTime().UnixMilli() - } else { - fileTime = times.Get(info).AccessTime().UnixMilli() - } - - var timeDelta = now - fileTime - log.Printf("File %s last %s at %d, %dms ago\n", path, timeType, fileTime, timeDelta) - if timeDelta > constants.ARCHIVE_THRESHOLD { - log.Printf("File %s was %s more than %dms ago, archiving...\n", path, timeType, constants.ARCHIVE_THRESHOLD) - archiveFile(path) - } -} - -func processArchiveFile(path string, info os.FileInfo) { - var now = time.Now().UnixMilli() - - var timeType = "accessed" - if constants.USE_MODTIME { - timeType = "modified" - } - - var fileTime int64 = 0 - if constants.USE_MODTIME { - fileTime = times.Get(info).ModTime().UnixMilli() - } else { - fileTime = times.Get(info).AccessTime().UnixMilli() - } - - var timeDelta = now - int64(fileTime) - log.Printf("File %s last %s at %d, %dms ago\n", path, timeType, fileTime, timeDelta) - - if timeDelta > constants.DELETE_THRESHOLD { - log.Printf("File %s was %s more than %dms ago, deleting...\n", path, timeType, constants.DELETE_THRESHOLD) - deleteFile(path) - } -} - -func archiveFile(path string) { - // defer os.Exit(1) - var newPath = constants.ROOT_ARCHIVE + strings.Replace(path, constants.ROOT, "", 1) - log.Printf("Archiving file %s to %s...\n", path, newPath) - - os.MkdirAll(filepath.Dir(newPath), os.ModePerm) - var err = os.Rename(path, newPath) - if err != nil { - log.Printf("Error archiving file %s: %s\n", path, err) - return - } - numFilesArchived++ -} - -func deleteFile(path string) { - // defer os.Exit(1) - log.Printf("Deleting file %s...\n", path) - var err = os.Remove(path) - if err != nil { - log.Printf("Error deleting file %s: %s\n", path, err) - return - } - numFilesDeleted++ -} - -func cleanRoot() { - var files, err = os.ReadDir(constants.ROOT) - if err != nil { - log.Printf("Error reading root directory %s: %s\n", constants.ROOT, err) - return - } - for _, file := range files { - if !file.IsDir() { - continue - } - var empty, err = isDirEmpty(constants.ROOT + "/" + file.Name()) - if err != nil { - log.Printf("Error checking if directory %s is empty: %s\n", file.Name(), err) - continue - } - log.Printf("Directory %s isempty: %t\n", file.Name(), empty) - if empty { - log.Printf("Deleting empty directory %s\n", file.Name()) - var err = os.RemoveAll(constants.ROOT + "/" + file.Name()) - if err != nil { - log.Printf("Error deleting empty directory %s: %s\n", file.Name(), err) - } - } - } -} -func isDirEmpty(dirPath string) (bool, error) { - var empty = true - var ferr error = nil - - filepath.Walk(dirPath, func(path string, info os.FileInfo, err error) error { - if err != nil { - log.Printf("Error scanning %s: %s\n", path, err) - ferr = err - return nil - } - if !info.IsDir() { - empty = false - log.Printf("Directory %s is not empty, found %s\n", dirPath, path) - return filepath.SkipAll - } - return nil - }) - return empty, ferr -} - -type Constants struct { - ROOT string - ROOT_ARCHIVE string - IGNORED_DIRECTORIES []string - ARCHIVE_THRESHOLD int64 - DELETE_THRESHOLD int64 - SCAN_INTERVAL time.Duration - USE_MODTIME bool -} - -func doRun() { - scanRoot() - scanArchive() - cleanRoot() - log.Printf("Archived %d files, deleted %d files\n", numFilesArchived, numFilesDeleted) - numFilesArchived = 0 - numFilesDeleted = 0 -} - -var constants = Constants{} - -func main() { - log.SetFlags(0b111) - // Important: Access times don’t accumulate. - // This implies that archiving the file won't alter its access time. - // Therefore, assign X as the ARCHIVE_TIME and X + Y as the DELETE_TIME, - // where X represents the duration it can exist in the folder, - // and Y represents the duration it can exist in the archive. - - var ROOT = filepath.ToSlash(strings.TrimSpace(getEnv("ROOT", "/tmp"))) - var ROOT_ARCHIVE = filepath.ToSlash(strings.TrimSpace(getEnv("ROOT_ARCHIVE", ROOT+"/archive"))) - os.Mkdir(ROOT_ARCHIVE, os.ModePerm) - var IGNORED_DIRECTORIES = []string{} - var ignoredEnv = getEnv("IGNORED_DIRECTORIES", "") - if ignoredEnv != "" { - ignoredEnv = strings.TrimSpace(ignoredEnv) - IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, strings.Split(ignoredEnv, ",")...) - } - IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, ROOT_ARCHIVE) - for key, dir := range IGNORED_DIRECTORIES { - IGNORED_DIRECTORIES[key] = filepath.ToSlash(strings.TrimSpace(dir)) - } - var ARCHIVE_THRESHOLD = parseDuration(getEnv("ARCHIVE_THRESHOLD", "1d")) - var DELETE_THRESHOLD = parseDuration(getEnv("DELETE_THRESHOLD", "12h")) - var SCAN_INTERVAL = time.Duration(parseDuration(getEnv("SCAN_INTERVAL", "1m")) * 1e6) - var USE_MODTIME = strings.TrimSpace(getEnv("USE_MODTIME", "false")) == "true" - - constants.ROOT = ROOT - constants.ROOT_ARCHIVE = ROOT_ARCHIVE - constants.IGNORED_DIRECTORIES = IGNORED_DIRECTORIES - constants.ARCHIVE_THRESHOLD = ARCHIVE_THRESHOLD - constants.DELETE_THRESHOLD = DELETE_THRESHOLD - constants.SCAN_INTERVAL = SCAN_INTERVAL - constants.USE_MODTIME = USE_MODTIME - - log.Println("Input args parsed as:") - log.Printf("ROOT: %s\n", ROOT) - log.Printf("ROOT_ARCHIVE: %s\n", ROOT_ARCHIVE) - log.Printf("IGNORED_DIRECTORIES: %s\n", IGNORED_DIRECTORIES) - log.Printf("ARCHIVE_THRESHOLD: %d\n", ARCHIVE_THRESHOLD) - log.Printf("DELETE_THRESHOLD: %d\n", DELETE_THRESHOLD) - log.Printf("SCAN_INTERVAL: %d\n", SCAN_INTERVAL.Milliseconds()) - log.Printf("USE_MODTIME: %s\n", strconv.FormatBool(USE_MODTIME)) - - doRun() - for { - log.Printf("Running at %d", time.Now().UnixMilli()) - time.Sleep(SCAN_INTERVAL) - doRun() - } -} +package main + +import ( + "log" + "os" + "path/filepath" + "regexp" + "strconv" + "strings" + "time" + + "github.com/djherbis/times" +) + +var timeUnits = map[string]int64{ + "ms": 1, + "s": 1000, + "m": 1000 * 60, + "h": 1000 * 60 * 60, + "d": 1000 * 60 * 60 * 24, + "M": 1000 * 60 * 60 * 24 * 30, + "y": 1000 * 60 * 60 * 24 * 365, +} + +var numFilesArchived = 0 +var numFilesDeleted = 0 + +var valueRegex, _ = regexp.Compile(`\d+`) +var unitRegex, _ = regexp.Compile(`[a-zA-Z]+`) + +func parseDuration(date string) int64 { + var milliseconds int64 = 0 + + date = strings.TrimSpace(date) + var parts = strings.Split(date, "_") + for _, part := range parts { + part = strings.TrimSpace(part) + log.Printf("Parsing date part: %s", part) + var value = valueRegex.FindString(part) + var unit = unitRegex.FindString(part) + + if value == "" || unit == "" { + log.Println("Invalid date part: " + part) + continue + } + + if _, ok := timeUnits[unit]; !ok { + log.Println("Invalid date unit: " + unit) + continue + } + log.Printf("Part %s parsed as: Value: %s, Unit: %s", part, value, unit) + + var valueMs, _ = strconv.ParseInt(value, 10, 16) + valueMs = valueMs * timeUnits[unit] + milliseconds += valueMs + log.Printf("Adding %dms to duration, now: %d", valueMs, milliseconds) + } + + return milliseconds +} + +func getEnv(key, def string) string { + var value, exists = os.LookupEnv(key) + if exists { + return value + } + return def +} + +func scanRoot() { + log.Println("Scanning root directory...") + filepath.Walk(constants.ROOT, func(path string, info os.FileInfo, err error) error { + if err != nil { + log.Printf("Error scanning %s: %s", path, err) + return nil + } + path = filepath.ToSlash(path) + + if path == constants.ROOT { + log.Printf("Skipping root directory %s...", path) + return nil + } + + // I forgot why this code was here... It doesn't make sense to me now + // if info.IsDir() { + // log.Printf("Skipping directory %s...", path) + // return filepath.SkipDir + // } + + // We hope that IGNORED_DIRECTORIES is a small list, so we can afford to iterate over it + // In fact iteration should be faster for small lists rather than hashing + for _, ignoredDir := range constants.IGNORED_DIRECTORIES { + log.Println(constants.IGNORED_DIRECTORIES, len(constants.IGNORED_DIRECTORIES)) + if strings.HasPrefix(path, ignoredDir) { + log.Printf("Ignoring directory %s", path) + return filepath.SkipDir + } + } + + processFile(path, info) + return nil + }) +} + +func scanArchive() { + log.Println("Scanning archive...") + filepath.Walk(constants.ROOT_ARCHIVE, func(path string, info os.FileInfo, err error) error { + if err != nil { + log.Printf("Error scanning %s: %s", path, err) + return nil + } + path = filepath.ToSlash(path) + + if path == constants.ROOT_ARCHIVE { + log.Printf("Skipping root directory %s...", path) + return nil + } + + processArchiveFile(path, info) + return nil + }) +} + +func processFile(path string, info os.FileInfo) { + var now = time.Now().UnixMilli() + + var timeType = "accessed" + if constants.USE_MODTIME { + timeType = "modified" + } + + var fileTime int64 = 0 + if constants.USE_MODTIME { + fileTime = times.Get(info).ModTime().UnixMilli() + } else { + fileTime = times.Get(info).AccessTime().UnixMilli() + } + + var timeDelta = now - fileTime + log.Printf("File %s last %s at %d, %dms ago", path, timeType, fileTime, timeDelta) + if timeDelta > constants.ARCHIVE_THRESHOLD { + log.Printf("File %s was %s more than %dms ago, archiving...", path, timeType, constants.ARCHIVE_THRESHOLD) + archiveFile(path) + } +} + +func processArchiveFile(path string, info os.FileInfo) { + var now = time.Now().UnixMilli() + + var timeType = "accessed" + if constants.USE_MODTIME { + timeType = "modified" + } + + var fileTime int64 = 0 + if constants.USE_MODTIME { + fileTime = times.Get(info).ModTime().UnixMilli() + } else { + fileTime = times.Get(info).AccessTime().UnixMilli() + } + + var timeDelta = now - int64(fileTime) + log.Printf("File %s last %s at %d, %dms ago", path, timeType, fileTime, timeDelta) + + if timeDelta > constants.DELETE_THRESHOLD { + log.Printf("File %s was %s more than %dms ago, deleting...", path, timeType, constants.DELETE_THRESHOLD) + deleteFile(path) + } +} + +func archiveFile(path string) { + defer os.Exit(1) + var newPath = constants.ROOT_ARCHIVE + strings.Replace(path, constants.ROOT, "", 1) + log.Printf("Archiving file %s to %s...", path, newPath) + + os.MkdirAll(filepath.Dir(newPath), os.ModePerm) + var err = os.Rename(path, newPath) + if err != nil { + log.Printf("Error archiving file %s: %s", path, err) + return + } + numFilesArchived++ +} + +func deleteFile(path string) { + defer os.Exit(1) + log.Printf("Deleting file %s...", path) + err := os.Remove(path) + if err != nil { + log.Printf("Error deleting file %s: %s", path, err) + return + } + numFilesDeleted++ +} + +func cleanRoot() { + files, err := os.ReadDir(constants.ROOT) + if err != nil { + log.Printf("Error reading root directory %s: %s", constants.ROOT, err) + return + } + for _, file := range files { + if !file.IsDir() { + continue + } + empty, err := isDirEmpty(constants.ROOT + "/" + file.Name()) + if err != nil { + log.Printf("Error checking if directory %s is empty: %s", file.Name(), err) + continue + } + log.Printf("Directory %s isempty: %t", file.Name(), empty) + if empty { + log.Printf("Deleting empty directory %s", file.Name()) + err := os.RemoveAll(constants.ROOT + "/" + file.Name()) + if err != nil { + log.Printf("Error deleting empty directory %s: %s", file.Name(), err) + } + } + } +} +func isDirEmpty(dirPath string) (bool, error) { + var empty = true + var ferr error = nil + + filepath.Walk(dirPath, func(path string, info os.FileInfo, err error) error { + if err != nil { + log.Printf("Error scanning %s: %s", path, err) + ferr = err + return nil + } + if !info.IsDir() { + empty = false + log.Printf("Directory %s is not empty, found %s", dirPath, path) + return filepath.SkipAll + } + return nil + }) + return empty, ferr +} + +type Constants struct { + ROOT string + ROOT_ARCHIVE string + IGNORED_DIRECTORIES []string + ARCHIVE_THRESHOLD int64 + DELETE_THRESHOLD int64 + SCAN_INTERVAL time.Duration + USE_MODTIME bool +} + +func doRun() { + scanRoot() + scanArchive() + cleanRoot() + log.Printf("Archived %d files, deleted %d files", numFilesArchived, numFilesDeleted) + numFilesArchived = 0 + numFilesDeleted = 0 +} + +var constants = Constants{} + +func main() { + log.SetFlags(log.Lmicroseconds) + // Important: Access times don’t accumulate. + // This implies that archiving the file won't alter its access time. + // Therefore, assign X as the ARCHIVE_TIME and X + Y as the DELETE_TIME, + // where X represents the duration it can exist in the folder, + // and Y represents the duration it can exist in the archive. + + ROOT := filepath.ToSlash(strings.TrimSpace(getEnv("ROOT", "/c/tmp"))) + ROOT_ARCHIVE := filepath.ToSlash(strings.TrimSpace(getEnv("ROOT_ARCHIVE", ROOT+"/archive"))) + os.Mkdir(ROOT_ARCHIVE, os.ModePerm) + IGNORED_DIRECTORIES := []string{} + ignoredEnv := getEnv("IGNORED_DIRECTORIES", "") + if ignoredEnv != "" { + ignoredEnv = strings.TrimSpace(ignoredEnv) + IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, strings.Split(ignoredEnv, ",")...) + } + IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, ROOT_ARCHIVE) + for key, dir := range IGNORED_DIRECTORIES { + IGNORED_DIRECTORIES[key] = filepath.ToSlash(strings.TrimSpace(dir)) + } + ARCHIVE_THRESHOLD := parseDuration(getEnv("ARCHIVE_THRESHOLD", "1d")) + DELETE_THRESHOLD := parseDuration(getEnv("DELETE_THRESHOLD", "12h")) + SCAN_INTERVAL := time.Duration(parseDuration(getEnv("SCAN_INTERVAL", "1m")) * 1e6) + USE_MODTIME := strings.TrimSpace(getEnv("USE_MODTIME", "false")) == "true" + + constants.ROOT = ROOT + constants.ROOT_ARCHIVE = ROOT_ARCHIVE + constants.IGNORED_DIRECTORIES = IGNORED_DIRECTORIES + constants.ARCHIVE_THRESHOLD = ARCHIVE_THRESHOLD + constants.DELETE_THRESHOLD = DELETE_THRESHOLD + constants.SCAN_INTERVAL = SCAN_INTERVAL + constants.USE_MODTIME = USE_MODTIME + + log.Println("Input args parsed as:") + log.Printf("ROOT: %s", ROOT) + log.Printf("ROOT_ARCHIVE: %s", ROOT_ARCHIVE) + log.Printf("IGNORED_DIRECTORIES: %s", IGNORED_DIRECTORIES) + log.Printf("ARCHIVE_THRESHOLD: %d", ARCHIVE_THRESHOLD) + log.Printf("DELETE_THRESHOLD: %d", DELETE_THRESHOLD) + log.Printf("SCAN_INTERVAL: %d", SCAN_INTERVAL.Milliseconds()) + log.Printf("USE_MODTIME: %s", strconv.FormatBool(USE_MODTIME)) + + doRun() + for { + log.Printf("Running at %d", time.Now().UnixMilli()) + time.Sleep(SCAN_INTERVAL) + doRun() + } +}