diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5f4ebb8 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +main.exe diff --git a/deploy.sh b/deploy.sh new file mode 100644 index 0000000..5127006 --- /dev/null +++ b/deploy.sh @@ -0,0 +1 @@ +docker build -t directory-cleaner . \ No newline at end of file diff --git a/main.go b/main.go index 956e80d..61fe473 100644 --- a/main.go +++ b/main.go @@ -35,7 +35,7 @@ func parseDuration(date string) int64 { var parts = strings.Split(date, "_") for _, part := range parts { part = strings.TrimSpace(part) - log.Printf("Parsing date part: %s", part) + log.Printf("Parsing date part: %s\n", part) var value = valueRegex.FindString(part) var unit = unitRegex.FindString(part) @@ -48,12 +48,12 @@ func parseDuration(date string) int64 { log.Println("Invalid date unit: " + unit) continue } - log.Printf("Part %s parsed as: Value: %s, Unit: %s", part, value, unit) + log.Printf("Part %s parsed as: Value: %s, Unit: %s\n", part, value, unit) var valueMs, _ = strconv.ParseInt(value, 10, 16) valueMs = valueMs * timeUnits[unit] milliseconds += valueMs - log.Printf("Adding %dms to duration, now: %d", valueMs, milliseconds) + log.Printf("Adding %dms to duration, now: %d\n", valueMs, milliseconds) } return milliseconds @@ -67,32 +67,211 @@ func getEnv(key, def string) string { return def } +func scanRoot() { + log.Println("Scanning root directory...") + filepath.Walk(constants.ROOT, func(path string, info os.FileInfo, err error) error { + if err != nil { + log.Printf("Error scanning %s: %s\n", path, err) + return nil + } + path = filepath.ToSlash(path) + + if path == constants.ROOT { + log.Printf("Skipping root directory %s...\n", path) + return nil + } + + // I forgot why this code was here... It doesn't make sense to me now + // if info.IsDir() { + // log.Printf("Skipping directory %s...\n", path) + // return filepath.SkipDir + // } + + // We hope that IGNORED_DIRECTORIES is a small list, so we can afford to iterate over it + // In fact iteration should be faster for small lists rather than hashing + for _, ignoredDir := range constants.IGNORED_DIRECTORIES { + log.Println(constants.IGNORED_DIRECTORIES, len(constants.IGNORED_DIRECTORIES)) + if strings.HasPrefix(path, ignoredDir) { + log.Printf("Ignoring directory %s\n", path) + return filepath.SkipDir + } + } + + processFile(path, info) + return nil + }) +} + +func scanArchive() { + log.Println("Scanning archive...") + filepath.Walk(constants.ROOT_ARCHIVE, func(path string, info os.FileInfo, err error) error { + if err != nil { + log.Printf("Error scanning %s: %s\n", path, err) + return nil + } + path = filepath.ToSlash(path) + + if path == constants.ROOT_ARCHIVE { + log.Printf("Skipping root directory %s...\n", path) + return nil + } + + processArchiveFile(path, info) + return nil + }) +} + +func processFile(path string, info os.FileInfo) { + var now = time.Now().UnixMilli() + + var timeType = "accessed" + if constants.USE_MODTIME { + timeType = "modified" + } + + var fileTime int64 = 0 + if constants.USE_MODTIME { + fileTime = times.Get(info).ModTime().UnixMilli() + } else { + fileTime = times.Get(info).AccessTime().UnixMilli() + } + + var timeDelta = now - fileTime + log.Printf("File %s last %s at %d, %dms ago\n", path, timeType, fileTime, timeDelta) + if timeDelta > constants.ARCHIVE_THRESHOLD { + log.Printf("File %s was %s more than %dms ago, archiving...\n", path, timeType, constants.ARCHIVE_THRESHOLD) + archiveFile(path) + } +} + +func processArchiveFile(path string, info os.FileInfo) { + var now = time.Now().UnixMilli() + + var timeType = "accessed" + if constants.USE_MODTIME { + timeType = "modified" + } + + var fileTime int64 = 0 + if constants.USE_MODTIME { + fileTime = times.Get(info).ModTime().UnixMilli() + } else { + fileTime = times.Get(info).AccessTime().UnixMilli() + } + + var timeDelta = now - int64(fileTime) + log.Printf("File %s last %s at %d, %dms ago\n", path, timeType, fileTime, timeDelta) + + if timeDelta > constants.DELETE_THRESHOLD { + log.Printf("File %s was %s more than %dms ago, deleting...\n", path, timeType, constants.DELETE_THRESHOLD) + deleteFile(path) + } +} + +func archiveFile(path string) { + // defer os.Exit(1) + var newPath = constants.ROOT_ARCHIVE + strings.Replace(path, constants.ROOT, "", 1) + log.Printf("Archiving file %s to %s...\n", path, newPath) + + os.MkdirAll(filepath.Dir(newPath), os.ModePerm) + var err = os.Rename(path, newPath) + if err != nil { + log.Printf("Error archiving file %s: %s\n", path, err) + return + } + numFilesArchived++ +} + +func deleteFile(path string) { + // defer os.Exit(1) + log.Printf("Deleting file %s...\n", path) + var err = os.Remove(path) + if err != nil { + log.Printf("Error deleting file %s: %s\n", path, err) + return + } + numFilesDeleted++ +} + +func cleanRoot() { + var files, err = os.ReadDir(constants.ROOT) + if err != nil { + log.Printf("Error reading root directory %s: %s\n", constants.ROOT, err) + return + } + for _, file := range files { + if !file.IsDir() { + continue + } + var empty, err = isDirEmpty(constants.ROOT + "/" + file.Name()) + if err != nil { + log.Printf("Error checking if directory %s is empty: %s\n", file.Name(), err) + continue + } + log.Printf("Directory %s isempty: %t\n", file.Name(), empty) + if empty { + log.Printf("Deleting empty directory %s\n", file.Name()) + var err = os.RemoveAll(constants.ROOT + "/" + file.Name()) + if err != nil { + log.Printf("Error deleting empty directory %s: %s\n", file.Name(), err) + } + } + } +} +func isDirEmpty(dirPath string) (bool, error) { + var empty = true + var ferr error = nil + + filepath.Walk(dirPath, func(path string, info os.FileInfo, err error) error { + if err != nil { + log.Printf("Error scanning %s: %s\n", path, err) + ferr = err + return nil + } + if !info.IsDir() { + empty = false + log.Printf("Directory %s is not empty, found %s\n", dirPath, path) + return filepath.SkipAll + } + return nil + }) + return empty, ferr +} + type Constants struct { ROOT string ROOT_ARCHIVE string IGNORED_DIRECTORIES []string - ARCHIVE_THRESHOLD time.Duration - DELETE_THRESHOLD time.Duration + ARCHIVE_THRESHOLD int64 + DELETE_THRESHOLD int64 SCAN_INTERVAL time.Duration USE_MODTIME bool } -var constants = &Constants{} +func doRun() { + scanRoot() + scanArchive() + cleanRoot() + log.Printf("Archived %d files, deleted %d files\n", numFilesArchived, numFilesDeleted) + numFilesArchived = 0 + numFilesDeleted = 0 +} + +var constants = Constants{} -// region main func main() { - log.SetFlags(log.Lmicroseconds | log.Lshortfile) + log.SetFlags(0b111) // Important: Access times don’t accumulate. // This implies that archiving the file won't alter its access time. // Therefore, assign X as the ARCHIVE_TIME and X + Y as the DELETE_TIME, // where X represents the duration it can exist in the folder, // and Y represents the duration it can exist in the archive. - ROOT := filepath.ToSlash(strings.TrimSpace(getEnv("ROOT", "/tmp"))) - ROOT_ARCHIVE := filepath.ToSlash(strings.TrimSpace(getEnv("ROOT_ARCHIVE", ROOT+"/archive"))) + var ROOT = filepath.ToSlash(strings.TrimSpace(getEnv("ROOT", "/tmp"))) + var ROOT_ARCHIVE = filepath.ToSlash(strings.TrimSpace(getEnv("ROOT_ARCHIVE", ROOT+"/archive"))) os.Mkdir(ROOT_ARCHIVE, os.ModePerm) - IGNORED_DIRECTORIES := []string{} - ignoredEnv := getEnv("IGNORED_DIRECTORIES", "") + var IGNORED_DIRECTORIES = []string{} + var ignoredEnv = getEnv("IGNORED_DIRECTORIES", "") if ignoredEnv != "" { ignoredEnv = strings.TrimSpace(ignoredEnv) IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, strings.Split(ignoredEnv, ",")...) @@ -101,10 +280,10 @@ func main() { for key, dir := range IGNORED_DIRECTORIES { IGNORED_DIRECTORIES[key] = filepath.ToSlash(strings.TrimSpace(dir)) } - ARCHIVE_THRESHOLD := time.Duration(parseDuration(getEnv("ARCHIVE_THRESHOLD", "1d"))) * time.Millisecond - DELETE_THRESHOLD := time.Duration(parseDuration(getEnv("DELETE_THRESHOLD", "12h"))) * time.Millisecond - SCAN_INTERVAL := time.Duration(parseDuration(getEnv("SCAN_INTERVAL", "1m"))) * time.Millisecond - USE_MODTIME := strings.TrimSpace(getEnv("USE_MODTIME", "false")) == "true" + var ARCHIVE_THRESHOLD = parseDuration(getEnv("ARCHIVE_THRESHOLD", "1d")) + var DELETE_THRESHOLD = parseDuration(getEnv("DELETE_THRESHOLD", "12h")) + var SCAN_INTERVAL = time.Duration(parseDuration(getEnv("SCAN_INTERVAL", "1m")) * 1e6) + var USE_MODTIME = strings.TrimSpace(getEnv("USE_MODTIME", "false")) == "true" constants.ROOT = ROOT constants.ROOT_ARCHIVE = ROOT_ARCHIVE @@ -115,213 +294,18 @@ func main() { constants.USE_MODTIME = USE_MODTIME log.Println("Input args parsed as:") - log.Printf("ROOT: %s", ROOT) - log.Printf("ROOT_ARCHIVE: %s", ROOT_ARCHIVE) - log.Printf("IGNORED_DIRECTORIES: %s", IGNORED_DIRECTORIES) - log.Printf("ARCHIVE_THRESHOLD: %s", ARCHIVE_THRESHOLD) - log.Printf("DELETE_THRESHOLD: %s", DELETE_THRESHOLD) - log.Printf("SCAN_INTERVAL: %s", SCAN_INTERVAL) - log.Printf("USE_MODTIME: %s", strconv.FormatBool(USE_MODTIME)) - + log.Printf("ROOT: %s\n", ROOT) + log.Printf("ROOT_ARCHIVE: %s\n", ROOT_ARCHIVE) + log.Printf("IGNORED_DIRECTORIES: %s\n", IGNORED_DIRECTORIES) + log.Printf("ARCHIVE_THRESHOLD: %d\n", ARCHIVE_THRESHOLD) + log.Printf("DELETE_THRESHOLD: %d\n", DELETE_THRESHOLD) + log.Printf("SCAN_INTERVAL: %d\n", SCAN_INTERVAL.Milliseconds()) + log.Printf("USE_MODTIME: %s\n", strconv.FormatBool(USE_MODTIME)) + doRun() for { - // os.Exit(0) + log.Printf("Running at %d", time.Now().UnixMilli()) time.Sleep(SCAN_INTERVAL) doRun() } } - -func doRun() { - log.Printf("Running at %s", time.Now().Format(time.DateTime)) - scanRoot() - scanArchive() - cleanRoot() - log.Printf("Archived %d files, deleted %d files", numFilesArchived, numFilesDeleted) - numFilesArchived = 0 - numFilesDeleted = 0 -} - -// region scanRoot -func scanRoot() { - log.Println("Scanning root directory...") - filepath.Walk(constants.ROOT, func(path string, info os.FileInfo, err error) error { - log.Printf("Scanning file %s...", path) - if err != nil { - log.Printf("Error scanning %s: %s", path, err) - return nil - } - path = filepath.ToSlash(path) - - if path == constants.ROOT { - log.Printf("Skipping root directory %s...", path) - return nil - } - - // I forgot why this code was here... It doesn't make sense to me now - // if info.IsDir() { - // log.Printf("Skipping directory %s...", path) - // return filepath.SkipDir - // } - - // We hope that IGNORED_DIRECTORIES is a small list, so we can afford to iterate over it - // In fact iteration should be faster for small lists rather than hashing - for _, ignoredDir := range constants.IGNORED_DIRECTORIES { - // log.Println(constants.IGNORED_DIRECTORIES, len(constants.IGNORED_DIRECTORIES)) - if strings.HasPrefix(path, ignoredDir) { - log.Printf("Ignoring directory %s", path) - return filepath.SkipDir - } - } - - go processFile(path, info) - return nil - }) -} - -func processFile(path string, info os.FileInfo) { - now := time.Now().UnixMilli() - log.Printf("Processing file %s...", path) - - timeType := "accessed" - if constants.USE_MODTIME { - timeType = "modified" - } - - var fileTime int64 - if constants.USE_MODTIME { - fileTime = times.Get(info).ModTime().UnixMilli() - } else { - fileTime = times.Get(info).AccessTime().UnixMilli() - } - - timeDelta := now - fileTime - fileTimeFormatted := time.UnixMilli(fileTime).Format(time.DateTime) - timeDeltaFormatted := time.Duration(timeDelta) * time.Millisecond - log.Printf("File %s last %s at %s, %s ago", path, timeType, fileTimeFormatted, timeDeltaFormatted) - if timeDelta > constants.ARCHIVE_THRESHOLD.Milliseconds() { - log.Printf("File %s was %s more than %s ago, archiving...", path, timeType, constants.ARCHIVE_THRESHOLD) - go archiveFile(path) - } -} - -func archiveFile(path string) { - newPath := constants.ROOT_ARCHIVE + strings.Replace(path, constants.ROOT, "", 1) - log.Printf("Archiving file %s to %s...", path, newPath) - - err := os.MkdirAll(filepath.Dir(newPath), os.ModePerm) - if err != nil { - log.Printf("Error creating directory %s: %s", filepath.Dir(newPath), err) - return - } - err = os.Rename(path, newPath) - if err != nil { - log.Printf("Error archiving file %s: %s", path, err) - return - } - numFilesArchived++ -} - -// region scanArchive -func scanArchive() { - log.Println("Scanning archive...") - filepath.Walk(constants.ROOT_ARCHIVE, func(path string, info os.FileInfo, err error) error { - log.Printf("Scanning archive file %s...", path) - if err != nil { - log.Printf("Error scanning %s: %s", path, err) - return nil - } - path = filepath.ToSlash(path) - - if path == constants.ROOT_ARCHIVE { - log.Printf("Skipping root directory %s...", path) - return nil - } - - go processArchiveFile(path, info) - return nil - }) -} - -func processArchiveFile(path string, info os.FileInfo) { - now := time.Now().UnixMilli() - - timeType := "accessed" - if constants.USE_MODTIME { - timeType = "modified" - } - - var fileTime int64 - if constants.USE_MODTIME { - fileTime = times.Get(info).ModTime().UnixMilli() - } else { - fileTime = times.Get(info).AccessTime().UnixMilli() - } - - timeDelta := now - int64(fileTime) - - fileTimeFormatted := time.UnixMilli(fileTime).Format(time.DateTime) - timeDeltaFormatted := time.Duration(timeDelta) * time.Millisecond - log.Printf("File %s last %s at %s, %s ago", path, timeType, fileTimeFormatted, timeDeltaFormatted) - - if timeDelta > constants.DELETE_THRESHOLD.Milliseconds() { - log.Printf("File %s was %s more than %s ago, deleting...", path, timeType, constants.DELETE_THRESHOLD) - go deleteFile(path) - } -} - -func deleteFile(path string) { - log.Printf("Deleting file %s...", path) - err := os.Remove(path) - if err != nil { - log.Printf("Error deleting file %s: %s", path, err) - return - } - numFilesDeleted++ -} - -// region cleanRoot -func cleanRoot() { - files, err := os.ReadDir(constants.ROOT) - if err != nil { - log.Printf("Error reading root directory %s: %s", constants.ROOT, err) - return - } - for _, file := range files { - if !file.IsDir() { - continue - } - empty, err := isDirEmpty(constants.ROOT + "/" + file.Name()) - if err != nil { - log.Printf("Error checking if directory %s is empty: %s", file.Name(), err) - continue - } - log.Printf("Directory %s isempty: %t", file.Name(), empty) - if empty { - log.Printf("Deleting empty directory %s", file.Name()) - err := os.RemoveAll(constants.ROOT + "/" + file.Name()) - if err != nil { - log.Printf("Error deleting empty directory %s: %s", file.Name(), err) - } - } - } -} - -func isDirEmpty(dirPath string) (bool, error) { - var empty = true - var ferr error = nil - - filepath.Walk(dirPath, func(path string, info os.FileInfo, err error) error { - if err != nil { - log.Printf("Error scanning %s: %s", path, err) - ferr = err - return nil - } - if !info.IsDir() { - empty = false - log.Printf("Directory %s is not empty, found %s", dirPath, path) - return filepath.SkipAll - } - return nil - }) - return empty, ferr -} \ No newline at end of file