Files
directory-cleaner/main.go

319 lines
9.0 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package main
import (
"log"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"time"
"github.com/djherbis/times"
)
var timeUnits = map[string]int64{
"ms": 1,
"s": 1000,
"m": 1000 * 60,
"h": 1000 * 60 * 60,
"d": 1000 * 60 * 60 * 24,
"M": 1000 * 60 * 60 * 24 * 30,
"y": 1000 * 60 * 60 * 24 * 365,
}
var numFilesArchived = 0
var numFilesDeleted = 0
var valueRegex, _ = regexp.Compile(`\d+`)
var unitRegex, _ = regexp.Compile(`[a-zA-Z]+`)
func parseDuration(date string) int64 {
var milliseconds int64 = 0
date = strings.TrimSpace(date)
var parts = strings.Split(date, "_")
for _, part := range parts {
part = strings.TrimSpace(part)
log.Printf("Parsing date part: %s", part)
var value = valueRegex.FindString(part)
var unit = unitRegex.FindString(part)
if value == "" || unit == "" {
log.Println("Invalid date part: " + part)
continue
}
if _, ok := timeUnits[unit]; !ok {
log.Println("Invalid date unit: " + unit)
continue
}
log.Printf("Part %s parsed as: Value: %s, Unit: %s", part, value, unit)
var valueMs, _ = strconv.ParseInt(value, 10, 16)
valueMs = valueMs * timeUnits[unit]
milliseconds += valueMs
log.Printf("Adding %dms to duration, now: %d", valueMs, milliseconds)
}
return milliseconds
}
func getEnv(key, def string) string {
var value, exists = os.LookupEnv(key)
if exists {
return value
}
return def
}
func scanRoot() {
log.Println("Scanning root directory...")
filepath.Walk(constants.ROOT, func(path string, info os.FileInfo, err error) error {
if err != nil {
log.Printf("Error scanning %s: %s", path, err)
return nil
}
path = filepath.ToSlash(path)
if path == constants.ROOT {
log.Printf("Skipping root directory %s...", path)
return nil
}
// I forgot why this code was here... It doesn't make sense to me now
// if info.IsDir() {
// log.Printf("Skipping directory %s...", path)
// return filepath.SkipDir
// }
// We hope that IGNORED_DIRECTORIES is a small list, so we can afford to iterate over it
// In fact iteration should be faster for small lists rather than hashing
for _, ignoredDir := range constants.IGNORED_DIRECTORIES {
log.Println(constants.IGNORED_DIRECTORIES, len(constants.IGNORED_DIRECTORIES))
if strings.HasPrefix(path, ignoredDir) {
log.Printf("Ignoring directory %s", path)
return filepath.SkipDir
}
}
processFile(path, info)
return nil
})
}
func scanArchive() {
log.Println("Scanning archive...")
filepath.Walk(constants.ROOT_ARCHIVE, func(path string, info os.FileInfo, err error) error {
log.Printf("Scanning archive file %s...", path)
if err != nil {
log.Printf("Error scanning %s: %s", path, err)
return nil
}
path = filepath.ToSlash(path)
if path == constants.ROOT_ARCHIVE {
log.Printf("Skipping root directory %s...", path)
return nil
}
processArchiveFile(path, info)
return nil
})
}
func processFile(path string, info os.FileInfo) {
now := time.Now().UnixMilli()
log.Printf("Processing file %s...", path)
timeType := "accessed"
if constants.USE_MODTIME {
timeType = "modified"
}
var fileTime int64
if constants.USE_MODTIME {
fileTime = times.Get(info).ModTime().UnixMilli()
} else {
fileTime = times.Get(info).AccessTime().UnixMilli()
}
timeDelta := now - fileTime
log.Printf("File %s last %s at %d, %dms ago", path, timeType, fileTime, timeDelta)
if timeDelta > constants.ARCHIVE_THRESHOLD {
log.Printf("File %s was %s more than %dms ago, archiving...", path, timeType, constants.ARCHIVE_THRESHOLD)
archiveFile(path)
}
}
func processArchiveFile(path string, info os.FileInfo) {
now := time.Now().UnixMilli()
timeType := "accessed"
if constants.USE_MODTIME {
timeType = "modified"
}
var fileTime int64
if constants.USE_MODTIME {
fileTime = times.Get(info).ModTime().UnixMilli()
} else {
fileTime = times.Get(info).AccessTime().UnixMilli()
}
timeDelta := now - int64(fileTime)
fileTimeFormatted := time.UnixMilli(fileTime).Format("15:04:05.000000")
timeDeltaFormatted := time.Duration(timeDelta) * time.Millisecond
log.Printf("File %s last %s at %s, %s ago", path, timeType, fileTimeFormatted, timeDeltaFormatted)
if timeDelta > constants.DELETE_THRESHOLD.Milliseconds() {
log.Printf("File %s was %s more than %dms ago, deleting...", path, timeType, constants.DELETE_THRESHOLD)
deleteFile(path)
}
}
func archiveFile(path string) {
newPath := constants.ROOT_ARCHIVE + strings.Replace(path, constants.ROOT, "", 1)
log.Printf("Archiving file %s to %s...", path, newPath)
err := os.MkdirAll(filepath.Dir(newPath), os.ModePerm)
if err != nil {
log.Printf("Error creating directory %s: %s", filepath.Dir(newPath), err)
return
}
// err := os.Rename(path, newPath)
// if err != nil {
// log.Printf("Error archiving file %s: %s", path, err)
// return
// }
numFilesArchived++
}
func deleteFile(path string) {
log.Printf("Deleting file %s...", path)
// err := os.Remove(path)
// if err != nil {
// log.Printf("Error deleting file %s: %s", path, err)
// return
// }
numFilesDeleted++
}
func cleanRoot() {
files, err := os.ReadDir(constants.ROOT)
if err != nil {
log.Printf("Error reading root directory %s: %s", constants.ROOT, err)
return
}
for _, file := range files {
if !file.IsDir() {
continue
}
empty, err := isDirEmpty(constants.ROOT + "/" + file.Name())
if err != nil {
log.Printf("Error checking if directory %s is empty: %s", file.Name(), err)
continue
}
log.Printf("Directory %s isempty: %t", file.Name(), empty)
if empty {
log.Printf("Deleting empty directory %s", file.Name())
err := os.RemoveAll(constants.ROOT + "/" + file.Name())
if err != nil {
log.Printf("Error deleting empty directory %s: %s", file.Name(), err)
}
}
}
}
func isDirEmpty(dirPath string) (bool, error) {
var empty = true
var ferr error = nil
filepath.Walk(dirPath, func(path string, info os.FileInfo, err error) error {
if err != nil {
log.Printf("Error scanning %s: %s", path, err)
ferr = err
return nil
}
if !info.IsDir() {
empty = false
log.Printf("Directory %s is not empty, found %s", dirPath, path)
return filepath.SkipAll
}
return nil
})
return empty, ferr
}
type Constants struct {
ROOT string
ROOT_ARCHIVE string
IGNORED_DIRECTORIES []string
ARCHIVE_THRESHOLD time.Duration
DELETE_THRESHOLD time.Duration
SCAN_INTERVAL time.Duration
USE_MODTIME bool
}
var constants = &Constants{}
func main() {
log.SetFlags(log.Lmicroseconds | log.Lshortfile)
// Important: Access times dont accumulate.
// This implies that archiving the file won't alter its access time.
// Therefore, assign X as the ARCHIVE_TIME and X + Y as the DELETE_TIME,
// where X represents the duration it can exist in the folder,
// and Y represents the duration it can exist in the archive.
ROOT := filepath.ToSlash(strings.TrimSpace(getEnv("ROOT", "C:/tmp")))
ROOT_ARCHIVE := filepath.ToSlash(strings.TrimSpace(getEnv("ROOT_ARCHIVE", ROOT+"/archive")))
os.Mkdir(ROOT_ARCHIVE, os.ModePerm)
IGNORED_DIRECTORIES := []string{}
ignoredEnv := getEnv("IGNORED_DIRECTORIES", "")
if ignoredEnv != "" {
ignoredEnv = strings.TrimSpace(ignoredEnv)
IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, strings.Split(ignoredEnv, ",")...)
}
IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, ROOT_ARCHIVE)
for key, dir := range IGNORED_DIRECTORIES {
IGNORED_DIRECTORIES[key] = filepath.ToSlash(strings.TrimSpace(dir))
}
ARCHIVE_THRESHOLD := time.Duration(parseDuration(getEnv("ARCHIVE_THRESHOLD", "1d"))) * time.Millisecond
DELETE_THRESHOLD := time.Duration(parseDuration(getEnv("DELETE_THRESHOLD", "12h"))) * time.Millisecond
SCAN_INTERVAL := time.Duration(parseDuration(getEnv("SCAN_INTERVAL", "1m"))) * time.Millisecond
USE_MODTIME := strings.TrimSpace(getEnv("USE_MODTIME", "false")) == "true"
constants.ROOT = ROOT
constants.ROOT_ARCHIVE = ROOT_ARCHIVE
constants.IGNORED_DIRECTORIES = IGNORED_DIRECTORIES
constants.ARCHIVE_THRESHOLD = ARCHIVE_THRESHOLD
constants.DELETE_THRESHOLD = DELETE_THRESHOLD
constants.SCAN_INTERVAL = SCAN_INTERVAL
constants.USE_MODTIME = USE_MODTIME
log.Println("Input args parsed as:")
log.Printf("ROOT: %s", ROOT)
log.Printf("ROOT_ARCHIVE: %s", ROOT_ARCHIVE)
log.Printf("IGNORED_DIRECTORIES: %s", IGNORED_DIRECTORIES)
log.Printf("ARCHIVE_THRESHOLD: %s", ARCHIVE_THRESHOLD)
log.Printf("DELETE_THRESHOLD: %s", DELETE_THRESHOLD)
log.Printf("SCAN_INTERVAL: %s", SCAN_INTERVAL)
log.Printf("USE_MODTIME: %s", strconv.FormatBool(USE_MODTIME))
doRun()
for {
os.Exit(0)
time.Sleep(SCAN_INTERVAL)
doRun()
}
}
func doRun() {
log.Printf("Running at %s", time.Now().Format("15:04:05"))
scanRoot()
scanArchive()
cleanRoot()
log.Printf("Archived %d files, deleted %d files", numFilesArchived, numFilesDeleted)
numFilesArchived = 0
numFilesDeleted = 0
}