Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-07-01 07:56:18

0001 #!/usr/bin/env python3
0002 
0003 import argparse
0004 import os
0005 import time
0006 from pathlib import Path
0007 
0008 parser = argparse.ArgumentParser(
0009     description="Performs garbage collection for Snakemake file cache",
0010 )
0011 
0012 parser.add_argument("--snakemake-output-cache-dir", type=str, help="Will try to use $SNAKEMAKE_OUTPUT_CACHE by default")
0013 parser.add_argument("--symlink-roots-dir", type=str)
0014 parser.add_argument("--target-size", type=str, help="Target size of the cache after pruning (can use K,M,G suffixes)", required=True)
0015 
0016 args = parser.parse_args()
0017 
0018 if args.snakemake_output_cache_dir is None:
0019     if "SNAKEMAKE_OUTPUT_CACHE" in os.environ:
0020         cache_dir = Path(os.environ["SNAKEMAKE_OUTPUT_CACHE"])
0021     else:
0022         raise Exception("Must specify --snakemake-output-cache-dir or set $SNAKEMAKE_OUTPUT_CACHE")
0023 else:
0024     cache_dir = Path(args.snakemake_output_cache_dir)
0025 assert cache_dir.is_dir()
0026 
0027 if args.target_size[-1] in ["K", "M", "G"]:
0028     target_size = int(args.target_size[:-1]) * {
0029         "K": 1024,
0030         "M": 1024**2,
0031         "G": 1024**3,
0032     }[args.target_size[-1]]
0033 else:
0034     target_size = int(args.target_size)
0035 
0036 alive = set()
0037 if args.symlink_roots_dir is not None:
0038     for path in Path(args.symlink_roots_dir).rglob("*"):
0039         if path.is_symlink():
0040             alive.add(path.resolve())
0041 
0042 paths = []
0043 total_size = 0
0044 for cache_path in cache_dir.iterdir():
0045     stat = cache_path.stat()
0046     size = stat.st_size
0047     total_size += size
0048     age = time.time() - stat.st_atime
0049     score = size * age
0050     paths.append((cache_path, score, size))
0051 
0052 print(f"Total cache size: {total_size / 1024. / 1024.:.1f} MiB")
0053 
0054 paths.sort(key=lambda t: t[1])
0055 while total_size >= target_size:
0056     cache_path, _, size = paths.pop()
0057     if cache_path in alive:
0058         print(f"{cache_path} is alive")
0059     else:
0060         cache_path.unlink(missing_ok=True)
0061         print(f"Removing {cache_path} of {size / 1024. / 1024.:.1f} MiB")
0062         total_size -= size