| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278 | 
							- #!/usr/bin/env python3
 
- # Copyright (c) 2016, Antonio SJ Musumeci <trapexit@spawn.link>
 
- #
 
- # Permission to use, copy, modify, and/or distribute this software for any
 
- # purpose with or without fee is hereby granted, provided that the above
 
- # copyright notice and this permission notice appear in all copies.
 
- #
 
- # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 
- # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 
- # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 
- # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 
- # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 
- # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 
- # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
- import argparse
 
- import ctypes
 
- import errno
 
- import fnmatch
 
- import io
 
- import os
 
- import shlex
 
- import stat
 
- import subprocess
 
- import sys
 
- _libc = ctypes.CDLL("libc.so.6",use_errno=True)
 
- _lgetxattr = _libc.lgetxattr
 
- _lgetxattr.argtypes = [ctypes.c_char_p,ctypes.c_char_p,ctypes.c_void_p,ctypes.c_size_t]
 
- def lgetxattr(path,name):
 
-     if type(path) == str:
 
-         path = path.encode(errors='backslashreplace')
 
-     if type(name) == str:
 
-         name = name.encode(errors='backslashreplace')
 
-     length = 64
 
-     while True:
 
-         buf = ctypes.create_string_buffer(length)
 
-         res = _lgetxattr(path,name,buf,ctypes.c_size_t(length))
 
-         if res >= 0:
 
-             return buf.raw[0:res]
 
-         else:
 
-             err = ctypes.get_errno()
 
-             if err == errno.ERANGE:
 
-                 length *= 2
 
-             elif err == errno.ENODATA:
 
-                 return None
 
-             else:
 
-                 raise IOError(err,os.strerror(err),path)
 
- def xattr_relpath(fullpath):
 
-     return lgetxattr(fullpath,'user.mergerfs.relpath').decode(errors='backslashreplace')
 
- def xattr_basepath(fullpath):
 
-     return lgetxattr(fullpath,'user.mergerfs.basepath').decode(errors='backslashreplace')
 
- def ismergerfs(path):
 
-     try:
 
-         lgetxattr(path,'user.mergerfs.version')
 
-         return True
 
-     except IOError as e:
 
-         return False
 
- def mergerfs_control_file(basedir):
 
-     if basedir == '/':
 
-         return None
 
-     ctrlfile = os.path.join(basedir,'.mergerfs')
 
-     if os.path.exists(ctrlfile):
 
-         return ctrlfile
 
-     else:
 
-         dirname = os.path.dirname(basedir)
 
-         return mergerfs_control_file(dirname)
 
- def mergerfs_srcmounts(ctrlfile):
 
-     srcmounts = lgetxattr(ctrlfile,'user.mergerfs.srcmounts')
 
-     srcmounts = srcmounts.decode(errors='backslashreplace').split(':')
 
-     return srcmounts
 
- def match(filename,matches):
 
-     for match in matches:
 
-         if fnmatch.fnmatch(filename,match):
 
-             return True
 
-     return False
 
- def execute_cmd(args):
 
-     return subprocess.call(args)
 
- def print_args(args):
 
-     quoted = [shlex.quote(arg) for arg in args]
 
-     print(' '.join(quoted))
 
- def human_to_bytes(s):
 
-     m = s[-1]
 
-     if   m == 'K':
 
-         i = int(s[0:-1]) * 1024
 
-     elif m == 'M':
 
-         i = int(s[0:-1]) * 1024 * 1024
 
-     elif m == 'G':
 
-         i = int(s[0:-1]) * 1024 * 1024 * 1024
 
-     elif m == 'T':
 
-         i = int(s[0:-1]) * 1024 * 1024 * 1024 * 1024
 
-     else:
 
-         i = int(s)
 
-     return i
 
- def get_stats(branches):
 
-     sizes = {}
 
-     for branch in branches:
 
-         vfs = os.statvfs(branch)
 
-         sizes[branch] = vfs.f_bavail * vfs.f_frsize
 
-     return sizes
 
- def build_move_file(src,tgt,rel):
 
-     rel = rel.strip('/')
 
-     srcpath = os.path.join(src,'./',rel)
 
-     tgtpath = tgt.rstrip('/') + '/'
 
-     return ['rsync',
 
-             '-avHAXWE',
 
-             '--numeric-ids',
 
-             '--progress',
 
-             '--relative',
 
-             '--remove-source-files',
 
-             srcpath,
 
-             tgtpath]
 
- def print_help():
 
-     help = \
 
- '''
 
- usage: mergerfs.consolidate [<options>] <dir>
 
- Consolidate files in a single mergerfs directory onto a single drive.
 
- positional arguments:
 
-   dir                    starting directory
 
- optional arguments:
 
-   -m, --max-files=       Skip directories with more than N files.
 
-                          (default: 256)
 
-   -M, --max-size=        Skip directories with files adding up to more
 
-                          than N. (default: 16G)
 
-   -I, --include-path=    fnmatch compatible path include filter.
 
-                          Can be used multiple times.
 
-   -E, --exclude-path=    fnmatch compatible path exclude filter.
 
-                          Can be used multiple times.
 
-   -e, --execute          Execute `rsync` commands as well as print them.
 
-   -h, --help             Print this help.
 
- '''
 
-     print(help)
 
- def buildargparser():
 
-     parser = argparse.ArgumentParser(add_help=False)
 
-     parser.add_argument('dir',
 
-                         type=str,
 
-                         nargs='?',
 
-                         default=None)
 
-     parser.add_argument('-m','--max-files',
 
-                         dest='max_files',
 
-                         type=int,
 
-                         default=256)
 
-     parser.add_argument('-M','--max-size',
 
-                         dest='max_size',
 
-                         type=human_to_bytes,
 
-                         default='16G')
 
-     parser.add_argument('-I','--include-path',
 
-                         dest='includepath',
 
-                         type=str,
 
-                         action='append',
 
-                         default=[])
 
-     parser.add_argument('-E','--exclude-path',
 
-                         dest='excludepath',
 
-                         type=str,
 
-                         action='append',
 
-                         default=[])
 
-     parser.add_argument('-e','--execute',
 
-                         dest='execute',
 
-                         action='store_true')
 
-     parser.add_argument('-h','--help',
 
-                         action='store_true')
 
-     return parser
 
- def main():
 
-     sys.stdout = io.TextIOWrapper(sys.stdout.buffer,
 
-                                   encoding='utf8',
 
-                                   errors='backslashreplace',
 
-                                   line_buffering=True)
 
-     sys.stderr = io.TextIOWrapper(sys.stderr.buffer,
 
-                                   encoding='utf8',
 
-                                   errors='backslashreplace',
 
-                                   line_buffering=True)
 
-     parser = buildargparser()
 
-     args = parser.parse_args()
 
-     if args.help or not args.dir:
 
-         print_help()
 
-         sys.exit(0)
 
-     args.dir = os.path.realpath(args.dir)
 
-     ctrlfile = mergerfs_control_file(args.dir)
 
-     if not ismergerfs(ctrlfile):
 
-         print("%s is not a mergerfs mount" % args.dir)
 
-         sys.exit(1)
 
-     basedir       = args.dir
 
-     execute       = args.execute
 
-     max_files     = args.max_files
 
-     max_size      = args.max_size
 
-     path_includes = ['*'] if not args.includepath else args.includepath
 
-     path_excludes = args.excludepath
 
-     srcmounts     = mergerfs_srcmounts(ctrlfile)
 
-     mount_stats = get_stats(srcmounts)
 
-     try:
 
-         for (root,dirs,files) in os.walk(basedir):
 
-             if len(files) <= 1:
 
-                 continue
 
-             if len(files) > max_files:
 
-                 continue
 
-             if match(root,path_excludes):
 
-                 continue
 
-             if not match(root,path_includes):
 
-                 continue
 
-             total_size = 0
 
-             file_stats = {}
 
-             for file in files:
 
-                 fullpath = os.path.join(root,file)
 
-                 st = os.lstat(fullpath)
 
-                 if not stat.S_ISREG(st.st_mode):
 
-                     continue
 
-                 total_size += st.st_size
 
-                 file_stats[fullpath] = st
 
-             if total_size >= max_size:
 
-                 continue
 
-             tgtpath = sorted(mount_stats.items(),key=lambda x: x[1],reverse=True)[0][0]
 
-             for (fullpath,st) in sorted(file_stats.items()):
 
-                 srcpath = xattr_basepath(fullpath)
 
-                 if srcpath == tgtpath:
 
-                     continue
 
-                 relpath = xattr_relpath(fullpath)
 
-                 mount_stats[srcpath] += st.st_size
 
-                 mount_stats[tgtpath] -= st.st_size
 
-                 args = build_move_file(srcpath,tgtpath,relpath)
 
-                 print_args(args)
 
-                 if execute:
 
-                     execute_cmd(args)
 
-     except (KeyboardInterrupt,BrokenPipeError):
 
-         pass
 
-     sys.exit(0)
 
- if __name__ == "__main__":
 
-    main()
 
 
  |