%!s(int64=3) %!d(string=hai) anos · 1b3a9bd121
--- a/roles/mergerfs/files/mergerfs.balance
+++ b/roles/mergerfs/files/mergerfs.balance
@@ -0,0 +1,293 @@
 
				+#!/usr/bin/env python3
			
 
				+
			
 
				+# Copyright (c) 2016, Antonio SJ Musumeci <trapexit@spawn.link>
			
 
				+#
			
 
				+# Permission to use, copy, modify, and/or distribute this software for any
			
 
				+# purpose with or without fee is hereby granted, provided that the above
			
 
				+# copyright notice and this permission notice appear in all copies.
			
 
				+#
			
 
				+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
			
 
				+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
			
 
				+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
			
 
				+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
			
 
				+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
			
 
				+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
			
 
				+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
			
 
				+
			
 
				+import argparse
			
 
				+import ctypes
			
 
				+import errno
			
 
				+import fnmatch
			
 
				+import io
			
 
				+import os
			
 
				+import shlex
			
 
				+import subprocess
			
 
				+import sys
			
 
				+
			
 
				+
			
 
				+_libc = ctypes.CDLL("libc.so.6",use_errno=True)
			
 
				+_lgetxattr = _libc.lgetxattr
			
 
				+_lgetxattr.argtypes = [ctypes.c_char_p,ctypes.c_char_p,ctypes.c_void_p,ctypes.c_size_t]
			
 
				+def lgetxattr(path,name):
			
 
				+    if type(path) == str:
			
 
				+        path = path.encode(errors='backslashreplace')
			
 
				+    if type(name) == str:
			
 
				+        name = name.encode(errors='backslashreplace')
			
 
				+    length = 64
			
 
				+    while True:
			
 
				+        buf = ctypes.create_string_buffer(length)
			
 
				+        res = _lgetxattr(path,name,buf,ctypes.c_size_t(length))
			
 
				+        if res >= 0:
			
 
				+            return buf.raw[0:res].decode(errors='backslashreplace')
			
 
				+        else:
			
 
				+            err = ctypes.get_errno()
			
 
				+            if err == errno.ERANGE:
			
 
				+                length *= 2
			
 
				+            elif err == errno.ENODATA:
			
 
				+                return None
			
 
				+            else:
			
 
				+                raise IOError(err,os.strerror(err),path)
			
 
				+
			
 
				+
			
 
				+def ismergerfs(path):
			
 
				+    try:
			
 
				+        lgetxattr(path,'user.mergerfs.version')
			
 
				+        return True
			
 
				+    except IOError as e:
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def mergerfs_control_file(basedir):
			
 
				+    if basedir == '/':
			
 
				+        return None
			
 
				+    ctrlfile = os.path.join(basedir,'.mergerfs')
			
 
				+    if os.path.exists(ctrlfile):
			
 
				+        return ctrlfile
			
 
				+    else:
			
 
				+        dirname = os.path.dirname(basedir)
			
 
				+        return mergerfs_control_file(dirname)
			
 
				+
			
 
				+
			
 
				+def mergerfs_srcmounts(ctrlfile):
			
 
				+    srcmounts = lgetxattr(ctrlfile,'user.mergerfs.srcmounts')
			
 
				+    srcmounts = srcmounts.split(':')
			
 
				+    return srcmounts
			
 
				+
			
 
				+
			
 
				+def match(filename,matches):
			
 
				+    for match in matches:
			
 
				+        if fnmatch.fnmatch(filename,match):
			
 
				+            return True
			
 
				+    return False
			
 
				+
			
 
				+
			
 
				+def exclude_by_size(filepath,exclude_lt,exclude_gt):
			
 
				+    try:
			
 
				+        st = os.lstat(filepath)
			
 
				+        if exclude_lt and st.st_size < exclude_lt:
			
 
				+            return True
			
 
				+        if exclude_gt and st.st_size > exclude_gt:
			
 
				+            return True
			
 
				+        return False
			
 
				+    except:
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def find_a_file(src,
			
 
				+                relpath,
			
 
				+                file_includes,file_excludes,
			
 
				+                path_includes,path_excludes,
			
 
				+                exclude_lt,exclude_gt):
			
 
				+    basepath = os.path.join(src,relpath)
			
 
				+    for (dirpath,dirnames,filenames) in os.walk(basepath):
			
 
				+        for filename in filenames:
			
 
				+            filepath = os.path.join(dirpath,filename)
			
 
				+            if match(filename,file_excludes):
			
 
				+                continue
			
 
				+            if match(filepath,path_excludes):
			
 
				+                continue
			
 
				+            if not match(filename,file_includes):
			
 
				+                continue
			
 
				+            if not match(filepath,path_includes):
			
 
				+                continue
			
 
				+            if exclude_by_size(filepath,exclude_lt,exclude_gt):
			
 
				+                continue
			
 
				+            return os.path.relpath(filepath,src)
			
 
				+    return None
			
 
				+
			
 
				+
			
 
				+def execute(args):
			
 
				+    return subprocess.call(args)
			
 
				+
			
 
				+
			
 
				+def print_args(args):
			
 
				+    quoted = [shlex.quote(arg) for arg in args]
			
 
				+    print(' '.join(quoted))
			
 
				+
			
 
				+
			
 
				+def build_move_file(src,dst,relfile):
			
 
				+    frompath = os.path.join(src,'./',relfile)
			
 
				+    topath   = dst+'/'
			
 
				+    args = ['rsync',
			
 
				+            '-avlHAXWE',
			
 
				+            '--relative',
			
 
				+            '--progress',
			
 
				+            '--remove-source-files',
			
 
				+            frompath,
			
 
				+            topath]
			
 
				+    return args
			
 
				+
			
 
				+
			
 
				+def freespace_percentage(srcmounts):
			
 
				+    lfsp = []
			
 
				+    for srcmount in srcmounts:
			
 
				+        vfs = os.statvfs(srcmount)
			
 
				+        avail = vfs.f_bavail * vfs.f_frsize
			
 
				+        total = vfs.f_blocks * vfs.f_frsize
			
 
				+        per = avail / total
			
 
				+        lfsp.append((srcmount,per))
			
 
				+    return sorted(lfsp, key=lambda x: x[1])
			
 
				+
			
 
				+
			
 
				+def all_within_range(l,n):
			
 
				+    if len(l) == 0 or len(l) == 1:
			
 
				+        return True
			
 
				+    return (abs(l[0][1] - l[-1][1]) <= n)
			
 
				+
			
 
				+
			
 
				+def human_to_bytes(s):
			
 
				+    m = s[-1]
			
 
				+    if   m == 'K':
			
 
				+        i = int(s[0:-1]) * 1024
			
 
				+    elif m == 'M':
			
 
				+        i = int(s[0:-1]) * 1024 * 1024
			
 
				+    elif m == 'G':
			
 
				+        i = int(s[0:-1]) * 1024 * 1024 * 1024
			
 
				+    elif m == 'T':
			
 
				+        i = int(s[0:-1]) * 1024 * 1024 * 1024 * 1024
			
 
				+    else:
			
 
				+        i = int(s)
			
 
				+
			
 
				+    return i
			
 
				+
			
 
				+
			
 
				+def buildargparser():
			
 
				+    parser = argparse.ArgumentParser(description='balance files on a mergerfs mount based on percentage drive filled')
			
 
				+    parser.add_argument('dir',
			
 
				+                        type=str,
			
 
				+                        help='starting directory')
			
 
				+    parser.add_argument('-p',
			
 
				+                        dest='percentage',
			
 
				+                        type=float,
			
 
				+                        default=2.0,
			
 
				+                        help='percentage range of freespace (default 2.0)')
			
 
				+    parser.add_argument('-i','--include',
			
 
				+                        dest='include',
			
 
				+                        type=str,
			
 
				+                         action='append',
			
 
				+                        default=[],
			
 
				+                        help='fnmatch compatible file filter (can use multiple times)')
			
 
				+    parser.add_argument('-e','--exclude',
			
 
				+                        dest='exclude',
			
 
				+                        type=str,
			
 
				+                        action='append',
			
 
				+                        default=[],
			
 
				+                        help='fnmatch compatible file filter (can use multiple times)')
			
 
				+    parser.add_argument('-I','--include-path',
			
 
				+                        dest='includepath',
			
 
				+                        type=str,
			
 
				+                        action='append',
			
 
				+                        default=[],
			
 
				+                        help='fnmatch compatible path filter (can use multiple times)')
			
 
				+    parser.add_argument('-E','--exclude-path',
			
 
				+                        dest='excludepath',
			
 
				+                        type=str,
			
 
				+                        action='append',
			
 
				+                        default=[],
			
 
				+                        help='fnmatch compatible path filter (can use multiple times)')
			
 
				+    parser.add_argument('-s',
			
 
				+                        dest='excludelt',
			
 
				+                        type=str,
			
 
				+                        default='0',
			
 
				+                        help='exclude files smaller than <int>[KMGT] bytes')
			
 
				+    parser.add_argument('-S',
			
 
				+                        dest='excludegt',
			
 
				+                        type=str,
			
 
				+                        default='0',
			
 
				+                        help='exclude files larger than <int>[KMGT] bytes')
			
 
				+    return parser
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer,
			
 
				+                                  encoding='utf8',
			
 
				+                                  errors="backslashreplace",
			
 
				+                                  line_buffering=True)
			
 
				+    sys.stderr = io.TextIOWrapper(sys.stderr.buffer,
			
 
				+                                  encoding='utf8',
			
 
				+                                  errors="backslashreplace",
			
 
				+                                  line_buffering=True)
			
 
				+
			
 
				+    parser = buildargparser()
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    args.dir = os.path.realpath(args.dir)
			
 
				+
			
 
				+    ctrlfile = mergerfs_control_file(args.dir)
			
 
				+    if not ismergerfs(ctrlfile):
			
 
				+        print("%s is not a mergerfs mount" % args.dir)
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+    relpath = ''
			
 
				+    mntpoint = os.path.dirname(ctrlfile)
			
 
				+    if args.dir != mntpoint:
			
 
				+        relpath = os.path.relpath(args.dir,mntpoint)
			
 
				+
			
 
				+    file_includes = ['*'] if not args.include else args.include
			
 
				+    file_excludes = args.exclude
			
 
				+    path_includes = ['*'] if not args.includepath else args.includepath
			
 
				+    path_excludes = args.excludepath
			
 
				+    exclude_lt    = human_to_bytes(args.excludelt)
			
 
				+    exclude_gt    = human_to_bytes(args.excludegt)
			
 
				+    srcmounts     = mergerfs_srcmounts(ctrlfile)
			
 
				+    percentage    = args.percentage / 100
			
 
				+
			
 
				+    try:
			
 
				+        l = freespace_percentage(srcmounts)
			
 
				+        while not all_within_range(l,percentage):
			
 
				+            todrive     = l[-1][0]
			
 
				+            relfilepath = None
			
 
				+            while not relfilepath and len(l):
			
 
				+                fromdrive = l[0][0]
			
 
				+                del l[0]
			
 
				+                relfilepath = find_a_file(fromdrive,
			
 
				+                                          relpath,
			
 
				+                                          file_includes,file_excludes,
			
 
				+                                          path_includes,path_excludes,
			
 
				+                                          exclude_lt,exclude_gt)
			
 
				+            if len(l) == 0:
			
 
				+                print('Could not find file to transfer: exiting...')
			
 
				+                break
			
 
				+            if fromdrive == todrive:
			
 
				+                print('Source drive == target drive: exiting...')
			
 
				+                break
			
 
				+
			
 
				+            args = build_move_file(fromdrive,todrive,relfilepath)
			
 
				+            print('file: {}\nfrom: {}\nto:   {}'.format(relfilepath,fromdrive,todrive))
			
 
				+            print_args(args)
			
 
				+            rv = execute(args)
			
 
				+            if rv:
			
 
				+                print('ERROR - exited with exit code: {}'.format(rv))
			
 
				+                break
			
 
				+            l = freespace_percentage(srcmounts)
			
 
				+        print('Branches within {:.1%} range: '.format(percentage))
			
 
				+        for (branch,percentage) in l:
			
 
				+            print(' * {}: {:.2%} free'.format(branch,percentage))
			
 
				+    except KeyboardInterrupt:
			
 
				+        print("exiting: CTRL-C pressed")
			
 
				+
			
 
				+    sys.exit(0)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+   main()
			
--- a/roles/mergerfs/files/mergerfs.consolidate
+++ b/roles/mergerfs/files/mergerfs.consolidate
@@ -0,0 +1,278 @@
 
				+#!/usr/bin/env python3
			
 
				+
			
 
				+# Copyright (c) 2016, Antonio SJ Musumeci <trapexit@spawn.link>
			
 
				+#
			
 
				+# Permission to use, copy, modify, and/or distribute this software for any
			
 
				+# purpose with or without fee is hereby granted, provided that the above
			
 
				+# copyright notice and this permission notice appear in all copies.
			
 
				+#
			
 
				+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
			
 
				+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
			
 
				+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
			
 
				+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
			
 
				+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
			
 
				+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
			
 
				+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
			
 
				+
			
 
				+import argparse
			
 
				+import ctypes
			
 
				+import errno
			
 
				+import fnmatch
			
 
				+import io
			
 
				+import os
			
 
				+import shlex
			
 
				+import stat
			
 
				+import subprocess
			
 
				+import sys
			
 
				+
			
 
				+
			
 
				+_libc = ctypes.CDLL("libc.so.6",use_errno=True)
			
 
				+_lgetxattr = _libc.lgetxattr
			
 
				+_lgetxattr.argtypes = [ctypes.c_char_p,ctypes.c_char_p,ctypes.c_void_p,ctypes.c_size_t]
			
 
				+def lgetxattr(path,name):
			
 
				+    if type(path) == str:
			
 
				+        path = path.encode(errors='backslashreplace')
			
 
				+    if type(name) == str:
			
 
				+        name = name.encode(errors='backslashreplace')
			
 
				+    length = 64
			
 
				+    while True:
			
 
				+        buf = ctypes.create_string_buffer(length)
			
 
				+        res = _lgetxattr(path,name,buf,ctypes.c_size_t(length))
			
 
				+        if res >= 0:
			
 
				+            return buf.raw[0:res]
			
 
				+        else:
			
 
				+            err = ctypes.get_errno()
			
 
				+            if err == errno.ERANGE:
			
 
				+                length *= 2
			
 
				+            elif err == errno.ENODATA:
			
 
				+                return None
			
 
				+            else:
			
 
				+                raise IOError(err,os.strerror(err),path)
			
 
				+
			
 
				+
			
 
				+def xattr_relpath(fullpath):
			
 
				+    return lgetxattr(fullpath,'user.mergerfs.relpath').decode(errors='backslashreplace')
			
 
				+
			
 
				+
			
 
				+def xattr_basepath(fullpath):
			
 
				+    return lgetxattr(fullpath,'user.mergerfs.basepath').decode(errors='backslashreplace')
			
 
				+
			
 
				+
			
 
				+def ismergerfs(path):
			
 
				+    try:
			
 
				+        lgetxattr(path,'user.mergerfs.version')
			
 
				+        return True
			
 
				+    except IOError as e:
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def mergerfs_control_file(basedir):
			
 
				+    if basedir == '/':
			
 
				+        return None
			
 
				+    ctrlfile = os.path.join(basedir,'.mergerfs')
			
 
				+    if os.path.exists(ctrlfile):
			
 
				+        return ctrlfile
			
 
				+    else:
			
 
				+        dirname = os.path.dirname(basedir)
			
 
				+        return mergerfs_control_file(dirname)
			
 
				+
			
 
				+
			
 
				+def mergerfs_srcmounts(ctrlfile):
			
 
				+    srcmounts = lgetxattr(ctrlfile,'user.mergerfs.srcmounts')
			
 
				+    srcmounts = srcmounts.decode(errors='backslashreplace').split(':')
			
 
				+    return srcmounts
			
 
				+
			
 
				+
			
 
				+def match(filename,matches):
			
 
				+    for match in matches:
			
 
				+        if fnmatch.fnmatch(filename,match):
			
 
				+            return True
			
 
				+    return False
			
 
				+
			
 
				+
			
 
				+def execute_cmd(args):
			
 
				+    return subprocess.call(args)
			
 
				+
			
 
				+
			
 
				+def print_args(args):
			
 
				+    quoted = [shlex.quote(arg) for arg in args]
			
 
				+    print(' '.join(quoted))
			
 
				+
			
 
				+
			
 
				+def human_to_bytes(s):
			
 
				+    m = s[-1]
			
 
				+    if   m == 'K':
			
 
				+        i = int(s[0:-1]) * 1024
			
 
				+    elif m == 'M':
			
 
				+        i = int(s[0:-1]) * 1024 * 1024
			
 
				+    elif m == 'G':
			
 
				+        i = int(s[0:-1]) * 1024 * 1024 * 1024
			
 
				+    elif m == 'T':
			
 
				+        i = int(s[0:-1]) * 1024 * 1024 * 1024 * 1024
			
 
				+    else:
			
 
				+        i = int(s)
			
 
				+
			
 
				+    return i
			
 
				+
			
 
				+
			
 
				+def get_stats(branches):
			
 
				+    sizes = {}
			
 
				+    for branch in branches:
			
 
				+        vfs = os.statvfs(branch)
			
 
				+        sizes[branch] = vfs.f_bavail * vfs.f_frsize
			
 
				+    return sizes
			
 
				+
			
 
				+
			
 
				+def build_move_file(src,tgt,rel):
			
 
				+    rel = rel.strip('/')
			
 
				+    srcpath = os.path.join(src,'./',rel)
			
 
				+    tgtpath = tgt.rstrip('/') + '/'
			
 
				+    return ['rsync',
			
 
				+            '-avHAXWE',
			
 
				+            '--numeric-ids',
			
 
				+            '--progress',
			
 
				+            '--relative',
			
 
				+            '--remove-source-files',
			
 
				+            srcpath,
			
 
				+            tgtpath]
			
 
				+
			
 
				+
			
 
				+def print_help():
			
 
				+    help = \
			
 
				+'''
			
 
				+usage: mergerfs.consolidate [<options>] <dir>
			
 
				+
			
 
				+Consolidate files in a single mergerfs directory onto a single drive.
			
 
				+
			
 
				+positional arguments:
			
 
				+  dir                    starting directory
			
 
				+
			
 
				+optional arguments:
			
 
				+  -m, --max-files=       Skip directories with more than N files.
			
 
				+                         (default: 256)
			
 
				+  -M, --max-size=        Skip directories with files adding up to more
			
 
				+                         than N. (default: 16G)
			
 
				+  -I, --include-path=    fnmatch compatible path include filter.
			
 
				+                         Can be used multiple times.
			
 
				+  -E, --exclude-path=    fnmatch compatible path exclude filter.
			
 
				+                         Can be used multiple times.
			
 
				+  -e, --execute          Execute `rsync` commands as well as print them.
			
 
				+  -h, --help             Print this help.
			
 
				+'''
			
 
				+    print(help)
			
 
				+
			
 
				+
			
 
				+def buildargparser():
			
 
				+    parser = argparse.ArgumentParser(add_help=False)
			
 
				+    parser.add_argument('dir',
			
 
				+                        type=str,
			
 
				+                        nargs='?',
			
 
				+                        default=None)
			
 
				+    parser.add_argument('-m','--max-files',
			
 
				+                        dest='max_files',
			
 
				+                        type=int,
			
 
				+                        default=256)
			
 
				+    parser.add_argument('-M','--max-size',
			
 
				+                        dest='max_size',
			
 
				+                        type=human_to_bytes,
			
 
				+                        default='16G')
			
 
				+    parser.add_argument('-I','--include-path',
			
 
				+                        dest='includepath',
			
 
				+                        type=str,
			
 
				+                        action='append',
			
 
				+                        default=[])
			
 
				+    parser.add_argument('-E','--exclude-path',
			
 
				+                        dest='excludepath',
			
 
				+                        type=str,
			
 
				+                        action='append',
			
 
				+                        default=[])
			
 
				+    parser.add_argument('-e','--execute',
			
 
				+                        dest='execute',
			
 
				+                        action='store_true')
			
 
				+    parser.add_argument('-h','--help',
			
 
				+                        action='store_true')
			
 
				+
			
 
				+    return parser
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer,
			
 
				+                                  encoding='utf8',
			
 
				+                                  errors='backslashreplace',
			
 
				+                                  line_buffering=True)
			
 
				+    sys.stderr = io.TextIOWrapper(sys.stderr.buffer,
			
 
				+                                  encoding='utf8',
			
 
				+                                  errors='backslashreplace',
			
 
				+                                  line_buffering=True)
			
 
				+
			
 
				+    parser = buildargparser()
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    if args.help or not args.dir:
			
 
				+        print_help()
			
 
				+        sys.exit(0)
			
 
				+
			
 
				+    args.dir = os.path.realpath(args.dir)
			
 
				+
			
 
				+    ctrlfile = mergerfs_control_file(args.dir)
			
 
				+    if not ismergerfs(ctrlfile):
			
 
				+        print("%s is not a mergerfs mount" % args.dir)
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+    basedir       = args.dir
			
 
				+    execute       = args.execute
			
 
				+    max_files     = args.max_files
			
 
				+    max_size      = args.max_size
			
 
				+    path_includes = ['*'] if not args.includepath else args.includepath
			
 
				+    path_excludes = args.excludepath
			
 
				+    srcmounts     = mergerfs_srcmounts(ctrlfile)
			
 
				+
			
 
				+    mount_stats = get_stats(srcmounts)
			
 
				+    try:
			
 
				+        for (root,dirs,files) in os.walk(basedir):
			
 
				+            if len(files) <= 1:
			
 
				+                continue
			
 
				+            if len(files) > max_files:
			
 
				+                continue
			
 
				+            if match(root,path_excludes):
			
 
				+                continue
			
 
				+            if not match(root,path_includes):
			
 
				+                continue
			
 
				+
			
 
				+            total_size = 0
			
 
				+            file_stats = {}
			
 
				+            for file in files:
			
 
				+                fullpath = os.path.join(root,file)
			
 
				+                st = os.lstat(fullpath)
			
 
				+                if not stat.S_ISREG(st.st_mode):
			
 
				+                    continue
			
 
				+                total_size += st.st_size
			
 
				+                file_stats[fullpath] = st
			
 
				+
			
 
				+            if total_size >= max_size:
			
 
				+                continue
			
 
				+
			
 
				+            tgtpath = sorted(mount_stats.items(),key=lambda x: x[1],reverse=True)[0][0]
			
 
				+            for (fullpath,st) in sorted(file_stats.items()):
			
 
				+                srcpath = xattr_basepath(fullpath)
			
 
				+                if srcpath == tgtpath:
			
 
				+                    continue
			
 
				+
			
 
				+                relpath = xattr_relpath(fullpath)
			
 
				+
			
 
				+                mount_stats[srcpath] += st.st_size
			
 
				+                mount_stats[tgtpath] -= st.st_size
			
 
				+
			
 
				+                args = build_move_file(srcpath,tgtpath,relpath)
			
 
				+
			
 
				+                print_args(args)
			
 
				+                if execute:
			
 
				+                    execute_cmd(args)
			
 
				+    except (KeyboardInterrupt,BrokenPipeError):
			
 
				+        pass
			
 
				+
			
 
				+    sys.exit(0)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+   main()
			
--- a/roles/mergerfs/files/mergerfs.ctl
+++ b/roles/mergerfs/files/mergerfs.ctl
@@ -0,0 +1,275 @@
 
				+#!/usr/bin/env python3
			
 
				+
			
 
				+# Copyright (c) 2016, Antonio SJ Musumeci <trapexit@spawn.link>
			
 
				+
			
 
				+# Permission to use, copy, modify, and/or distribute this software for any
			
 
				+# purpose with or without fee is hereby granted, provided that the above
			
 
				+# copyright notice and this permission notice appear in all copies.
			
 
				+
			
 
				+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
			
 
				+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
			
 
				+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
			
 
				+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
			
 
				+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
			
 
				+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
			
 
				+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
			
 
				+
			
 
				+import argparse
			
 
				+import os
			
 
				+import sys
			
 
				+
			
 
				+
			
 
				+def find_mergerfs():
			
 
				+    rv = []
			
 
				+    with open('/proc/self/mountinfo','r') as f:
			
 
				+        for line in f:
			
 
				+            values = line.split()
			
 
				+            mountroot, mountpoint = values[3:5]
			
 
				+            separator = values.index('-', 6)
			
 
				+            fstype = values[separator + 1]
			
 
				+            if fstype == 'fuse.mergerfs' and mountroot == '/':
			
 
				+                rv.append(mountpoint.encode().decode('unicode_escape'))
			
 
				+    return rv
			
 
				+
			
 
				+
			
 
				+def ask_about_path(paths):
			
 
				+    prompt = 'Available mergerfs mounts:\n'
			
 
				+    for i in range(0,len(paths)):
			
 
				+        prompt += ' {0}: {1}\n'.format(i,paths[i])
			
 
				+    prompt += 'Choose which mount to act on: '
			
 
				+    path = input(prompt)
			
 
				+    return paths[int(path)]
			
 
				+
			
 
				+
			
 
				+def device2mount(device):
			
 
				+    with open('/proc/mounts','r') as f:
			
 
				+        for line in f:
			
 
				+            columns = line.split()
			
 
				+            if columns[0] == device:
			
 
				+                return columns[1]
			
 
				+    with open('/etc/fstab','r') as f:
			
 
				+        for line in f:
			
 
				+            columns = line.split()
			
 
				+            try:
			
 
				+                if columns[0] == device:
			
 
				+                    return columns[1]
			
 
				+                realpath = os.path.realpath(columns[0])
			
 
				+                if realpath == device:
			
 
				+                    return columns[1]
			
 
				+            except:
			
 
				+                pass
			
 
				+    return None
			
 
				+
			
 
				+
			
 
				+def control_file(path):
			
 
				+    return os.path.join(path,'.mergerfs')
			
 
				+
			
 
				+
			
 
				+def add_srcmount(ctrlfile,srcmount):
			
 
				+    key   = b'user.mergerfs.srcmounts'
			
 
				+    value = b'+' + srcmount.encode()
			
 
				+    try:
			
 
				+        os.setxattr(ctrlfile,key,value)
			
 
				+    except Exception as e:
			
 
				+        print(e)
			
 
				+
			
 
				+
			
 
				+def remove_srcmount(ctrlfile,srcmount):
			
 
				+    key   = b'user.mergerfs.srcmounts'
			
 
				+    value = b'-' + srcmount.encode()
			
 
				+    try:
			
 
				+        os.setxattr(ctrlfile,key,value)
			
 
				+    except Exception as e:
			
 
				+        print(e)
			
 
				+
			
 
				+
			
 
				+def normalize_key(key):
			
 
				+    if type(key) == bytes:
			
 
				+        if key.startswith(b'user.mergerfs.'):
			
 
				+            return key
			
 
				+        return b'user.mergerfs.' + key
			
 
				+    elif type(key) == str:
			
 
				+        if key.startswith('user.mergerfs.'):
			
 
				+            return key
			
 
				+        return 'user.mergerfs.' + key
			
 
				+
			
 
				+
			
 
				+def print_mergerfs_info(fspaths):
			
 
				+    for fspath in fspaths:
			
 
				+        ctrlfile  = control_file(fspath)
			
 
				+        version   = os.getxattr(ctrlfile,'user.mergerfs.version')
			
 
				+        pid       = os.getxattr(ctrlfile,'user.mergerfs.pid')
			
 
				+        srcmounts = os.getxattr(ctrlfile,'user.mergerfs.srcmounts')
			
 
				+        output = ('- mount: {0}\n'
			
 
				+                  '  version: {1}\n'
			
 
				+                  '  pid: {2}\n'
			
 
				+                  '  srcmounts:\n'
			
 
				+                  '    - ').format(fspath,
			
 
				+                                   version.decode(),
			
 
				+                                   pid.decode())
			
 
				+        srcmounts = srcmounts.decode().split(':')
			
 
				+        output += '\n    - '.join(srcmounts)
			
 
				+        print(output)
			
 
				+
			
 
				+
			
 
				+def build_arg_parser():
			
 
				+    desc = 'a tool for runtime manipulation of mergerfs'
			
 
				+    parser = argparse.ArgumentParser(description=desc)
			
 
				+
			
 
				+    subparsers = parser.add_subparsers(dest='command')
			
 
				+
			
 
				+    parser.add_argument('-m','--mount',
			
 
				+                        type=str,
			
 
				+                        help='mergerfs mount to act on')
			
 
				+
			
 
				+    addopt = subparsers.add_parser('add')
			
 
				+    addopt.add_argument('type',choices=['path','device'])
			
 
				+    addopt.add_argument('path',type=str)
			
 
				+    addopt.set_defaults(func=cmd_add)
			
 
				+
			
 
				+    removeopt = subparsers.add_parser('remove')
			
 
				+    removeopt.add_argument('type',choices=['path','device'])
			
 
				+    removeopt.add_argument('path',type=str)
			
 
				+    removeopt.set_defaults(func=cmd_remove)
			
 
				+
			
 
				+    listopt = subparsers.add_parser('list')
			
 
				+    listopt.add_argument('type',choices=['options','values'])
			
 
				+    listopt.set_defaults(func=cmd_list)
			
 
				+
			
 
				+    getopt = subparsers.add_parser('get')
			
 
				+    getopt.add_argument('option',type=str,nargs='+')
			
 
				+    getopt.set_defaults(func=cmd_get)
			
 
				+
			
 
				+    setopt = subparsers.add_parser('set')
			
 
				+    setopt.add_argument('option',type=str)
			
 
				+    setopt.add_argument('value',type=str)
			
 
				+    setopt.set_defaults(func=cmd_set)
			
 
				+
			
 
				+    infoopt = subparsers.add_parser('info')
			
 
				+    infoopt.set_defaults(func=cmd_info)
			
 
				+
			
 
				+    return parser
			
 
				+
			
 
				+
			
 
				+def cmd_add(fspaths,args):
			
 
				+    if args.type == 'device':
			
 
				+        return cmd_add_device(fspaths,args)
			
 
				+    elif args.type == 'path':
			
 
				+        return cmd_add_path(fspaths,args)
			
 
				+
			
 
				+def cmd_add_device(fspaths,args):
			
 
				+    for fspath in fspaths:
			
 
				+        ctrlfile = control_file(fspath)
			
 
				+        mount = device2mount(args.path)
			
 
				+        if mount:
			
 
				+            add_srcmount(ctrlfile,mount)
			
 
				+        else:
			
 
				+            print('{0} not found'.format(args.path))
			
 
				+
			
 
				+def cmd_add_path(fspaths,args):
			
 
				+    for fspath in fspaths:
			
 
				+        ctrlfile = control_file(fspath)
			
 
				+        add_srcmount(ctrlfile,args.path)
			
 
				+
			
 
				+
			
 
				+def cmd_remove(fspaths,args):
			
 
				+    if args.type == 'device':
			
 
				+        return cmd_remove_device(fspaths,args)
			
 
				+    elif args.type == 'path':
			
 
				+        return cmd_remove_path(fspaths,args)
			
 
				+
			
 
				+def cmd_remove_device(fspaths,args):
			
 
				+    for fspath in fspaths:
			
 
				+        ctrlfile = control_file(fspath)
			
 
				+        mount = device2mount(args.path)
			
 
				+        if mount:
			
 
				+            remove_srcmount(ctrlfile,mount)
			
 
				+        else:
			
 
				+            print('{0} not found'.format(args.path.decode()))
			
 
				+
			
 
				+def cmd_remove_path(fspaths,args):
			
 
				+    for fspath in fspaths:
			
 
				+        ctrlfile = control_file(fspath)
			
 
				+        remove_srcmount(ctrlfile,args.path)
			
 
				+
			
 
				+
			
 
				+def cmd_list(fspaths,args):
			
 
				+    if args.type == 'values':
			
 
				+        return cmd_list_values(fspaths,args)
			
 
				+    if args.type == 'options':
			
 
				+        return cmd_list_options(fspaths,args)
			
 
				+
			
 
				+def cmd_list_options(fspaths,args):
			
 
				+    for fspath in fspaths:
			
 
				+        ctrlfile = control_file(fspath)
			
 
				+        keys = os.listxattr(ctrlfile)
			
 
				+        output = ('- mount: {0}\n'
			
 
				+                  '  options:\n').format(fspath)
			
 
				+        for key in keys:
			
 
				+            output += '    - {0}\n'.format(key)
			
 
				+        print(output,end='')
			
 
				+
			
 
				+def cmd_list_values(fspaths,args):
			
 
				+    for fspath in fspaths:
			
 
				+        ctrlfile = control_file(fspath)
			
 
				+        keys = os.listxattr(ctrlfile)
			
 
				+        output = ('- mount: {0}\n'
			
 
				+                  '  options:\n').format(fspath)
			
 
				+        for key in keys:
			
 
				+            value = os.getxattr(ctrlfile,key)
			
 
				+            output += '    {0}: {1}\n'.format(key,value.decode())
			
 
				+        print(output,end='')
			
 
				+
			
 
				+
			
 
				+def cmd_get(fspaths,args):
			
 
				+    for fspath in fspaths:
			
 
				+        ctrlfile = control_file(fspath)
			
 
				+        print('- mount: {0}'.format(fspath))
			
 
				+        for key in args.option:
			
 
				+            key   = normalize_key(key)
			
 
				+            value = os.getxattr(ctrlfile,key).decode()
			
 
				+            print('    {0}: {1}'.format(key,value))
			
 
				+
			
 
				+
			
 
				+def cmd_set(fspaths,args):
			
 
				+    for fspath in fspaths:
			
 
				+        ctrlfile = control_file(fspath)
			
 
				+        key = normalize_key(args.option)
			
 
				+        value = args.value.encode()
			
 
				+        try:
			
 
				+            os.setxattr(ctrlfile,key,value)
			
 
				+        except Exception as e:
			
 
				+            print(e)
			
 
				+
			
 
				+
			
 
				+def cmd_info(fspaths,args):
			
 
				+    print_mergerfs_info(fspaths)
			
 
				+
			
 
				+
			
 
				+def print_and_exit(string,rv):
			
 
				+    print(string)
			
 
				+    sys.exit(rv)
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    parser = build_arg_parser()
			
 
				+    args   = parser.parse_args()
			
 
				+
			
 
				+    fspaths = find_mergerfs()
			
 
				+    if args.mount and args.mount in fspaths:
			
 
				+        fspaths = [args.mount]
			
 
				+    elif not args.mount and not fspaths:
			
 
				+        print_and_exit('no mergerfs mounts found',1)
			
 
				+    elif args.mount and args.mount not in fspaths:
			
 
				+        print_and_exit('{0} is not a mergerfs mount'.format(args.mount),1)
			
 
				+
			
 
				+    if hasattr(args, 'func'):
			
 
				+        args.func(fspaths,args)
			
 
				+    else:
			
 
				+        parser.print_help()
			
 
				+
			
 
				+    sys.exit(0)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/roles/mergerfs/files/mergerfs.dedup
+++ b/roles/mergerfs/files/mergerfs.dedup
@@ -0,0 +1,548 @@
 
				+#!/usr/bin/env python3
			
 
				+
			
 
				+# Copyright (c) 2016, Antonio SJ Musumeci <trapexit@spawn.link>
			
 
				+
			
 
				+# Permission to use, copy, modify, and/or distribute this software for any
			
 
				+# purpose with or without fee is hereby granted, provided that the above
			
 
				+# copyright notice and this permission notice appear in all copies.
			
 
				+
			
 
				+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
			
 
				+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
			
 
				+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
			
 
				+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
			
 
				+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
			
 
				+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
			
 
				+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
			
 
				+
			
 
				+import argparse
			
 
				+import ctypes
			
 
				+import errno
			
 
				+import fnmatch
			
 
				+import hashlib
			
 
				+import io
			
 
				+import os
			
 
				+import random
			
 
				+import shlex
			
 
				+import sys
			
 
				+
			
 
				+
			
 
				+_libc = ctypes.CDLL("libc.so.6",use_errno=True)
			
 
				+_lgetxattr = _libc.lgetxattr
			
 
				+_lgetxattr.argtypes = [ctypes.c_char_p,ctypes.c_char_p,ctypes.c_void_p,ctypes.c_size_t]
			
 
				+def lgetxattr(path,name):
			
 
				+    if type(path) == str:
			
 
				+        path = path.encode(errors='backslashreplace')
			
 
				+    if type(name) == str:
			
 
				+        name = name.encode(errors='backslashreplace')
			
 
				+    length = 64
			
 
				+    while True:
			
 
				+        buf = ctypes.create_string_buffer(length)
			
 
				+        res = _lgetxattr(path,name,buf,ctypes.c_size_t(length))
			
 
				+        if res >= 0:
			
 
				+            return buf.raw[0:res]
			
 
				+        else:
			
 
				+            err = ctypes.get_errno()
			
 
				+            if err == errno.ERANGE:
			
 
				+                length *= 2
			
 
				+            elif err == errno.ENODATA:
			
 
				+                return None
			
 
				+            else:
			
 
				+                raise IOError(err,os.strerror(err),path)
			
 
				+
			
 
				+
			
 
				+def ismergerfs(path):
			
 
				+    try:
			
 
				+        lgetxattr(path,b'user.mergerfs.fullpath')
			
 
				+        return True
			
 
				+    except IOError as e:
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def hash_file(filepath, hasher=None, blocksize=65536):
			
 
				+    if not hasher:
			
 
				+        hasher = hashlib.md5()
			
 
				+
			
 
				+    with open(filepath,'rb') as afile:
			
 
				+        buf = afile.read(blocksize)
			
 
				+        while buf:
			
 
				+            hasher.update(buf)
			
 
				+            buf = afile.read(blocksize)
			
 
				+
			
 
				+    return hasher.hexdigest()
			
 
				+
			
 
				+
			
 
				+def short_hash_file(filepath, hasher=None, blocksize=65536, blocks=16):
			
 
				+    if not hasher:
			
 
				+        hasher = hashlib.md5()
			
 
				+
			
 
				+    with open(filepath,'rb') as f:
			
 
				+        size = os.fstat(f.fileno()).st_size
			
 
				+        if size <= blocksize:
			
 
				+            size = 1
			
 
				+            blocks = 1
			
 
				+
			
 
				+        random.seed(size,version=2)
			
 
				+        for _ in range(blocks):
			
 
				+            offset = random.randrange(size)
			
 
				+            f.seek(offset)
			
 
				+            buf = f.read(blocksize)
			
 
				+            if buf:
			
 
				+                hasher.update(buf)
			
 
				+            else:
			
 
				+                break
			
 
				+
			
 
				+    return hasher.hexdigest()
			
 
				+
			
 
				+
			
 
				+def sizeof_fmt(num):
			
 
				+    for unit in ['','K','M','G','T','P','E','Z']:
			
 
				+        if abs(num) < 1024.0:
			
 
				+            return "%3.1f%sB" % (num,unit)
			
 
				+        num /= 1024.0
			
 
				+    return "%.1f%sB" % (num,'Y')
			
 
				+
			
 
				+
			
 
				+def stat_files(paths):
			
 
				+    rv = []
			
 
				+    for path in paths:
			
 
				+        try:
			
 
				+            st = os.stat(path)
			
 
				+            rv.append((path,st))
			
 
				+        except:
			
 
				+            pass
			
 
				+
			
 
				+    return rv
			
 
				+
			
 
				+
			
 
				+def remove(files,execute,verbose):
			
 
				+    for (path,stat) in files:
			
 
				+        try:
			
 
				+            print('rm -vf',shlex.quote(path))
			
 
				+            if execute:
			
 
				+                os.remove(path)
			
 
				+        except Exception as e:
			
 
				+            print("%s" % e)
			
 
				+
			
 
				+
			
 
				+def print_stats(stats):
			
 
				+    for i in range(0,len(stats)):
			
 
				+        print("#  %i: %s" % (i+1,stats[i][0]))
			
 
				+        data = ("#   - uid: {0:5}; gid: {1:5}; mode: {2:6o}; "
			
 
				+                "size: {3}; mtime: {4}").format(
			
 
				+            stats[i][1].st_uid,
			
 
				+            stats[i][1].st_gid,
			
 
				+            stats[i][1].st_mode,
			
 
				+            sizeof_fmt(stats[i][1].st_size),
			
 
				+            stats[i][1].st_mtime)
			
 
				+        print(data)
			
 
				+
			
 
				+
			
 
				+def total_size(stats):
			
 
				+    total = 0
			
 
				+    for (name,stat) in stats:
			
 
				+        total = total + stat.st_size
			
 
				+    return total
			
 
				+
			
 
				+
			
 
				+def manual_dedup(fullpath,stats):
			
 
				+    done = False
			
 
				+    while not done:
			
 
				+        value = input("# Which to keep? ('s' to skip):")
			
 
				+
			
 
				+        if value.lower() == 's':
			
 
				+            stats.clear()
			
 
				+            done = True
			
 
				+            continue
			
 
				+
			
 
				+        try:
			
 
				+            value = int(value) - 1
			
 
				+            if value < 0 or value >= len(stats):
			
 
				+                raise ValueError
			
 
				+            stats.remove(stats[value])
			
 
				+            done = True
			
 
				+        except NameError:
			
 
				+            print("Input error: enter a value [1-{0}] or skip by entering 's'".format(len(stats)))
			
 
				+        except ValueError:
			
 
				+            print("Input error: enter a value [1-{0}] or skip by entering 's'".format(len(stats)))
			
 
				+
			
 
				+
			
 
				+def mtime_all(stats):
			
 
				+    mtime = stats[0][1].st_mtime
			
 
				+    return all(x[1].st_mtime == mtime for x in stats)
			
 
				+
			
 
				+
			
 
				+def mtime_any(mtime,stats):
			
 
				+    return any([st.st_mtime == mtime for (path,st) in stats])
			
 
				+
			
 
				+
			
 
				+def size_all(stats):
			
 
				+    size = stats[0][1].st_size
			
 
				+    return all(x[1].st_size == size for x in stats)
			
 
				+
			
 
				+
			
 
				+def size_any(size,stats):
			
 
				+    return any([st.st_size == size for (path,st) in stats])
			
 
				+
			
 
				+
			
 
				+def md5sums_all(stats):
			
 
				+    if size_all(stats):
			
 
				+        hashval = hash_file(stats[0][0])
			
 
				+        return all(hash_file(path) == hashval for (path,st) in stats[1:])
			
 
				+    return False
			
 
				+
			
 
				+
			
 
				+def short_md5sums_all(stats):
			
 
				+    if size_all(stats):
			
 
				+        hashval = short_hash_file(stats[0][0])
			
 
				+        return all(short_hash_file(path) == hashval for (path,st) in stats[1:])
			
 
				+    return False
			
 
				+
			
 
				+
			
 
				+def oldest_dedup(fullpath,stats):
			
 
				+    if size_all(stats) and mtime_all(stats):
			
 
				+        drive_with_most_space_dedup(fullpath,stats)
			
 
				+        return
			
 
				+
			
 
				+    stats.sort(key=lambda st: st[1].st_mtime)
			
 
				+    oldest = stats[0]
			
 
				+    stats.remove(oldest)
			
 
				+
			
 
				+
			
 
				+def strict_oldest_dedup(fullpath,stats):
			
 
				+    stats.sort(key=lambda st: st[1].st_mtime,reverse=False)
			
 
				+
			
 
				+    oldest = stats[0]
			
 
				+    stats.remove(oldest)
			
 
				+    if mtime_any(oldest[1].st_mtime,stats):
			
 
				+        stats.clear()
			
 
				+
			
 
				+
			
 
				+def newest_dedup(fullpath,stats):
			
 
				+    if size_all(stats) and mtime_all(stats):
			
 
				+        drive_with_most_space_dedup(fullpath,stats)
			
 
				+        return
			
 
				+
			
 
				+    stats.sort(key=lambda st: st[1].st_mtime,reverse=True)
			
 
				+    newest = stats[0]
			
 
				+    stats.remove(newest)
			
 
				+
			
 
				+
			
 
				+def strict_newest_dedup(fullpath,stats):
			
 
				+    stats.sort(key=lambda st: st[1].st_mtime,reverse=True)
			
 
				+
			
 
				+    newest = stats[0]
			
 
				+    stats.remove(newest)
			
 
				+    if mtime_any(newest[1].st_mtime,stats):
			
 
				+        stats.clear()
			
 
				+
			
 
				+
			
 
				+def largest_dedup(fullpath,stats):
			
 
				+    if size_all(stats) and mtime_all(stats):
			
 
				+        drive_with_most_space_dedup(fullpath,stats)
			
 
				+        return
			
 
				+
			
 
				+    stats.sort(key=lambda st: st[1].st_size,reverse=True)
			
 
				+    largest = stats[0]
			
 
				+    stats.remove(largest)
			
 
				+
			
 
				+
			
 
				+def strict_largest_dedup(fullpath,stats):
			
 
				+    stats.sort(key=lambda st: st[1].st_size,reverse=True)
			
 
				+
			
 
				+    largest = stats[0]
			
 
				+    stats.remove(largest)
			
 
				+    if size_any(largest[1].st_size,stats):
			
 
				+        stats.clear()
			
 
				+
			
 
				+
			
 
				+def smallest_dedup(fullpath,stats):
			
 
				+    if size_all(stats) and mtime_all(stats):
			
 
				+        drive_with_most_space_dedup(fullpath,stats)
			
 
				+        return
			
 
				+
			
 
				+    stats.sort(key=lambda st: st[1].st_size)
			
 
				+    smallest = stats[0]
			
 
				+    stats.remove(smallest)
			
 
				+
			
 
				+
			
 
				+def strict_smallest_dedup(fullpath,stats):
			
 
				+    stats.sort(key=lambda st: st[1].st_size,reverse=False)
			
 
				+
			
 
				+    smallest = stats[0]
			
 
				+    stats.remove(smallest)
			
 
				+    if size_any(smallest[1].st_size,stats):
			
 
				+        stats.clear()
			
 
				+
			
 
				+
			
 
				+def calc_space_free(stat):
			
 
				+    st = os.statvfs(stat[0])
			
 
				+    return st.f_frsize * st.f_bfree
			
 
				+
			
 
				+
			
 
				+def drive_with_most_space_dedup(fullpath,stats):
			
 
				+    stats.sort(key=calc_space_free,reverse=True)
			
 
				+    largest = stats[0]
			
 
				+    stats.remove(largest)
			
 
				+
			
 
				+
			
 
				+def mergerfs_getattr_dedup(origpath,stats):
			
 
				+    fullpath = getxattr(origpath,b'user.mergerfs.fullpath')
			
 
				+    for (path,stat) in stats:
			
 
				+        if path != fullpath:
			
 
				+            continue
			
 
				+        stats.remove((path,stat))
			
 
				+        break
			
 
				+
			
 
				+
			
 
				+def get_dedupfun(name,strict):
			
 
				+    if strict:
			
 
				+        name = 'strict-' + name
			
 
				+    funs = {
			
 
				+        'manual': manual_dedup,
			
 
				+        'strict-manual': manual_dedup,
			
 
				+        'mostfreespace': drive_with_most_space_dedup,
			
 
				+        'strict-mostfreespace': drive_with_most_space_dedup,
			
 
				+        'newest': newest_dedup,
			
 
				+        'strict-newest': strict_newest_dedup,
			
 
				+        'oldest': oldest_dedup,
			
 
				+        'strict-oldest': strict_oldest_dedup,
			
 
				+        'largest': largest_dedup,
			
 
				+        'strict-largest': strict_largest_dedup,
			
 
				+        'smallest': smallest_dedup,
			
 
				+        'strict-smallest': strict_smallest_dedup,
			
 
				+        'mergerfs': mergerfs_getattr_dedup,
			
 
				+        'strict-mergerfs': mergerfs_getattr_dedup
			
 
				+    }
			
 
				+    return funs[name]
			
 
				+
			
 
				+
			
 
				+def get_ignorefun(name):
			
 
				+    funs = {
			
 
				+        None: lambda x: None,
			
 
				+        'same-time': mtime_all,
			
 
				+        'diff-time': lambda x: not mtime_all(x),
			
 
				+        'same-size': size_all,
			
 
				+        'diff-size': lambda x: not size_all(x),
			
 
				+        'same-hash': md5sums_all,
			
 
				+        'diff-hash': lambda x: not md5sums_all(x),
			
 
				+        'same-short-hash': short_md5sums_all,
			
 
				+        'diff-short-hash': lambda x: not short_md5sums_all(x)
			
 
				+    }
			
 
				+
			
 
				+    return funs[name]
			
 
				+
			
 
				+
			
 
				+def getxattr(path,key):
			
 
				+    try:
			
 
				+        attr = lgetxattr(path,key)
			
 
				+        if attr:
			
 
				+            return attr.decode('utf-8')
			
 
				+        return ''
			
 
				+    except IOError as e:
			
 
				+        if e.errno == errno.ENODATA:
			
 
				+            return ''
			
 
				+        raise
			
 
				+    except UnicodeDecodeError as e:
			
 
				+        print(e)
			
 
				+        print(attr)
			
 
				+    return ''
			
 
				+
			
 
				+
			
 
				+def match(filename,matches):
			
 
				+    for match in matches:
			
 
				+        if fnmatch.fnmatch(filename,match):
			
 
				+            return True
			
 
				+    return False
			
 
				+
			
 
				+
			
 
				+def dedup(fullpath,verbose,ignorefun,execute,dedupfun):
			
 
				+    paths = getxattr(fullpath,b'user.mergerfs.allpaths').split('\0')
			
 
				+    if len(paths) <= 1:
			
 
				+        return 0
			
 
				+
			
 
				+    stats = stat_files(paths)
			
 
				+
			
 
				+    if ignorefun(stats):
			
 
				+        if verbose >= 2:
			
 
				+            print('# ignored:',fullpath)
			
 
				+        return 0
			
 
				+
			
 
				+    if (dedupfun == manual_dedup):
			
 
				+        print('#',fullpath)
			
 
				+        print_stats(stats)
			
 
				+
			
 
				+    try:
			
 
				+        dedupfun(fullpath,stats)
			
 
				+        if not stats:
			
 
				+            if verbose >= 2:
			
 
				+                print('# skipped:',fullpath)
			
 
				+            return 0
			
 
				+
			
 
				+        if (dedupfun != manual_dedup):
			
 
				+            if verbose >= 2:
			
 
				+                print('#',fullpath)
			
 
				+            if verbose >= 3:
			
 
				+                print_stats(stats)
			
 
				+
			
 
				+        for (path,stat) in stats:
			
 
				+            try:
			
 
				+                if verbose:
			
 
				+                    print('rm -vf',shlex.quote(path))
			
 
				+                if execute:
			
 
				+                    os.remove(path)
			
 
				+            except Exception as e:
			
 
				+                print('#',e)
			
 
				+
			
 
				+        return total_size(stats)
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(e)
			
 
				+
			
 
				+    return 0
			
 
				+
			
 
				+
			
 
				+def print_help():
			
 
				+    help = \
			
 
				+'''
			
 
				+usage: mergerfs.dedup [<options>] <dir>
			
 
				+
			
 
				+Remove duplicate files across branches of a mergerfs pool. Provides
			
 
				+multiple algos for determining which file to keep and what to skip.
			
 
				+
			
 
				+positional arguments:
			
 
				+  dir                    Starting directory
			
 
				+
			
 
				+optional arguments:
			
 
				+  -v, --verbose          Once to print `rm` commands
			
 
				+                         Twice for status info
			
 
				+                         Three for file info
			
 
				+  -i, --ignore=          Ignore files if... (default: none)
			
 
				+                         * same-size       : have the same size
			
 
				+                         * diff-size       : have different sizes
			
 
				+                         * same-time       : have the same mtime
			
 
				+                         * diff-time       : have different mtimes
			
 
				+                         * same-hash       : have the same md5sum
			
 
				+                         * diff-hash       : have different md5sums
			
 
				+                         * same-short-hash : have the same short md5sums
			
 
				+                         * diff-short-hash : have different short md5sums
			
 
				+                         'hash' is expensive. 'short-hash' far less
			
 
				+                         expensive, not as safe, but pretty good.
			
 
				+  -d, --dedup=           What file to *keep* (default: mergerfs)
			
 
				+                         * manual        : ask user
			
 
				+                         * oldest        : file with smallest mtime
			
 
				+                         * newest        : file with largest mtime
			
 
				+                         * largest       : file with largest size
			
 
				+                         * smallest      : file with smallest size
			
 
				+                         * mostfreespace : file on drive with most free space
			
 
				+                         * mergerfs      : file selected by the mergerfs
			
 
				+                                           getattr policy
			
 
				+  -s, --strict           Skip dedup if all files have same (mtime,size) value.
			
 
				+                         Only applies to oldest, newest, largest, smallest.
			
 
				+  -e, --execute          Will not perform file removal without this.
			
 
				+  -I, --include=         fnmatch compatible filter to include files.
			
 
				+                         Can be used multiple times.
			
 
				+  -E, --exclude=         fnmatch compatible filter to exclude files.
			
 
				+                         Can be used multiple times.
			
 
				+
			
 
				+'''
			
 
				+    print(help)
			
 
				+
			
 
				+
			
 
				+def buildargparser():
			
 
				+    desc = 'dedup files across branches in a mergerfs pool'
			
 
				+    usage = 'mergerfs.dedup [<options>] <dir>'
			
 
				+    parser = argparse.ArgumentParser(add_help=False)
			
 
				+
			
 
				+    parser.add_argument('dir',
			
 
				+                        type=str,
			
 
				+                        nargs='?',
			
 
				+                        default=None,
			
 
				+                        help='starting directory')
			
 
				+    parser.add_argument('-v','--verbose',
			
 
				+                        action='count',
			
 
				+                        default=0)
			
 
				+    parser.add_argument('-i','--ignore',
			
 
				+                        choices=['same-size','diff-size',
			
 
				+                                 'same-time','diff-time',
			
 
				+                                 'same-hash','diff-hash',
			
 
				+                                 'same-short-hash',
			
 
				+                                 'diff-short-hash'])
			
 
				+    parser.add_argument('-d','--dedup',
			
 
				+                        choices=['manual',
			
 
				+                                 'oldest','newest',
			
 
				+                                 'smallest','largest',
			
 
				+                                 'mostfreespace',
			
 
				+                                 'mergerfs'],
			
 
				+                        default='mergerfs')
			
 
				+    parser.add_argument('-s','--strict',
			
 
				+                        action='store_true')
			
 
				+    parser.add_argument('-e','--execute',
			
 
				+                        action='store_true')
			
 
				+    parser.add_argument('-I','--include',
			
 
				+                        type=str,
			
 
				+                        action='append',
			
 
				+                        default=[])
			
 
				+    parser.add_argument('-E','--exclude',
			
 
				+                        type=str,
			
 
				+                        action='append',
			
 
				+                        default=[])
			
 
				+    parser.add_argument('-h','--help',
			
 
				+                        action='store_true')
			
 
				+
			
 
				+    return parser
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer,
			
 
				+                                  encoding='utf8',
			
 
				+                                  errors='backslashreplace',
			
 
				+                                  line_buffering=True)
			
 
				+    sys.stderr = io.TextIOWrapper(sys.stderr.buffer,
			
 
				+                                  encoding='utf8',
			
 
				+                                  errors='backslashreplace',
			
 
				+                                  line_buffering=True)
			
 
				+
			
 
				+    parser = buildargparser()
			
 
				+    args   = parser.parse_args()
			
 
				+
			
 
				+    if args.help or not args.dir:
			
 
				+        print_help()
			
 
				+        sys.exit(0)
			
 
				+
			
 
				+    args.dir = os.path.realpath(args.dir)
			
 
				+    if not ismergerfs(args.dir):
			
 
				+        print("%s is not a mergerfs directory" % args.dir)
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+    dedupfun  = get_dedupfun(args.dedup,args.strict)
			
 
				+    ignorefun = get_ignorefun(args.ignore)
			
 
				+    verbose   = args.verbose
			
 
				+    execute   = args.execute
			
 
				+    includes  = ['*'] if not args.include else args.include
			
 
				+    excludes  = args.exclude
			
 
				+
			
 
				+    total_size = 0
			
 
				+    try:
			
 
				+        for (dirname,dirnames,filenames) in os.walk(args.dir):
			
 
				+            for filename in filenames:
			
 
				+                if match(filename,excludes):
			
 
				+                    continue
			
 
				+                if not match(filename,includes):
			
 
				+                    continue
			
 
				+                fullpath    = os.path.join(dirname,filename)
			
 
				+                total_size += dedup(fullpath,verbose,ignorefun,execute,dedupfun)
			
 
				+    except KeyboardInterrupt:
			
 
				+        print("# exiting: CTRL-C pressed")
			
 
				+    except IOError as e:
			
 
				+        if e.errno == errno.EPIPE:
			
 
				+            pass
			
 
				+        else:
			
 
				+            raise
			
 
				+
			
 
				+    print('# Total savings:',sizeof_fmt(total_size))
			
 
				+
			
 
				+    sys.exit(0)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/roles/mergerfs/files/mergerfs.dup
+++ b/roles/mergerfs/files/mergerfs.dup
@@ -0,0 +1,399 @@
 
				+#!/usr/bin/env python3
			
 
				+
			
 
				+# Copyright (c) 2016, Antonio SJ Musumeci <trapexit@spawn.link>
			
 
				+#
			
 
				+# Permission to use, copy, modify, and/or distribute this software for any
			
 
				+# purpose with or without fee is hereby granted, provided that the above
			
 
				+# copyright notice and this permission notice appear in all copies.
			
 
				+#
			
 
				+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
			
 
				+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
			
 
				+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
			
 
				+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
			
 
				+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
			
 
				+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
			
 
				+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
			
 
				+
			
 
				+import argparse
			
 
				+import ctypes
			
 
				+import errno
			
 
				+import fnmatch
			
 
				+import io
			
 
				+import os
			
 
				+import shlex
			
 
				+import subprocess
			
 
				+import sys
			
 
				+
			
 
				+
			
 
				+_libc = ctypes.CDLL("libc.so.6",use_errno=True)
			
 
				+_lgetxattr = _libc.lgetxattr
			
 
				+_lgetxattr.argtypes = [ctypes.c_char_p,ctypes.c_char_p,ctypes.c_void_p,ctypes.c_size_t]
			
 
				+def lgetxattr(path,name):
			
 
				+    if type(path) == str:
			
 
				+        path = path.encode(errors='backslashreplace')
			
 
				+    if type(name) == str:
			
 
				+        name = name.encode(errors='backslashreplace')
			
 
				+    length = 64
			
 
				+    while True:
			
 
				+        buf = ctypes.create_string_buffer(length)
			
 
				+        res = _lgetxattr(path,name,buf,ctypes.c_size_t(length))
			
 
				+        if res >= 0:
			
 
				+            return buf.raw[0:res].decode(errors='backslashreplace')
			
 
				+        else:
			
 
				+            err = ctypes.get_errno()
			
 
				+            if err == errno.ERANGE:
			
 
				+                length *= 2
			
 
				+            elif err == errno.ENODATA:
			
 
				+                return None
			
 
				+            else:
			
 
				+                raise IOError(err,os.strerror(err),path)
			
 
				+
			
 
				+
			
 
				+def ismergerfs(path):
			
 
				+    try:
			
 
				+        lgetxattr(path,'user.mergerfs.basepath')
			
 
				+        return True
			
 
				+    except IOError as e:
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def mergerfs_control_file(basedir):
			
 
				+    if basedir == '/':
			
 
				+        return None
			
 
				+    ctrlfile = os.path.join(basedir,'.mergerfs')
			
 
				+    if os.path.exists(ctrlfile):
			
 
				+        return ctrlfile
			
 
				+    basedir = os.path.dirname(basedir)
			
 
				+    return mergerfs_control_file(basedir)
			
 
				+
			
 
				+
			
 
				+def mergerfs_branches(ctrlfile):
			
 
				+    branches = lgetxattr(ctrlfile,'user.mergerfs.srcmounts')
			
 
				+    branches = branches.split(':')
			
 
				+    return branches
			
 
				+
			
 
				+
			
 
				+def match(filename,matches):
			
 
				+    for match in matches:
			
 
				+        if fnmatch.fnmatch(filename,match):
			
 
				+            return True
			
 
				+    return False
			
 
				+
			
 
				+
			
 
				+def execute_cmd(args):
			
 
				+    return subprocess.call(args)
			
 
				+
			
 
				+
			
 
				+def print_args(args):
			
 
				+    quoted = [shlex.quote(arg) for arg in args]
			
 
				+    print(' '.join(quoted))
			
 
				+
			
 
				+
			
 
				+def build_copy_file(src,tgt,rel):
			
 
				+    srcpath = os.path.join(src,'./',rel)
			
 
				+    tgtpath = tgt + '/'
			
 
				+    return ['rsync',
			
 
				+            '-avHAXWE',
			
 
				+            '--numeric-ids',
			
 
				+            '--progress',
			
 
				+            '--relative',
			
 
				+            srcpath,
			
 
				+            tgtpath]
			
 
				+
			
 
				+
			
 
				+def build_branches_freespace(branches):
			
 
				+    rv = dict()
			
 
				+    for branch in branches:
			
 
				+        st = os.statvfs(branch)
			
 
				+        rv[branch] = st.f_bavail * st.f_frsize
			
 
				+    return rv
			
 
				+
			
 
				+
			
 
				+def print_help():
			
 
				+    help = \
			
 
				+'''
			
 
				+usage: mergerfs.dup [<options>] <dir>
			
 
				+
			
 
				+Duplicate files & directories across multiple drives in a pool.
			
 
				+Will print out commands for inspection and out of band use.
			
 
				+
			
 
				+positional arguments:
			
 
				+  dir                    starting directory
			
 
				+
			
 
				+optional arguments:
			
 
				+  -c, --count=           Number of copies to create. (default: 2)
			
 
				+  -d, --dup=             Which file (if more than one exists) to choose to
			
 
				+                         duplicate. Each one falls back to `mergerfs` if
			
 
				+                         all files have the same value. (default: newest)
			
 
				+                         * newest   : file with largest mtime
			
 
				+                         * oldest   : file with smallest mtime
			
 
				+                         * smallest : file with smallest size
			
 
				+                         * largest  : file with largest size
			
 
				+                         * mergerfs : file chosen by mergerfs' getattr
			
 
				+  -p, --prune            Remove files above `count`. Without this enabled
			
 
				+                         it will update all existing files.
			
 
				+  -e, --execute          Execute `rsync` and `rm` commands. Not just
			
 
				+                         print them.
			
 
				+  -I, --include=         fnmatch compatible filter to include files.
			
 
				+                         Can be used multiple times.
			
 
				+  -E, --exclude=         fnmatch compatible filter to exclude files.
			
 
				+                         Can be used multiple times.
			
 
				+'''
			
 
				+    print(help)
			
 
				+
			
 
				+
			
 
				+def buildargparser():
			
 
				+    parser = argparse.ArgumentParser(add_help=False)
			
 
				+    parser.add_argument('dir',
			
 
				+                        type=str,
			
 
				+                        nargs='?',
			
 
				+                        default=None)
			
 
				+    parser.add_argument('-c','--count',
			
 
				+                        dest='count',
			
 
				+                        type=int,
			
 
				+                        default=2)
			
 
				+    parser.add_argument('-p','--prune',
			
 
				+                        dest='prune',
			
 
				+                        action='store_true')
			
 
				+    parser.add_argument('-d','--dup',
			
 
				+                        choices=['newest','oldest',
			
 
				+                                 'smallest','largest',
			
 
				+                                 'mergerfs'],
			
 
				+                        default='newest')
			
 
				+    parser.add_argument('-e','--execute',
			
 
				+                        dest='execute',
			
 
				+                        action='store_true')
			
 
				+    parser.add_argument('-I','--include',
			
 
				+                        dest='include',
			
 
				+                        type=str,
			
 
				+                        action='append',
			
 
				+                        default=[])
			
 
				+    parser.add_argument('-E','--exclude',
			
 
				+                        dest='exclude',
			
 
				+                        type=str,
			
 
				+                        action='append',
			
 
				+                        default=[])
			
 
				+    parser.add_argument('-h','--help',
			
 
				+                        action='store_true')
			
 
				+
			
 
				+    return parser
			
 
				+
			
 
				+
			
 
				+def xattr_basepath(fullpath):
			
 
				+    return lgetxattr(fullpath,'user.mergerfs.basepath')
			
 
				+
			
 
				+
			
 
				+def xattr_allpaths(fullpath):
			
 
				+    return lgetxattr(fullpath,'user.mergerfs.allpaths')
			
 
				+
			
 
				+
			
 
				+def xattr_relpath(fullpath):
			
 
				+    return lgetxattr(fullpath,'user.mergerfs.relpath')
			
 
				+
			
 
				+
			
 
				+def exists(base,rel,name):
			
 
				+    fullpath = os.path.join(base,rel,name)
			
 
				+    return os.path.lexists(fullpath)
			
 
				+
			
 
				+
			
 
				+def mergerfs_all_basepaths(fullpath,relpath):
			
 
				+    attr = xattr_allpaths(fullpath)
			
 
				+    if not attr:
			
 
				+        dirname  = os.path.dirname(fullpath)
			
 
				+        basename = os.path.basename(fullpath)
			
 
				+        attr     = xattr_allpaths(dirname)
			
 
				+        attr     = attr.split('\0')
			
 
				+        attr     = [os.path.join(path,basename)
			
 
				+                    for path in attr
			
 
				+                    if os.path.lexists(os.path.join(path,basename))]
			
 
				+    else:
			
 
				+        attr = attr.split('\0')
			
 
				+    return [x[:-len(relpath)].rstrip('/') for x in attr]
			
 
				+
			
 
				+
			
 
				+def mergerfs_basepath(fullpath):
			
 
				+    attr = xattr_basepath(fullpath)
			
 
				+    if not attr:
			
 
				+        dirname  = os.path.dirname(fullpath)
			
 
				+        basename = os.path.basename(fullpath)
			
 
				+        attr     = xattr_allpaths(dirname)
			
 
				+        attr     = attr.split('\0')
			
 
				+        for path in attr:
			
 
				+            fullpath = os.path.join(path,basename)
			
 
				+            if os.path.lexists(fullpath):
			
 
				+                relpath = xattr_relpath(dirname)
			
 
				+                return path[:-len(relpath)].rstrip('/')
			
 
				+    return attr
			
 
				+
			
 
				+
			
 
				+def mergerfs_relpath(fullpath):
			
 
				+    attr = xattr_relpath(fullpath)
			
 
				+    if not attr:
			
 
				+        dirname  = os.path.dirname(fullpath)
			
 
				+        basename = os.path.basename(fullpath)
			
 
				+        attr     = xattr_relpath(dirname)
			
 
				+        attr     = os.path.join(attr,basename)
			
 
				+    return attr.lstrip('/')
			
 
				+
			
 
				+
			
 
				+def newest_dupfun(default_basepath,relpath,basepaths):
			
 
				+    sts = dict([(f,os.lstat(os.path.join(f,relpath))) for f in basepaths])
			
 
				+
			
 
				+    mtime = sts[basepaths[0]].st_mtime
			
 
				+    if not all([st.st_mtime == mtime for st in sts.values()]):
			
 
				+        return sorted(sts,key=lambda x: sts.get(x).st_mtime,reverse=True)[0]
			
 
				+
			
 
				+    ctime = sts[basepaths[0]].st_ctime
			
 
				+    if not all([st.st_ctime == ctime for st in sts.values()]):
			
 
				+        return sorted(sts,key=lambda x: sts.get(x).st_ctime,reverse=True)[0]
			
 
				+
			
 
				+    return default_basepath
			
 
				+
			
 
				+
			
 
				+def oldest_dupfun(default_basepath,relpath,basepaths):
			
 
				+    sts = dict([(f,os.lstat(os.path.join(f,relpath))) for f in basepaths])
			
 
				+
			
 
				+    mtime = sts[basepaths[0]].st_mtime
			
 
				+    if not all([st.st_mtime == mtime for st in sts.values()]):
			
 
				+        return sorted(sts,key=lambda x: sts.get(x).st_mtime,reverse=False)[0]
			
 
				+
			
 
				+    ctime = sts[basepaths[0]].st_ctime
			
 
				+    if not all([st.st_ctime == ctime for st in sts.values()]):
			
 
				+        return sorted(sts,key=lambda x: sts.get(x).st_ctime,reverse=False)[0]
			
 
				+
			
 
				+    return default_basepath
			
 
				+
			
 
				+
			
 
				+def largest_dupfun(default_basepath,relpath,basepaths):
			
 
				+    sts = dict([(f,os.lstat(os.path.join(f,relpath))) for f in basepaths])
			
 
				+
			
 
				+    size = sts[basepaths[0]].st_size
			
 
				+    if not all([st.st_size == size for st in sts.values()]):
			
 
				+        return sorted(sts,key=lambda x: sts.get(x).st_size,reverse=True)[0]
			
 
				+
			
 
				+    return default_basepath
			
 
				+
			
 
				+
			
 
				+def smallest_dupfun(default_basepath,relpath,basepaths):
			
 
				+    sts = dict([(f,os.lstat(os.path.join(f,relpath))) for f in basepaths])
			
 
				+
			
 
				+    size = sts[basepaths[0]].st_size
			
 
				+    if not all([st.st_size == size for st in sts.values()]):
			
 
				+        return sorted(sts,key=lambda x: sts.get(x).st_size,reverse=False)[0]
			
 
				+
			
 
				+    return default_basepath
			
 
				+
			
 
				+
			
 
				+def mergerfs_dupfun(default_basepath,relpath,basepaths):
			
 
				+    return default_basepath
			
 
				+
			
 
				+
			
 
				+def getdupfun(name):
			
 
				+    funs = {'newest': newest_dupfun,
			
 
				+            'oldest': oldest_dupfun,
			
 
				+            'smallest': smallest_dupfun,
			
 
				+            'largest': largest_dupfun,
			
 
				+            'mergerfs': mergerfs_dupfun}
			
 
				+    return funs[name]
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer,
			
 
				+                                  encoding='utf8',
			
 
				+                                  errors='backslashreplace',
			
 
				+                                  line_buffering=True)
			
 
				+    sys.stderr = io.TextIOWrapper(sys.stderr.buffer,
			
 
				+                                  encoding='utf8',
			
 
				+                                  errors='backslashreplace',
			
 
				+                                  line_buffering=True)
			
 
				+
			
 
				+    parser = buildargparser()
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    if args.help or not args.dir:
			
 
				+        print_help()
			
 
				+        sys.exit(0)
			
 
				+
			
 
				+    args.dir = os.path.realpath(args.dir)
			
 
				+
			
 
				+    if not ismergerfs(args.dir):
			
 
				+        print("%s is not a mergerfs mount" % args.dir)
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+    prune     = args.prune
			
 
				+    execute   = args.execute
			
 
				+    includes  = ['*'] if not args.include else args.include
			
 
				+    excludes  = args.exclude
			
 
				+    dupfun    = getdupfun(args.dup)
			
 
				+    ctrlfile  = mergerfs_control_file(args.dir)
			
 
				+    branches  = mergerfs_branches(ctrlfile)
			
 
				+    branches  = build_branches_freespace(branches)
			
 
				+    count     = min(args.count,len(branches))
			
 
				+
			
 
				+    try:
			
 
				+        for (dirpath,dirnames,filenames) in os.walk(args.dir):
			
 
				+            for filename in filenames:
			
 
				+                if match(filename,excludes):
			
 
				+                    continue
			
 
				+                if not match(filename,includes):
			
 
				+                    continue
			
 
				+
			
 
				+                fullpath = os.path.join(dirpath,filename)
			
 
				+                basepath = mergerfs_basepath(fullpath)
			
 
				+                relpath  = mergerfs_relpath(fullpath)
			
 
				+                existing = mergerfs_all_basepaths(fullpath,relpath)
			
 
				+
			
 
				+                srcpath  = dupfun(basepath,relpath,existing)
			
 
				+                srcfile  = os.path.join(srcpath,relpath)
			
 
				+                srcfile_size = os.lstat(srcfile).st_size
			
 
				+                existing.remove(srcpath)
			
 
				+
			
 
				+                i = 1
			
 
				+                copies = []
			
 
				+                for tgtpath in existing:
			
 
				+                    if prune and i >= count:
			
 
				+                        break
			
 
				+                    copies.append(tgtpath)
			
 
				+                    args = build_copy_file(srcpath,tgtpath,relpath)
			
 
				+                    print('# overwrite')
			
 
				+                    print_args(args)
			
 
				+                    if execute:
			
 
				+                        execute_cmd(args)
			
 
				+                    i += 1
			
 
				+
			
 
				+                for _ in range(i,count):
			
 
				+                    for branch in sorted(branches,key=branches.get,reverse=True):
			
 
				+                        tgtfile = os.path.join(branch,relpath)
			
 
				+                        if branch in copies or os.path.exists(tgtfile):
			
 
				+                            continue
			
 
				+                        copies.append(branch)
			
 
				+                        branches[branch] -= srcfile_size
			
 
				+                        args = build_copy_file(srcpath,branch,relpath)
			
 
				+                        print('# copy')
			
 
				+                        print_args(args)
			
 
				+                        if execute:
			
 
				+                            execute_cmd(args)
			
 
				+                        break
			
 
				+
			
 
				+                if prune:
			
 
				+                    leftovers = set(existing) - set(copies)
			
 
				+                    for branch in leftovers:
			
 
				+                        branches[branch] += srcfile_size
			
 
				+                        tgtfile = os.path.join(branch,relpath)
			
 
				+                        print('# remove')
			
 
				+                        args = ['rm','-vf',tgtfile]
			
 
				+                        print_args(args)
			
 
				+                        if execute:
			
 
				+                            execute_cmd(args)
			
 
				+
			
 
				+
			
 
				+    except KeyboardInterrupt:
			
 
				+        print("exiting: CTRL-C pressed")
			
 
				+    except BrokenPipeError:
			
 
				+        pass
			
 
				+
			
 
				+    sys.exit(0)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+   main()
			
--- a/roles/mergerfs/files/mergerfs.fsck
+++ b/roles/mergerfs/files/mergerfs.fsck
@@ -0,0 +1,225 @@
 
				+#!/usr/bin/env python3
			
 
				+
			
 
				+# Copyright (c) 2016, Antonio SJ Musumeci <trapexit@spawn.link>
			
 
				+
			
 
				+# Permission to use, copy, modify, and/or distribute this software for any
			
 
				+# purpose with or without fee is hereby granted, provided that the above
			
 
				+# copyright notice and this permission notice appear in all copies.
			
 
				+
			
 
				+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
			
 
				+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
			
 
				+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
			
 
				+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
			
 
				+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
			
 
				+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
			
 
				+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
			
 
				+
			
 
				+import argparse
			
 
				+import ctypes
			
 
				+import errno
			
 
				+import io
			
 
				+import os
			
 
				+import sys
			
 
				+
			
 
				+
			
 
				+_libc = ctypes.CDLL("libc.so.6",use_errno=True)
			
 
				+_lgetxattr = _libc.lgetxattr
			
 
				+_lgetxattr.argtypes = [ctypes.c_char_p,ctypes.c_char_p,ctypes.c_void_p,ctypes.c_size_t]
			
 
				+def lgetxattr(path,name):
			
 
				+    if type(path) == str:
			
 
				+        path = path.encode(errors='backslashreplace')
			
 
				+    if type(name) == str:
			
 
				+        name = name.encode(errors='backslashreplace')
			
 
				+    length = 64
			
 
				+    while True:
			
 
				+        buf = ctypes.create_string_buffer(length)
			
 
				+        res = _lgetxattr(path,name,buf,ctypes.c_size_t(length))
			
 
				+        if res >= 0:
			
 
				+            return buf.raw[0:res]
			
 
				+        else:
			
 
				+            err = ctypes.get_errno()
			
 
				+            if err == errno.ERANGE:
			
 
				+                length *= 2
			
 
				+            elif err == errno.ENODATA:
			
 
				+                return None
			
 
				+            else:
			
 
				+                raise IOError(err,os.strerror(err),path)
			
 
				+
			
 
				+
			
 
				+def ismergerfs(path):
			
 
				+    try:
			
 
				+        lgetxattr(path,"user.mergerfs.fullpath")
			
 
				+        return True
			
 
				+    except IOError as e:
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def setstat(stat,paths):
			
 
				+    for path in paths:
			
 
				+        try:
			
 
				+            os.chmod(path,stat.st_mode)
			
 
				+            os.chown(path,stat.st_uid,stat.st_gid);
			
 
				+            print("set %s > uid: %d gid: %d mode: %o" %
			
 
				+                  (path,stat.st_uid,stat.st_gid,stat.st_mode))
			
 
				+        except Exception as e:
			
 
				+            print("%s" % e)
			
 
				+
			
 
				+
			
 
				+def stats_different(stats):
			
 
				+    base = stats[0]
			
 
				+    for stat in stats:
			
 
				+        if ((stat.st_mode == base.st_mode) and
			
 
				+            (stat.st_uid  == base.st_uid)  and
			
 
				+            (stat.st_gid  == base.st_gid)):
			
 
				+            continue
			
 
				+        return True
			
 
				+    return False
			
 
				+
			
 
				+def size_equal(stats):
			
 
				+    base = stats[0]
			
 
				+    for stat in stats:
			
 
				+        if stat.st_size != base.st_size:
			
 
				+            return False
			
 
				+    return True
			
 
				+
			
 
				+def print_stats(Files,Stats):
			
 
				+    for i in range(0,len(Files)):
			
 
				+        print("  %i: %s" % (i,Files[i].decode(errors='backslashreplace')))
			
 
				+        data = ("   - uid: {0:5}; gid: {1:5}; mode: {2:6o}; "
			
 
				+                "size: {3:10}; mtime: {4}").format(
			
 
				+            Stats[i].st_uid,
			
 
				+            Stats[i].st_gid,
			
 
				+            Stats[i].st_mode,
			
 
				+            Stats[i].st_size,
			
 
				+            Stats[i].st_mtime)
			
 
				+        print (data)
			
 
				+
			
 
				+
			
 
				+def noop_fix(paths,stats):
			
 
				+    pass
			
 
				+
			
 
				+
			
 
				+def manual_fix(paths,stats):
			
 
				+    done = False
			
 
				+    while not done:
			
 
				+        try:
			
 
				+            value = input('Which is correct?: ')
			
 
				+            value = int(value)
			
 
				+            if((value >= len(paths)) or (value < 0)):
			
 
				+                print("Input error: enter a value [0,%d]" % (len(paths)-1))
			
 
				+                continue
			
 
				+            setstat(stats[value],paths)
			
 
				+            done = True
			
 
				+        except Exception as e:
			
 
				+            print("%s" % e)
			
 
				+            done = True
			
 
				+
			
 
				+
			
 
				+def newest_fix(paths,stats):
			
 
				+    stats.sort(key=lambda stat: stat.st_mtime)
			
 
				+    try:
			
 
				+        newest = stats[-1]
			
 
				+        setstat(newest,paths)
			
 
				+    except Exception as e:
			
 
				+        print("%s" % e)
			
 
				+
			
 
				+
			
 
				+def nonroot_fix(paths,stats):
			
 
				+    try:
			
 
				+        for stat in stats:
			
 
				+            if stat.st_uid != 0:
			
 
				+                setstat(stat,paths)
			
 
				+                return
			
 
				+        return newest_fix(paths,stats)
			
 
				+    except Exception as e:
			
 
				+        print("%s" % e)
			
 
				+
			
 
				+
			
 
				+def getfixfun(name):
			
 
				+    if name == 'manual':
			
 
				+        return manual_fix
			
 
				+    elif name == 'newest':
			
 
				+        return newest_fix
			
 
				+    elif name == 'nonroot':
			
 
				+        return nonroot_fix
			
 
				+    return noop_fix
			
 
				+
			
 
				+
			
 
				+def check_consistancy(fullpath,verbose,size,fix):
			
 
				+    paths = lgetxattr(fullpath,"user.mergerfs.allpaths")
			
 
				+    if not paths:
			
 
				+        return
			
 
				+    paths = paths.split(b'\0')
			
 
				+    if len(paths) <= 1:
			
 
				+        return
			
 
				+
			
 
				+    stats = [os.stat(path) for path in paths]
			
 
				+    if (size and not size_equal(stats)):
			
 
				+        return
			
 
				+    if not stats_different(stats):
			
 
				+        return
			
 
				+
			
 
				+    print("%s" % fullpath)
			
 
				+    if verbose:
			
 
				+        print_stats(paths,stats)
			
 
				+    fix(paths,stats)
			
 
				+
			
 
				+
			
 
				+def buildargparser():
			
 
				+    parser = argparse.ArgumentParser(description='audit a mergerfs mount for inconsistencies')
			
 
				+    parser.add_argument('dir',type=str,
			
 
				+                        help='starting directory')
			
 
				+    parser.add_argument('-v','--verbose',action='store_true',
			
 
				+                        help='print details of audit item')
			
 
				+    parser.add_argument('-s','--size',action='store_true',
			
 
				+                        help='only consider if the size is the same')
			
 
				+    parser.add_argument('-f','--fix',choices=['manual','newest','nonroot'],
			
 
				+                        help='fix policy')
			
 
				+    return parser
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer,
			
 
				+                                  encoding='utf8',
			
 
				+                                  errors='backslashreplace',
			
 
				+                                  line_buffering=True)
			
 
				+    sys.stderr = io.TextIOWrapper(sys.stderr.buffer,
			
 
				+                                  encoding='utf8',
			
 
				+                                  errors='backslashreplace',
			
 
				+                                  line_buffering=True)
			
 
				+
			
 
				+    parser = buildargparser()
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    if args.fix:
			
 
				+        args.verbose = True
			
 
				+
			
 
				+    fix = getfixfun(args.fix)
			
 
				+
			
 
				+    args.dir = os.path.realpath(args.dir)
			
 
				+    if not ismergerfs(args.dir):
			
 
				+        print("%s is not a mergerfs directory" % args.dir)
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+    try:
			
 
				+        size = args.size
			
 
				+        verbose = args.verbose
			
 
				+        for (dirname,dirnames,filenames) in os.walk(args.dir):
			
 
				+            fulldirpath = os.path.join(args.dir,dirname)
			
 
				+            check_consistancy(fulldirpath,verbose,size,fix)
			
 
				+            for filename in filenames:
			
 
				+                fullpath = os.path.join(fulldirpath,filename)
			
 
				+                check_consistancy(fullpath,verbose,size,fix)
			
 
				+    except KeyboardInterrupt:
			
 
				+        pass
			
 
				+    except IOError as e:
			
 
				+        if e.errno == errno.EPIPE:
			
 
				+            pass
			
 
				+        else:
			
 
				+            raise
			
 
				+
			
 
				+    sys.exit(0)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/roles/mergerfs/files/mergerfs.mktrash
+++ b/roles/mergerfs/files/mergerfs.mktrash
@@ -0,0 +1,28 @@
 
				+#!/bin/bash
			
 
				+
			
 
				+TRASHDIR=".Trash"
			
 
				+MOUNTPOINT="${1}"
			
 
				+
			
 
				+if [ "${MOUNTPOINT}" = "" ]; then
			
 
				+    echo "usage: ${0} <mountpoint>"
			
 
				+    exit 1
			
 
				+fi
			
 
				+
			
 
				+if [ $EUID -ne 0 ]; then
			
 
				+    echo "You must run ${0} as root"
			
 
				+    exit 2
			
 
				+fi
			
 
				+
			
 
				+if [ ! -e "${MOUNTPOINT}/.mergerfs" ]; then
			
 
				+    echo "ERROR: ${MOUNTPOINT} does not appear to be a mergerfs mountpoint"
			
 
				+    exit 3
			
 
				+fi
			
 
				+
			
 
				+SRCMOUNTS=$(xattr -p user.mergerfs.srcmounts "${MOUNTPOINT}/.mergerfs" | tr : " ")
			
 
				+for mount in ${SRCMOUNTS}
			
 
				+do
			
 
				+    DIR="${mount}/${TRASHDIR}"
			
 
				+    mkdir -v --mode=1777 "${DIR}"
			
 
				+done
			
 
				+
			
 
				+exit 0
			
--- a/roles/mergerfs/tasks/main.yml
+++ b/roles/mergerfs/tasks/main.yml
@@ -0,0 +1,68 @@
 
				+---
			
 
				+- name: Install git
			
 
				+  yum:
			
 
				+    name: git-core
			
 
				+    state: installed
			
 
				+
			
 
				+- name: Clone mergerfs repo
			
 
				+  shell:
			
 
				+    cmd: git clone http://github.com/trapexit/mergerfs.git
			
 
				+    chdir: /root
			
 
				+    creates: /root/mergerfs
			
 
				+
			
 
				+- name: Query latest mergerfs version
			
 
				+  shell:
			
 
				+    cmd: git tag | sort -V | tail -1
			
 
				+    chdir: /root/mergerfs
			
 
				+  register: mergerfs_version
			
 
				+  changed_when: false
			
 
				+
			
 
				+- name: Set mergerfs RPM path
			
 
				+  set_fact:
			
 
				+    mergerfs_rpm: /root/mergerfs/rpmbuild/RPMS/x86_64/mergerfs-{{ mergerfs_version.stdout }}-1.{{ 'el' if ansible_distribution == 'CentOS' else 'fc' }}{{ ansible_distribution_major_version }}.{{ ansible_architecture }}.rpm
			
 
				+
			
 
				+- name: Download and build mergerfs
			
 
				+  shell:
			
 
				+    cmd: >
			
 
				+         git checkout {{ mergerfs_version.stdout }} &&
			
 
				+         tools/install-build-pkgs &&
			
 
				+         make rpm
			
 
				+    chdir: /root/mergerfs
			
 
				+    creates: "{{ mergerfs_rpm }}"
			
 
				+
			
 
				+- name: Stat mergerfs build folder
			
 
				+  find:
			
 
				+    path: /root/mergerfs/rpmbuild/RPMS/x86_64
			
 
				+  register: mergerfs_build_folder
			
 
				+
			
 
				+- name: Install mergerfs
			
 
				+  yum:
			
 
				+    name: "{{ mergerfs_build_folder.files[0].path }}"
			
 
				+    state: installed
			
 
				+    disable_gpg_check: yes
			
 
				+  when:
			
 
				+    - mergerfs_build_folder.files[0].path is defined
			
 
				+
			
 
				+         
			
 
				+- name: Install mergerfs-tools prereqs
			
 
				+  package:
			
 
				+    name:
			
 
				+      - python3
			
 
				+      - rsync
			
 
				+    state: present
			
 
				+
			
 
				+- name: Install mergerfs-tools
			
 
				+  copy:
			
 
				+    src: "{{ item }}"
			
 
				+    dest: /usr/local/bin/{{ item }}
			
 
				+    owner: root
			
 
				+    group: root
			
 
				+    mode: '0755'
			
 
				+  loop:
			
 
				+    - mergerfs.balance
			
 
				+    - mergerfs.consolidate
			
 
				+    - mergerfs.ctl
			
 
				+    - mergerfs.dedup
			
 
				+    - mergerfs.dup
			
 
				+    - mergerfs.fsck
			
 
				+    - mergerfs.mktrash