Explorar o código

add mergerfs role

Blaine Story %!s(int64=3) %!d(string=hai) anos
pai
achega
1b3a9bd121

+ 293 - 0
roles/mergerfs/files/mergerfs.balance

@@ -0,0 +1,293 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2016, Antonio SJ Musumeci <trapexit@spawn.link>
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+import argparse
+import ctypes
+import errno
+import fnmatch
+import io
+import os
+import shlex
+import subprocess
+import sys
+
+
+_libc = ctypes.CDLL("libc.so.6",use_errno=True)
+_lgetxattr = _libc.lgetxattr
+_lgetxattr.argtypes = [ctypes.c_char_p,ctypes.c_char_p,ctypes.c_void_p,ctypes.c_size_t]
+def lgetxattr(path,name):
+    if type(path) == str:
+        path = path.encode(errors='backslashreplace')
+    if type(name) == str:
+        name = name.encode(errors='backslashreplace')
+    length = 64
+    while True:
+        buf = ctypes.create_string_buffer(length)
+        res = _lgetxattr(path,name,buf,ctypes.c_size_t(length))
+        if res >= 0:
+            return buf.raw[0:res].decode(errors='backslashreplace')
+        else:
+            err = ctypes.get_errno()
+            if err == errno.ERANGE:
+                length *= 2
+            elif err == errno.ENODATA:
+                return None
+            else:
+                raise IOError(err,os.strerror(err),path)
+
+
+def ismergerfs(path):
+    try:
+        lgetxattr(path,'user.mergerfs.version')
+        return True
+    except IOError as e:
+        return False
+
+
+def mergerfs_control_file(basedir):
+    if basedir == '/':
+        return None
+    ctrlfile = os.path.join(basedir,'.mergerfs')
+    if os.path.exists(ctrlfile):
+        return ctrlfile
+    else:
+        dirname = os.path.dirname(basedir)
+        return mergerfs_control_file(dirname)
+
+
+def mergerfs_srcmounts(ctrlfile):
+    srcmounts = lgetxattr(ctrlfile,'user.mergerfs.srcmounts')
+    srcmounts = srcmounts.split(':')
+    return srcmounts
+
+
+def match(filename,matches):
+    for match in matches:
+        if fnmatch.fnmatch(filename,match):
+            return True
+    return False
+
+
+def exclude_by_size(filepath,exclude_lt,exclude_gt):
+    try:
+        st = os.lstat(filepath)
+        if exclude_lt and st.st_size < exclude_lt:
+            return True
+        if exclude_gt and st.st_size > exclude_gt:
+            return True
+        return False
+    except:
+        return False
+
+
+def find_a_file(src,
+                relpath,
+                file_includes,file_excludes,
+                path_includes,path_excludes,
+                exclude_lt,exclude_gt):
+    basepath = os.path.join(src,relpath)
+    for (dirpath,dirnames,filenames) in os.walk(basepath):
+        for filename in filenames:
+            filepath = os.path.join(dirpath,filename)
+            if match(filename,file_excludes):
+                continue
+            if match(filepath,path_excludes):
+                continue
+            if not match(filename,file_includes):
+                continue
+            if not match(filepath,path_includes):
+                continue
+            if exclude_by_size(filepath,exclude_lt,exclude_gt):
+                continue
+            return os.path.relpath(filepath,src)
+    return None
+
+
+def execute(args):
+    return subprocess.call(args)
+
+
+def print_args(args):
+    quoted = [shlex.quote(arg) for arg in args]
+    print(' '.join(quoted))
+
+
+def build_move_file(src,dst,relfile):
+    frompath = os.path.join(src,'./',relfile)
+    topath   = dst+'/'
+    args = ['rsync',
+            '-avlHAXWE',
+            '--relative',
+            '--progress',
+            '--remove-source-files',
+            frompath,
+            topath]
+    return args
+
+
+def freespace_percentage(srcmounts):
+    lfsp = []
+    for srcmount in srcmounts:
+        vfs = os.statvfs(srcmount)
+        avail = vfs.f_bavail * vfs.f_frsize
+        total = vfs.f_blocks * vfs.f_frsize
+        per = avail / total
+        lfsp.append((srcmount,per))
+    return sorted(lfsp, key=lambda x: x[1])
+
+
+def all_within_range(l,n):
+    if len(l) == 0 or len(l) == 1:
+        return True
+    return (abs(l[0][1] - l[-1][1]) <= n)
+
+
+def human_to_bytes(s):
+    m = s[-1]
+    if   m == 'K':
+        i = int(s[0:-1]) * 1024
+    elif m == 'M':
+        i = int(s[0:-1]) * 1024 * 1024
+    elif m == 'G':
+        i = int(s[0:-1]) * 1024 * 1024 * 1024
+    elif m == 'T':
+        i = int(s[0:-1]) * 1024 * 1024 * 1024 * 1024
+    else:
+        i = int(s)
+
+    return i
+
+
+def buildargparser():
+    parser = argparse.ArgumentParser(description='balance files on a mergerfs mount based on percentage drive filled')
+    parser.add_argument('dir',
+                        type=str,
+                        help='starting directory')
+    parser.add_argument('-p',
+                        dest='percentage',
+                        type=float,
+                        default=2.0,
+                        help='percentage range of freespace (default 2.0)')
+    parser.add_argument('-i','--include',
+                        dest='include',
+                        type=str,
+                         action='append',
+                        default=[],
+                        help='fnmatch compatible file filter (can use multiple times)')
+    parser.add_argument('-e','--exclude',
+                        dest='exclude',
+                        type=str,
+                        action='append',
+                        default=[],
+                        help='fnmatch compatible file filter (can use multiple times)')
+    parser.add_argument('-I','--include-path',
+                        dest='includepath',
+                        type=str,
+                        action='append',
+                        default=[],
+                        help='fnmatch compatible path filter (can use multiple times)')
+    parser.add_argument('-E','--exclude-path',
+                        dest='excludepath',
+                        type=str,
+                        action='append',
+                        default=[],
+                        help='fnmatch compatible path filter (can use multiple times)')
+    parser.add_argument('-s',
+                        dest='excludelt',
+                        type=str,
+                        default='0',
+                        help='exclude files smaller than <int>[KMGT] bytes')
+    parser.add_argument('-S',
+                        dest='excludegt',
+                        type=str,
+                        default='0',
+                        help='exclude files larger than <int>[KMGT] bytes')
+    return parser
+
+
+def main():
+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer,
+                                  encoding='utf8',
+                                  errors="backslashreplace",
+                                  line_buffering=True)
+    sys.stderr = io.TextIOWrapper(sys.stderr.buffer,
+                                  encoding='utf8',
+                                  errors="backslashreplace",
+                                  line_buffering=True)
+
+    parser = buildargparser()
+    args = parser.parse_args()
+
+    args.dir = os.path.realpath(args.dir)
+
+    ctrlfile = mergerfs_control_file(args.dir)
+    if not ismergerfs(ctrlfile):
+        print("%s is not a mergerfs mount" % args.dir)
+        sys.exit(1)
+
+    relpath = ''
+    mntpoint = os.path.dirname(ctrlfile)
+    if args.dir != mntpoint:
+        relpath = os.path.relpath(args.dir,mntpoint)
+
+    file_includes = ['*'] if not args.include else args.include
+    file_excludes = args.exclude
+    path_includes = ['*'] if not args.includepath else args.includepath
+    path_excludes = args.excludepath
+    exclude_lt    = human_to_bytes(args.excludelt)
+    exclude_gt    = human_to_bytes(args.excludegt)
+    srcmounts     = mergerfs_srcmounts(ctrlfile)
+    percentage    = args.percentage / 100
+
+    try:
+        l = freespace_percentage(srcmounts)
+        while not all_within_range(l,percentage):
+            todrive     = l[-1][0]
+            relfilepath = None
+            while not relfilepath and len(l):
+                fromdrive = l[0][0]
+                del l[0]
+                relfilepath = find_a_file(fromdrive,
+                                          relpath,
+                                          file_includes,file_excludes,
+                                          path_includes,path_excludes,
+                                          exclude_lt,exclude_gt)
+            if len(l) == 0:
+                print('Could not find file to transfer: exiting...')
+                break
+            if fromdrive == todrive:
+                print('Source drive == target drive: exiting...')
+                break
+
+            args = build_move_file(fromdrive,todrive,relfilepath)
+            print('file: {}\nfrom: {}\nto:   {}'.format(relfilepath,fromdrive,todrive))
+            print_args(args)
+            rv = execute(args)
+            if rv:
+                print('ERROR - exited with exit code: {}'.format(rv))
+                break
+            l = freespace_percentage(srcmounts)
+        print('Branches within {:.1%} range: '.format(percentage))
+        for (branch,percentage) in l:
+            print(' * {}: {:.2%} free'.format(branch,percentage))
+    except KeyboardInterrupt:
+        print("exiting: CTRL-C pressed")
+
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+   main()

+ 278 - 0
roles/mergerfs/files/mergerfs.consolidate

@@ -0,0 +1,278 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2016, Antonio SJ Musumeci <trapexit@spawn.link>
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+import argparse
+import ctypes
+import errno
+import fnmatch
+import io
+import os
+import shlex
+import stat
+import subprocess
+import sys
+
+
+_libc = ctypes.CDLL("libc.so.6",use_errno=True)
+_lgetxattr = _libc.lgetxattr
+_lgetxattr.argtypes = [ctypes.c_char_p,ctypes.c_char_p,ctypes.c_void_p,ctypes.c_size_t]
+def lgetxattr(path,name):
+    if type(path) == str:
+        path = path.encode(errors='backslashreplace')
+    if type(name) == str:
+        name = name.encode(errors='backslashreplace')
+    length = 64
+    while True:
+        buf = ctypes.create_string_buffer(length)
+        res = _lgetxattr(path,name,buf,ctypes.c_size_t(length))
+        if res >= 0:
+            return buf.raw[0:res]
+        else:
+            err = ctypes.get_errno()
+            if err == errno.ERANGE:
+                length *= 2
+            elif err == errno.ENODATA:
+                return None
+            else:
+                raise IOError(err,os.strerror(err),path)
+
+
+def xattr_relpath(fullpath):
+    return lgetxattr(fullpath,'user.mergerfs.relpath').decode(errors='backslashreplace')
+
+
+def xattr_basepath(fullpath):
+    return lgetxattr(fullpath,'user.mergerfs.basepath').decode(errors='backslashreplace')
+
+
+def ismergerfs(path):
+    try:
+        lgetxattr(path,'user.mergerfs.version')
+        return True
+    except IOError as e:
+        return False
+
+
+def mergerfs_control_file(basedir):
+    if basedir == '/':
+        return None
+    ctrlfile = os.path.join(basedir,'.mergerfs')
+    if os.path.exists(ctrlfile):
+        return ctrlfile
+    else:
+        dirname = os.path.dirname(basedir)
+        return mergerfs_control_file(dirname)
+
+
+def mergerfs_srcmounts(ctrlfile):
+    srcmounts = lgetxattr(ctrlfile,'user.mergerfs.srcmounts')
+    srcmounts = srcmounts.decode(errors='backslashreplace').split(':')
+    return srcmounts
+
+
+def match(filename,matches):
+    for match in matches:
+        if fnmatch.fnmatch(filename,match):
+            return True
+    return False
+
+
+def execute_cmd(args):
+    return subprocess.call(args)
+
+
+def print_args(args):
+    quoted = [shlex.quote(arg) for arg in args]
+    print(' '.join(quoted))
+
+
+def human_to_bytes(s):
+    m = s[-1]
+    if   m == 'K':
+        i = int(s[0:-1]) * 1024
+    elif m == 'M':
+        i = int(s[0:-1]) * 1024 * 1024
+    elif m == 'G':
+        i = int(s[0:-1]) * 1024 * 1024 * 1024
+    elif m == 'T':
+        i = int(s[0:-1]) * 1024 * 1024 * 1024 * 1024
+    else:
+        i = int(s)
+
+    return i
+
+
+def get_stats(branches):
+    sizes = {}
+    for branch in branches:
+        vfs = os.statvfs(branch)
+        sizes[branch] = vfs.f_bavail * vfs.f_frsize
+    return sizes
+
+
+def build_move_file(src,tgt,rel):
+    rel = rel.strip('/')
+    srcpath = os.path.join(src,'./',rel)
+    tgtpath = tgt.rstrip('/') + '/'
+    return ['rsync',
+            '-avHAXWE',
+            '--numeric-ids',
+            '--progress',
+            '--relative',
+            '--remove-source-files',
+            srcpath,
+            tgtpath]
+
+
+def print_help():
+    help = \
+'''
+usage: mergerfs.consolidate [<options>] <dir>
+
+Consolidate files in a single mergerfs directory onto a single drive.
+
+positional arguments:
+  dir                    starting directory
+
+optional arguments:
+  -m, --max-files=       Skip directories with more than N files.
+                         (default: 256)
+  -M, --max-size=        Skip directories with files adding up to more
+                         than N. (default: 16G)
+  -I, --include-path=    fnmatch compatible path include filter.
+                         Can be used multiple times.
+  -E, --exclude-path=    fnmatch compatible path exclude filter.
+                         Can be used multiple times.
+  -e, --execute          Execute `rsync` commands as well as print them.
+  -h, --help             Print this help.
+'''
+    print(help)
+
+
+def buildargparser():
+    parser = argparse.ArgumentParser(add_help=False)
+    parser.add_argument('dir',
+                        type=str,
+                        nargs='?',
+                        default=None)
+    parser.add_argument('-m','--max-files',
+                        dest='max_files',
+                        type=int,
+                        default=256)
+    parser.add_argument('-M','--max-size',
+                        dest='max_size',
+                        type=human_to_bytes,
+                        default='16G')
+    parser.add_argument('-I','--include-path',
+                        dest='includepath',
+                        type=str,
+                        action='append',
+                        default=[])
+    parser.add_argument('-E','--exclude-path',
+                        dest='excludepath',
+                        type=str,
+                        action='append',
+                        default=[])
+    parser.add_argument('-e','--execute',
+                        dest='execute',
+                        action='store_true')
+    parser.add_argument('-h','--help',
+                        action='store_true')
+
+    return parser
+
+
+def main():
+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer,
+                                  encoding='utf8',
+                                  errors='backslashreplace',
+                                  line_buffering=True)
+    sys.stderr = io.TextIOWrapper(sys.stderr.buffer,
+                                  encoding='utf8',
+                                  errors='backslashreplace',
+                                  line_buffering=True)
+
+    parser = buildargparser()
+    args = parser.parse_args()
+
+    if args.help or not args.dir:
+        print_help()
+        sys.exit(0)
+
+    args.dir = os.path.realpath(args.dir)
+
+    ctrlfile = mergerfs_control_file(args.dir)
+    if not ismergerfs(ctrlfile):
+        print("%s is not a mergerfs mount" % args.dir)
+        sys.exit(1)
+
+    basedir       = args.dir
+    execute       = args.execute
+    max_files     = args.max_files
+    max_size      = args.max_size
+    path_includes = ['*'] if not args.includepath else args.includepath
+    path_excludes = args.excludepath
+    srcmounts     = mergerfs_srcmounts(ctrlfile)
+
+    mount_stats = get_stats(srcmounts)
+    try:
+        for (root,dirs,files) in os.walk(basedir):
+            if len(files) <= 1:
+                continue
+            if len(files) > max_files:
+                continue
+            if match(root,path_excludes):
+                continue
+            if not match(root,path_includes):
+                continue
+
+            total_size = 0
+            file_stats = {}
+            for file in files:
+                fullpath = os.path.join(root,file)
+                st = os.lstat(fullpath)
+                if not stat.S_ISREG(st.st_mode):
+                    continue
+                total_size += st.st_size
+                file_stats[fullpath] = st
+
+            if total_size >= max_size:
+                continue
+
+            tgtpath = sorted(mount_stats.items(),key=lambda x: x[1],reverse=True)[0][0]
+            for (fullpath,st) in sorted(file_stats.items()):
+                srcpath = xattr_basepath(fullpath)
+                if srcpath == tgtpath:
+                    continue
+
+                relpath = xattr_relpath(fullpath)
+
+                mount_stats[srcpath] += st.st_size
+                mount_stats[tgtpath] -= st.st_size
+
+                args = build_move_file(srcpath,tgtpath,relpath)
+
+                print_args(args)
+                if execute:
+                    execute_cmd(args)
+    except (KeyboardInterrupt,BrokenPipeError):
+        pass
+
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+   main()

+ 275 - 0
roles/mergerfs/files/mergerfs.ctl

@@ -0,0 +1,275 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2016, Antonio SJ Musumeci <trapexit@spawn.link>
+
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+import argparse
+import os
+import sys
+
+
+def find_mergerfs():
+    rv = []
+    with open('/proc/self/mountinfo','r') as f:
+        for line in f:
+            values = line.split()
+            mountroot, mountpoint = values[3:5]
+            separator = values.index('-', 6)
+            fstype = values[separator + 1]
+            if fstype == 'fuse.mergerfs' and mountroot == '/':
+                rv.append(mountpoint.encode().decode('unicode_escape'))
+    return rv
+
+
+def ask_about_path(paths):
+    prompt = 'Available mergerfs mounts:\n'
+    for i in range(0,len(paths)):
+        prompt += ' {0}: {1}\n'.format(i,paths[i])
+    prompt += 'Choose which mount to act on: '
+    path = input(prompt)
+    return paths[int(path)]
+
+
+def device2mount(device):
+    with open('/proc/mounts','r') as f:
+        for line in f:
+            columns = line.split()
+            if columns[0] == device:
+                return columns[1]
+    with open('/etc/fstab','r') as f:
+        for line in f:
+            columns = line.split()
+            try:
+                if columns[0] == device:
+                    return columns[1]
+                realpath = os.path.realpath(columns[0])
+                if realpath == device:
+                    return columns[1]
+            except:
+                pass
+    return None
+
+
+def control_file(path):
+    return os.path.join(path,'.mergerfs')
+
+
+def add_srcmount(ctrlfile,srcmount):
+    key   = b'user.mergerfs.srcmounts'
+    value = b'+' + srcmount.encode()
+    try:
+        os.setxattr(ctrlfile,key,value)
+    except Exception as e:
+        print(e)
+
+
+def remove_srcmount(ctrlfile,srcmount):
+    key   = b'user.mergerfs.srcmounts'
+    value = b'-' + srcmount.encode()
+    try:
+        os.setxattr(ctrlfile,key,value)
+    except Exception as e:
+        print(e)
+
+
+def normalize_key(key):
+    if type(key) == bytes:
+        if key.startswith(b'user.mergerfs.'):
+            return key
+        return b'user.mergerfs.' + key
+    elif type(key) == str:
+        if key.startswith('user.mergerfs.'):
+            return key
+        return 'user.mergerfs.' + key
+
+
+def print_mergerfs_info(fspaths):
+    for fspath in fspaths:
+        ctrlfile  = control_file(fspath)
+        version   = os.getxattr(ctrlfile,'user.mergerfs.version')
+        pid       = os.getxattr(ctrlfile,'user.mergerfs.pid')
+        srcmounts = os.getxattr(ctrlfile,'user.mergerfs.srcmounts')
+        output = ('- mount: {0}\n'
+                  '  version: {1}\n'
+                  '  pid: {2}\n'
+                  '  srcmounts:\n'
+                  '    - ').format(fspath,
+                                   version.decode(),
+                                   pid.decode())
+        srcmounts = srcmounts.decode().split(':')
+        output += '\n    - '.join(srcmounts)
+        print(output)
+
+
+def build_arg_parser():
+    desc = 'a tool for runtime manipulation of mergerfs'
+    parser = argparse.ArgumentParser(description=desc)
+
+    subparsers = parser.add_subparsers(dest='command')
+
+    parser.add_argument('-m','--mount',
+                        type=str,
+                        help='mergerfs mount to act on')
+
+    addopt = subparsers.add_parser('add')
+    addopt.add_argument('type',choices=['path','device'])
+    addopt.add_argument('path',type=str)
+    addopt.set_defaults(func=cmd_add)
+
+    removeopt = subparsers.add_parser('remove')
+    removeopt.add_argument('type',choices=['path','device'])
+    removeopt.add_argument('path',type=str)
+    removeopt.set_defaults(func=cmd_remove)
+
+    listopt = subparsers.add_parser('list')
+    listopt.add_argument('type',choices=['options','values'])
+    listopt.set_defaults(func=cmd_list)
+
+    getopt = subparsers.add_parser('get')
+    getopt.add_argument('option',type=str,nargs='+')
+    getopt.set_defaults(func=cmd_get)
+
+    setopt = subparsers.add_parser('set')
+    setopt.add_argument('option',type=str)
+    setopt.add_argument('value',type=str)
+    setopt.set_defaults(func=cmd_set)
+
+    infoopt = subparsers.add_parser('info')
+    infoopt.set_defaults(func=cmd_info)
+
+    return parser
+
+
+def cmd_add(fspaths,args):
+    if args.type == 'device':
+        return cmd_add_device(fspaths,args)
+    elif args.type == 'path':
+        return cmd_add_path(fspaths,args)
+
+def cmd_add_device(fspaths,args):
+    for fspath in fspaths:
+        ctrlfile = control_file(fspath)
+        mount = device2mount(args.path)
+        if mount:
+            add_srcmount(ctrlfile,mount)
+        else:
+            print('{0} not found'.format(args.path))
+
+def cmd_add_path(fspaths,args):
+    for fspath in fspaths:
+        ctrlfile = control_file(fspath)
+        add_srcmount(ctrlfile,args.path)
+
+
+def cmd_remove(fspaths,args):
+    if args.type == 'device':
+        return cmd_remove_device(fspaths,args)
+    elif args.type == 'path':
+        return cmd_remove_path(fspaths,args)
+
+def cmd_remove_device(fspaths,args):
+    for fspath in fspaths:
+        ctrlfile = control_file(fspath)
+        mount = device2mount(args.path)
+        if mount:
+            remove_srcmount(ctrlfile,mount)
+        else:
+            print('{0} not found'.format(args.path.decode()))
+
+def cmd_remove_path(fspaths,args):
+    for fspath in fspaths:
+        ctrlfile = control_file(fspath)
+        remove_srcmount(ctrlfile,args.path)
+
+
+def cmd_list(fspaths,args):
+    if args.type == 'values':
+        return cmd_list_values(fspaths,args)
+    if args.type == 'options':
+        return cmd_list_options(fspaths,args)
+
+def cmd_list_options(fspaths,args):
+    for fspath in fspaths:
+        ctrlfile = control_file(fspath)
+        keys = os.listxattr(ctrlfile)
+        output = ('- mount: {0}\n'
+                  '  options:\n').format(fspath)
+        for key in keys:
+            output += '    - {0}\n'.format(key)
+        print(output,end='')
+
+def cmd_list_values(fspaths,args):
+    for fspath in fspaths:
+        ctrlfile = control_file(fspath)
+        keys = os.listxattr(ctrlfile)
+        output = ('- mount: {0}\n'
+                  '  options:\n').format(fspath)
+        for key in keys:
+            value = os.getxattr(ctrlfile,key)
+            output += '    {0}: {1}\n'.format(key,value.decode())
+        print(output,end='')
+
+
+def cmd_get(fspaths,args):
+    for fspath in fspaths:
+        ctrlfile = control_file(fspath)
+        print('- mount: {0}'.format(fspath))
+        for key in args.option:
+            key   = normalize_key(key)
+            value = os.getxattr(ctrlfile,key).decode()
+            print('    {0}: {1}'.format(key,value))
+
+
+def cmd_set(fspaths,args):
+    for fspath in fspaths:
+        ctrlfile = control_file(fspath)
+        key = normalize_key(args.option)
+        value = args.value.encode()
+        try:
+            os.setxattr(ctrlfile,key,value)
+        except Exception as e:
+            print(e)
+
+
+def cmd_info(fspaths,args):
+    print_mergerfs_info(fspaths)
+
+
+def print_and_exit(string,rv):
+    print(string)
+    sys.exit(rv)
+
+
+def main():
+    parser = build_arg_parser()
+    args   = parser.parse_args()
+
+    fspaths = find_mergerfs()
+    if args.mount and args.mount in fspaths:
+        fspaths = [args.mount]
+    elif not args.mount and not fspaths:
+        print_and_exit('no mergerfs mounts found',1)
+    elif args.mount and args.mount not in fspaths:
+        print_and_exit('{0} is not a mergerfs mount'.format(args.mount),1)
+
+    if hasattr(args, 'func'):
+        args.func(fspaths,args)
+    else:
+        parser.print_help()
+
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()

+ 548 - 0
roles/mergerfs/files/mergerfs.dedup

@@ -0,0 +1,548 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2016, Antonio SJ Musumeci <trapexit@spawn.link>
+
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+import argparse
+import ctypes
+import errno
+import fnmatch
+import hashlib
+import io
+import os
+import random
+import shlex
+import sys
+
+
+_libc = ctypes.CDLL("libc.so.6",use_errno=True)
+_lgetxattr = _libc.lgetxattr
+_lgetxattr.argtypes = [ctypes.c_char_p,ctypes.c_char_p,ctypes.c_void_p,ctypes.c_size_t]
+def lgetxattr(path,name):
+    if type(path) == str:
+        path = path.encode(errors='backslashreplace')
+    if type(name) == str:
+        name = name.encode(errors='backslashreplace')
+    length = 64
+    while True:
+        buf = ctypes.create_string_buffer(length)
+        res = _lgetxattr(path,name,buf,ctypes.c_size_t(length))
+        if res >= 0:
+            return buf.raw[0:res]
+        else:
+            err = ctypes.get_errno()
+            if err == errno.ERANGE:
+                length *= 2
+            elif err == errno.ENODATA:
+                return None
+            else:
+                raise IOError(err,os.strerror(err),path)
+
+
+def ismergerfs(path):
+    try:
+        lgetxattr(path,b'user.mergerfs.fullpath')
+        return True
+    except IOError as e:
+        return False
+
+
+def hash_file(filepath, hasher=None, blocksize=65536):
+    if not hasher:
+        hasher = hashlib.md5()
+
+    with open(filepath,'rb') as afile:
+        buf = afile.read(blocksize)
+        while buf:
+            hasher.update(buf)
+            buf = afile.read(blocksize)
+
+    return hasher.hexdigest()
+
+
+def short_hash_file(filepath, hasher=None, blocksize=65536, blocks=16):
+    if not hasher:
+        hasher = hashlib.md5()
+
+    with open(filepath,'rb') as f:
+        size = os.fstat(f.fileno()).st_size
+        if size <= blocksize:
+            size = 1
+            blocks = 1
+
+        random.seed(size,version=2)
+        for _ in range(blocks):
+            offset = random.randrange(size)
+            f.seek(offset)
+            buf = f.read(blocksize)
+            if buf:
+                hasher.update(buf)
+            else:
+                break
+
+    return hasher.hexdigest()
+
+
+def sizeof_fmt(num):
+    for unit in ['','K','M','G','T','P','E','Z']:
+        if abs(num) < 1024.0:
+            return "%3.1f%sB" % (num,unit)
+        num /= 1024.0
+    return "%.1f%sB" % (num,'Y')
+
+
+def stat_files(paths):
+    rv = []
+    for path in paths:
+        try:
+            st = os.stat(path)
+            rv.append((path,st))
+        except:
+            pass
+
+    return rv
+
+
+def remove(files,execute,verbose):
+    for (path,stat) in files:
+        try:
+            print('rm -vf',shlex.quote(path))
+            if execute:
+                os.remove(path)
+        except Exception as e:
+            print("%s" % e)
+
+
+def print_stats(stats):
+    for i in range(0,len(stats)):
+        print("#  %i: %s" % (i+1,stats[i][0]))
+        data = ("#   - uid: {0:5}; gid: {1:5}; mode: {2:6o}; "
+                "size: {3}; mtime: {4}").format(
+            stats[i][1].st_uid,
+            stats[i][1].st_gid,
+            stats[i][1].st_mode,
+            sizeof_fmt(stats[i][1].st_size),
+            stats[i][1].st_mtime)
+        print(data)
+
+
+def total_size(stats):
+    total = 0
+    for (name,stat) in stats:
+        total = total + stat.st_size
+    return total
+
+
+def manual_dedup(fullpath,stats):
+    done = False
+    while not done:
+        value = input("# Which to keep? ('s' to skip):")
+
+        if value.lower() == 's':
+            stats.clear()
+            done = True
+            continue
+
+        try:
+            value = int(value) - 1
+            if value < 0 or value >= len(stats):
+                raise ValueError
+            stats.remove(stats[value])
+            done = True
+        except NameError:
+            print("Input error: enter a value [1-{0}] or skip by entering 's'".format(len(stats)))
+        except ValueError:
+            print("Input error: enter a value [1-{0}] or skip by entering 's'".format(len(stats)))
+
+
+def mtime_all(stats):
+    mtime = stats[0][1].st_mtime
+    return all(x[1].st_mtime == mtime for x in stats)
+
+
+def mtime_any(mtime,stats):
+    return any([st.st_mtime == mtime for (path,st) in stats])
+
+
+def size_all(stats):
+    size = stats[0][1].st_size
+    return all(x[1].st_size == size for x in stats)
+
+
+def size_any(size,stats):
+    return any([st.st_size == size for (path,st) in stats])
+
+
+def md5sums_all(stats):
+    if size_all(stats):
+        hashval = hash_file(stats[0][0])
+        return all(hash_file(path) == hashval for (path,st) in stats[1:])
+    return False
+
+
+def short_md5sums_all(stats):
+    if size_all(stats):
+        hashval = short_hash_file(stats[0][0])
+        return all(short_hash_file(path) == hashval for (path,st) in stats[1:])
+    return False
+
+
+def oldest_dedup(fullpath,stats):
+    if size_all(stats) and mtime_all(stats):
+        drive_with_most_space_dedup(fullpath,stats)
+        return
+
+    stats.sort(key=lambda st: st[1].st_mtime)
+    oldest = stats[0]
+    stats.remove(oldest)
+
+
+def strict_oldest_dedup(fullpath,stats):
+    stats.sort(key=lambda st: st[1].st_mtime,reverse=False)
+
+    oldest = stats[0]
+    stats.remove(oldest)
+    if mtime_any(oldest[1].st_mtime,stats):
+        stats.clear()
+
+
+def newest_dedup(fullpath,stats):
+    if size_all(stats) and mtime_all(stats):
+        drive_with_most_space_dedup(fullpath,stats)
+        return
+
+    stats.sort(key=lambda st: st[1].st_mtime,reverse=True)
+    newest = stats[0]
+    stats.remove(newest)
+
+
+def strict_newest_dedup(fullpath,stats):
+    stats.sort(key=lambda st: st[1].st_mtime,reverse=True)
+
+    newest = stats[0]
+    stats.remove(newest)
+    if mtime_any(newest[1].st_mtime,stats):
+        stats.clear()
+
+
+def largest_dedup(fullpath,stats):
+    if size_all(stats) and mtime_all(stats):
+        drive_with_most_space_dedup(fullpath,stats)
+        return
+
+    stats.sort(key=lambda st: st[1].st_size,reverse=True)
+    largest = stats[0]
+    stats.remove(largest)
+
+
+def strict_largest_dedup(fullpath,stats):
+    stats.sort(key=lambda st: st[1].st_size,reverse=True)
+
+    largest = stats[0]
+    stats.remove(largest)
+    if size_any(largest[1].st_size,stats):
+        stats.clear()
+
+
+def smallest_dedup(fullpath,stats):
+    if size_all(stats) and mtime_all(stats):
+        drive_with_most_space_dedup(fullpath,stats)
+        return
+
+    stats.sort(key=lambda st: st[1].st_size)
+    smallest = stats[0]
+    stats.remove(smallest)
+
+
+def strict_smallest_dedup(fullpath,stats):
+    stats.sort(key=lambda st: st[1].st_size,reverse=False)
+
+    smallest = stats[0]
+    stats.remove(smallest)
+    if size_any(smallest[1].st_size,stats):
+        stats.clear()
+
+
+def calc_space_free(stat):
+    st = os.statvfs(stat[0])
+    return st.f_frsize * st.f_bfree
+
+
+def drive_with_most_space_dedup(fullpath,stats):
+    stats.sort(key=calc_space_free,reverse=True)
+    largest = stats[0]
+    stats.remove(largest)
+
+
+def mergerfs_getattr_dedup(origpath,stats):
+    fullpath = getxattr(origpath,b'user.mergerfs.fullpath')
+    for (path,stat) in stats:
+        if path != fullpath:
+            continue
+        stats.remove((path,stat))
+        break
+
+
+def get_dedupfun(name,strict):
+    if strict:
+        name = 'strict-' + name
+    funs = {
+        'manual': manual_dedup,
+        'strict-manual': manual_dedup,
+        'mostfreespace': drive_with_most_space_dedup,
+        'strict-mostfreespace': drive_with_most_space_dedup,
+        'newest': newest_dedup,
+        'strict-newest': strict_newest_dedup,
+        'oldest': oldest_dedup,
+        'strict-oldest': strict_oldest_dedup,
+        'largest': largest_dedup,
+        'strict-largest': strict_largest_dedup,
+        'smallest': smallest_dedup,
+        'strict-smallest': strict_smallest_dedup,
+        'mergerfs': mergerfs_getattr_dedup,
+        'strict-mergerfs': mergerfs_getattr_dedup
+    }
+    return funs[name]
+
+
+def get_ignorefun(name):
+    funs = {
+        None: lambda x: None,
+        'same-time': mtime_all,
+        'diff-time': lambda x: not mtime_all(x),
+        'same-size': size_all,
+        'diff-size': lambda x: not size_all(x),
+        'same-hash': md5sums_all,
+        'diff-hash': lambda x: not md5sums_all(x),
+        'same-short-hash': short_md5sums_all,
+        'diff-short-hash': lambda x: not short_md5sums_all(x)
+    }
+
+    return funs[name]
+
+
+def getxattr(path,key):
+    try:
+        attr = lgetxattr(path,key)
+        if attr:
+            return attr.decode('utf-8')
+        return ''
+    except IOError as e:
+        if e.errno == errno.ENODATA:
+            return ''
+        raise
+    except UnicodeDecodeError as e:
+        print(e)
+        print(attr)
+    return ''
+
+
+def match(filename,matches):
+    for match in matches:
+        if fnmatch.fnmatch(filename,match):
+            return True
+    return False
+
+
+def dedup(fullpath,verbose,ignorefun,execute,dedupfun):
+    paths = getxattr(fullpath,b'user.mergerfs.allpaths').split('\0')
+    if len(paths) <= 1:
+        return 0
+
+    stats = stat_files(paths)
+
+    if ignorefun(stats):
+        if verbose >= 2:
+            print('# ignored:',fullpath)
+        return 0
+
+    if (dedupfun == manual_dedup):
+        print('#',fullpath)
+        print_stats(stats)
+
+    try:
+        dedupfun(fullpath,stats)
+        if not stats:
+            if verbose >= 2:
+                print('# skipped:',fullpath)
+            return 0
+
+        if (dedupfun != manual_dedup):
+            if verbose >= 2:
+                print('#',fullpath)
+            if verbose >= 3:
+                print_stats(stats)
+
+        for (path,stat) in stats:
+            try:
+                if verbose:
+                    print('rm -vf',shlex.quote(path))
+                if execute:
+                    os.remove(path)
+            except Exception as e:
+                print('#',e)
+
+        return total_size(stats)
+
+    except Exception as e:
+        print(e)
+
+    return 0
+
+
+def print_help():
+    help = \
+'''
+usage: mergerfs.dedup [<options>] <dir>
+
+Remove duplicate files across branches of a mergerfs pool. Provides
+multiple algos for determining which file to keep and what to skip.
+
+positional arguments:
+  dir                    Starting directory
+
+optional arguments:
+  -v, --verbose          Once to print `rm` commands
+                         Twice for status info
+                         Three for file info
+  -i, --ignore=          Ignore files if... (default: none)
+                         * same-size       : have the same size
+                         * diff-size       : have different sizes
+                         * same-time       : have the same mtime
+                         * diff-time       : have different mtimes
+                         * same-hash       : have the same md5sum
+                         * diff-hash       : have different md5sums
+                         * same-short-hash : have the same short md5sums
+                         * diff-short-hash : have different short md5sums
+                         'hash' is expensive. 'short-hash' far less
+                         expensive, not as safe, but pretty good.
+  -d, --dedup=           What file to *keep* (default: mergerfs)
+                         * manual        : ask user
+                         * oldest        : file with smallest mtime
+                         * newest        : file with largest mtime
+                         * largest       : file with largest size
+                         * smallest      : file with smallest size
+                         * mostfreespace : file on drive with most free space
+                         * mergerfs      : file selected by the mergerfs
+                                           getattr policy
+  -s, --strict           Skip dedup if all files have same (mtime,size) value.
+                         Only applies to oldest, newest, largest, smallest.
+  -e, --execute          Will not perform file removal without this.
+  -I, --include=         fnmatch compatible filter to include files.
+                         Can be used multiple times.
+  -E, --exclude=         fnmatch compatible filter to exclude files.
+                         Can be used multiple times.
+
+'''
+    print(help)
+
+
+def buildargparser():
+    desc = 'dedup files across branches in a mergerfs pool'
+    usage = 'mergerfs.dedup [<options>] <dir>'
+    parser = argparse.ArgumentParser(add_help=False)
+
+    parser.add_argument('dir',
+                        type=str,
+                        nargs='?',
+                        default=None,
+                        help='starting directory')
+    parser.add_argument('-v','--verbose',
+                        action='count',
+                        default=0)
+    parser.add_argument('-i','--ignore',
+                        choices=['same-size','diff-size',
+                                 'same-time','diff-time',
+                                 'same-hash','diff-hash',
+                                 'same-short-hash',
+                                 'diff-short-hash'])
+    parser.add_argument('-d','--dedup',
+                        choices=['manual',
+                                 'oldest','newest',
+                                 'smallest','largest',
+                                 'mostfreespace',
+                                 'mergerfs'],
+                        default='mergerfs')
+    parser.add_argument('-s','--strict',
+                        action='store_true')
+    parser.add_argument('-e','--execute',
+                        action='store_true')
+    parser.add_argument('-I','--include',
+                        type=str,
+                        action='append',
+                        default=[])
+    parser.add_argument('-E','--exclude',
+                        type=str,
+                        action='append',
+                        default=[])
+    parser.add_argument('-h','--help',
+                        action='store_true')
+
+    return parser
+
+
+def main():
+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer,
+                                  encoding='utf8',
+                                  errors='backslashreplace',
+                                  line_buffering=True)
+    sys.stderr = io.TextIOWrapper(sys.stderr.buffer,
+                                  encoding='utf8',
+                                  errors='backslashreplace',
+                                  line_buffering=True)
+
+    parser = buildargparser()
+    args   = parser.parse_args()
+
+    if args.help or not args.dir:
+        print_help()
+        sys.exit(0)
+
+    args.dir = os.path.realpath(args.dir)
+    if not ismergerfs(args.dir):
+        print("%s is not a mergerfs directory" % args.dir)
+        sys.exit(1)
+
+    dedupfun  = get_dedupfun(args.dedup,args.strict)
+    ignorefun = get_ignorefun(args.ignore)
+    verbose   = args.verbose
+    execute   = args.execute
+    includes  = ['*'] if not args.include else args.include
+    excludes  = args.exclude
+
+    total_size = 0
+    try:
+        for (dirname,dirnames,filenames) in os.walk(args.dir):
+            for filename in filenames:
+                if match(filename,excludes):
+                    continue
+                if not match(filename,includes):
+                    continue
+                fullpath    = os.path.join(dirname,filename)
+                total_size += dedup(fullpath,verbose,ignorefun,execute,dedupfun)
+    except KeyboardInterrupt:
+        print("# exiting: CTRL-C pressed")
+    except IOError as e:
+        if e.errno == errno.EPIPE:
+            pass
+        else:
+            raise
+
+    print('# Total savings:',sizeof_fmt(total_size))
+
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()

+ 399 - 0
roles/mergerfs/files/mergerfs.dup

@@ -0,0 +1,399 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2016, Antonio SJ Musumeci <trapexit@spawn.link>
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+import argparse
+import ctypes
+import errno
+import fnmatch
+import io
+import os
+import shlex
+import subprocess
+import sys
+
+
+_libc = ctypes.CDLL("libc.so.6",use_errno=True)
+_lgetxattr = _libc.lgetxattr
+_lgetxattr.argtypes = [ctypes.c_char_p,ctypes.c_char_p,ctypes.c_void_p,ctypes.c_size_t]
+def lgetxattr(path,name):
+    if type(path) == str:
+        path = path.encode(errors='backslashreplace')
+    if type(name) == str:
+        name = name.encode(errors='backslashreplace')
+    length = 64
+    while True:
+        buf = ctypes.create_string_buffer(length)
+        res = _lgetxattr(path,name,buf,ctypes.c_size_t(length))
+        if res >= 0:
+            return buf.raw[0:res].decode(errors='backslashreplace')
+        else:
+            err = ctypes.get_errno()
+            if err == errno.ERANGE:
+                length *= 2
+            elif err == errno.ENODATA:
+                return None
+            else:
+                raise IOError(err,os.strerror(err),path)
+
+
+def ismergerfs(path):
+    try:
+        lgetxattr(path,'user.mergerfs.basepath')
+        return True
+    except IOError as e:
+        return False
+
+
+def mergerfs_control_file(basedir):
+    if basedir == '/':
+        return None
+    ctrlfile = os.path.join(basedir,'.mergerfs')
+    if os.path.exists(ctrlfile):
+        return ctrlfile
+    basedir = os.path.dirname(basedir)
+    return mergerfs_control_file(basedir)
+
+
+def mergerfs_branches(ctrlfile):
+    branches = lgetxattr(ctrlfile,'user.mergerfs.srcmounts')
+    branches = branches.split(':')
+    return branches
+
+
+def match(filename,matches):
+    for match in matches:
+        if fnmatch.fnmatch(filename,match):
+            return True
+    return False
+
+
+def execute_cmd(args):
+    return subprocess.call(args)
+
+
+def print_args(args):
+    quoted = [shlex.quote(arg) for arg in args]
+    print(' '.join(quoted))
+
+
+def build_copy_file(src,tgt,rel):
+    srcpath = os.path.join(src,'./',rel)
+    tgtpath = tgt + '/'
+    return ['rsync',
+            '-avHAXWE',
+            '--numeric-ids',
+            '--progress',
+            '--relative',
+            srcpath,
+            tgtpath]
+
+
+def build_branches_freespace(branches):
+    rv = dict()
+    for branch in branches:
+        st = os.statvfs(branch)
+        rv[branch] = st.f_bavail * st.f_frsize
+    return rv
+
+
+def print_help():
+    help = \
+'''
+usage: mergerfs.dup [<options>] <dir>
+
+Duplicate files & directories across multiple drives in a pool.
+Will print out commands for inspection and out of band use.
+
+positional arguments:
+  dir                    starting directory
+
+optional arguments:
+  -c, --count=           Number of copies to create. (default: 2)
+  -d, --dup=             Which file (if more than one exists) to choose to
+                         duplicate. Each one falls back to `mergerfs` if
+                         all files have the same value. (default: newest)
+                         * newest   : file with largest mtime
+                         * oldest   : file with smallest mtime
+                         * smallest : file with smallest size
+                         * largest  : file with largest size
+                         * mergerfs : file chosen by mergerfs' getattr
+  -p, --prune            Remove files above `count`. Without this enabled
+                         it will update all existing files.
+  -e, --execute          Execute `rsync` and `rm` commands. Not just
+                         print them.
+  -I, --include=         fnmatch compatible filter to include files.
+                         Can be used multiple times.
+  -E, --exclude=         fnmatch compatible filter to exclude files.
+                         Can be used multiple times.
+'''
+    print(help)
+
+
+def buildargparser():
+    parser = argparse.ArgumentParser(add_help=False)
+    parser.add_argument('dir',
+                        type=str,
+                        nargs='?',
+                        default=None)
+    parser.add_argument('-c','--count',
+                        dest='count',
+                        type=int,
+                        default=2)
+    parser.add_argument('-p','--prune',
+                        dest='prune',
+                        action='store_true')
+    parser.add_argument('-d','--dup',
+                        choices=['newest','oldest',
+                                 'smallest','largest',
+                                 'mergerfs'],
+                        default='newest')
+    parser.add_argument('-e','--execute',
+                        dest='execute',
+                        action='store_true')
+    parser.add_argument('-I','--include',
+                        dest='include',
+                        type=str,
+                        action='append',
+                        default=[])
+    parser.add_argument('-E','--exclude',
+                        dest='exclude',
+                        type=str,
+                        action='append',
+                        default=[])
+    parser.add_argument('-h','--help',
+                        action='store_true')
+
+    return parser
+
+
+def xattr_basepath(fullpath):
+    return lgetxattr(fullpath,'user.mergerfs.basepath')
+
+
+def xattr_allpaths(fullpath):
+    return lgetxattr(fullpath,'user.mergerfs.allpaths')
+
+
+def xattr_relpath(fullpath):
+    return lgetxattr(fullpath,'user.mergerfs.relpath')
+
+
+def exists(base,rel,name):
+    fullpath = os.path.join(base,rel,name)
+    return os.path.lexists(fullpath)
+
+
+def mergerfs_all_basepaths(fullpath,relpath):
+    attr = xattr_allpaths(fullpath)
+    if not attr:
+        dirname  = os.path.dirname(fullpath)
+        basename = os.path.basename(fullpath)
+        attr     = xattr_allpaths(dirname)
+        attr     = attr.split('\0')
+        attr     = [os.path.join(path,basename)
+                    for path in attr
+                    if os.path.lexists(os.path.join(path,basename))]
+    else:
+        attr = attr.split('\0')
+    return [x[:-len(relpath)].rstrip('/') for x in attr]
+
+
+def mergerfs_basepath(fullpath):
+    attr = xattr_basepath(fullpath)
+    if not attr:
+        dirname  = os.path.dirname(fullpath)
+        basename = os.path.basename(fullpath)
+        attr     = xattr_allpaths(dirname)
+        attr     = attr.split('\0')
+        for path in attr:
+            fullpath = os.path.join(path,basename)
+            if os.path.lexists(fullpath):
+                relpath = xattr_relpath(dirname)
+                return path[:-len(relpath)].rstrip('/')
+    return attr
+
+
+def mergerfs_relpath(fullpath):
+    attr = xattr_relpath(fullpath)
+    if not attr:
+        dirname  = os.path.dirname(fullpath)
+        basename = os.path.basename(fullpath)
+        attr     = xattr_relpath(dirname)
+        attr     = os.path.join(attr,basename)
+    return attr.lstrip('/')
+
+
+def newest_dupfun(default_basepath,relpath,basepaths):
+    sts = dict([(f,os.lstat(os.path.join(f,relpath))) for f in basepaths])
+
+    mtime = sts[basepaths[0]].st_mtime
+    if not all([st.st_mtime == mtime for st in sts.values()]):
+        return sorted(sts,key=lambda x: sts.get(x).st_mtime,reverse=True)[0]
+
+    ctime = sts[basepaths[0]].st_ctime
+    if not all([st.st_ctime == ctime for st in sts.values()]):
+        return sorted(sts,key=lambda x: sts.get(x).st_ctime,reverse=True)[0]
+
+    return default_basepath
+
+
+def oldest_dupfun(default_basepath,relpath,basepaths):
+    sts = dict([(f,os.lstat(os.path.join(f,relpath))) for f in basepaths])
+
+    mtime = sts[basepaths[0]].st_mtime
+    if not all([st.st_mtime == mtime for st in sts.values()]):
+        return sorted(sts,key=lambda x: sts.get(x).st_mtime,reverse=False)[0]
+
+    ctime = sts[basepaths[0]].st_ctime
+    if not all([st.st_ctime == ctime for st in sts.values()]):
+        return sorted(sts,key=lambda x: sts.get(x).st_ctime,reverse=False)[0]
+
+    return default_basepath
+
+
+def largest_dupfun(default_basepath,relpath,basepaths):
+    sts = dict([(f,os.lstat(os.path.join(f,relpath))) for f in basepaths])
+
+    size = sts[basepaths[0]].st_size
+    if not all([st.st_size == size for st in sts.values()]):
+        return sorted(sts,key=lambda x: sts.get(x).st_size,reverse=True)[0]
+
+    return default_basepath
+
+
+def smallest_dupfun(default_basepath,relpath,basepaths):
+    sts = dict([(f,os.lstat(os.path.join(f,relpath))) for f in basepaths])
+
+    size = sts[basepaths[0]].st_size
+    if not all([st.st_size == size for st in sts.values()]):
+        return sorted(sts,key=lambda x: sts.get(x).st_size,reverse=False)[0]
+
+    return default_basepath
+
+
+def mergerfs_dupfun(default_basepath,relpath,basepaths):
+    return default_basepath
+
+
+def getdupfun(name):
+    funs = {'newest': newest_dupfun,
+            'oldest': oldest_dupfun,
+            'smallest': smallest_dupfun,
+            'largest': largest_dupfun,
+            'mergerfs': mergerfs_dupfun}
+    return funs[name]
+
+
+def main():
+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer,
+                                  encoding='utf8',
+                                  errors='backslashreplace',
+                                  line_buffering=True)
+    sys.stderr = io.TextIOWrapper(sys.stderr.buffer,
+                                  encoding='utf8',
+                                  errors='backslashreplace',
+                                  line_buffering=True)
+
+    parser = buildargparser()
+    args = parser.parse_args()
+
+    if args.help or not args.dir:
+        print_help()
+        sys.exit(0)
+
+    args.dir = os.path.realpath(args.dir)
+
+    if not ismergerfs(args.dir):
+        print("%s is not a mergerfs mount" % args.dir)
+        sys.exit(1)
+
+    prune     = args.prune
+    execute   = args.execute
+    includes  = ['*'] if not args.include else args.include
+    excludes  = args.exclude
+    dupfun    = getdupfun(args.dup)
+    ctrlfile  = mergerfs_control_file(args.dir)
+    branches  = mergerfs_branches(ctrlfile)
+    branches  = build_branches_freespace(branches)
+    count     = min(args.count,len(branches))
+
+    try:
+        for (dirpath,dirnames,filenames) in os.walk(args.dir):
+            for filename in filenames:
+                if match(filename,excludes):
+                    continue
+                if not match(filename,includes):
+                    continue
+
+                fullpath = os.path.join(dirpath,filename)
+                basepath = mergerfs_basepath(fullpath)
+                relpath  = mergerfs_relpath(fullpath)
+                existing = mergerfs_all_basepaths(fullpath,relpath)
+
+                srcpath  = dupfun(basepath,relpath,existing)
+                srcfile  = os.path.join(srcpath,relpath)
+                srcfile_size = os.lstat(srcfile).st_size
+                existing.remove(srcpath)
+
+                i = 1
+                copies = []
+                for tgtpath in existing:
+                    if prune and i >= count:
+                        break
+                    copies.append(tgtpath)
+                    args = build_copy_file(srcpath,tgtpath,relpath)
+                    print('# overwrite')
+                    print_args(args)
+                    if execute:
+                        execute_cmd(args)
+                    i += 1
+
+                for _ in range(i,count):
+                    for branch in sorted(branches,key=branches.get,reverse=True):
+                        tgtfile = os.path.join(branch,relpath)
+                        if branch in copies or os.path.exists(tgtfile):
+                            continue
+                        copies.append(branch)
+                        branches[branch] -= srcfile_size
+                        args = build_copy_file(srcpath,branch,relpath)
+                        print('# copy')
+                        print_args(args)
+                        if execute:
+                            execute_cmd(args)
+                        break
+
+                if prune:
+                    leftovers = set(existing) - set(copies)
+                    for branch in leftovers:
+                        branches[branch] += srcfile_size
+                        tgtfile = os.path.join(branch,relpath)
+                        print('# remove')
+                        args = ['rm','-vf',tgtfile]
+                        print_args(args)
+                        if execute:
+                            execute_cmd(args)
+
+
+    except KeyboardInterrupt:
+        print("exiting: CTRL-C pressed")
+    except BrokenPipeError:
+        pass
+
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+   main()

+ 225 - 0
roles/mergerfs/files/mergerfs.fsck

@@ -0,0 +1,225 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2016, Antonio SJ Musumeci <trapexit@spawn.link>
+
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+import argparse
+import ctypes
+import errno
+import io
+import os
+import sys
+
+
+_libc = ctypes.CDLL("libc.so.6",use_errno=True)
+_lgetxattr = _libc.lgetxattr
+_lgetxattr.argtypes = [ctypes.c_char_p,ctypes.c_char_p,ctypes.c_void_p,ctypes.c_size_t]
+def lgetxattr(path,name):
+    if type(path) == str:
+        path = path.encode(errors='backslashreplace')
+    if type(name) == str:
+        name = name.encode(errors='backslashreplace')
+    length = 64
+    while True:
+        buf = ctypes.create_string_buffer(length)
+        res = _lgetxattr(path,name,buf,ctypes.c_size_t(length))
+        if res >= 0:
+            return buf.raw[0:res]
+        else:
+            err = ctypes.get_errno()
+            if err == errno.ERANGE:
+                length *= 2
+            elif err == errno.ENODATA:
+                return None
+            else:
+                raise IOError(err,os.strerror(err),path)
+
+
+def ismergerfs(path):
+    try:
+        lgetxattr(path,"user.mergerfs.fullpath")
+        return True
+    except IOError as e:
+        return False
+
+
+def setstat(stat,paths):
+    for path in paths:
+        try:
+            os.chmod(path,stat.st_mode)
+            os.chown(path,stat.st_uid,stat.st_gid);
+            print("set %s > uid: %d gid: %d mode: %o" %
+                  (path,stat.st_uid,stat.st_gid,stat.st_mode))
+        except Exception as e:
+            print("%s" % e)
+
+
+def stats_different(stats):
+    base = stats[0]
+    for stat in stats:
+        if ((stat.st_mode == base.st_mode) and
+            (stat.st_uid  == base.st_uid)  and
+            (stat.st_gid  == base.st_gid)):
+            continue
+        return True
+    return False
+
+def size_equal(stats):
+    base = stats[0]
+    for stat in stats:
+        if stat.st_size != base.st_size:
+            return False
+    return True
+
+def print_stats(Files,Stats):
+    for i in range(0,len(Files)):
+        print("  %i: %s" % (i,Files[i].decode(errors='backslashreplace')))
+        data = ("   - uid: {0:5}; gid: {1:5}; mode: {2:6o}; "
+                "size: {3:10}; mtime: {4}").format(
+            Stats[i].st_uid,
+            Stats[i].st_gid,
+            Stats[i].st_mode,
+            Stats[i].st_size,
+            Stats[i].st_mtime)
+        print (data)
+
+
+def noop_fix(paths,stats):
+    pass
+
+
+def manual_fix(paths,stats):
+    done = False
+    while not done:
+        try:
+            value = input('Which is correct?: ')
+            value = int(value)
+            if((value >= len(paths)) or (value < 0)):
+                print("Input error: enter a value [0,%d]" % (len(paths)-1))
+                continue
+            setstat(stats[value],paths)
+            done = True
+        except Exception as e:
+            print("%s" % e)
+            done = True
+
+
+def newest_fix(paths,stats):
+    stats.sort(key=lambda stat: stat.st_mtime)
+    try:
+        newest = stats[-1]
+        setstat(newest,paths)
+    except Exception as e:
+        print("%s" % e)
+
+
+def nonroot_fix(paths,stats):
+    try:
+        for stat in stats:
+            if stat.st_uid != 0:
+                setstat(stat,paths)
+                return
+        return newest_fix(paths,stats)
+    except Exception as e:
+        print("%s" % e)
+
+
+def getfixfun(name):
+    if name == 'manual':
+        return manual_fix
+    elif name == 'newest':
+        return newest_fix
+    elif name == 'nonroot':
+        return nonroot_fix
+    return noop_fix
+
+
+def check_consistancy(fullpath,verbose,size,fix):
+    paths = lgetxattr(fullpath,"user.mergerfs.allpaths")
+    if not paths:
+        return
+    paths = paths.split(b'\0')
+    if len(paths) <= 1:
+        return
+
+    stats = [os.stat(path) for path in paths]
+    if (size and not size_equal(stats)):
+        return
+    if not stats_different(stats):
+        return
+
+    print("%s" % fullpath)
+    if verbose:
+        print_stats(paths,stats)
+    fix(paths,stats)
+
+
+def buildargparser():
+    parser = argparse.ArgumentParser(description='audit a mergerfs mount for inconsistencies')
+    parser.add_argument('dir',type=str,
+                        help='starting directory')
+    parser.add_argument('-v','--verbose',action='store_true',
+                        help='print details of audit item')
+    parser.add_argument('-s','--size',action='store_true',
+                        help='only consider if the size is the same')
+    parser.add_argument('-f','--fix',choices=['manual','newest','nonroot'],
+                        help='fix policy')
+    return parser
+
+
+def main():
+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer,
+                                  encoding='utf8',
+                                  errors='backslashreplace',
+                                  line_buffering=True)
+    sys.stderr = io.TextIOWrapper(sys.stderr.buffer,
+                                  encoding='utf8',
+                                  errors='backslashreplace',
+                                  line_buffering=True)
+
+    parser = buildargparser()
+    args = parser.parse_args()
+
+    if args.fix:
+        args.verbose = True
+
+    fix = getfixfun(args.fix)
+
+    args.dir = os.path.realpath(args.dir)
+    if not ismergerfs(args.dir):
+        print("%s is not a mergerfs directory" % args.dir)
+        sys.exit(1)
+
+    try:
+        size = args.size
+        verbose = args.verbose
+        for (dirname,dirnames,filenames) in os.walk(args.dir):
+            fulldirpath = os.path.join(args.dir,dirname)
+            check_consistancy(fulldirpath,verbose,size,fix)
+            for filename in filenames:
+                fullpath = os.path.join(fulldirpath,filename)
+                check_consistancy(fullpath,verbose,size,fix)
+    except KeyboardInterrupt:
+        pass
+    except IOError as e:
+        if e.errno == errno.EPIPE:
+            pass
+        else:
+            raise
+
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()

+ 28 - 0
roles/mergerfs/files/mergerfs.mktrash

@@ -0,0 +1,28 @@
+#!/bin/bash
+
+TRASHDIR=".Trash"
+MOUNTPOINT="${1}"
+
+if [ "${MOUNTPOINT}" = "" ]; then
+    echo "usage: ${0} <mountpoint>"
+    exit 1
+fi
+
+if [ $EUID -ne 0 ]; then
+    echo "You must run ${0} as root"
+    exit 2
+fi
+
+if [ ! -e "${MOUNTPOINT}/.mergerfs" ]; then
+    echo "ERROR: ${MOUNTPOINT} does not appear to be a mergerfs mountpoint"
+    exit 3
+fi
+
+SRCMOUNTS=$(xattr -p user.mergerfs.srcmounts "${MOUNTPOINT}/.mergerfs" | tr : " ")
+for mount in ${SRCMOUNTS}
+do
+    DIR="${mount}/${TRASHDIR}"
+    mkdir -v --mode=1777 "${DIR}"
+done
+
+exit 0

+ 68 - 0
roles/mergerfs/tasks/main.yml

@@ -0,0 +1,68 @@
+---
+- name: Install git
+  yum:
+    name: git-core
+    state: installed
+
+- name: Clone mergerfs repo
+  shell:
+    cmd: git clone http://github.com/trapexit/mergerfs.git
+    chdir: /root
+    creates: /root/mergerfs
+
+- name: Query latest mergerfs version
+  shell:
+    cmd: git tag | sort -V | tail -1
+    chdir: /root/mergerfs
+  register: mergerfs_version
+  changed_when: false
+
+- name: Set mergerfs RPM path
+  set_fact:
+    mergerfs_rpm: /root/mergerfs/rpmbuild/RPMS/x86_64/mergerfs-{{ mergerfs_version.stdout }}-1.{{ 'el' if ansible_distribution == 'CentOS' else 'fc' }}{{ ansible_distribution_major_version }}.{{ ansible_architecture }}.rpm
+
+- name: Download and build mergerfs
+  shell:
+    cmd: >
+         git checkout {{ mergerfs_version.stdout }} &&
+         tools/install-build-pkgs &&
+         make rpm
+    chdir: /root/mergerfs
+    creates: "{{ mergerfs_rpm }}"
+
+- name: Stat mergerfs build folder
+  find:
+    path: /root/mergerfs/rpmbuild/RPMS/x86_64
+  register: mergerfs_build_folder
+
+- name: Install mergerfs
+  yum:
+    name: "{{ mergerfs_build_folder.files[0].path }}"
+    state: installed
+    disable_gpg_check: yes
+  when:
+    - mergerfs_build_folder.files[0].path is defined
+
+         
+- name: Install mergerfs-tools prereqs
+  package:
+    name:
+      - python3
+      - rsync
+    state: present
+
+- name: Install mergerfs-tools
+  copy:
+    src: "{{ item }}"
+    dest: /usr/local/bin/{{ item }}
+    owner: root
+    group: root
+    mode: '0755'
+  loop:
+    - mergerfs.balance
+    - mergerfs.consolidate
+    - mergerfs.ctl
+    - mergerfs.dedup
+    - mergerfs.dup
+    - mergerfs.fsck
+    - mergerfs.mktrash