mergerfs.consolidate 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278
  1. #!/usr/bin/env python3
  2. # Copyright (c) 2016, Antonio SJ Musumeci <trapexit@spawn.link>
  3. #
  4. # Permission to use, copy, modify, and/or distribute this software for any
  5. # purpose with or without fee is hereby granted, provided that the above
  6. # copyright notice and this permission notice appear in all copies.
  7. #
  8. # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9. # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  10. # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  11. # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  12. # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  13. # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  14. # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. import argparse
  16. import ctypes
  17. import errno
  18. import fnmatch
  19. import io
  20. import os
  21. import shlex
  22. import stat
  23. import subprocess
  24. import sys
  25. _libc = ctypes.CDLL("libc.so.6",use_errno=True)
  26. _lgetxattr = _libc.lgetxattr
  27. _lgetxattr.argtypes = [ctypes.c_char_p,ctypes.c_char_p,ctypes.c_void_p,ctypes.c_size_t]
  28. def lgetxattr(path,name):
  29. if type(path) == str:
  30. path = path.encode(errors='backslashreplace')
  31. if type(name) == str:
  32. name = name.encode(errors='backslashreplace')
  33. length = 64
  34. while True:
  35. buf = ctypes.create_string_buffer(length)
  36. res = _lgetxattr(path,name,buf,ctypes.c_size_t(length))
  37. if res >= 0:
  38. return buf.raw[0:res]
  39. else:
  40. err = ctypes.get_errno()
  41. if err == errno.ERANGE:
  42. length *= 2
  43. elif err == errno.ENODATA:
  44. return None
  45. else:
  46. raise IOError(err,os.strerror(err),path)
  47. def xattr_relpath(fullpath):
  48. return lgetxattr(fullpath,'user.mergerfs.relpath').decode(errors='backslashreplace')
  49. def xattr_basepath(fullpath):
  50. return lgetxattr(fullpath,'user.mergerfs.basepath').decode(errors='backslashreplace')
  51. def ismergerfs(path):
  52. try:
  53. lgetxattr(path,'user.mergerfs.version')
  54. return True
  55. except IOError as e:
  56. return False
  57. def mergerfs_control_file(basedir):
  58. if basedir == '/':
  59. return None
  60. ctrlfile = os.path.join(basedir,'.mergerfs')
  61. if os.path.exists(ctrlfile):
  62. return ctrlfile
  63. else:
  64. dirname = os.path.dirname(basedir)
  65. return mergerfs_control_file(dirname)
  66. def mergerfs_srcmounts(ctrlfile):
  67. srcmounts = lgetxattr(ctrlfile,'user.mergerfs.srcmounts')
  68. srcmounts = srcmounts.decode(errors='backslashreplace').split(':')
  69. return srcmounts
  70. def match(filename,matches):
  71. for match in matches:
  72. if fnmatch.fnmatch(filename,match):
  73. return True
  74. return False
  75. def execute_cmd(args):
  76. return subprocess.call(args)
  77. def print_args(args):
  78. quoted = [shlex.quote(arg) for arg in args]
  79. print(' '.join(quoted))
  80. def human_to_bytes(s):
  81. m = s[-1]
  82. if m == 'K':
  83. i = int(s[0:-1]) * 1024
  84. elif m == 'M':
  85. i = int(s[0:-1]) * 1024 * 1024
  86. elif m == 'G':
  87. i = int(s[0:-1]) * 1024 * 1024 * 1024
  88. elif m == 'T':
  89. i = int(s[0:-1]) * 1024 * 1024 * 1024 * 1024
  90. else:
  91. i = int(s)
  92. return i
  93. def get_stats(branches):
  94. sizes = {}
  95. for branch in branches:
  96. vfs = os.statvfs(branch)
  97. sizes[branch] = vfs.f_bavail * vfs.f_frsize
  98. return sizes
  99. def build_move_file(src,tgt,rel):
  100. rel = rel.strip('/')
  101. srcpath = os.path.join(src,'./',rel)
  102. tgtpath = tgt.rstrip('/') + '/'
  103. return ['rsync',
  104. '-avHAXWE',
  105. '--numeric-ids',
  106. '--progress',
  107. '--relative',
  108. '--remove-source-files',
  109. srcpath,
  110. tgtpath]
  111. def print_help():
  112. help = \
  113. '''
  114. usage: mergerfs.consolidate [<options>] <dir>
  115. Consolidate files in a single mergerfs directory onto a single drive.
  116. positional arguments:
  117. dir starting directory
  118. optional arguments:
  119. -m, --max-files= Skip directories with more than N files.
  120. (default: 256)
  121. -M, --max-size= Skip directories with files adding up to more
  122. than N. (default: 16G)
  123. -I, --include-path= fnmatch compatible path include filter.
  124. Can be used multiple times.
  125. -E, --exclude-path= fnmatch compatible path exclude filter.
  126. Can be used multiple times.
  127. -e, --execute Execute `rsync` commands as well as print them.
  128. -h, --help Print this help.
  129. '''
  130. print(help)
  131. def buildargparser():
  132. parser = argparse.ArgumentParser(add_help=False)
  133. parser.add_argument('dir',
  134. type=str,
  135. nargs='?',
  136. default=None)
  137. parser.add_argument('-m','--max-files',
  138. dest='max_files',
  139. type=int,
  140. default=256)
  141. parser.add_argument('-M','--max-size',
  142. dest='max_size',
  143. type=human_to_bytes,
  144. default='16G')
  145. parser.add_argument('-I','--include-path',
  146. dest='includepath',
  147. type=str,
  148. action='append',
  149. default=[])
  150. parser.add_argument('-E','--exclude-path',
  151. dest='excludepath',
  152. type=str,
  153. action='append',
  154. default=[])
  155. parser.add_argument('-e','--execute',
  156. dest='execute',
  157. action='store_true')
  158. parser.add_argument('-h','--help',
  159. action='store_true')
  160. return parser
  161. def main():
  162. sys.stdout = io.TextIOWrapper(sys.stdout.buffer,
  163. encoding='utf8',
  164. errors='backslashreplace',
  165. line_buffering=True)
  166. sys.stderr = io.TextIOWrapper(sys.stderr.buffer,
  167. encoding='utf8',
  168. errors='backslashreplace',
  169. line_buffering=True)
  170. parser = buildargparser()
  171. args = parser.parse_args()
  172. if args.help or not args.dir:
  173. print_help()
  174. sys.exit(0)
  175. args.dir = os.path.realpath(args.dir)
  176. ctrlfile = mergerfs_control_file(args.dir)
  177. if not ismergerfs(ctrlfile):
  178. print("%s is not a mergerfs mount" % args.dir)
  179. sys.exit(1)
  180. basedir = args.dir
  181. execute = args.execute
  182. max_files = args.max_files
  183. max_size = args.max_size
  184. path_includes = ['*'] if not args.includepath else args.includepath
  185. path_excludes = args.excludepath
  186. srcmounts = mergerfs_srcmounts(ctrlfile)
  187. mount_stats = get_stats(srcmounts)
  188. try:
  189. for (root,dirs,files) in os.walk(basedir):
  190. if len(files) <= 1:
  191. continue
  192. if len(files) > max_files:
  193. continue
  194. if match(root,path_excludes):
  195. continue
  196. if not match(root,path_includes):
  197. continue
  198. total_size = 0
  199. file_stats = {}
  200. for file in files:
  201. fullpath = os.path.join(root,file)
  202. st = os.lstat(fullpath)
  203. if not stat.S_ISREG(st.st_mode):
  204. continue
  205. total_size += st.st_size
  206. file_stats[fullpath] = st
  207. if total_size >= max_size:
  208. continue
  209. tgtpath = sorted(mount_stats.items(),key=lambda x: x[1],reverse=True)[0][0]
  210. for (fullpath,st) in sorted(file_stats.items()):
  211. srcpath = xattr_basepath(fullpath)
  212. if srcpath == tgtpath:
  213. continue
  214. relpath = xattr_relpath(fullpath)
  215. mount_stats[srcpath] += st.st_size
  216. mount_stats[tgtpath] -= st.st_size
  217. args = build_move_file(srcpath,tgtpath,relpath)
  218. print_args(args)
  219. if execute:
  220. execute_cmd(args)
  221. except (KeyboardInterrupt,BrokenPipeError):
  222. pass
  223. sys.exit(0)
  224. if __name__ == "__main__":
  225. main()