From 626e23f0a5c9d95b201daa665571cd5028214f50 Mon Sep 17 00:00:00 2001
From: Martin Stone <martin@d7415.co.uk>
Date: Thu, 13 Sep 2018 23:03:46 +0100
Subject: [PATCH] Add simple support for importing rsync+hardlinks backups

---
 src/borg_import/main.py    | 73 ++++++++++++++++++++++++++++++++++++++
 src/borg_import/rsynchl.py | 20 +++++++++++
 2 files changed, 93 insertions(+)
 create mode 100644 src/borg_import/rsynchl.py

diff --git a/src/borg_import/main.py b/src/borg_import/main.py
index 66f3481..5f6cd47 100755
--- a/src/borg_import/main.py
+++ b/src/borg_import/main.py
@@ -8,6 +8,7 @@
 from pathlib import Path
 
 from .rsnapshots import get_snapshots
+from .rsynchl import get_rsyncsnapshots
 
 log = logging.getLogger(__name__)
 
@@ -134,6 +135,78 @@ def import_rsnapshot(self, args):
                 import_journal.unlink()
 
 
+class rsynchlImporter(Importer):
+    name = 'rsynchl'
+    description = 'import rsync+hardlink backups'
+    epilog = """
+    Imports from rsync backup sets by renaming each snapshot to a common
+    name independent of the snapshot, which allows the Borg files cache
+    to work with maximum efficiency.
+
+    An archive will be created for each folder in the rsync_root. The
+    archive name will be the folder name and the archive timestamp will
+    be the folder mtime. If the borg repository already contains an
+    archive with the folder name, that folder will be skipped.
+
+    The directory is called "borg-import-dir" inside the specified root,
+    and borg-import will note which snapshot is currently located there
+    in a file called "borg-import-dir.snapshot" besides it, in case
+    things go wrong.
+
+    Otherwise nothing in the rsync root is modified, and neither
+    are the contents of the snapshots.
+    """
+
+    def populate_parser(self, parser):
+        parser.add_argument('rsync_root', metavar='RSYNC_ROOT',
+                            help='Path to root directory', type=Path)
+        # TODO: support the full wealth of borg possibilities
+        parser.add_argument('repository', metavar='BORG_REPOSITORY', help='Borg repository', type=Path)
+        parser.set_defaults(function=self.import_rsynchl)
+
+    def import_rsynchl(self, args):
+        existing_archives = list_borg_archives(args)
+
+        import_path = args.rsync_root / 'borg-import-dir'
+        import_journal = args.rsync_root / 'borg-import-dir.snapshot'
+
+        if import_path.exists():
+            print('{} exists. Cannot continue.'.format(import_path))
+            return 1
+
+        for rsnapshot in get_rsyncsnapshots(args.rsync_root):
+            timestamp = rsnapshot['timestamp'].replace(microsecond=0)
+            snapshot_original_path = rsnapshot['path']
+            name = rsnapshot['name']
+            archive_name = args.prefix + name
+
+            if archive_name in existing_archives:
+                print('Skipping (already exists in repository):', name)
+                continue
+
+            print('Importing {} (timestamp {}) '.format(name, timestamp), end='')
+            if archive_name != name:
+                print('as', archive_name)
+            else:
+                print()
+            log.debug('  Moving {} -> {}'.format(rsnapshot['path'], import_path))
+
+            # We move the snapshots to import_path so that the files cache in Borg can work effectively.
+
+            with import_journal.open('w') as fd:
+                fd.write('Current snapshot: %s\n' % rsnapshot['name'])
+                fd.write('Original path: %s\n' % snapshot_original_path)
+
+            snapshot_original_path.rename(import_path)
+
+            try:
+                borg_import(args, archive_name, import_path, timestamp=timestamp)
+            finally:
+                log.debug('  Moving {} -> {}'.format(import_path, rsnapshot['path']))
+                import_path.rename(snapshot_original_path)
+                import_journal.unlink()
+
+
 def build_parser():
     common_parser = argparse.ArgumentParser(add_help=False)
     common_group = common_parser.add_argument_group('Common options')
diff --git a/src/borg_import/rsynchl.py b/src/borg_import/rsynchl.py
new file mode 100644
index 0000000..28f354d
--- /dev/null
+++ b/src/borg_import/rsynchl.py
@@ -0,0 +1,20 @@
+import re
+
+from .helpers.discover import discover, parser
+from .helpers.names import make_name
+from .helpers.timestamps import datetime_from_mtime
+
+
+def get_rsyncsnapshots(root):
+    """Get all snapshot metadata discovered in the rsync root directory."""
+    regex = re.compile(r'(?P<snapshot_name>.+)')
+    for path in discover(str(root), 1):
+        parsed = parser(path, regex)
+        if parsed is not None:
+            abs_path = root / path
+            meta = dict(
+                name=make_name(parsed['snapshot_name']),
+                path=abs_path,
+                timestamp=datetime_from_mtime(abs_path),
+            )
+            yield meta