From 626e23f0a5c9d95b201daa665571cd5028214f50 Mon Sep 17 00:00:00 2001 From: Martin Stone Date: Thu, 13 Sep 2018 23:03:46 +0100 Subject: [PATCH] Add simple support for importing rsync+hardlinks backups --- src/borg_import/main.py | 73 ++++++++++++++++++++++++++++++++++++++ src/borg_import/rsynchl.py | 20 +++++++++++ 2 files changed, 93 insertions(+) create mode 100644 src/borg_import/rsynchl.py diff --git a/src/borg_import/main.py b/src/borg_import/main.py index 66f3481..5f6cd47 100755 --- a/src/borg_import/main.py +++ b/src/borg_import/main.py @@ -8,6 +8,7 @@ from pathlib import Path from .rsnapshots import get_snapshots +from .rsynchl import get_rsyncsnapshots log = logging.getLogger(__name__) @@ -134,6 +135,78 @@ def import_rsnapshot(self, args): import_journal.unlink() +class rsynchlImporter(Importer): + name = 'rsynchl' + description = 'import rsync+hardlink backups' + epilog = """ + Imports from rsync backup sets by renaming each snapshot to a common + name independent of the snapshot, which allows the Borg files cache + to work with maximum efficiency. + + An archive will be created for each folder in the rsync_root. The + archive name will be the folder name and the archive timestamp will + be the folder mtime. If the borg repository already contains an + archive with the folder name, that folder will be skipped. + + The directory is called "borg-import-dir" inside the specified root, + and borg-import will note which snapshot is currently located there + in a file called "borg-import-dir.snapshot" besides it, in case + things go wrong. + + Otherwise nothing in the rsync root is modified, and neither + are the contents of the snapshots. + """ + + def populate_parser(self, parser): + parser.add_argument('rsync_root', metavar='RSYNC_ROOT', + help='Path to root directory', type=Path) + # TODO: support the full wealth of borg possibilities + parser.add_argument('repository', metavar='BORG_REPOSITORY', help='Borg repository', type=Path) + parser.set_defaults(function=self.import_rsynchl) + + def import_rsynchl(self, args): + existing_archives = list_borg_archives(args) + + import_path = args.rsync_root / 'borg-import-dir' + import_journal = args.rsync_root / 'borg-import-dir.snapshot' + + if import_path.exists(): + print('{} exists. Cannot continue.'.format(import_path)) + return 1 + + for rsnapshot in get_rsyncsnapshots(args.rsync_root): + timestamp = rsnapshot['timestamp'].replace(microsecond=0) + snapshot_original_path = rsnapshot['path'] + name = rsnapshot['name'] + archive_name = args.prefix + name + + if archive_name in existing_archives: + print('Skipping (already exists in repository):', name) + continue + + print('Importing {} (timestamp {}) '.format(name, timestamp), end='') + if archive_name != name: + print('as', archive_name) + else: + print() + log.debug(' Moving {} -> {}'.format(rsnapshot['path'], import_path)) + + # We move the snapshots to import_path so that the files cache in Borg can work effectively. + + with import_journal.open('w') as fd: + fd.write('Current snapshot: %s\n' % rsnapshot['name']) + fd.write('Original path: %s\n' % snapshot_original_path) + + snapshot_original_path.rename(import_path) + + try: + borg_import(args, archive_name, import_path, timestamp=timestamp) + finally: + log.debug(' Moving {} -> {}'.format(import_path, rsnapshot['path'])) + import_path.rename(snapshot_original_path) + import_journal.unlink() + + def build_parser(): common_parser = argparse.ArgumentParser(add_help=False) common_group = common_parser.add_argument_group('Common options') diff --git a/src/borg_import/rsynchl.py b/src/borg_import/rsynchl.py new file mode 100644 index 0000000..28f354d --- /dev/null +++ b/src/borg_import/rsynchl.py @@ -0,0 +1,20 @@ +import re + +from .helpers.discover import discover, parser +from .helpers.names import make_name +from .helpers.timestamps import datetime_from_mtime + + +def get_rsyncsnapshots(root): + """Get all snapshot metadata discovered in the rsync root directory.""" + regex = re.compile(r'(?P.+)') + for path in discover(str(root), 1): + parsed = parser(path, regex) + if parsed is not None: + abs_path = root / path + meta = dict( + name=make_name(parsed['snapshot_name']), + path=abs_path, + timestamp=datetime_from_mtime(abs_path), + ) + yield meta