Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add simple support for importing rsync+hardlinks backups #26

Merged
merged 1 commit into from
Sep 13, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions src/borg_import/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pathlib import Path

from .rsnapshots import get_snapshots
from .rsynchl import get_rsyncsnapshots

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -134,6 +135,78 @@ def import_rsnapshot(self, args):
import_journal.unlink()


class rsynchlImporter(Importer):
name = 'rsynchl'
description = 'import rsync+hardlink backups'
epilog = """
Imports from rsync backup sets by renaming each snapshot to a common
name independent of the snapshot, which allows the Borg files cache
to work with maximum efficiency.

An archive will be created for each folder in the rsync_root. The
archive name will be the folder name and the archive timestamp will
be the folder mtime. If the borg repository already contains an
archive with the folder name, that folder will be skipped.

The directory is called "borg-import-dir" inside the specified root,
and borg-import will note which snapshot is currently located there
in a file called "borg-import-dir.snapshot" besides it, in case
things go wrong.

Otherwise nothing in the rsync root is modified, and neither
are the contents of the snapshots.
"""

def populate_parser(self, parser):
parser.add_argument('rsync_root', metavar='RSYNC_ROOT',
help='Path to root directory', type=Path)
# TODO: support the full wealth of borg possibilities
parser.add_argument('repository', metavar='BORG_REPOSITORY', help='Borg repository', type=Path)
parser.set_defaults(function=self.import_rsynchl)

def import_rsynchl(self, args):
existing_archives = list_borg_archives(args)

import_path = args.rsync_root / 'borg-import-dir'
import_journal = args.rsync_root / 'borg-import-dir.snapshot'

if import_path.exists():
print('{} exists. Cannot continue.'.format(import_path))
return 1

for rsnapshot in get_rsyncsnapshots(args.rsync_root):
timestamp = rsnapshot['timestamp'].replace(microsecond=0)
snapshot_original_path = rsnapshot['path']
name = rsnapshot['name']
archive_name = args.prefix + name

if archive_name in existing_archives:
print('Skipping (already exists in repository):', name)
continue

print('Importing {} (timestamp {}) '.format(name, timestamp), end='')
if archive_name != name:
print('as', archive_name)
else:
print()
log.debug(' Moving {} -> {}'.format(rsnapshot['path'], import_path))

# We move the snapshots to import_path so that the files cache in Borg can work effectively.

with import_journal.open('w') as fd:
fd.write('Current snapshot: %s\n' % rsnapshot['name'])
fd.write('Original path: %s\n' % snapshot_original_path)

snapshot_original_path.rename(import_path)

try:
borg_import(args, archive_name, import_path, timestamp=timestamp)
finally:
log.debug(' Moving {} -> {}'.format(import_path, rsnapshot['path']))
import_path.rename(snapshot_original_path)
import_journal.unlink()


def build_parser():
common_parser = argparse.ArgumentParser(add_help=False)
common_group = common_parser.add_argument_group('Common options')
Expand Down
20 changes: 20 additions & 0 deletions src/borg_import/rsynchl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import re

from .helpers.discover import discover, parser
from .helpers.names import make_name
from .helpers.timestamps import datetime_from_mtime


def get_rsyncsnapshots(root):
"""Get all snapshot metadata discovered in the rsync root directory."""
regex = re.compile(r'(?P<snapshot_name>.+)')
for path in discover(str(root), 1):
parsed = parser(path, regex)
if parsed is not None:
abs_path = root / path
meta = dict(
name=make_name(parsed['snapshot_name']),
path=abs_path,
timestamp=datetime_from_mtime(abs_path),
)
yield meta