diff --git a/README.md b/README.md index 2050cc2..34f16e1 100644 --- a/README.md +++ b/README.md @@ -11,3 +11,70 @@ syncing backups of an AWS S3 bucket (including old versions of objects) by making use of the bucket's [Amazon S3 Inventory][inv] files. [inv]: https://docs.aws.amazon.com/AmazonS3/latest/userguide/storage-inventory.html + +**Warning:** This is an in-development program. They may be bugs, and some +planned features have not been implemented yet. + + +Building & Running +================== + +1. [Install Rust and Cargo](https://www.rust-lang.org/tools/install). + +2. Clone this repository and `cd` into it. + +3. Run `cargo build --release` to build the binary. The intermediate build + artifacts will be cached in `target/` in order to speed up subsequent + builds. + +4. Run with `cargo run --release -- `. + +5. If necessary, the actual binary can be found in `target/debug/s3invsync` (or + `target/release/s3invsync` if built with `--release`). It should run on any + system with the same OS and architecture as it was built on. + + +Usage +===== + + cargo run --release -- [] + +`s3invsync` downloads the contents of an S3 bucket, including old versions of +objects, to the directory `` using S3 Inventory files located at +``. + +`` must be of the form `s3://{bucket}/{prefix}/`, where +`{bucket}` is the destination bucket on which the inventory files are stored +and `{prefix}/` is the key prefix under which the [inventory manifest files][] +are located in the bucket (i.e., appending a string of the form +`YYYY-MM-DDTHH-MMZ/manifest.json` to `{prefix}/` should yield a key for a +manifest file). + +[inventory manifest files]: https://docs.aws.amazon.com/AmazonS3/latest/userguide/storage-inventory-location.html + +When downloading a given key from S3, the latest version (if not deleted) is +stored at `{outdir}/{key}`, and the versionIds and etags of all latest object +versions in a given directory are stored in `.s3invsync.versions.json` in that +directory. Each non-latest, non-deleted version of a given key is stored at +`{outdir}/{key}.old.{versionId}.{etag}`. + +Options +------- + +- `-d `, `--date ` — Download objects from the inventory created at + the given date. + + By default, the most recent inventory is downloaded. + + The date must be in the format `YYYY-MM-DD` (in which case the latest + inventory for the given date is used) or in the format `YYYY-MM-DDTHH-MMZ` + (to specify a specific inventory). + +- `-I `, `--inventory-jobs ` — Specify the maximum number of inventory + list files to download & process at once [default: 20] + +- `-O `, `--object-jobs ` — Specify the maximum number of inventory + entries to download & process at once [default: 20] + +- `--path-filter ` — Only download objects whose keys match the given + [regular expression](https://docs.rs/regex/latest/regex/#syntax) diff --git a/src/main.rs b/src/main.rs index 0d1dd47..c84acbd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -17,23 +17,47 @@ use std::path::PathBuf; use tracing::Level; use tracing_subscriber::{filter::Targets, fmt::time::OffsetTime, prelude::*}; +/// Back up an AWS S3 bucket using S3 Inventory files +/// +/// See for more information. #[derive(Clone, Debug, Parser)] #[command(version)] struct Arguments { + /// Download objects from the inventory created at the given date. + /// + /// By default, the most recent inventory is downloaded. + /// + /// The date must be in the format `YYYY-MM-DD` (in which case the latest + /// inventory for the given date is used) or in the format + /// `YYYY-MM-DDTHH-MMZ` (to specify a specific inventory). #[arg(short, long)] date: Option, + /// Set the maximum number of inventory list files to download & process at + /// once #[arg(short = 'I', long, default_value = "20")] inventory_jobs: NonZeroUsize, + /// Set the maximum number of inventory entries to download & process at + /// once #[arg(short = 'O', long, default_value = "20")] object_jobs: NonZeroUsize, - #[arg(long)] + /// Only download objects whose keys match the given regular expression + #[arg(long, value_name = "REGEX")] path_filter: Option, + /// The location of the manifest files for the S3 inventory to back up + /// + /// `` must be of the form `s3://{bucket}/{prefix}/`, where + /// `{bucket}` is the destination bucket on which the inventory files are + /// stored and `{prefix}/` is the key prefix under which the inventory + /// manifest files are located in the bucket (i.e., appending a string of + /// the form `YYYY-MM-DDTHH-MMZ/manifest.json` to `{prefix}/` should yield + /// a key for a manifest file). inventory_base: S3Location, + /// Directory in which to download the S3 objects outdir: PathBuf, }