Skip to content

Commit

Permalink
Add --list-dates option
Browse files Browse the repository at this point in the history
  • Loading branch information
jwodder committed Jan 10, 2025
1 parent a0cf46c commit 2256221
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 20 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ In Development
--------------
- Add `--compress-filter-msgs` option
- Support all documented S3 Inventory fields in inventory lists
- Add `--list-dates` option
- The `<outdir>` command-line argument is now optional and defaults to the
current directory

v0.1.0-alpha.2 (2025-01-06)
---------------------------
Expand Down
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,11 @@ for further options.
Usage
=====

s3invsync [<options>] <inventory-base> <outdir>
s3invsync [<options>] <inventory-base> [<outdir>]

`s3invsync` downloads the contents of an S3 bucket, including old versions of
objects, to the directory `<outdir>` using S3 Inventory files located at
objects, to the directory `<outdir>` (defaulting to the current working
directory if not specified) using S3 Inventory files located at
`<inventory-base>`.

`<inventory-base>` must be of the form `s3://{bucket}/{prefix}/`, where
Expand Down Expand Up @@ -112,6 +113,9 @@ Options
- `-I <INT>`, `--inventory-jobs <INT>` — Specify the maximum number of inventory
list files to download & process at once [default: 20]

- `--list-dates` — List available inventory manifest dates instead of
backing anything up

- `-l <level>`, `--log-level <level>` — Set the log level to the given value.
Possible values are "`ERROR`", "`WARN`", "`INFO`", "`DEBUG`", and "`TRACE`"
(all case-insensitive). [default value: `DEBUG`]
Expand Down
51 changes: 34 additions & 17 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use crate::syncer::Syncer;
use crate::timestamps::DateMaybeHM;
use anyhow::Context;
use clap::Parser;
use futures_util::TryStreamExt;
use std::io::{stderr, IsTerminal};
use std::num::NonZeroUsize;
use std::path::PathBuf;
Expand Down Expand Up @@ -43,6 +44,10 @@ struct Arguments {
#[arg(short = 'I', long, default_value = "20")]
inventory_jobs: NonZeroUsize,

/// List available inventory manifest dates instead of backing anything up
#[arg(long)]
list_dates: bool,

/// Set logging level
#[arg(
short,
Expand Down Expand Up @@ -75,8 +80,9 @@ struct Arguments {
/// a key for a manifest file).
inventory_base: S3Location,

/// Directory in which to download the S3 objects
outdir: PathBuf,
/// Directory in which to download the S3 objects. Defaults to the current
/// working directory.
outdir: Option<PathBuf>,
}

// See
Expand Down Expand Up @@ -111,20 +117,31 @@ async fn run(args: Arguments) -> anyhow::Result<()> {
let region = get_bucket_region(args.inventory_base.bucket()).await?;
tracing::info!(%bucket, %region, "Found S3 bucket region");
let client = S3Client::new(region, args.inventory_base, args.trace_progress).await?;
tracing::info!("Fetching manifest ...");
let (manifest, manifest_date) = client.get_manifest_for_date(args.date).await?;
let syncer = Syncer::new(
client,
args.outdir,
manifest_date,
start_time,
args.inventory_jobs,
args.object_jobs,
args.path_filter,
args.compress_filter_msgs,
);
tracing::info!("Starting backup ...");
syncer.run(manifest).await?;
tracing::info!("Backup complete");
if args.list_dates {
let mut stream = client.list_all_manifest_timestamps();
while let Some(date) = stream.try_next().await? {
println!("{date}");
}
} else {
let outdir = match args.outdir {
Some(p) => p,
None => std::env::current_dir().context("failed to determine current directory")?,
};
tracing::info!("Fetching manifest ...");
let (manifest, manifest_date) = client.get_manifest_for_date(args.date).await?;
let syncer = Syncer::new(
client,
outdir,
manifest_date,
start_time,
args.inventory_jobs,
args.object_jobs,
args.path_filter,
args.compress_filter_msgs,
);
tracing::info!("Starting backup ...");
syncer.run(manifest).await?;
tracing::info!("Backup complete");
}
Ok(())
}
5 changes: 5 additions & 0 deletions src/s3/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ impl S3Client {
Ok((manifest, ts))
}

/// Returns a stream yielding all available inventory manifest timestamps
pub(crate) fn list_all_manifest_timestamps(&self) -> ListManifestDates {
ListManifestDates::new(self, &self.inventory_base)
}

/// Return the full timestamp for the latest manifest, either (if `day` is
/// `None`) out of all manifests or else the latest on the given date.
#[tracing::instrument(skip_all, fields(day = day.map(|d| d.to_string())))]
Expand Down
2 changes: 1 addition & 1 deletion src/s3/streams.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ type InnerListError = SdkError<ListObjectsV2Error, HttpResponse>;
/// parses their names as [`DateHM`] values, yielding the successful parses.
#[derive(Debug)]
#[must_use = "streams do nothing unless polled"]
pub(super) struct ListManifestDates {
pub(crate) struct ListManifestDates {
url: S3Location,
inner: Option<PaginationStream<Result<ListObjectsV2Output, InnerListError>>>,
results: VecDeque<DateHM>,
Expand Down

0 comments on commit 2256221

Please sign in to comment.