From ca7c62bd908127c5875ba5a583b13c8ec23595a4 Mon Sep 17 00:00:00 2001 From: Vignesh Rao Date: Sun, 28 Jul 2024 23:25:12 -0500 Subject: [PATCH] Set `.wiki` as suffix for wiki pages after archiving Create new env var to store a local copy of the backup Remove cloned directory after successful S3 upload (default) --- README.md | 11 ++++++----- docs/README.html | 1 + docs/README.md | 11 ++++++----- docs/_sources/README.md.txt | 11 ++++++----- docs/genindex.html | 4 +++- docs/index.html | 15 ++++++++++++++- docs/objects.inv | Bin 844 -> 852 bytes docs/searchindex.js | 2 +- git2s3/__init__.py | 2 +- git2s3/config.py | 1 + git2s3/main.py | 18 ++++++++++++++---- git2s3/s3.py | 12 ++++++++++-- 12 files changed, 63 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 1830c8d..55d1690 100644 --- a/README.md +++ b/README.md @@ -61,12 +61,13 @@ git2s3 start - **GIT_TOKEN** - GitHub token to get ALL repos (including private). - **GIT_IGNORE** - List of repositories/gists to ignore. Defaults to `[]` - **SOURCE** - Source options `[repo, gist, wiki]` to back up. Defaults to all. -- **LOG** - Log options to log to a ``file`` or ``stdout``. _Does not apply when custom logger is used_ +- **LOG** - Log options to log to a `file` or `stdout`. _Does not apply when custom logger is used_ - **DEBUG** - Boolean flag to enable debug level logging. _Does not apply when custom logger is used_ -- **AWS_PROFILE_NAME** - AWS profile name. Uses the CLI config value ``AWS_DEFAULT_PROFILE`` by default. -- **AWS_ACCESS_KEY_ID** - AWS access key ID. Uses the CLI config value ``AWS_ACCESS_KEY_ID`` by default. -- **AWS_SECRET_ACCESS_KEY** - AWS secret key. Uses the CLI config value ``AWS_SECRET_ACCESS_KEY`` by default. -- **AWS_REGION_NAME** - S3 bucket's region. Uses the CLI config value ``AWS_DEFAULT_REGION`` by default. +- **STORE_LOCAL** - Boolean flag to store the backup locally. Defaults to `False` +- **AWS_PROFILE_NAME** - AWS profile name. Uses the CLI config value `AWS_DEFAULT_PROFILE` by default. +- **AWS_ACCESS_KEY_ID** - AWS access key ID. Uses the CLI config value `AWS_ACCESS_KEY_ID` by default. +- **AWS_SECRET_ACCESS_KEY** - AWS secret key. Uses the CLI config value `AWS_SECRET_ACCESS_KEY` by default. +- **AWS_REGION_NAME** - S3 bucket's region. Uses the CLI config value `AWS_DEFAULT_REGION` by default. - **AWS_BUCKET_NAME** - AWS bucket name to store the backups. - **AWS_S3_PREFIX** - S3 prefix _(folder like)_ for the backup. Defaults to `github` - **BOTO3_RETRY_ATTEMPTS** - Number of retries for Boto3 client config. Defaults to `10` diff --git a/docs/README.html b/docs/README.html index 17686e5..c74cf79 100644 --- a/docs/README.html +++ b/docs/README.html @@ -99,6 +99,7 @@

Environment Variables

SOURCE - Source options [repo, gist, wiki] to back up. Defaults to all.

  • LOG - Log options to log to a file or stdout. Does not apply when custom logger is used

  • DEBUG - Boolean flag to enable debug level logging. Does not apply when custom logger is used

  • +
  • STORE_LOCAL - Boolean flag to store the backup locally. Defaults to False

  • AWS_PROFILE_NAME - AWS profile name. Uses the CLI config value AWS_DEFAULT_PROFILE by default.

  • AWS_ACCESS_KEY_ID - AWS access key ID. Uses the CLI config value AWS_ACCESS_KEY_ID by default.

  • AWS_SECRET_ACCESS_KEY - AWS secret key. Uses the CLI config value AWS_SECRET_ACCESS_KEY by default.

  • diff --git a/docs/README.md b/docs/README.md index 1830c8d..55d1690 100644 --- a/docs/README.md +++ b/docs/README.md @@ -61,12 +61,13 @@ git2s3 start - **GIT_TOKEN** - GitHub token to get ALL repos (including private). - **GIT_IGNORE** - List of repositories/gists to ignore. Defaults to `[]` - **SOURCE** - Source options `[repo, gist, wiki]` to back up. Defaults to all. -- **LOG** - Log options to log to a ``file`` or ``stdout``. _Does not apply when custom logger is used_ +- **LOG** - Log options to log to a `file` or `stdout`. _Does not apply when custom logger is used_ - **DEBUG** - Boolean flag to enable debug level logging. _Does not apply when custom logger is used_ -- **AWS_PROFILE_NAME** - AWS profile name. Uses the CLI config value ``AWS_DEFAULT_PROFILE`` by default. -- **AWS_ACCESS_KEY_ID** - AWS access key ID. Uses the CLI config value ``AWS_ACCESS_KEY_ID`` by default. -- **AWS_SECRET_ACCESS_KEY** - AWS secret key. Uses the CLI config value ``AWS_SECRET_ACCESS_KEY`` by default. -- **AWS_REGION_NAME** - S3 bucket's region. Uses the CLI config value ``AWS_DEFAULT_REGION`` by default. +- **STORE_LOCAL** - Boolean flag to store the backup locally. Defaults to `False` +- **AWS_PROFILE_NAME** - AWS profile name. Uses the CLI config value `AWS_DEFAULT_PROFILE` by default. +- **AWS_ACCESS_KEY_ID** - AWS access key ID. Uses the CLI config value `AWS_ACCESS_KEY_ID` by default. +- **AWS_SECRET_ACCESS_KEY** - AWS secret key. Uses the CLI config value `AWS_SECRET_ACCESS_KEY` by default. +- **AWS_REGION_NAME** - S3 bucket's region. Uses the CLI config value `AWS_DEFAULT_REGION` by default. - **AWS_BUCKET_NAME** - AWS bucket name to store the backups. - **AWS_S3_PREFIX** - S3 prefix _(folder like)_ for the backup. Defaults to `github` - **BOTO3_RETRY_ATTEMPTS** - Number of retries for Boto3 client config. Defaults to `10` diff --git a/docs/_sources/README.md.txt b/docs/_sources/README.md.txt index 1830c8d..55d1690 100644 --- a/docs/_sources/README.md.txt +++ b/docs/_sources/README.md.txt @@ -61,12 +61,13 @@ git2s3 start - **GIT_TOKEN** - GitHub token to get ALL repos (including private). - **GIT_IGNORE** - List of repositories/gists to ignore. Defaults to `[]` - **SOURCE** - Source options `[repo, gist, wiki]` to back up. Defaults to all. -- **LOG** - Log options to log to a ``file`` or ``stdout``. _Does not apply when custom logger is used_ +- **LOG** - Log options to log to a `file` or `stdout`. _Does not apply when custom logger is used_ - **DEBUG** - Boolean flag to enable debug level logging. _Does not apply when custom logger is used_ -- **AWS_PROFILE_NAME** - AWS profile name. Uses the CLI config value ``AWS_DEFAULT_PROFILE`` by default. -- **AWS_ACCESS_KEY_ID** - AWS access key ID. Uses the CLI config value ``AWS_ACCESS_KEY_ID`` by default. -- **AWS_SECRET_ACCESS_KEY** - AWS secret key. Uses the CLI config value ``AWS_SECRET_ACCESS_KEY`` by default. -- **AWS_REGION_NAME** - S3 bucket's region. Uses the CLI config value ``AWS_DEFAULT_REGION`` by default. +- **STORE_LOCAL** - Boolean flag to store the backup locally. Defaults to `False` +- **AWS_PROFILE_NAME** - AWS profile name. Uses the CLI config value `AWS_DEFAULT_PROFILE` by default. +- **AWS_ACCESS_KEY_ID** - AWS access key ID. Uses the CLI config value `AWS_ACCESS_KEY_ID` by default. +- **AWS_SECRET_ACCESS_KEY** - AWS secret key. Uses the CLI config value `AWS_SECRET_ACCESS_KEY` by default. +- **AWS_REGION_NAME** - S3 bucket's region. Uses the CLI config value `AWS_DEFAULT_REGION` by default. - **AWS_BUCKET_NAME** - AWS bucket name to store the backups. - **AWS_S3_PREFIX** - S3 prefix _(folder like)_ for the backup. Defaults to `github` - **BOTO3_RETRY_ATTEMPTS** - Number of retries for Boto3 client config. Defaults to `10` diff --git a/docs/genindex.html b/docs/genindex.html index d1350d6..2386cab 100644 --- a/docs/genindex.html +++ b/docs/genindex.html @@ -243,10 +243,12 @@

    I

    L

    diff --git a/docs/index.html b/docs/index.html index d82d4fe..c4012ea 100644 --- a/docs/index.html +++ b/docs/index.html @@ -251,8 +251,16 @@

    Welcome to Git2S3’s documentation!
    -trigger() None
    +trigger() bool

    Trigger to upload all file objects concurrently to S3.

    +
    +
    Returns:
    +

    Returns a boolean flag to indicate completion status.

    +
    +
    Return type:
    +

    bool

    +
    +
    @@ -428,6 +436,11 @@

    Configurationdebug: bool
    +
    +
    +local_store: bool
    +
    +
    aws_profile_name: str | None
    diff --git a/docs/objects.inv b/docs/objects.inv index 918b8eb0519fca327d35867517bb6d31c3d2ce6e..2256500d28a95119c743576d076c9625ce5ab22c 100644 GIT binary patch delta 745 zcmV-9+rCRr!dDhs$$6j^wF|ACnS#x2JWc2+YbSG=w7DAENg1C>5}WP(~--BesQu&>wH ziMQ{n>7;#`sF_d_h%+Lv6w&adLrCWvu*`6cT**QONjoS+wSVDTilwww&agaqF1Mfh zr{qXK6ZJ^22Dv5Lz>4q7ru?k4M?=R+ta$^3OYPAA6EH>@Tc}8PmQcpPTC#xy;mru0 zrDGdFl3h@jD%z+O7cf@FKTNfFCDR^X#+pQh~SfcyJdAsGS5ef=9IhS;z~; zs2~>0S0Ger{xmff$UjNQ&EX(2~q<>D$86GrbqSiaq6mu!0CYRYMvq_4FouwCo|HV>^%GV3w%((WaPTM9N@oh(&=QwnV zL-bRrc(;=Vus?CHqu=JYcMtQ*U$Gw>#j5E|6=Fwviz?REqy^byy_uj(5W(;?8cpxR z<76;9s-0Q<0Kt_1FhOn&=mHd3QRwW=NwhVdGi1FIE|FeQdeBmcT5BE{Y$qz`oNfU@UtJT7Mb2W^u?`jpYAicd$Gry(Z}=Sltrzc?^GK0 zT^Og7)E4ql#U-X0n`>Bk(bPl12^?pzrAwwxurMq?s0^5=BT1$8FWgoc9iOzPZP z*-k+wYJa>#g>5cnvCd@aA92gSEp$bmoreVAEdT6=BHz) z>Y28g-oD2E{faBwI66>B@9 zVEY&MI{IySd-t%ce2M)?C8_2&Rh;Q9s+e1|23kaq^=2Hefd|9$Y&5+O!`WbVQoFGD z0h~zxv%<=g?w?=3P37%e3&P^CzBe;}Y%jP{-TeF^YCXggk4`VXzJfh None: if datastore.private: wiki_dest = str( os.path.join( - self.clone_dir, datastore.source, "private", datastore.name + self.clone_dir, + datastore.source, + "private", + f"{datastore.name}.wiki", ) ) else: wiki_dest = str( - os.path.join(self.clone_dir, datastore.source, "public", datastore.name) + os.path.join( + self.clone_dir, datastore.source, "public", f"{datastore.name}.wiki" + ) ) if not os.path.isdir(wiki_dest): os.makedirs(wiki_dest) @@ -376,7 +381,12 @@ def start(self) -> None: if squire.check_file_presence(self.clone_dir): self.logger.info("Initiating S3 upload process...") s3_upload = s3.Uploader(self.env, self.logger) - s3_upload.trigger() - self.logger.info("S3 upload process completed.") + if s3_upload.trigger(): + self.logger.error("Some objects failed to upload.") + else: + self.logger.info("S3 upload process completed.") + if not self.env.local_store: + self.logger.info("Deleting local copy!") + shutil.rmtree(self.clone_dir) else: self.logger.warning("No files found for S3 upload process.") diff --git a/git2s3/s3.py b/git2s3/s3.py index 1d4fb13..413ee48 100644 --- a/git2s3/s3.py +++ b/git2s3/s3.py @@ -61,8 +61,13 @@ def upload_file( except (FileNotFoundError, BotoCoreError, ClientError) as error: raise exc.UploadError(error) - def trigger(self) -> None: - """Trigger to upload all file objects concurrently to S3.""" + def trigger(self) -> bool: + """Trigger to upload all file objects concurrently to S3. + + Returns: + bool: + Returns a boolean flag to indicate completion status. + """ futures = {} with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor: for root, dirs, files in os.walk(self.base_path): @@ -74,10 +79,13 @@ def trigger(self) -> None: self.upload_file, local_file_path, s3_file_path ) futures[future] = s3_file_path + exception = False for future in as_completed(futures): if future.exception(): + exception = True self.logger.error( "Thread processing '%s' received an exception: %s", futures[future], future.exception(), ) + return exception