From d00fdedfe325152137a934f18dafb62d12dbdc60 Mon Sep 17 00:00:00 2001 From: Travis Clarke Date: Sat, 21 Mar 2020 18:11:30 -0700 Subject: [PATCH] v1.4.0 - limit default # of concurrent requests; fixes #6 --- Makefile | 2 +- README.md | 3 ++- docs/source/index.md | 4 +++- s3recon/__init__.py | 2 +- s3recon/s3recon.py | 55 ++++++++++++++++++++++++++------------------ 5 files changed, 40 insertions(+), 26 deletions(-) diff --git a/Makefile b/Makefile index 367ed72..025ad7e 100644 --- a/Makefile +++ b/Makefile @@ -41,7 +41,7 @@ endif .PHONY: release release: tag ifdef version - curl -XPOST \ + curl -sSLf -XPOST \ -H "Authorization: token ${GITHUB_ACCESS_TOKEN}" \ -H "Content-Type: application/json" \ "https://api.github.com/repos/clarketm/${project}/releases" \ diff --git a/README.md b/README.md index 3ff6de1..3c3f947 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ $ pip install s3recon ## Usage ```text -usage: s3recon [-h] [-o file] [-t seconds] word_list [word_list ...] +usage: s3recon [-h] [-o file] [-d] [-p] [-t seconds] [-v] [-c num] word_list [word_list ...] positional arguments: word_list read words from one or more files @@ -36,6 +36,7 @@ optional arguments: -p, --public only include 'public' buckets in the output -t seconds, --timeout seconds http request timeout in (default: 30) -v, --version show program's version number and exit + -c num, --concurrency num maximum of concurrent requests (default: # of lcpus) ``` diff --git a/docs/source/index.md b/docs/source/index.md index 06161da..cf684ed 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -34,6 +34,7 @@ Version |version| ## Installation +> **NOTE**: s3recon requires python version **>=3.6** ```bash $ pip install s3recon @@ -42,7 +43,7 @@ $ pip install s3recon ## Usage ```text -usage: s3recon [-h] [-o file] [-t seconds] word_list [word_list ...] +usage: s3recon [-h] [-o file] [-d] [-p] [-t seconds] [-v] [-c num] word_list [word_list ...] positional arguments: word_list read words from one or more files @@ -54,6 +55,7 @@ optional arguments: -p, --public only include 'public' buckets in the output -t seconds, --timeout seconds http request timeout in (default: 30) -v, --version show program's version number and exit + -c num, --concurrency num maximum of concurrent requests (default: # of lcpus) ``` diff --git a/s3recon/__init__.py b/s3recon/__init__.py index 9c73af2..3e8d9f9 100644 --- a/s3recon/__init__.py +++ b/s3recon/__init__.py @@ -1 +1 @@ -__version__ = "1.3.1" +__version__ = "1.4.0" diff --git a/s3recon/s3recon.py b/s3recon/s3recon.py index a4a84c9..1ce3ddf 100644 --- a/s3recon/s3recon.py +++ b/s3recon/s3recon.py @@ -1,10 +1,10 @@ #!/usr/bin/env python3 -from asyncio import get_event_loop, gather +from asyncio import get_event_loop, gather, Semaphore from collections import defaultdict from datetime import datetime from json import dumps from logging import getLogger, basicConfig, INFO -from os import environ +from os import environ, cpu_count from pathlib import Path from random import choice from sys import path @@ -24,6 +24,7 @@ from s3recon.mongodb import MongoDB, Hit, Access filterwarnings("ignore", category=InsecureRequestWarning) +cpus = cpu_count() or 1 logger = getLogger(__name__) @@ -52,21 +53,22 @@ def bucket_exists(url, timeout): return exists, public -def find_bucket(url, timeout, db): - exists, public = bucket_exists(url, timeout) +async def find_bucket(url, timeout, db, sem): + async with sem: + exists, public = bucket_exists(url, timeout) - if exists: - access = Access.PUBLIC if public else Access.PRIVATE - access_key = repr(access) - access_word = str(access).upper() - logger.info(f"{access_key} {access_word} {url}") + if exists: + access = Access.PUBLIC if public else Access.PRIVATE + access_key = repr(access) + access_word = str(access).upper() + logger.info(f"{access_key} {access_word} {url}") - hit = Hit(url, access) - if db and hit.is_valid(): - db.update({"url": url}, dict(hit)) - return Hit(url, access) + hit = Hit(url, access) + if db and hit.is_valid(): + db.update({"url": url}, dict(hit)) + return Hit(url, access) - return None + return None def collect_results(hits): @@ -105,7 +107,7 @@ def json_output_template(key, total, hits, exclude): return {} if exclude else {key: {"total": total, "hits": hits}} -def main(words, timeout, output, use_db, only_public): +def main(words, timeout, concurrency, output, use_db, only_public): start = datetime.now() loop = get_event_loop() @@ -129,16 +131,16 @@ def main(words, timeout, output, use_db, only_public): for env in environments } + db = MongoDB(host=database["host"], port=database["port"]) if use_db else None + sem = Semaphore(concurrency) + tasks = gather( *[ - loop.run_in_executor( - None, - find_bucket, + find_bucket( url, timeout, - MongoDB(host=database["host"], port=database["port"]) - if use_db - else None, + db, + sem ) for url in url_list ] @@ -198,6 +200,14 @@ def cli(): parser.add_argument( "-v", "--version", action="version", version=f"%(prog)s {__version__}" ) + parser.add_argument( + "-c", + "--concurrency", + type=int, + metavar="num", + default=cpus, + help=f"maximum of concurrent requests (default: {cpus})", + ) # parser.add_argument("words", nargs="?", type=argparse.FileType("r"), default=stdin, help="list of words to permute") parser.add_argument( "word_list", @@ -210,10 +220,11 @@ def cli(): output = args.output db = args.db timeout = args.timeout + concurrency = args.concurrency public = args.public words = {l.strip() for f in args.word_list for l in f} - main(words=words, timeout=timeout, output=output, use_db=db, only_public=public) + main(words=words, timeout=timeout, concurrency=max(1, concurrency), output=output, use_db=db, only_public=public) if __name__ == "__main__":