v1.4.0 - limit default # of concurrent requests; fixes #6

clarketm · Mar 22, 2020 · d00fded · d00fded
1 parent 3e9bece
commit d00fded
Show file tree

Hide file tree

Showing 5 changed files with 40 additions and 26 deletions.
diff --git a/Makefile b/Makefile
@@ -41,7 +41,7 @@ endif
 .PHONY: release
 release: tag
 ifdef version
-	curl -XPOST \
+	curl -sSLf -XPOST \
 	-H "Authorization: token ${GITHUB_ACCESS_TOKEN}" \
 	-H "Content-Type: application/json" \
 	"https://api.github.com/repos/clarketm/${project}/releases" \

diff --git a/README.md b/README.md
@@ -24,7 +24,7 @@ $ pip install s3recon
 ## Usage
 ```text
 
-usage: s3recon [-h] [-o file] [-t seconds] word_list [word_list ...]
+usage: s3recon [-h] [-o file] [-d] [-p] [-t seconds] [-v] [-c num] word_list [word_list ...]
 
 positional arguments:
   word_list                      read words from one or more <word-list> files
@@ -36,6 +36,7 @@ optional arguments:
   -p, --public                   only include 'public' buckets in the output
   -t seconds, --timeout seconds  http request timeout in <seconds> (default: 30)
   -v, --version                  show program's version number and exit
+  -c num, --concurrency num      maximum <num> of concurrent requests (default: # of lcpus)
   
 ```
 

diff --git a/docs/source/index.md b/docs/source/index.md
@@ -34,6 +34,7 @@ Version |version|
 </a>
 
 ## Installation
+> **NOTE**: s3recon requires python version **>=3.6**
 
 ```bash
 $ pip install s3recon
@@ -42,7 +43,7 @@ $ pip install s3recon
 ## Usage
 ```text
 
-usage: s3recon [-h] [-o file] [-t seconds] word_list [word_list ...]
+usage: s3recon [-h] [-o file] [-d] [-p] [-t seconds] [-v] [-c num] word_list [word_list ...]
 
 positional arguments:
   word_list                      read words from one or more <word-list> files
@@ -54,6 +55,7 @@ optional arguments:
   -p, --public                   only include 'public' buckets in the output
   -t seconds, --timeout seconds  http request timeout in <seconds> (default: 30)
   -v, --version                  show program's version number and exit
+  -c num, --concurrency num      maximum <num> of concurrent requests (default: # of lcpus)
   
 ```
 

diff --git a/s3recon/__init__.py b/s3recon/__init__.py
@@ -1 +1 @@
-__version__ = "1.3.1"
+__version__ = "1.4.0"
diff --git a/s3recon/s3recon.py b/s3recon/s3recon.py
@@ -1,10 +1,10 @@
 #!/usr/bin/env python3
-from asyncio import get_event_loop, gather
+from asyncio import get_event_loop, gather, Semaphore
 from collections import defaultdict
 from datetime import datetime
 from json import dumps
 from logging import getLogger, basicConfig, INFO
-from os import environ
+from os import environ, cpu_count
 from pathlib import Path
 from random import choice
 from sys import path
@@ -24,6 +24,7 @@
 from s3recon.mongodb import MongoDB, Hit, Access
 
 filterwarnings("ignore", category=InsecureRequestWarning)
+cpus = cpu_count() or 1
 
 logger = getLogger(__name__)
 
@@ -52,21 +53,22 @@ def bucket_exists(url, timeout):
     return exists, public
 
 
-def find_bucket(url, timeout, db):
-    exists, public = bucket_exists(url, timeout)
+async def find_bucket(url, timeout, db, sem):
+    async with sem:
+        exists, public = bucket_exists(url, timeout)
 
-    if exists:
-        access = Access.PUBLIC if public else Access.PRIVATE
-        access_key = repr(access)
-        access_word = str(access).upper()
-        logger.info(f"{access_key} {access_word} {url}")
+        if exists:
+            access = Access.PUBLIC if public else Access.PRIVATE
+            access_key = repr(access)
+            access_word = str(access).upper()
+            logger.info(f"{access_key} {access_word} {url}")
 
-        hit = Hit(url, access)
-        if db and hit.is_valid():
-            db.update({"url": url}, dict(hit))
-        return Hit(url, access)
+            hit = Hit(url, access)
+            if db and hit.is_valid():
+                db.update({"url": url}, dict(hit))
+            return Hit(url, access)
 
-    return None
+        return None
 
 
 def collect_results(hits):
@@ -105,7 +107,7 @@ def json_output_template(key, total, hits, exclude):
     return {} if exclude else {key: {"total": total, "hits": hits}}
 
 
-def main(words, timeout, output, use_db, only_public):
+def main(words, timeout, concurrency, output, use_db, only_public):
     start = datetime.now()
     loop = get_event_loop()
 
@@ -129,16 +131,16 @@ def main(words, timeout, output, use_db, only_public):
         for env in environments
     }
 
+    db = MongoDB(host=database["host"], port=database["port"]) if use_db else None
+    sem = Semaphore(concurrency)
+
     tasks = gather(
         *[
-            loop.run_in_executor(
-                None,
-                find_bucket,
+            find_bucket(
                 url,
                 timeout,
-                MongoDB(host=database["host"], port=database["port"])
-                if use_db
-                else None,
+                db,
+                sem
             )
             for url in url_list
         ]
@@ -198,6 +200,14 @@ def cli():
     parser.add_argument(
         "-v", "--version", action="version", version=f"%(prog)s {__version__}"
     )
+    parser.add_argument(
+        "-c",
+        "--concurrency",
+        type=int,
+        metavar="num",
+        default=cpus,
+        help=f"maximum <num> of concurrent requests (default: {cpus})",
+    )
     # parser.add_argument("words", nargs="?", type=argparse.FileType("r"), default=stdin, help="list of words to permute")
     parser.add_argument(
         "word_list",
@@ -210,10 +220,11 @@ def cli():
     output = args.output
     db = args.db
     timeout = args.timeout
+    concurrency = args.concurrency
     public = args.public
     words = {l.strip() for f in args.word_list for l in f}
 
-    main(words=words, timeout=timeout, output=output, use_db=db, only_public=public)
+    main(words=words, timeout=timeout, concurrency=max(1, concurrency), output=output, use_db=db, only_public=public)
 
 
 if __name__ == "__main__":