Merge pull request #40 from D4Vinci/dev

D4Vinci · web-flow · commit 573bfe030cbb · 2025-02-26T00:27:21.000+02:00
v0.2.95
diff --git a/README.md b/README.md
@@ -220,6 +220,8 @@ This class is built on top of [httpx](https://www.python-httpx.org/) with additi
 
 For all methods, you have `stealthy_headers` which makes `Fetcher` create and use real browser's headers then create a referer header as if this request came from Google's search of this URL's domain. It's enabled by default. You can also set the number of retries with the argument `retries` for all methods and this will make httpx retry requests if it failed for any reason. The default number of retries for all `Fetcher` methods is 3.
 
+> Hence: All headers generated by `stealthy_headers` argument can be overwritten by you through the `headers` argument
+
 You can route all traffic (HTTP and HTTPS) to a proxy for any of these methods in this format `http://username:password@localhost:8030`
 ```python
 >> page = Fetcher().get('https://httpbin.org/get', stealthy_headers=True, follow_redirects=True)
diff --git a/scrapling/__init__.py b/scrapling/__init__.py
@@ -5,7 +5,7 @@
 from scrapling.parser import Adaptor, Adaptors
 
 __author__ = "Karim Shoair (karim.shoair@pm.me)"
-__version__ = "0.2.94"
+__version__ = "0.2.95"
 __copyright__ = "Copyright (c) 2024 Karim Shoair"
 
 
diff --git a/scrapling/engines/static.py b/scrapling/engines/static.py
@@ -42,16 +42,19 @@ def _headers_job(self, headers: Optional[Dict]) -> Dict:
         :return: A dictionary of the new headers.
         """
         headers = headers or {}
-
-        # Validate headers
-        if not headers.get('user-agent') and not headers.get('User-Agent'):
-            headers['User-Agent'] = generate_headers(browser_mode=False).get('User-Agent')
-            log.debug(f"Can't find useragent in headers so '{headers['User-Agent']}' was used.")
+        headers_keys = set(map(str.lower, headers.keys()))
 
         if self.stealth:
             extra_headers = generate_headers(browser_mode=False)
+            # Don't overwrite user supplied headers
+            extra_headers = {key: value for key, value in extra_headers.items() if key.lower() not in headers_keys}
             headers.update(extra_headers)
-            headers.update({'referer': generate_convincing_referer(self.url)})
+            if 'referer' not in headers_keys:
+                headers.update({'referer': generate_convincing_referer(self.url)})
+
+        elif 'user-agent' not in headers_keys:
+            headers['User-Agent'] = generate_headers(browser_mode=False).get('User-Agent')
+            log.debug(f"Can't find useragent in headers so '{headers['User-Agent']}' was used.")
 
         return headers
 
diff --git a/setup.cfg b/setup.cfg
@@ -1,8 +1,8 @@
 [metadata]
 name = scrapling
-version = 0.2.94
+version = 0.2.95
 author = Karim Shoair
 author_email = karim.shoair@pm.me
-description = Scrapling is an undetectable, powerful, flexible, adaptive, and high-performance web scraping library for Python.
+description = Scrapling is an undetectable, powerful, flexible, high-performance Python library that makes Web Scraping easy again!
 license = BSD
 home_page = https://github.com/D4Vinci/Scrapling
diff --git a/setup.py b/setup.py
@@ -6,10 +6,9 @@
 
 setup(
     name="scrapling",
-    version="0.2.94",
-    description="""Scrapling is a powerful, flexible, and high-performance web scraping library for Python. It
-     simplifies the process of extracting data from websites, even when they undergo structural changes, and offers
-     impressive speed improvements over many popular scraping tools.""",
+    version="0.2.95",
+    description="""Scrapling is an undetectable, powerful, flexible, high-performance Python library that makes Web Scraping easy again! In an internet filled with complications,
+    it simplifies web scraping, even when websites' design changes, while providing impressive speed that surpasses almost all alternatives.""",
     long_description=long_description,
     long_description_content_type="text/markdown",
     author="Karim Shoair",