Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .ci/benchmark.txt
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ FileType FileNumber ValidLines Positives Negatives Templat
.zsh 6 872 12
.zsh-theme 1 97 1
TOTAL: 11493 16707548 15038 46718 4910
credsweeper result_cnt : 14629, lost_cnt : 0, true_cnt : 14438, false_cnt : 191
credsweeper result_cnt : 14632, lost_cnt : 0, true_cnt : 14441, false_cnt : 191
Rules Positives Negatives Templates Reported TP FP TN FN FPR FNR ACC PRC RCL F1
------------------------------ ----------- ----------- ----------- ---------- ----- ---- ----- ---- -------- -------- -------- -------- -------- --------
API 239 3172 187 228 228 0 3359 11 0.000000 0.046025 0.996943 1.000000 0.953975 0.976445
Expand All @@ -243,7 +243,7 @@ AWS S3 Bucket 67 23 0 9
Atlassian Old PAT token 5 8 0 11 5 6 2 0 0.750000 0.000000 0.538462 0.454545 1.000000 0.625000
Auth 1075 2754 81 1059 1051 8 2827 24 0.002822 0.022326 0.991816 0.992446 0.977674 0.985005
Azure Access Token 21 0 0 13 13 0 0 8 0.380952 0.619048 1.000000 0.619048 0.764706
BASE64 Private Key 22 4 0 21 21 0 4 1 0.000000 0.045455 0.961538 1.000000 0.954545 0.976744
BASE64 Private Key 22 4 0 22 22 0 4 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
BASE64 encoded PEM Private Key 12 0 0 10 10 0 0 2 0.166667 0.833333 1.000000 0.833333 0.909091
Bitbucket Client ID 36 66 0 48 31 16 50 5 0.242424 0.138889 0.794118 0.659574 0.861111 0.746988
Bitbucket Client Secret 46 90 1 88 41 46 45 5 0.505495 0.108696 0.627737 0.471264 0.891304 0.616541
Expand All @@ -270,7 +270,7 @@ Jira 2FA 36 1 1 3
Key 4209 15726 483 4198 4184 14 16195 25 0.000864 0.005940 0.998090 0.996665 0.994060 0.995361
Nonce 113 49 0 111 111 0 49 2 0.000000 0.017699 0.987654 1.000000 0.982301 0.991071
Other 9 7446 5 0 0 7451 9 0.000000 1.000000 0.998794 0.000000
PEM Private Key 1142 1483 0 1144 1140 4 1479 2 0.002697 0.001751 0.997714 0.996503 0.998249 0.997375
PEM Private Key 1142 1483 0 1146 1142 4 1479 0 0.002697 0.000000 0.998476 0.996510 1.000000 0.998252
Password 2320 7545 2539 2275 2251 24 10060 69 0.002380 0.029741 0.992502 0.989451 0.970259 0.979761
SQL Password 44 13 0 41 41 0 13 3 0.000000 0.068182 0.947368 1.000000 0.931818 0.964706
Salesforce Credentials 6 0 0 5 5 0 0 1 0.166667 0.833333 1.000000 0.833333 0.909091
Expand All @@ -284,4 +284,4 @@ Token 929 4172 455 85
Twilio Credentials 30 39 0 30 30 0 39 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
URL Credentials 225 168 197 225 225 0 365 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
UUID 1857 265 0 1840 1839 1 264 18 0.003774 0.009693 0.991046 0.999457 0.990307 0.994861
15038 46718 4910 14638 14438 191 46527 600 0.004088 0.039899 0.987192 0.986944 0.960101 0.973337
15038 46718 4910 14641 14441 191 46527 597 0.004088 0.039699 0.987240 0.986946 0.960301 0.973441
28 changes: 17 additions & 11 deletions credsweeper/deep_scanner/deep_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,21 @@
from .encoder_scanner import EncoderScanner
from .gzip_scanner import GzipScanner
from .html_scanner import HtmlScanner
from .jclass_scanner import JclassScanner
from .jks_scanner import JksScanner
from .lang_scanner import LangScanner
from .lzma_scanner import LzmaScanner
from .mxfile_scanner import MxfileScanner
from .pdf_scanner import PdfScanner
from .pkcs12_scanner import Pkcs12Scanner
from .pkcs_scanner import PkcsScanner
from .pptx_scanner import PptxScanner
from .tar_scanner import TarScanner
from .tmx_scanner import TmxScanner
from .xlsx_scanner import XlsxScanner
from .xml_scanner import XmlScanner
from .zip_scanner import ZipScanner
from ..common.constants import DEFAULT_ENCODING
from ..file_handler.descriptor import Descriptor
from ..file_handler.file_path_extractor import FilePathExtractor
from ..file_handler.struct_content_provider import StructContentProvider

Expand All @@ -49,11 +51,12 @@ class DeepScanner(
EncoderScanner, #
GzipScanner, #
HtmlScanner, #
JclassScanner, #
JksScanner, #
LangScanner, #
LzmaScanner, #
PdfScanner, #
Pkcs12Scanner, #
PkcsScanner, #
PptxScanner, #
TarScanner, #
DebScanner, #
Expand Down Expand Up @@ -82,7 +85,7 @@ def scanner(self) -> Scanner:
return self.__scanner

@staticmethod
def get_deep_scanners(data: bytes, file_type: str, depth: int) -> Tuple[List[Any], List[Any]]:
def get_deep_scanners(data: bytes, descriptor: Descriptor, depth: int) -> Tuple[List[Any], List[Any]]:
"""Returns possibly scan methods for the data depends on content and fallback scanners"""
deep_scanners: List[Any] = []
fallback_scanners: List[Any] = []
Expand All @@ -91,20 +94,20 @@ def get_deep_scanners(data: bytes, file_type: str, depth: int) -> Tuple[List[Any
deep_scanners.append(ZipScanner)
# probably, there might be a docx, xlsx and so on.
# It might be scanned with text representation in third-party libraries.
if file_type in (".xlsx", ".ods"):
if descriptor.extension in (".xlsx", ".ods"):
deep_scanners.append(XlsxScanner)
else:
fallback_scanners.append(XlsxScanner)
if ".docx" == file_type:
if ".docx" == descriptor.extension:
deep_scanners.append(DocxScanner)
else:
fallback_scanners.append(DocxScanner)
if ".pptx" == file_type:
if ".pptx" == descriptor.extension:
deep_scanners.append(PptxScanner)
else:
fallback_scanners.append(PptxScanner)
elif Util.is_com(data):
if ".xls" == file_type:
if ".xls" == descriptor.extension:
deep_scanners.append(XlsxScanner)
else:
fallback_scanners.append(XlsxScanner)
Expand All @@ -125,10 +128,12 @@ def get_deep_scanners(data: bytes, file_type: str, depth: int) -> Tuple[List[Any
deep_scanners.append(GzipScanner)
elif Util.is_pdf(data):
deep_scanners.append(PdfScanner)
elif Util.is_jclass(data):
deep_scanners.append(JclassScanner)
elif Util.is_jks(data):
deep_scanners.append(JksScanner)
elif Util.is_asn1(data):
deep_scanners.append(Pkcs12Scanner)
deep_scanners.append(PkcsScanner)
elif Util.is_xml(data):
if Util.is_html(data):
deep_scanners.append(HtmlScanner)
Expand All @@ -146,7 +151,7 @@ def get_deep_scanners(data: bytes, file_type: str, depth: int) -> Tuple[List[Any
deep_scanners.append(XmlScanner)
fallback_scanners.append(ByteScanner)
elif Util.is_eml(data):
if ".eml" == file_type:
if ".eml" == descriptor.extension:
deep_scanners.append(EmlScanner)
else:
fallback_scanners.append(EmlScanner)
Expand All @@ -160,7 +165,8 @@ def get_deep_scanners(data: bytes, file_type: str, depth: int) -> Tuple[List[Any
deep_scanners.append(LangScanner)
deep_scanners.append(ByteScanner)
else:
logger.warning("Cannot apply a deep scanner for type %s prefix %s", file_type, str(data[:MIN_DATA_LEN]))
logger.warning("Cannot apply a deep scanner for type %s prefix %s %d", descriptor,
repr(data[:MIN_DATA_LEN]), len(data))
return deep_scanners, fallback_scanners

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
Expand All @@ -178,7 +184,7 @@ def deep_scan_with_fallback(self, data_provider: DataContentProvider, depth: int

"""
candidates: List[Candidate] = []
deep_scanners, fallback_scanners = self.get_deep_scanners(data_provider.data, data_provider.file_type, depth)
deep_scanners, fallback_scanners = self.get_deep_scanners(data_provider.data, data_provider.descriptor, depth)
fallback = True
for scan_class in deep_scanners:
new_candidates = scan_class.data_scan(self, data_provider, depth, recursive_limit_size)
Expand Down
74 changes: 74 additions & 0 deletions credsweeper/deep_scanner/jclass_scanner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import io
import logging
import struct
from abc import ABC
from typing import List, Optional

from credsweeper.common.constants import MIN_DATA_LEN, UTF_8
from credsweeper.credentials import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.file_handler.struct_content_provider import StructContentProvider

logger = logging.getLogger(__name__)


class JclassScanner(AbstractScanner, ABC):
"""Implements java .class scanning"""

@staticmethod
def u2(stream: io.BytesIO):
"""Extracts unsigned 16 bit big-endian"""
return struct.unpack(">H", stream.read(2))[0]

@staticmethod
def get_utf8_constants(stream: io.BytesIO) -> List[str]:
"""Extracts only Utf8 constants from java ClassFile"""
result = []
item_count = JclassScanner.u2(stream)
while 0 < item_count:
# actual number of items is one less!
item_count -= 1
tag = struct.unpack("B", stream.read(1))[0]
if 1 == tag:
length = JclassScanner.u2(stream)
data = stream.read(int(length))
if MIN_DATA_LEN <= length:
value = data.decode(encoding=UTF_8, errors="replace")
result.append(value)
elif tag in (3, 4, 9, 10, 11, 12, 18):
_ = stream.read(4)
elif tag in (7, 8, 16):
_ = stream.read(2)
elif tag in (5, 6):
_ = stream.read(8)
elif 15 == tag:
_ = stream.read(3)
else:
logger.error(f"Unknown tag {tag}")
break
return result

def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Extracts data from binary"""
candidates = None
try:
stream = io.BytesIO(data_provider.data)
stream.read(4) # magic
minor = JclassScanner.u2(stream)
major = JclassScanner.u2(stream)
constants = JclassScanner.get_utf8_constants(stream)
struct_content_provider = StructContentProvider(struct=constants,
file_path=data_provider.file_path,
file_type=data_provider.file_type,
info=f"{data_provider.info}|Java.{major}.{minor}")
new_limit = recursive_limit_size - sum(len(x) for x in constants)
gzip_candidates = self.structure_scan(struct_content_provider, depth, new_limit)
return gzip_candidates
except Exception as jclass_exc:
logger.error(f"{data_provider.file_path}:{jclass_exc}")
return candidates
45 changes: 0 additions & 45 deletions credsweeper/deep_scanner/pkcs12_scanner.py

This file was deleted.

41 changes: 41 additions & 0 deletions credsweeper/deep_scanner/pkcs_scanner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import base64
import logging
from abc import ABC
from typing import List, Optional

from credsweeper.credentials import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.utils import Util

logger = logging.getLogger(__name__)


class PkcsScanner(AbstractScanner, ABC):
"""Implements pkcs12 scanning"""

def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Tries to scan PKCS12 to open with standard password"""
for pw_probe in self.config.bruteforce_list:
try:
password = pw_probe.encode() if pw_probe else None
if pkey := Util.load_pk(data_provider.data, password):
if not Util.check_pk(pkey):
logger.debug("False alarm %s", data_provider.info)
return []
candidate = Candidate.get_dummy_candidate(
self.config, #
data_provider.file_path, #
data_provider.file_type, #
f"{data_provider.info}|PKCS:{repr(password)} is the password", #
"PKCS")
candidate.line_data_list[0].line = base64.b64encode(data_provider.data).decode()
candidate.line_data_list[0].value = repr(password)
return [candidate]
except Exception as pkcs_exc:
logger.debug(f"{data_provider.file_path}:{pw_probe}:{pkcs_exc}")
return None
3 changes: 1 addition & 2 deletions credsweeper/file_handler/data_content_provider.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import json
import logging
import string
import warnings
from functools import cached_property
from typing import List, Optional, Any, Generator, Callable, Tuple
Expand Down Expand Up @@ -385,7 +384,7 @@ def represent_as_encoded(self) -> Optional[bool]:
return False
try:
self.decoded = Util.decode_base64( #
self.text.translate(str.maketrans('', '', string.whitespace)), #
text=Util.PEM_CLEANING_PATTERN.sub(r'', self.text).replace('\\', ''), #
padding_safe=True, #
urlsafe_detect=True) #
except Exception as exc:
Expand Down
23 changes: 9 additions & 14 deletions credsweeper/filters/value_base64_key_check.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
import contextlib
import string

from cryptography.hazmat.primitives import serialization

from credsweeper.config import Config
from credsweeper.credentials import LineData
Expand All @@ -13,6 +10,8 @@
class ValueBase64KeyCheck(Filter):
"""Check that candidate contains base64 encoded private key"""

EXTRA_TRANS_TABLE = str.maketrans('', '', "\",'\\")

def __init__(self, config: Config = None) -> None:
self.config = config

Expand All @@ -29,12 +28,10 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
"""

with contextlib.suppress(Exception):
text = line_data.value
# replace to space any escaped sequence except space from string.whitespace
for x in ["\\t", "\\n", "\\r", "\\v", "\\f"]:
text = text.replace(x, ' ')
for x in string.whitespace:
text = text.replace(x, '')
# remove backslash escaping sequences
text = Util.PEM_CLEANING_PATTERN.sub(r'', line_data.value)
# remove whitespaces
text = text.translate(Util.WHITESPACE_TRANS_TABLE)
# clean sequence concatenation case:
text = text.replace("'+'", '')
text = text.replace('"+"', '')
Expand All @@ -43,12 +40,10 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
text = text.replace('%2F', '/')
text = text.replace('%3D', '=')
# clean any other chars which should not appear
for x in ["'", '"', '\\', ',']:
text = text.replace(x, "")
text = text.translate(ValueBase64KeyCheck.EXTRA_TRANS_TABLE)
# only PEM standard encoding supported in regex pattern to cut off ending of the key
key = Util.decode_base64(text, padding_safe=True, urlsafe_detect=False)
private_key = serialization.load_der_private_key(key, password=None)
if 0 < private_key.key_size: # type: ignore
# access to size field check - some types have no size
private_key = Util.load_pk(key, password=None)
if Util.check_pk(private_key):
return False
return True
2 changes: 1 addition & 1 deletion credsweeper/secret/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
".aar",
".apk",
".bz2",
".class",
".gz",
".lzma",
".tar",
Expand All @@ -28,7 +29,6 @@
".avi",
".bin",
".bmp",
".class",
".css",
".dmg",
".ear",
Expand Down
Loading
Loading