-
Notifications
You must be signed in to change notification settings - Fork 2
/
ja4db-bots.py
102 lines (79 loc) · 3.08 KB
/
ja4db-bots.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/env python3
# Source: https://github.com/O-X-L/haproxy-ja4
# Copyright (C) 2024 Rath Pascal
# License: MIT
# script used to create a list of only bot-related fingerprints
# WARNING: there can be false-positives - use the BOT_SCORE_LIMIT to modify the output-db
# download raw db:
# curl -s https://ja4db.com/api/read/ -o ja4db.json
# pylint: disable=R0801
from re import sub as regex_replace
from json import loads as json_loads
from json import dumps as json_dumps
DEBUG = False
# making sure most of the recorded clients are bots - else we might have too many false-positives
# increase the limit for less false-positives; lower into negative to get more entries in the output-db
BOT_SCORE_LIMIT = 0
BOT_SCRIPT = [
'golang', 'wget', 'curl', 'go-http-client', 'apache-httpclient', 'java', 'perl',
'python', 'openssl', 'headless', 'cypress', 'mechanicalsoup', 'grpc-go', 'okhttp',
'httpx', 'httpcore', 'aiohttp', 'httputil', 'urllib', 'guzzle', 'axios', 'ruby',
'zend_http_client', 'wordpress', 'symfony', 'httpclient', 'cpp-httplib', 'ngrok',
'malware', 'httprequest',
]
BOT_SCAN = [
'scan', 'scanner', 'nessus', 'metasploit', 'zgrab', 'zmap', 'nmap', 'research', 'inspect',
]
BOT_CRAWL = [
'bot', 'mastodon', 'https://', 'http://', 'whatsapp', 'twitter', 'facebook', 'chatgpt',
'telegram', 'crawler', 'colly', 'phpcrawl', 'nutch', 'spider', 'scrapy', 'elinks',
'imageVacuum', 'apify', 'chrome-lighthouse', 'adsdefender', 'baidu', 'yandex', 'duckduckgo',
'google', 'yahoo', 'bing', 'microsoftpreview',
]
BOT_RANDOM = [
'mozilla/4.', 'mozilla/3.', 'mozilla/2.', 'fidget-spinner-bot', 'test-bot', 'tiny-bot',
'download', 'printer', 'router', 'camera', 'phillips hue', 'vpn', 'cisco', 'proxy', 'image',
'office', 'fetcher', 'feed', 'photon', 'alittle client'
]
BOT_SEARCH = BOT_SCRIPT
BOT_SEARCH.extend(BOT_SCAN)
BOT_SEARCH.extend(BOT_CRAWL)
BOT_SEARCH.extend(BOT_RANDOM)
CLIENT_KEYS = ['user_agent_string', 'application', 'notes', 'os']
bot_fp = {}
bot_fp_score = {}
def _get_client(_entry: dict) :
for k in CLIENT_KEYS:
if _entry[k] is not None:
return _entry[k].strip()
return None
with open('ja4db.json', 'r', encoding='utf-8') as db_file:
db = json_loads(db_file.read())
for entry in db:
fp = entry['ja4_fingerprint']
if fp is None:
continue
fp = regex_replace(r'[^a-z0-9_]', '', fp)
client = _get_client(entry)
if client in [None, ''] or len(fp) != 36:
continue
bot = False
for s in BOT_SEARCH:
if client.lower().find(s) != -1:
bot = True
bot_fp[fp] = client
break
if fp not in bot_fp_score:
bot_fp_score[fp] = 0
bot_fp_score[fp] += 1 if bot else -1
if DEBUG:
with open('ja4_bots_full.json', 'w', encoding='utf-8') as f:
f.write(json_dumps(bot_fp_score, indent=4))
for fp, score in bot_fp_score.items():
if score < BOT_SCORE_LIMIT:
try:
bot_fp.pop(fp)
except KeyError:
pass
with open('ja4_bots.json', 'w', encoding='utf-8') as f:
f.write(json_dumps(bot_fp, indent=4))