This repository has been archived by the owner on Apr 5, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 104
/
Copy pathlog_clean.py
86 lines (74 loc) · 3.09 KB
/
log_clean.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import base64
import re
import sys
# a script to take an xmpppeek log of a Ecovacs app session with a Deebot N79 and strip out some of the nonsense,
# including any private identifiers
source_ip = None
userid = None
resourceid = None
robotid = None
auth_glob = None
for line in sys.stdin:
# remove the garbage
line = line.rstrip()
line = re.sub("\[\\d{4}-\\d{2}-\\d{2} ", '', line)
line = re.sub("\.\\d{6}-\\d{2}:\\d{2}\] \[", ' ', line)
line = re.sub("]$", ' ', line)
line = re.sub("\(([SC])2[SC]\) [.0-9]+:\\d+ -> [.0-9]+:\d+\]", '\\1', line)
line = re.sub("\}\}\}", '', line)
line = re.sub("\{\{\{", '', line)
# find the private bits and remove them
if not source_ip:
match = re.search('Client connect from ([.0-9]+)', line)
if match:
source_ip = match.group(1)
if not userid:
match = re.search('(20\d{6}[0-9a-f]{13})@ecouser.net/([0-9a-f]{8})', line)
if match:
userid = match.group(1)
resourceid = match.group(2)
if not robotid:
match = re.search('(E\d{8,})@126.ecorobot.net/atom', line)
if match:
robotid = match.group(1)
if not auth_glob:
match = re.search('<auth mechanism="PLAIN" xmlns="urn:ietf:params:xml:ns:xmpp-sasl">([-A-Za-z0-9+/=]+)</auth>',
line)
if match:
auth_glob = match.group(1)
if source_ip:
line = re.sub(source_ip, 'SOURCEIP', line)
if userid:
line = re.sub(userid, 'USERID', line)
if resourceid:
line = re.sub(resourceid, 'RESOURCEID', line)
if robotid:
line = re.sub(robotid, 'ROBOTID', line)
if auth_glob:
line = re.sub(auth_glob, 'AUTHGLOB', line)
# translate client commmands
line = re.sub(
'<iq id="(\d+)" to="[email protected]/atom" from="[email protected]/RESOURCEID" type="set"><query xmlns="com:ctl">(<ctl .*>)</query></iq>',
'id=\\1 command=\\2', line)
# translate server responses
line = re.sub(
'<iq to="[email protected]/RESOURCEID" type="result" id="(\d+)" from="[email protected]/atom"/>',
'id=\\1 result =empty', line)
line = re.sub(
'<iq to="[email protected]/RESOURCEID" type="set" id="(\d+)" from="[email protected]/atom"><query xmlns="com:ctl"><ctl id="(\d+)" ret="([^"]+)"/></query></iq>',
'id=\\1 id=\\2 result=\\3', line)
line = re.sub(
'<iq to="[email protected]/RESOURCEID" type="set" id="(\d+)" from="[email protected]/atom"><query xmlns="com:ctl">(<ctl .*)</query></iq>',
'id=\\1 response=\\2', line)
print(line)
# per SASL plain auth: https://tools.ietf.org/html/rfc4616
(authentication_id, authorization_id, password) = base64.b64decode(auth_glob).decode().split(sep='\0')
# no idea what the leading field is, and the resource appears to be the same
(mystery, resource, secret) = password.split('/')
print("------------------")
print("sample config:")
print("user=" + userid)
print("domain=ecouser.net")
print("resource=" + resourceid)
print("secret=" + secret)
print("vacuum=" + robotid + "@126.ecorobot.net")