Skip to content

Commit 5062043

Browse files
Change the string append logic to use StringIO
1 parent 4e5fe0c commit 5062043

File tree

4 files changed

+26
-23
lines changed

4 files changed

+26
-23
lines changed

src/SimpleReplay/audit_logs_parsing.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ def clear_and_set_text(self, new_value):
2727
def append_text(self, value):
2828
self.text.write(value)
2929

30+
def get_text_value(self):
31+
return self.text.getvalue()
32+
3033
def get_filename(self):
3134
base_name = (
3235
self.database_name
@@ -53,7 +56,7 @@ def __str__(self):
5356
self.database_name,
5457
self.pid,
5558
self.xid,
56-
self.text.getvalue(),
59+
self.get_text_value(),
5760
)
5861
)
5962

@@ -67,11 +70,11 @@ def __eq__(self, other):
6770
and self.database_name == other.database_name
6871
and self.pid == other.pid
6972
and self.xid == other.xid
70-
and self.text.getvalue() == other.text.getvalue()
73+
and self.get_text_value() == other.get_text_value()
7174
)
7275

7376
def __hash__(self):
74-
return hash((str(self.pid), str(self.xid), self.text.getvalue().strip("\n")))
77+
return hash((str(self.pid), str(self.xid), self.get_text_value().strip("\n")))
7578

7679

7780
class ConnectionLog:

src/SimpleReplay/extract/extractor/extract_parser.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,11 @@ def _parse_user_activity_log(file, logs, databases, start_time, end_time):
6464
if filename in logs:
6565
# Check if duplicate. This happens with JDBC connections.
6666
prev_query = logs[filename][-1]
67-
if not is_duplicate(prev_query.text.getvalue(), user_activity_log.text.getvalue()):
67+
if not is_duplicate(prev_query.get_text_value(), user_activity_log.get_text_value()):
6868
if fetch_pattern.search(
69-
prev_query.text.getvalue()
70-
) and fetch_pattern.search(user_activity_log.text.getvalue()):
71-
user_activity_log.clear_and_set_text(f"--{user_activity_log.text.getvalue()}")
69+
prev_query.get_text_value()
70+
) and fetch_pattern.search(user_activity_log.get_text_value()):
71+
user_activity_log.clear_and_set_text(f"--{user_activity_log.get_text_value()}")
7272
logs[filename].append(user_activity_log)
7373
else:
7474
logs[filename].append(user_activity_log)
@@ -107,7 +107,7 @@ def _parse_start_node_log(file, logs, databases, start_time, end_time):
107107
if filename in logs:
108108
# Check if duplicate. This happens with JDBC connections.
109109
prev_query = logs[filename][-1]
110-
if not is_duplicate(prev_query.text.getvalue(), start_node_log.text.getvalue()):
110+
if not is_duplicate(prev_query.get_text_value(), start_node_log.get_text_value()):
111111
logs[filename].append(start_node_log)
112112
else:
113113
logs[filename] = [start_node_log]

src/SimpleReplay/extract/extractor/extractor.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -200,33 +200,33 @@ def get_sql_connections_replacements(self, last_connections, log_items):
200200
)
201201
continue
202202

203-
query.clear_and_set_text(remove_line_comments(query.text.getvalue()).strip())
203+
query.clear_and_set_text(remove_line_comments(query.get_text_value()).strip())
204204

205-
if "copy " in query.text.getvalue().lower() and "from 's3:" in query.text.getvalue().lower():
205+
if "copy " in query.get_text_value().lower() and "from 's3:" in query.get_text_value().lower():
206206
bucket = re.search(
207-
r"from 's3:\/\/[^']*", query.text.getvalue(), re.IGNORECASE
207+
r"from 's3:\/\/[^']*", query.get_text_value(), re.IGNORECASE
208208
).group()[6:]
209209
replacements.add(bucket)
210210
query.clear_and_set_text(re.sub(
211211
r"IAM_ROLE 'arn:aws:iam::\d+:role/\S+'",
212212
f" IAM_ROLE ''",
213-
query.text.getvalue(),
213+
query.get_text_value(),
214214
flags=re.IGNORECASE,
215215
))
216-
if "unload" in query.text.getvalue().lower() and "to 's3:" in query.text.getvalue().lower():
216+
if "unload" in query.get_text_value().lower() and "to 's3:" in query.get_text_value().lower():
217217
query.clear_and_set_text(re.sub(
218218
r"IAM_ROLE 'arn:aws:iam::\d+:role/\S+'",
219219
f" IAM_ROLE ''",
220-
query.text.getvalue(),
220+
query.get_text_value(),
221221
flags=re.IGNORECASE,
222222
))
223223

224-
query.clear_and_set_text(f"{query.text.getvalue().strip()}")
225-
if not len(query.text.getvalue()) == 0:
226-
if not query.text.getvalue().endswith(";"):
224+
query.clear_and_set_text(f"{query.get_text_value().strip()}")
225+
if not len(query.get_text_value()) == 0:
226+
if not query.get_text_value().endswith(";"):
227227
query.append_text(";")
228228

229-
query_info["text"] = query.text.getvalue()
229+
query_info["text"] = query.get_text_value()
230230
sql_json["transactions"][query.xid]["queries"].append(query_info)
231231

232232
if not hash((query.database_name, query.username, query.pid)) in last_connections:

src/SimpleReplay/log_validation.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -44,18 +44,18 @@ def is_valid_log(log, start_time, end_time):
4444
if end_time and log.record_time > end_time:
4545
return False
4646

47-
if any(word in log.text.getvalue() for word in problem_keywords):
47+
if any(word in log.get_text_value() for word in problem_keywords):
4848
return False
4949

50-
if any(word in log.text.getvalue() for word in potential_problem_keywords) and not any(word in log.text.getvalue() for word in not_problem_keywords):
50+
if any(word in log.get_text_value() for word in potential_problem_keywords) and not any(word in log.get_text_value() for word in not_problem_keywords):
5151
return False
5252

5353
# filter internal statement rewrites with parameter markers
54-
if re.search('\$\d',log.text.getvalue()):
54+
if re.search('\$\d',log.get_text_value()):
5555
# remove \$\d in string literals ( select '$1' ) or comment blocks ( */ $1 */ )
56-
text_without_valid_parameter_markers = re.sub("""'.*\\$\\d.*'|\\/\\*.*\\$\\d.*\\*\\/""",'',log.text.getvalue(),flags=re.DOTALL)
56+
text_without_valid_parameter_markers = re.sub("""'.*\\$\\d.*'|\\/\\*.*\\$\\d.*\\*\\/""",'',log.get_text_value(),flags=re.DOTALL)
5757
# remove \$\d in single line quotes ( -- $1 )
58-
if '--' in log.text.getvalue():
58+
if '--' in log.get_text_value():
5959
text_without_valid_parameter_markers = re.sub('^\s*--.*\$\d','',text_without_valid_parameter_markers)
6060
# if there are still parameter markers remaining after removing them from valid cases, the query text is invalid
6161
if re.search('\$\d',text_without_valid_parameter_markers):

0 commit comments

Comments
 (0)