Skip to content

Commit 03088c7

Browse files
committed
refactor: simplify regex patterns to prevent catastrophic backtracking
- Use bounded quantifiers in regex patterns to prevent ReDoS - Simplify leader name extraction logic for better maintainability - Add explicit whitespace and length limits to prevent excessive backtracking - Improve regex readability with pattern variables
1 parent ad3edb8 commit 03088c7

File tree

2 files changed

+44
-26
lines changed

2 files changed

+44
-26
lines changed

backend/apps/owasp/models/common.py

Lines changed: 43 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
from __future__ import annotations
44

5-
import itertools
65
import logging
76
import re
87
from urllib.parse import urlparse
@@ -198,17 +197,28 @@ def get_leaders(self):
198197

199198
leaders = []
200199
for line in content.split("\n"):
201-
leaders.extend(
202-
[
203-
name
204-
for name in itertools.chain(
205-
*re.findall(
206-
r"[-*]\s*\[\s*([^(]+?)\s*(?:\([^)]*\))?\]|\*\s*([\w\s]+)", line.strip()
207-
)
208-
)
209-
if name.strip()
210-
]
200+
stripped_line = line.strip()
201+
names = []
202+
203+
# Match [name] or [name (optional_info)] patterns
204+
# Using bounded quantifiers {0,3} to prevent catastrophic backtracking
205+
bracketed_pattern = (
206+
r"[-*]\s{0,3}\[\s{0,3}([^\]\(]{1,200})"
207+
r"(?:\s{0,3}\([^)]{0,100}\))?\s{0,3}\]"
211208
)
209+
names.extend(re.findall(bracketed_pattern, stripped_line))
210+
211+
# Match * name patterns
212+
names.extend(re.findall(r"\*\s{0,3}([\w\s]{1,200})", stripped_line))
213+
214+
# Clean names by removing parenthetical info (avoid PLW2901)
215+
cleaned_names = []
216+
for raw_name in names:
217+
if raw_name.strip():
218+
cleaned = re.sub(r"\s*\([^)]{0,100}\)\s*$", "", raw_name).strip()
219+
cleaned_names.append(cleaned)
220+
221+
leaders.extend(cleaned_names)
212222

213223
return leaders
214224

@@ -220,23 +230,31 @@ def get_leaders_emails(self):
220230

221231
leaders = {}
222232
for line in content.split("\n"):
223-
matches = re.findall(
224-
r"^[-*]\s*\[([^\]]+)\]\((?:mailto:)?([^)]+)(\)|([^[<\n]))", line.strip()
233+
stripped_line = line.strip()
234+
# Use bounded quantifiers to prevent backtracking on malformed input
235+
# Match [name](email) or [name](mailto:email) patterns
236+
email_pattern = (
237+
r"^[-*]\s{0,3}\[([^\]]{1,200})\]"
238+
r"\((?:mailto:)?([^)]{1,300})\)"
225239
)
240+
matches = re.findall(email_pattern, stripped_line)
226241

227-
for match in matches:
228-
if match[0] and match[1]: # Name with email
229-
leaders[match[0].strip()] = match[1].strip()
230-
elif match[2]: # Name without email
231-
leaders[match[2].strip()] = None
242+
for raw_name, raw_email in matches:
243+
name = raw_name.strip()
244+
email = raw_email.strip()
245+
if name and email:
246+
leaders[name] = email
247+
elif name:
248+
leaders[name] = None
232249

233250
return leaders
234251

235252
def get_metadata(self):
236253
"""Get entity metadata."""
237254
try:
255+
metadata_pattern = r"^---\s{0,3}((?:.|\n){0,10000}?)\s{0,3}---"
238256
yaml_content = re.search(
239-
r"^---\s*(.*?)\s*---",
257+
metadata_pattern,
240258
get_repository_file_content(self.index_md_url),
241259
re.DOTALL,
242260
)
@@ -282,7 +300,8 @@ def get_urls(self, domain=None):
282300

283301
urls = set()
284302

285-
markdown_links = re.findall(r"\[([^\]]*)\]\((https?://[^\s\)]+)\)", content)
303+
markdown_pattern = r"\[([^\]]*)\]\((https?://[^\s\)]+)\)"
304+
markdown_links = re.findall(markdown_pattern, content)
286305
for _text, url in markdown_links:
287306
cleaned_url = clean_url(url)
288307
if cleaned_url and validate_url(cleaned_url):
@@ -311,11 +330,10 @@ def parse_tags(self, tags) -> list[str]:
311330
if not tags:
312331
return []
313332

314-
return (
315-
[tag.strip(", ") for tag in tags.split("," if "," in tags else " ")]
316-
if isinstance(tags, str)
317-
else tags
318-
)
333+
if isinstance(tags, str):
334+
separator = "," if "," in tags else " "
335+
return [tag.strip(", ") for tag in tags.split(separator)]
336+
return tags
319337

320338
def sync_leaders(self, leaders_emails):
321339
"""Sync Leaders data.

frontend/__tests__/unit/pages/CreateModule.test.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,5 +121,5 @@ describe('CreateModulePage', () => {
121121
expect(mockCreateModule).toHaveBeenCalled()
122122
expect(mockPush).toHaveBeenCalledWith('/my/mentorship/programs/test-program')
123123
})
124-
})
124+
}, 10000)
125125
})

0 commit comments

Comments
 (0)