You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I have searched the PaddleOCR Docs and found no similar bug report.
I have searched the PaddleOCR Issues and found no similar bug report.
I have searched the PaddleOCR Discussions and found no similar bug report.
🐛 Bug (问题描述)
### ImageTextFilter
class ImageTextFilter(MessageFilter):
"""
Filters images containing specific text using PaddleOCR.
Args:
keywords (list[str]): List of keywords to look for in the extracted text.
regex (bool): Whether to treat keywords as regex patterns (default: False).
"""
def __init__(self, keywords: List[str], regex: bool = False) -> None:
self.regex = regex
if regex:
self.keyword_pattern = re.compile("|".join(keywords), re.IGNORECASE)
else:
self.keywords = [keyword.lower() for keyword in keywords]
# Initialize PaddleOCR (optimized for English, lightweight model)
self.ocr = PaddleOCR(use_angle_cls=True, lang="en", show_log=False)
async def _extract_text(self, message) -> str:
"""Extract text from an image using PaddleOCR."""
try:
# Download image from the message
image_data = await message.download_media(bytes)
if not image_data:
print("ImageTextFilter: Failed to download image.")
return ""
# Open the image
try:
image = Image.open(BytesIO(image_data))
except Exception as e:
print(f"ImageTextFilter: Error opening image - {e}")
return ""
# Perform OCR
try:
results = self.ocr.ocr(image, cls=True)
except Exception as e:
print(f"ImageTextFilter: PaddleOCR processing error - {e}")
return ""
# Extract detected text
extracted_text = " ".join([line[1][0] for result in results for line in result])
return extracted_text.strip()
except Exception as e:
print(f"ImageTextFilter: Unexpected error - {e}")
return ""
async def _process_message(
self, message: EventMessage, event_type: Type[EventLike]
) -> Tuple[Union[bool, None], EventMessage]:
"""
Processes the message and determines if it should be forwarded.
If an image contains the specified text, it bypasses all other filters.
"""
# Ignore messages without media (text-only messages are not processed)
if not message.media or not isinstance(message.media, types.MessageMediaPhoto):
return True, message # Let other filters handle text messages
# Extract text from the image
extracted_text = await self._extract_text(message)
print(f"ImageTextFilter: Extracted OCR Text -> {extracted_text}")
# Check if the extracted text matches the configured keywords
if self.regex:
if self.keyword_pattern.search(extracted_text):
print("ImageTextFilter: Match found (Regex) - Bypassing other filters!")
message.bypass_filters = True # Bypass all other filters
return None, message
else:
if any(keyword in extracted_text.lower() for keyword in self.keywords):
print("ImageTextFilter: Match found (Plain Text) - Bypassing other filters!")
message.bypass_filters = True # Bypass all other filters
return None, message
# No match found - Let other filters process the message
print("ImageTextFilter: No match found, passing to other filters.")
return True, message
### CompositeMessageFilter
class CompositeMessageFilter(MessageFilter):
"""Composite message filter that sequentially applies the filters
Args:
*arg (MessageFilter):
Message filters
"""
def __init__(self, *arg: MessageFilter) -> None:
self._filters = list(arg)
self._is_restricted_content_allowed = any(
f.restricted_content_allowed for f in self._filters
)
@property
def restricted_content_allowed(self) -> bool:
return self._is_restricted_content_allowed
async def process(
self, message: EventEntity, event_type: Type[EventLike]
) -> Tuple[bool, EventEntity]:
# **Check if ImageTextFilter has already marked this message to bypass other filters**
if hasattr(message, "bypass_filters") and message.bypass_filters:
print(f"CompositeMessageFilter: Bypassing all filters due to ImageTextFilter match")
return True, message # **Immediately forward the message**
# **Otherwise, apply other filters sequentially**
for f in self._filters:
proceed, message = await f.process(message, event_type)
if proceed is False:
return False, message
return True, message
async def _process_message(
self, message: EventMessage, event_type: Type[EventLike]
) -> Tuple[bool, EventMessage]:
raise NotImplementedError
def __repr__(self) -> str:
return f"{self.__class__.__name__}: {self._filters}"
Error
🏃♂️ Environment (运行环境)
Windows 10 pro
Paddle 2.6.2
Paddle Ocr 2.9.1
Python 3.11
venv
This discussion was converted from issue #14592 on January 25, 2025 11:11.
Heading
Bold
Italic
Quote
Code
Link
Numbered list
Unordered list
Task list
Attach files
Mention
Reference
Menu
reacted with thumbs up emoji reacted with thumbs down emoji reacted with laugh emoji reacted with hooray emoji reacted with confused emoji reacted with heart emoji reacted with rocket emoji reacted with eyes emoji
-
🔎 Search before asking
🐛 Bug (问题描述)
### ImageTextFilter
class ImageTextFilter(MessageFilter):
"""
Filters images containing specific text using PaddleOCR.
### CompositeMessageFilter
class CompositeMessageFilter(MessageFilter):
"""Composite message filter that sequentially applies the filters
Error
🏃♂️ Environment (运行环境)
Windows 10 pro
Paddle 2.6.2
Paddle Ocr 2.9.1
Python 3.11
venv
🌰 Minimal Reproducible Example (最小可复现问题的Demo)
Beta Was this translation helpful? Give feedback.
All reactions