Skip to content

Commit 62f73d8

Browse files
authored
Entry icon url (#91)
* add support for favicon from html * add Entry.icon_url * parse favicon for standalone entries * fix favicon parsing from html * use entry favicon as fallback * more lenient parsing of article title
1 parent e7982c4 commit 62f73d8

File tree

5 files changed

+52
-8
lines changed

5 files changed

+52
-8
lines changed

feedi/models.py

+3
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,9 @@ class Entry(db.Model):
518518
header = sa.Column(
519519
sa.String, doc="an html line to put above the title, such as 'user reblogged'.")
520520

521+
icon_url = sa.Column(
522+
sa.String, doc="To be used for standalone entry avatars or as a fallback when the feed has no icon.")
523+
521524
__table_args__ = (sa.UniqueConstraint("feed_id", "remote_id"),
522525
sa.Index("entry_sort_ts", sort_date.desc()))
523526

feedi/parsers/html.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def fetch(url):
2323
soup = BeautifulSoup(response.content, 'lxml')
2424
metadata = scraping.all_meta(soup)
2525

26-
title = metadata.get('og:title', metadata.get('twitter:title'))
26+
title = metadata.get('og:title', metadata.get('twitter:title', getattr(soup.title, 'text')))
2727

2828
if not title or (metadata.get('og:type') and metadata['og:type'] != 'article'):
2929
raise ValueError(f"{url} is missing article metadata")
@@ -35,6 +35,8 @@ def fetch(url):
3535

3636
username = metadata.get('author', '').split(',')[0]
3737

38+
icon_url = scraping.get_favicon(url, html=response.content)
39+
3840
entry = {
3941
'remote_id': url,
4042
'title': title,
@@ -45,7 +47,7 @@ def fetch(url):
4547
'media_url': metadata.get('og:image', metadata.get('twitter:image')),
4648
'target_url': url,
4749
'content_url': url,
48-
'raw_data': json.dumps(metadata)
49-
}
50+
'raw_data': json.dumps(metadata),
51+
'icon_url': icon_url}
5052

5153
return entry

feedi/scraping.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,26 @@
55
import urllib
66
import zipfile
77

8-
import favicon
8+
# use internal module to access unexported .tags function
9+
import favicon.favicon as favicon
910
from bs4 import BeautifulSoup
1011

11-
from feedi.requests import requests
12+
from feedi.requests import USER_AGENT, requests
1213

1314
logger = logging.getLogger(__name__)
1415

1516

16-
def get_favicon(url):
17+
def get_favicon(url, html=None):
1718
"Return the best favicon from the given url, or None."
1819
url_parts = urllib.parse.urlparse(url)
1920
url = f'{url_parts.scheme}://{url_parts.netloc}'
2021

2122
try:
22-
favicons = favicon.get(url)
23+
if not html:
24+
favicons = favicon.get(url, headers={'User-Agent': USER_AGENT}, timeout=2)
25+
else:
26+
favicons = sorted(favicon.tags(url, html),
27+
key=lambda i: i.width + i.height, reverse=True)
2328
except Exception:
2429
logger.exception("error fetching favicon: %s", url)
2530
return

feedi/templates/entry_avatar.html

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,5 @@
88
</a>
99
{% else %}
1010
{% set domain = entry.target_url | url_domain %}
11-
<img class="is-rounded feed-avatar" alt="{{ domain.0.upper() }}" title="{{ domain }}">
11+
<img class="is-rounded feed-avatar" {% if entry.icon_url %}src="{{ entry.icon_url }}"{% endif %} alt="{{ domain.0.upper() }}" title="{{ domain }}">
1212
{% endif %}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
"""entry icon_url
2+
3+
Revision ID: 29fa12e6895d
4+
Revises: aed00250a7ad
5+
Create Date: 2024-01-08 15:15:50.478699
6+
7+
"""
8+
from typing import Sequence, Union
9+
10+
from alembic import op
11+
import sqlalchemy as sa
12+
13+
14+
# revision identifiers, used by Alembic.
15+
revision: str = '29fa12e6895d'
16+
down_revision: Union[str, None] = 'aed00250a7ad'
17+
branch_labels: Union[str, Sequence[str], None] = None
18+
depends_on: Union[str, Sequence[str], None] = None
19+
20+
21+
def upgrade() -> None:
22+
# ### commands auto generated by Alembic - please adjust! ###
23+
with op.batch_alter_table('entries', schema=None) as batch_op:
24+
batch_op.add_column(sa.Column('icon_url', sa.String(), nullable=True))
25+
26+
# ### end Alembic commands ###
27+
28+
29+
def downgrade() -> None:
30+
# ### commands auto generated by Alembic - please adjust! ###
31+
with op.batch_alter_table('entries', schema=None) as batch_op:
32+
batch_op.drop_column('icon_url')
33+
34+
# ### end Alembic commands ###

0 commit comments

Comments
 (0)