Skip to content

Commit

Permalink
Overhaul entry urls (#84)
Browse files Browse the repository at this point in the history
* rename entry_url to target_url

* add comments url

* rename fields in code

* remove obsolete helpers

* more usage corrections

* add logic to decide feed discoverability

* fix title

* fix linter

* remove header
  • Loading branch information
facundoolano authored Jan 2, 2024
1 parent f677806 commit 990d269
Show file tree
Hide file tree
Showing 13 changed files with 154 additions and 77 deletions.
4 changes: 2 additions & 2 deletions feedi/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,9 @@ def entry_excerpt(entry):
if not entry.body:
return ''

if entry.has_content():
if entry.content_url and entry.title:
title = entry.title
elif entry.has_distinct_user():
elif entry.has_distinct_user:
title = entry.display_name or entry.username
else:
title = entry.feed.name
Expand Down
46 changes: 29 additions & 17 deletions feedi/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import datetime
import json
import urllib

import sqlalchemy as sa
import sqlalchemy.dialects.sqlite as sqlite
Expand Down Expand Up @@ -472,12 +473,19 @@ class Entry(db.Model):

body = sa.Column(sa.String, doc="The content to be displayed in the feed preview. HTML is supported. \
For article entries, it would be an excerpt of the full article content.")
entry_url = sa.Column(
sa.String, doc="The URL of this entry in the source. For link aggregators this would be the comments page.")

target_url = sa.Column(
sa.String, doc="The URL to open when accessing the entry at its source. \
NULL is interpreted as the entry cannot be open at the source.")

content_url = sa.Column(
sa.String, doc="The URL where the full content can be fetched or read. \
For link aggregators this would be the article redirect url. \
An empty content URL implies that the entry can't be read locally.")
sa.String, doc="The URL to fetch the full entry content from, for reading locally. \
NULL is interpreted as the entry cannot be read locally.")

comments_url = sa.Column(
sa.String, doc="The URL to fetch the full entry content from, for reading locally. \
NULL is interpreted as the entry cannot be read locally.")

media_url = sa.Column(sa.String, doc="URL of a media attachement or preview.")

created = sa.Column(sa.TIMESTAMP, nullable=False, default=datetime.datetime.utcnow)
Expand All @@ -502,28 +510,32 @@ class Entry(db.Model):
def __repr__(self):
return f'<Entry {self.feed_id}/{self.remote_id}>'

def has_content(self):
@property
def is_external_link(self):
"""
Returns True if this entry has associated content (with a title and a remote url).
This would be the case for blogs, news sites, etc., but not for mastodon toots or
notification streams.
Return True if the target url seems to be external to the source, e.g. a link submitted to a link aggregator,
or a preview url. This is handy to decide whether a new RSS feed may be discoverable from an entry. This will
incorrectly return True if the rss feed is hosted at a different domain than the actual source site it exposes.
"""
return self.title and self.content_url
if not self.target_url:
return False

if not self.feed:
return True

if not self.feed.url:
return False

return urllib.parse.urlparse(self.target_url).netloc != urllib.parse.urlparse(self.feed.url)

@property
def has_distinct_user(self):
"""
Returns True if this entry has a recognizable author, particularly that
it has an avatar and a name that can be displayed instead of a generic feed icon.
"""
return self.avatar_url and (self.display_name or self.username)

def has_comments_url(self):
"""
Returns True if this entry has a distinct comments/discussion endpoint,
separate from the content site. (E.g. link agreggators and mastodon toots).
"""
return self.entry_url and self.content_url != self.entry_url

@classmethod
def _filtered_query(cls, user_id, hide_seen=False, favorited=None,
feed_name=None, username=None, folder=None,
Expand Down
15 changes: 5 additions & 10 deletions feedi/parsers/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def fetch(self):
'body': item['synopsis'],
'media_url': item['image']['url'],
'content_url': content_url,
'entry_url': content_url,
'target_url': content_url,
'raw_data': json.dumps(item)
})

Expand Down Expand Up @@ -89,14 +89,9 @@ def fetch(self):
'remote_updated': datetime.datetime.fromisoformat(item['dateModified']),
'body': item['description'],
'media_url': item['image'],
'entry_url': item['url'],
# FIXME this website does very funky things with the html
# that can't be parsed in the reader.
# can't really set content url as null since that's currently interpreted
# as entry_url being a comments url.
# we should probably make comments_url more explicit and add a "skip reader"
# flag or something like that.
'content_url': item['url'],
'target_url': item['url'],
# this website does very funky things with the html that can't be parsed in the reader.
'content_url': None,
})

return entry_values
Expand Down Expand Up @@ -128,7 +123,7 @@ def fetch(self):
'remote_updated': date,
'body': article.find(class_='newsSummary').text,
'media_url': article.find('img')['src'],
'entry_url': content_url,
'target_url': content_url,
'content_url': content_url,
})

Expand Down
8 changes: 4 additions & 4 deletions feedi/parsers/mastodon.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def fetch_toots(server_url, access_token, newer_than=None, limit=None):
# result['content_url'] = toot['url']

# use server-local urls
entry['entry_url'] = status_url(server_url, toot)
entry['target_url'] = status_url(server_url, toot)

# for media we only support images for now and will take just the first one
media = [m['preview_url'] for m in toot['media_attachments'] if m['type'] == 'image']
Expand Down Expand Up @@ -142,11 +142,11 @@ def fetch_notifications(server_url, access_token, newer_than=None, limit=None):

# NOTE: we could attempt to render the source toot in the body as the mastodon web ui does,
# but I'm guessing that more often than not that would result in useless messages spamming the feed.
# leaving it empty and relying on the entry_url / title link to get to the source status
# leaving it empty and relying on the target_url / title link to get to the source status
if notification['type'] in ['follow', 'follow_request']:
entry['entry_url'] = user_url(server_url, notification)
entry['target_url'] = user_url(server_url, notification)
else:
entry['entry_url'] = status_url(server_url, notification['status'])
entry['target_url'] = status_url(server_url, notification['status'])

entries.append(entry)

Expand Down
25 changes: 15 additions & 10 deletions feedi/parsers/rss.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class RSSParser(CachingRequestsMixin):
"""

FIELDS = ['title', 'avatar_url', 'username', 'body', 'media_url', 'remote_id',
'remote_created', 'remote_updated', 'entry_url', 'content_url', 'header']
'remote_created', 'remote_updated', 'comments_url', 'target_url', 'content_url', 'header']

@staticmethod
def is_compatible(_feed_url):
Expand Down Expand Up @@ -170,8 +170,12 @@ def parse_title(self, entry):
def parse_content_url(self, entry):
return entry['link']

def parse_entry_url(self, entry):
return entry.get('comments') or self.parse_content_url(entry)
def parse_target_url(self, entry):
# assume that whatever is identified as content url is the safe default for target
return self.parse_content_url(entry)

def parse_comments_url(self, entry):
return entry.get('comments')

def parse_username(self, entry):
# TODO if missing try to get from meta?
Expand Down Expand Up @@ -377,7 +381,7 @@ def parse_content_url(self, entry):
soup = BeautifulSoup(entry['summary'], 'lxml')
return soup.find("a", string="[link]")['href']

def parse_entry_url(self, entry):
def parse_comments_url(self, entry):
# this particular feed puts the reddit comments page in the link
return entry['link']

Expand All @@ -396,7 +400,7 @@ def is_compatible(feed_url):
return 'lobste.rs' in feed_url

def parse_body(self, entry):
# skip link-only posts
# fill summary from source for link-only posts
if 'Comments' in entry['summary']:
url = self.parse_content_url(entry)
return self.fetch_meta(url, 'og:description', 'description')
Expand All @@ -413,7 +417,7 @@ def is_compatible(feed_url):
return 'news.ycombinator.com' in feed_url or 'hnrss.org' in feed_url

def parse_body(self, entry):
# skip link-only posts
# fill summary from source for link-only posts
if 'Article URL' in entry['summary']:
url = self.parse_content_url(entry)
return self.fetch_meta(url, 'og:description', 'description')
Expand Down Expand Up @@ -443,13 +447,14 @@ def parse_avatar_url(self, entry):
def parse_media_url(self, _entry):
return None

def parse_entry_url(self, _entry):
return None

def parse_content_url(self, _entry):
# don't open this in the local reader
return None

def parse_target_url(self, _entry):
# don't open github
return None


class GoodreadsFeedParser(RSSParser):
"""
Expand Down Expand Up @@ -480,7 +485,7 @@ def parse_title(self, _entry):
def parse_media_url(self, _entry):
return None

def parse_entry_url(self, entry):
def parse_target_url(self, entry):
return entry['link']

def parse_content_url(self, _entry):
Expand Down
12 changes: 6 additions & 6 deletions feedi/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,23 +433,22 @@ def entry_view(id):
# if ajax/htmx just load the empty UI and load content asynchronously
return flask.render_template("entry_content.html", entry=entry, content=None)
else:
dest_url = entry.content_url or entry.entry_url
if not dest_url:
if not entry.content_url and not entry.target_url:
# this view can't work if no entry or content url
return "Entry not readable", 400

# if it's a video site, just redirect. TODO add more sites
if 'youtube.com' in dest_url or 'vimeo.com' in dest_url:
return redirect_response(dest_url)
if 'youtube.com' in entry.content_url or 'vimeo.com' in entry.content_url:
return redirect_response(entry.target_url)

# if full browser load or explicit content request, fetch the article synchronously
try:
content = extract_article(dest_url, local_links=True)['content']
content = extract_article(entry.content_url, local_links=True)['content']
return flask.render_template("entry_content.html", entry=entry, content=content)
except Exception:
pass

return redirect_response(dest_url)
return redirect_response(entry.target_url)


def redirect_response(url):
Expand Down Expand Up @@ -481,6 +480,7 @@ def preview_content():

# put together entry stub for the template
entry = models.Entry(content_url=url,
target_url=url,
title=article['title'],
username=article['byline'])
return flask.render_template("content_preview.html", content=article['content'], entry=entry)
Expand Down
4 changes: 2 additions & 2 deletions feedi/templates/content_preview.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
</figure>

<div class="media-content">
<a href="{{ entry.content_url }}" target="_blank"><strong>{{ entry.title }}</strong></a>
<a href="{{ entry.target_url }}" target="_blank"><strong>{{ entry.title }}</strong></a>
<br/>
<small class="has-text-grey-light">{{ entry.username }} {%if entry.content_url %}· {{ entry.content_url | url_domain}}{% endif %}</small>
<small class="has-text-grey-light">{{ entry.username }} {%if entry.target_url %}· {{ entry.target_url | url_domain}}{% endif %}</small>

</div>
<div class="media-right">
Expand Down
10 changes: 5 additions & 5 deletions feedi/templates/entry_commands.html
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,19 @@
<a hx-put="{{ url_for('mastodon_favorite', id=entry.id )}}" class="dropdown-item"><span class="icon"><i class="fas fa-star"></i></span>Masto favorite</a>
<hr class="dropdown-divider">
{% endif %}
{% if entry.has_content() %}
<a href="{{ entry.content_url }}" target="_blank" class="dropdown-item"><span class="icon"><i class="fas fa-external-link-alt"></i></span> Go to source</a>
{% if entry.target_url %}
<a href="{{ entry.target_url }}" target="_blank" class="dropdown-item"><span class="icon"><i class="fas fa-external-link-alt"></i></span> Go to source</a>
{% endif %}
{% if entry.has_comments_url() %}
<a href="{{ entry.entry_url }}" target="_blank" class="dropdown-item"><span class="icon"><i class="far fa-comment-alt"></i></span> Go to comments</a>
{% if entry.comments_url %}
<a href="{{ entry.comments_url }}" target="_blank" class="dropdown-item"><span class="icon"><i class="far fa-comment-alt"></i></span> Go to comments</a>
{% endif %}
{% if entry.content_url %}
{% if entry.id and request.path != url_for('entry_view', id=entry.id) %}
<a hx-boost="true" href="{{ url_for('entry_view', id=entry.id) }}" class="dropdown-item"><span class="icon"><i class="fas fa-book-reader"></i></span> View in reader</a>
{% endif %}
<a class="dropdown-item" _="on click writeText('{{ entry.content_url }}') into the navigator's clipboard"><span class="icon"><i class="fas fa-link"></i></span> Copy URL</a>

{% if not entry.entry_url or entry.content_url != entry.entry_url %}
{% if entry.is_external_link %}
<a href="{{ url_for('feed_add', url=entry.content_url) }}" class="dropdown-item"><span class="icon"><i class="fas fa-rss"></i></span> Discover feed</a>
{% endif %}
{% if current_user.has_kindle %}
Expand Down
10 changes: 5 additions & 5 deletions feedi/templates/entry_content.html
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,14 @@
><i class="fas fa-star"></i></a>
{% endif %}

{% if entry.has_comments_url() %}
{% if entry.comments_url %}
<a class="is-white icon is-rounded level-item" title="Comment"
href="{{ entry.entry_url}}" target="_blank">
href="{{ entry.comments_url}}" target="_blank">
<i class="fas fa-comment-alt"></i>
</a>
{% else %}
<a class="is-white icon is-rounded level-item" title="Go to source"
href="{{ entry.content_url}}" target="_blank">
href="{{ entry.target_url}}" target="_blank">
<i class="fas fa-external-link-alt"></i>
</a>
{% endif %}
Expand Down Expand Up @@ -78,9 +78,9 @@
</p>
</figure>
<div class="media-content">
<a href="{{ entry.content_url }}" target="_blank"><strong>{{ entry.title }}</strong></a>
<a href="{{ entry.target_url }}" target="_blank"><strong>{{ entry.title }}</strong></a>
<br/>
<small class="has-text-grey-light"><span title="{{ entry.remote_updated.isoformat() }}">{{ entry.remote_created | humanize }}</span> {% if entry.username %}· {{ entry.username}}{%endif%} {%if entry.content_url %}· {{ entry.content_url | url_domain}}{% endif %}</small>
<small class="has-text-grey-light"><span title="{{ entry.remote_updated.isoformat() }}">{{ entry.remote_created | humanize }}</span> {% if entry.username %}· {{ entry.username}}{%endif%} {%if entry.target_url %}· {{ entry.target_url | url_domain}}{% endif %}</small>
</div>
<div class="media-right">
<div class="level entry-quick-actions">
Expand Down
28 changes: 16 additions & 12 deletions feedi/templates/entry_header.html
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,16 @@
</div>
<p>
<b class="entry-title">
{% if entry.has_content() %}
<a _="on click[shiftKey and not metaKey] or keydown[key is 'Enter' and not metaKey and shiftKey ] from the closest .feed-entry go to url '{{ url_for("entry_view", id=entry.id) }}' then halt
{% if entry.title %}
<a _="{% if entry.content_url %}on click[shiftKey and not metaKey] or keydown[key is 'Enter' and not metaKey and shiftKey ] from the closest .feed-entry go to url '{{ url_for("entry_view", id=entry.id) }}' then halt
then on click[shiftKey and metaKey] or keydown[key is 'Enter' and metaKey and shiftKey ] from the closest .feed-entry go to url '{{ url_for("entry_view", id=entry.id) }}' in new window then halt
then on keydown[key is 'Enter' and not metaKey and not shiftKey] from the closest .feed-entry go to url '{{ entry.content_url }}' in new window then halt"
href="{{ entry.content_url }}" target="_blank">{{ entry.title | safe }}</a>
{% elif entry.has_distinct_user() %}
{% endif %}
{% if entry.target_url %}
then on keydown[key is 'Enter' and not metaKey and not shiftKey] from the closest .feed-entry go to url '{{ entry.target_url }}' in new window then halt
{% endif %}"
{% if entry.target_url %}href="{{ entry.target_url }}" target="_blank"{% endif %}
>{{ entry.title | safe }}</a>
{% elif entry.has_distinct_user %}
<a href="{{ url_for('entry_list', username=entry.username ) }}">
{{ entry.display_name or entry.username }}
</a>
Expand All @@ -22,20 +26,20 @@
</a>
{% endif %}
</b>
<span class="entry-excerpt-metadata is-hidden {% if entry.has_content() or entry.has_comments_url() %}is-clickable{% endif %}"
{% if entry.has_content() %}
<span class="entry-excerpt-metadata is-hidden {% if entry.content_url or entry.target_url %}is-clickable{% endif %}"
{% if entry.content_url %}
hx-get="{{ url_for('entry_view', id=entry.id) }}"
hx-trigger="click"
hx-target="body"
hx-push-url="true"
{% elif entry.has_comments_url() %}
_="on click go to url {{ entry.entry_url }} in new window"
{% elif entry.target_url %}
_="on click go to url {{ entry.target_url }} in new window"
{% endif %}>
<small>{{ entry | entry_excerpt }}</small>
</span>
<span class="entry-metadata">
<br/>
<small class="has-text-grey-light"><span title="{{ entry.remote_updated.isoformat() }}">{{ entry.remote_created | humanize }}</span> {% if entry.username %}· {{ entry.username}}{%endif%} {%if entry.content_url %}· {{ entry.content_url | url_domain}}{% endif %}</small>
<small class="has-text-grey-light"><span title="{{ entry.remote_updated.isoformat() }}">{{ entry.remote_created | humanize }}</span> {% if entry.username %}· {{ entry.username}}{%endif%} {%if entry.target_url %}· {{ entry.target_url | url_domain}}{% endif %}</small>
</span>
</p>
</div>
Expand Down Expand Up @@ -74,9 +78,9 @@
_="on click toggle .toggled
then on keyup[key is 'f'] from the closest .feed-entry trigger click on me"
><i class="fas fa-star"></i></a>
{% if entry.has_comments_url() %}
{% if entry.comments_url %}
<a tabindex="-1" class="level-item icon hover-icon is-white is-rounded" title="Comment"
href="{{ entry.entry_url}}" target="_blank"
href="{{ entry.comments_url}}" target="_blank"
><i class="fas fa-comment-alt"></i></a>
{% endif %}
<div class="dropdown is-right"
Expand Down
Loading

0 comments on commit 990d269

Please sign in to comment.