Skip to content

Commit

Permalink
Rename entry remote timestamp columns (#85)
Browse files Browse the repository at this point in the history
* rename entry_url to target_url

* add comments url

* rename fields in code

* remove obsolete helpers

* more usage corrections

* add logic to decide feed discoverability

* fix title

* fix linter

* remove header

* rename entry sort fields

* rename fields in code usage

* lint
  • Loading branch information
facundoolano authored Jan 2, 2024
1 parent 990d269 commit 86d9d09
Show file tree
Hide file tree
Showing 10 changed files with 107 additions and 33 deletions.
21 changes: 13 additions & 8 deletions feedi/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ def frequency_rank_query(cls):

return db.select(cls.id, rank_func.label('rank'))\
.join(Entry)\
.filter(Entry.remote_updated >= retention_date)\
.filter(Entry.sort_date >= retention_date)\
.group_by(cls)\
.subquery()

Expand Down Expand Up @@ -404,7 +404,7 @@ def to_valuelist(self):

def _api_args(self):
from flask import current_app as app
latest_entry = self.entries.order_by(Entry.remote_updated.desc()).first()
latest_entry = self.entries.order_by(Entry.sort_date.desc()).first()

args = dict(server_url=self.account.app.api_base_url,
access_token=self.account.access_token)
Expand Down Expand Up @@ -491,8 +491,13 @@ class Entry(db.Model):
created = sa.Column(sa.TIMESTAMP, nullable=False, default=datetime.datetime.utcnow)
updated = sa.Column(sa.TIMESTAMP, nullable=False,
default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow)
remote_created = sa.Column(sa.TIMESTAMP, nullable=False)
remote_updated = sa.Column(sa.TIMESTAMP, nullable=False)
display_date = sa.Column(sa.TIMESTAMP, nullable=False,
doc="The date that will displayed as the publication date of the entry. \
Typically the publication or creation date informed at the source.")

sort_date = sa.Column(sa.TIMESTAMP, nullable=False,
doc="The date that determines an entry's chronological order. \
Typically the updated date informed at the source.")

viewed = sa.Column(sa.TIMESTAMP, index=True)
favorited = sa.Column(sa.TIMESTAMP, index=True)
Expand All @@ -505,7 +510,7 @@ class Entry(db.Model):
sa.String, doc="an html line to put above the title, such as 'user reblogged'.")

__table_args__ = (sa.UniqueConstraint("feed_id", "remote_id"),
sa.Index("entry_updated_ts", remote_updated.desc()))
sa.Index("entry_sort_ts", sort_date.desc()))

def __repr__(self):
return f'<Entry {self.feed_id}/{self.remote_id}>'
Expand Down Expand Up @@ -595,7 +600,7 @@ def sorted_by(cls, user_id, ordering, start_at, **filters):

if ordering == cls.ORDER_RECENCY:
# reverse chronological order
return query.order_by(cls.remote_updated.desc())
return query.order_by(cls.sort_date.desc())

elif ordering == cls.ORDER_FREQUENCY:
# Order entries by least frequent feeds first then reverse-chronologically for entries in the same
Expand All @@ -612,8 +617,8 @@ def sorted_by(cls, user_id, ordering, start_at, **filters):
return query.join(Feed)\
.join(subquery, Feed.id == subquery.c.id, isouter=True)\
.order_by(
(cls.remote_updated >= recency_bucket_date).desc(),
(cls.sort_date >= recency_bucket_date).desc(),
subquery.c.rank,
cls.remote_updated.desc())
cls.sort_date.desc())
else:
raise ValueError('unknown ordering %s' % ordering)
16 changes: 8 additions & 8 deletions feedi/parsers/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ def fetch(self):
'remote_id': item['id'],
'title': item['name'],
'username': item['additions'].split(';')[0].split('Por ')[-1],
'remote_created': created,
'remote_updated': created,
'display_date': created,
'sort_date': created,
'body': item['synopsis'],
'media_url': item['image']['url'],
'content_url': content_url,
Expand Down Expand Up @@ -85,8 +85,8 @@ def fetch(self):
'remote_id': item['url'],
'title': item['headline'],
'username': item['editor'],
'remote_created': datetime.datetime.fromisoformat(item['dateCreated']),
'remote_updated': datetime.datetime.fromisoformat(item['dateModified']),
'display_date': datetime.datetime.fromisoformat(item['dateCreated']),
'sort_date': datetime.datetime.fromisoformat(item['dateModified']),
'body': item['description'],
'media_url': item['image'],
'target_url': item['url'],
Expand Down Expand Up @@ -119,8 +119,8 @@ def fetch(self):
'remote_id': content_url.split('/')[-1],
'title': article.find(class_='newsTitle').text,
'username': author,
'remote_created': date,
'remote_updated': date,
'display_date': date,
'sort_date': date,
'body': article.find(class_='newsSummary').text,
'media_url': article.find('img')['src'],
'target_url': content_url,
Expand Down Expand Up @@ -156,8 +156,8 @@ def fetch(self):
'remote_id': article['_id'],
'title': article['title'],
'username': article['byline'],
'remote_created': pub_date,
'remote_updated': pub_date,
'display_date': pub_date,
'sort_date': pub_date,
'body': self.fetch_meta(article_url, 'og:description', 'description'),
'media_url': self.fetch_meta(article_url, 'og:image', 'twitter:image'),
'content_url': article_url,
Expand Down
8 changes: 4 additions & 4 deletions feedi/parsers/mastodon.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def fetch_toots(server_url, access_token, newer_than=None, limit=None):
# the updated date is taken from the base toot, so if if it's a reblog it will be the time
# it was reblogged. This will be used for sorting entries in the timeline.
# in that case the created date, the one displayed, will be taken from the reblogged toot
entry['remote_updated'] = toot['edited_at'] or toot['created_at']
entry['sort_date'] = toot['edited_at'] or toot['created_at']

if toot.get('in_reply_to_id') and not toot.get('reblog'):
# we don't want to show replies as standalone toots in the timeline, unless they are reblogs
Expand All @@ -78,7 +78,7 @@ def fetch_toots(server_url, access_token, newer_than=None, limit=None):
entry['display_name'] = display_name(toot)
entry['body'] = toot['content']
entry['remote_id'] = toot['id']
entry['remote_created'] = toot['created_at']
entry['display_date'] = toot['created_at']

# we don't want toots to be expanded on the local reader, so we exclude content_url
# this could change if we started to add stuff like displaying (or adding) comments
Expand Down Expand Up @@ -132,8 +132,8 @@ def fetch_notifications(server_url, access_token, newer_than=None, limit=None):

entry = {
'remote_id': notification['id'],
'remote_updated': notification['created_at'],
'remote_created': notification['created_at'],
'sort_date': notification['created_at'],
'display_date': notification['created_at'],
'raw_data': json.dumps(notification, default=str),
'avatar_url': notification['account']['avatar'],
'username': notification['account']['acct'],
Expand Down
6 changes: 3 additions & 3 deletions feedi/parsers/rss.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class RSSParser(CachingRequestsMixin):
"""

FIELDS = ['title', 'avatar_url', 'username', 'body', 'media_url', 'remote_id',
'remote_created', 'remote_updated', 'comments_url', 'target_url', 'content_url', 'header']
'display_date', 'sort_date', 'comments_url', 'target_url', 'content_url', 'header']

@staticmethod
def is_compatible(_feed_url):
Expand Down Expand Up @@ -238,13 +238,13 @@ def parse_media_url(self, entry):
def parse_remote_id(self, entry):
return entry.get('id', entry['link'])

def parse_remote_created(self, entry):
def parse_display_date(self, entry):
dt = to_datetime(entry.get('published_parsed', entry.get('updated_parsed')))
if dt > datetime.datetime.utcnow():
raise ValueError(f"publication date is in the future {dt}")
return dt

def parse_remote_updated(self, entry):
def parse_sort_date(self, entry):
dt = to_datetime(entry['updated_parsed'])
if dt > datetime.datetime.utcnow():
raise ValueError("publication date is in the future")
Expand Down
2 changes: 1 addition & 1 deletion feedi/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def mastodon_boost(id):
def feed_list():
subquery = models.Feed.frequency_rank_query()
feeds = db.session.execute(db.select(models.Feed, subquery.c.rank, sa.func.count(1),
sa.func.max(models.Entry.remote_updated).label('updated'))
sa.func.max(models.Entry.sort_date).label('updated'))
.filter(models.Feed.user_id == current_user.id)
.join(subquery, models.Feed.id == subquery.c.id, isouter=True)
.join(models.Entry, models.Feed.id == models.Entry.feed_id, isouter=True)
Expand Down
14 changes: 7 additions & 7 deletions feedi/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def delete_old_entries():
# filter feeds that have old entries
feeds_q = db.select(models.Feed.id, models.Feed.name)\
.join(models.Feed.entries)\
.filter(models.Entry.remote_updated < older_than_date,
.filter(models.Entry.sort_date < older_than_date,
models.Entry.favorited.is_(None),
models.Entry.pinned.is_(None)
)\
Expand All @@ -110,14 +110,14 @@ def delete_old_entries():

for (feed_id, feed_name) in db.session.execute(feeds_q).all():
# of the ones that have old entries, get the date of the nth entry (overall, not just within the old ones)
min_remote_updated = db.session.scalar(
db.select(models.Entry.remote_updated)
min_sort_date = db.session.scalar(
db.select(models.Entry.sort_date)
.filter_by(feed_id=feed_id)
.order_by(models.Entry.remote_updated.desc())
.order_by(models.Entry.sort_date.desc())
.limit(1)
.offset(minimum - 1))

if not min_remote_updated:
if not min_sort_date:
continue

# delete all entries from that feed that are older than RSS_SKIP_OLDER_THAN_DAYS
Expand All @@ -127,8 +127,8 @@ def delete_old_entries():
models.Entry.favorited.is_(None),
models.Entry.pinned.is_(None),
models.Entry.feed_id == feed_id,
models.Entry.remote_updated < min_remote_updated,
models.Entry.remote_updated < older_than_date)
models.Entry.sort_date < min_sort_date,
models.Entry.sort_date < older_than_date)

res = db.session.execute(q)
db.session.commit()
Expand Down
2 changes: 1 addition & 1 deletion feedi/templates/entry_content.html
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
<div class="media-content">
<a href="{{ entry.target_url }}" target="_blank"><strong>{{ entry.title }}</strong></a>
<br/>
<small class="has-text-grey-light"><span title="{{ entry.remote_updated.isoformat() }}">{{ entry.remote_created | humanize }}</span> {% if entry.username %}· {{ entry.username}}{%endif%} {%if entry.target_url %}· {{ entry.target_url | url_domain}}{% endif %}</small>
<small class="has-text-grey-light"><span title="{{ entry.display_date.isoformat() }}">{{ entry.display_date | humanize }}</span> {% if entry.username %}· {{ entry.username}}{%endif%} {%if entry.target_url %}· {{ entry.target_url | url_domain}}{% endif %}</small>
</div>
<div class="media-right">
<div class="level entry-quick-actions">
Expand Down
2 changes: 1 addition & 1 deletion feedi/templates/entry_header.html
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
</span>
<span class="entry-metadata">
<br/>
<small class="has-text-grey-light"><span title="{{ entry.remote_updated.isoformat() }}">{{ entry.remote_created | humanize }}</span> {% if entry.username %}· {{ entry.username}}{%endif%} {%if entry.target_url %}· {{ entry.target_url | url_domain}}{% endif %}</small>
<small class="has-text-grey-light"><span title="{{ entry.display_date.isoformat() }}">{{ entry.display_date | humanize }}</span> {% if entry.username %}· {{ entry.username}}{%endif%} {%if entry.target_url %}· {{ entry.target_url | url_domain}}{% endif %}</small>
</span>
</p>
</div>
Expand Down
33 changes: 33 additions & 0 deletions migrations/versions/4ab856aa1a08_recreate_date_sort_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""recreate date sort index
Revision ID: 4ab856aa1a08
Revises: de5bb7dd36db
Create Date: 2024-01-02 14:09:58.205490
"""
from typing import Sequence, Union

import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision: str = '4ab856aa1a08'
down_revision: Union[str, None] = 'de5bb7dd36db'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('entries', schema=None) as batch_op:
batch_op.create_index('entry_sort_ts', [sa.text('sort_date DESC')], unique=False)

# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('entries', schema=None) as batch_op:
batch_op.drop_index('entry_sort_ts')

# ### end Alembic commands ###
36 changes: 36 additions & 0 deletions migrations/versions/de5bb7dd36db_rename_entry_date_columns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""rename entry date columns
Revision ID: de5bb7dd36db
Revises: a7680d17c088
Create Date: 2024-01-02 13:52:37.962221
"""
from typing import Sequence, Union

import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision: str = 'de5bb7dd36db'
down_revision: Union[str, None] = 'a7680d17c088'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('entries', schema=None) as batch_op:
batch_op.drop_index('entry_updated_ts')
batch_op.alter_column('remote_created', new_column_name='display_date')
batch_op.alter_column('remote_updated', new_column_name='sort_date')

# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('entries', schema=None) as batch_op:
batch_op.alter_column('display_date', new_column_name='remote_created')
batch_op.alter_column('sort_date', new_column_name='remote_updated')

# ### end Alembic commands ###

0 comments on commit 86d9d09

Please sign in to comment.