-
Notifications
You must be signed in to change notification settings - Fork 57
/
Copy pathsuperfeedr.py
137 lines (107 loc) · 4.12 KB
/
superfeedr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
"""Superfeedr.
* https://superfeedr.com/users/snarfed
* http://documentation.superfeedr.com/subscribers.html
* http://documentation.superfeedr.com/schema.html
"""
import logging
from flask import request
from flask.views import View
from google.cloud.ndb.key import _MAX_KEYPART_BYTES
from google.cloud.ndb._datastore_types import _MAX_STRING_LENGTH
from oauth_dropins.webutil import appengine_info
from requests.auth import HTTPBasicAuth
import models
import util
logger = logging.getLogger(__name__)
SUPERFEEDR_TOKEN = util.read('superfeedr_token')
SUPERFEEDR_USERNAME = util.read('superfeedr_username')
PUSH_API_URL = 'https://push.superfeedr.com'
MAX_BLOGPOST_LINKS = 10
TRANSIENT_ERROR_HTTP_CODES = ('500', '501', '502', '503', '429')
def subscribe(source):
"""Subscribes to a source.
Also receives some past posts and adds propagate tasks for them.
http://documentation.superfeedr.com/subscribers.html#addingfeedswithpubsubhubbub
Args:
source (Blogger Tumblr, or WordPress)
"""
if appengine_info.LOCAL_SERVER:
logger.info('Running locally, not subscribing to Superfeedr')
return
data = {
'hub.mode': 'subscribe',
'hub.topic': source.feed_url(),
'hub.callback': util.host_url(f'/{source.SHORT_NAME}/notify/{source.key_id()}'),
# TODO
# 'hub.secret': 'xxx',
'format': 'json',
'retrieve': 'true',
}
logger.info(f'Adding Superfeedr subscription: {data}')
resp = util.requests_post(
PUSH_API_URL, data=data,
auth=HTTPBasicAuth(SUPERFEEDR_USERNAME, SUPERFEEDR_TOKEN))
resp.raise_for_status()
handle_feed(resp.json(), source)
def handle_feed(feed, source):
"""Handles a Superfeedr JSON feed.
Creates :class:`models.BlogPost` entities and adds propagate-blogpost tasks
for new items.
* http://documentation.superfeedr.com/schema.html#json
* http://documentation.superfeedr.com/subscribers.html#pubsubhubbubnotifications
Args:
feed (str): Superfeedr JSON feed
source (Blogger, Tumblr, or WordPress)
"""
logger.info(f'Source: {source.label()} {source.key_id()}')
logger.info(f'Raw feed: {feed}')
if not feed:
return
if source.status != 'enabled':
logger.info(f'Dropping because source is {source.status}')
return
elif 'webmention' not in source.features:
logger.info("Dropping because source doesn't have webmention feature")
return
for item in feed.get('items', []):
url = item.get('permalinkUrl') or item.get('id')
if not url:
logger.error('Dropping feed item without permalinkUrl or id!')
continue
# extract links from content, discarding self links.
#
# i don't use get_webmention_target[s]() here because they follows redirects
# and fetch link contents, and this handler should be small and fast and try
# to return a response to superfeedr successfully.
content = item.get('content') or item.get('summary', '')
links = [util.clean_url(util.unwrap_t_umblr_com(url))
for url in util.extract_links(content)
if util.domain_from_link(url) not in source.domains]
unique = []
for link in util.dedupe_urls(links):
if len(link) <= _MAX_STRING_LENGTH:
unique.append(link)
else:
logger.info(f'Giving up on link over {_MAX_STRING_LENGTH} chars! {link}')
if len(unique) >= MAX_BLOGPOST_LINKS:
logger.info('Stopping at 10 links! Skipping the rest.')
break
logger.info(f'Found links: {unique}')
if len(url) > _MAX_KEYPART_BYTES:
logger.warning('Blog post URL is too long (over 500 chars)! Giving up.')
bp = models.BlogPost(id=url[:_MAX_KEYPART_BYTES], source=source.key,
feed_item=item, failed=unique)
else:
bp = models.BlogPost(id=url, source=source.key, feed_item=item, unsent=unique)
bp.get_or_save()
class Notify(View):
"""Handles a Superfeedr notification.
Abstract; subclasses must set the :attr:`SOURCE_CLS` attr.
http://documentation.superfeedr.com/subscribers.html#pubsubhubbubnotifications
"""
SOURCE_CLS = None
def dispatch_request(self, id):
source = self.SOURCE_CLS.get_by_id(id)
if source:
handle_feed(request.json, source)
return ''