-
Notifications
You must be signed in to change notification settings - Fork 1
/
regulated_reader.py
63 lines (53 loc) · 1.75 KB
/
regulated_reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from bs4 import BeautifulSoup
import requests, PyRSS2Gen
import json, time, datetime, sys, os
def _print_usage():
sys.stderr.write('Usage: \n')
sys.stderr.write(sys.argv[0]+" init [URL] - set up the RSS feed\n")
sys.stderr.write(sys.argv[0]+" - Generate an RSS feed with the next item\n")
sys.exit(1)
class RegulatedReader():
def add_item(self, feed, next_url):
feed_item = self.getinfo(self.get_soup(next_url))
feed_item['link'] = next_url
feed_item['pubDate'] = time.time()
feed.append(feed_item)
with open(self.name+'.rss', 'w') as f:
self.compose_pyrss2_tree(feed).write_xml(f)
with open(self.name+'.json', 'w') as f:
json.dump(feed, f)
def build(self):
if len(sys.argv) == 1: # Build the RSS feed
with open(self.name+'.json') as f:
feed=json.load(f)
next_url = self.getnext(self.get_soup(feed[-1]['link']))
elif len(sys.argv) == 3 and sys.argv[1] == 'init':
if os.path.exists(self.name+'.json'):
sys.stderr.write('That feed has already been initialized. Please delete '+self.name+'.json if you wish to start over.\n')
sys.exit(2)
feed = []
next_url = sys.argv[2]
else:
_print_usage()
self.add_item(feed, next_url)
def get_soup(self, url):
r = requests.get(url)
r.raise_for_status()
return BeautifulSoup(r.text)
def compose_pyrss2_tree(self, feed):
items = []
for item in feed:
items.append(PyRSS2Gen.RSSItem(
title = item['title'],
link = item['link'],
description = item['description'],
guid = PyRSS2Gen.Guid(item['link']),
pubDate = datetime.datetime.fromtimestamp(item['pubDate']),
))
return PyRSS2Gen.RSS2(
title = self.title,
link = self.link,
description = self.description,
lastBuildDate = datetime.datetime.now(),
items = items,
)