-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwebsite-stalker.yaml
54 lines (49 loc) · 2.54 KB
/
website-stalker.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
---
sites:
- url:
- https://www.haw-hamburg.de/cyberangriff/
- https://www.haw-hamburg.de/cyberangriff/datenleak/
- https://www.haw-hamburg.de/hochschule/technik-und-informatik/studium-und-lehre/fakultaetsservicebuero/
# Quellen des Kalenderbots
- https://www.haw-hamburg.de/hochschule/technik-und-informatik/departments/informatik/studium/aktuelle-veranstaltungen/
- https://www.haw-hamburg.de/en/study/degree-courses-a-z/study-courses-in-detail/course/courses/show/information-engineering/Studierende/
- https://www.haw-hamburg.de/hochschule/technik-und-informatik/departments/informations-und-elektrotechnik/studium/studienorganisation/studienplaene/
- https://www.haw-hamburg.de/studium/studiengaenge-a-z/studiengaenge-detail/course/courses/show/elektrotechnik-und-informationstechnik/Studierende/
editors:
- css_select: '#main, .last_change'
- css_remove: img, .side-bar, .dots
- html_markdownify
- url:
- https://www.haw-hamburg.de/detail/news/news/show/informationen-aus-dem-department-informatik-zum-angriff-auf-die-it/
editors:
- css_select: .article
- css_remove: img, .dots
- html_markdownify
- url: https://userdoc.informatik.haw-hamburg.de/doku.php?id=start
editors:
- css_select: .page
- css_remove: script, img, .dw__toc
- html_markdownify
- url:
# Quellen des Kalenderbots
- https://userdoc.informatik.haw-hamburg.de/doku.php?id=stundenplan:ics_public&do=media&ns=stundenplan
- https://userdoc.informatik.haw-hamburg.de/doku.php?id=stundenplan:ics_public
editors:
- css_select: .page
- css_remove: script, img, .dw__toc
- html_prettify
# Way too noisy because of duplicated mensa entries on the page (they should finally get their data in check)
# - url: https://www.stwhh.de/gastronomie/mensen-cafes-weiteres
# editors:
# - css_select: "#page-content .tx-epwerkbuilding-element-wrapper"
# - css_remove: svg, img, .tx-eptemplate-werk-button
# - css_sort: # Sort the contents of the wrapper. This assumes that the headline stuff ends up at the top
# selector: .tx-epwerkbuilding-element-wrapper > div
# - css_sort: # Sort all the wrappers ideally by their headline and then whatever follows
# selector: .tx-epwerkbuilding-element-wrapper
# sort_by:
# - html_textify
# - regex_replace:
# pattern: <div.+tx-epwerkbuilding-list-headline[^>]+>[\s\n]*(.+)[\s\n]*</div>
# replace: <h3>$1</h3>
# - html_markdownify