forked from laktak/extrakto
-
Notifications
You must be signed in to change notification settings - Fork 0
/
extrakto.conf
57 lines (50 loc) · 1.74 KB
/
extrakto.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# extrakto filter definitions
# Define your own filters in ~/.config/extrakto/extrakto.conf
# To override an existing filter just specify the new values.
# For example, if you prefer to split words on comma as well you can define:
# [word]
# regex: ([^][(){}=$─-➿-, \t\n\r]+)
# define a section per filter
# each filter must have at least a regex containing one or more capture groups
# regex: a python regex expression
# enabled: is filter active (default True)
# in_all: is included in --all (default True)
# lstrip: characters to strip from left result
# rstrip: characters to strip from right result
# exclude: exclude result if matching
# alt2-9: alternate result (see url)
[word]
# "words" consist of anything but the following characters:
# [](){}=$
# unicode range 2500-27BF which includes:
# - Box Drawing
# - Block Elements
# - Geometric Shapes
# - Miscellaneous Symbols
# - Dingbats
# unicode range E000-F8FF (private use/Powerline)
# and whitespace ( \t\n\r)
# regex: [^][(){}=$\u2500-\u27BF\uE000-\uF8FF \t\n\r]+
regex: ([^][(){}=$─-➿- \t\n\r]+)
lstrip: ,:;()[]{}<>'"|
rstrip: ,:;()[]{}<>'"|.
in_all: False
[path]
# separator: (?=[ \t\n]|"|\(|\[|<|\')?
# optionally starts with: (~|/)?
regex: (?:[ \t\n\"([<':]|^)(~|/)?([-~a-zA-Z0-9_+-,.]+/[^ \t\n\r|:"'$%&)>\]]*)
# exclude transfer speeds like 5k/s or m/s, and page 1/2
exclude: [kmgKMG]/s$|^\d+/\d+$
# remove invalid end characters (like punctuation or markdown syntax)
rstrip: ",):"
[url]
regex: (https?://|git@|git://|ssh://|s*ftp://|file:///)([a-zA-Z0-9?=%/_.:,;~@!#$&()*+-]*)
alt2: ://([^/? ]+)
# remove invalid end characters (like punctuation or markdown syntax)
rstrip: ",):"
[quote]
regex: ("[^"\n\r]+")
alt2: "([^"\n\r]+)"
[s-quote]
regex: ('[^'\n\r]+')
alt2: '([^'\n\r]+)'