diff --git a/.ci_config/prospector.yaml b/.ci_config/prospector.yaml index ad9e5ef..8d31dcf 100644 --- a/.ci_config/prospector.yaml +++ b/.ci_config/prospector.yaml @@ -13,3 +13,10 @@ bandit: mypy: run: true + +pydocstyle: + disable: + # conflicts with D211 + - D203 + # conflicts with D211 + - D212 diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 61d9081..0000000 --- a/.flake8 +++ /dev/null @@ -1,2 +0,0 @@ -[flake8] -max-line-length = 99 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f6db1e5..7f039ef 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,6 +22,12 @@ repos: rev: v3.7.0.1 hooks: - id: shfmt + args: + - "--binary-next-line" + - "--case-indent" + - "--indent" + - "4" + - "--space-redirects" - repo: https://github.com/AleksaC/hadolint-py rev: v2.12.0.3 hooks: diff --git a/README.md b/README.md index c9bf04f..b6861ec 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,6 @@ The following table shows features of AdBlock Plus filters and there status with | Feature | Type | Status | Test | | ------- | ---- | ------ | ---- | -| `#$#` | CSS selector - Snippet filter | :question: | :question: | | `:-abp-contains()` | extended CSS selector | :question: | :question: | | `:-abp-has()` | extended CSS selector | :question: | :question: | | `:-abp-properties()` | extended CSS selector | :question: | :question: | @@ -33,10 +32,22 @@ The following table shows features of AdBlock Plus filters and there status with | `\|…\|` | block exact domain matching including scheme | :question: | :question: | | `!…` | comments | :white_check_mark: | | | `csp=` | filter options | :question: | :question: | -| `##…[…]` | CSS attribute selector | :question: | :question: | -| `##` | CSS selector - Element hiding | :white_check_mark: | | -| `#?#` | CSS selector - Element hiding emulation | :question: | :question: | -| `#@#` | CSS selector - Element hiding exception | :question: | :question: | +| `##.class` | global CSS attribute selector with matching for class | :white_check_mark: | :white_check_mark: | +| `###id` | global CSS attribute selector with matching for id | :white_check_mark: | :white_check_mark: | +| `##[attribute]` | global CSS attribute selector with matching for attribute-name | :white_check_mark: | :white_check_mark: | +| `##[attribute=value]` | global CSS attribute selector with matching for attribute-value pair | :white_check_mark: | :white_check_mark: | +| `##[attribute^=value]` | global CSS attribute selector with matching for attribute with value starting with | :white_check_mark: | :white_check_mark: | +| `##[attribute$=value]` | global CSS attribute selector with matching for attribute with value ending with | :white_check_mark: | :white_check_mark: | +| `##[attribute*=value]` | global CSS attribute selector with matching for attribute with value containing | :white_check_mark: | :white_check_mark: | +| `##html-tag[attribute]` | global CSS attribute selector for html-tag with matching for attribute-name | :construction: | :construction: | +| `##html-tag[attribute=value]` | global CSS attribute selector for html-tag with matching for attribute-value pair | :construction: | :construction: | +| `##html-tag[attribute^=value]` | global CSS attribute selector for html-tag with matching for attribute with value starting with | :construction: | :construction: | +| `##html-tag[attribute$=value]` | global CSS attribute selector for html-tag with matching for attribute with value ending with | :construction: | :construction: | +| `##html-tag[attribute*=value]` | global CSS attribute selector for html-tag with matching for attribute with value containing | :construction: | :construction: | +| `[…]#$#` | domain based CSS selector - Snippet filter | :question: | :question: | +| `[…]##` | domain based CSS selector - Element hiding | :white_check_mark: | | +| `[…]#?#` | domain based CSS selector - Element hiding emulation | :question: | :question: | +| `[…]#@#` | domain based CSS selector - Element hiding exception | :question: | :question: | | `document` | filter options | :question: | :question: | | `~domain=` | filter options | :question: | :question: | | `domain=` | filter options | :question: | :question: | diff --git a/privoxy-blocklist.sh b/privoxy-blocklist.sh index 8fd2b7b..6842fba 100755 --- a/privoxy-blocklist.sh +++ b/privoxy-blocklist.sh @@ -30,6 +30,18 @@ set -euo pipefail # dependencies DEPENDS=('privoxy' 'sed' 'grep' 'bash' 'wget') +# types of content filters +# used in conftest.py, thus keep structure +FILTERTYPES=( + "attribute_global_name" + "attribute_global_exact" + "attribute_global_contain" + "attribute_global_startswith" + "attribute_global_endswith" + "class_global" + "id_global" +) + ###################################################################### # # No changes needed after this line. @@ -41,13 +53,16 @@ function usage() { echo "${TMPNAME:-This} is a script to convert AdBlockPlus-lists into Privoxy-lists and install them." echo " " echo "Options:" - echo " -h: Show this help." - echo " -c: Path to script configuration file. (default = ${SCRIPTCONF} - OS specific)" - echo " -q: Don't give any output." - echo " -v 1: Enable verbosity 1. Show a little bit more output." - echo " -v 2: Enable verbosity 2. Show a lot more output." - echo " -v 3: Enable verbosity 3. Show all possible output and don't delete temporary files.(For debugging only!!)" - echo " -r: Remove all lists build by this script." + echo " -h: Show this help." + echo " -c: Path to script configuration file. (default = ${SCRIPTCONF} - OS specific)" + echo " -f filter: only activate given content filter, can be used multiple times. (default: empty, content-filter disabled)" + echo " Supported values: ${FILTERTYPES[*]}" + echo " -q: Don't give any output." + echo " -v 1: Enable verbosity 1. Show a little bit more output." + echo " -v 2: Enable verbosity 2. Show a lot more output." + echo " -v 3: Enable verbosity 3. Show all possible output and don't delete temporary files.(For debugging only!!)" + echo " -V: Show version." + echo " -r: Remove all lists build by this script." } function get_config_path() { @@ -69,7 +84,7 @@ function get_config_path() { function prepare() { if [ ${UID} -ne 0 ]; then - error -e "Root privileges needed. Exit.\n" + error "Root privileges needed. Exit." usage exit 1 fi @@ -82,8 +97,6 @@ function prepare() { fi done - OS="$(uname)" - if [ -z "${SCRIPTCONF:-}" ]; then get_config_path fi @@ -100,7 +113,15 @@ function prepare() { # array of URL for AdblockPlus lists # for more sources just add it within the round brackets -URLS=("https://easylist-downloads.adblockplus.org/easylistgermany.txt" "https://easylist-downloads.adblockplus.org/easylist.txt") +URLS=( + "https://easylist-downloads.adblockplus.org/easylistgermany.txt" + "https://easylist-downloads.adblockplus.org/easylist.txt" +) + +# array of content filters to convert +# for supported values check: $0 -h +# empty by default to deactivate as content filters slowdown privoxy a lot +FILTERS=() # config for privoxy initscript providing PRIVOXY_CONF, PRIVOXY_USER and PRIVOXY_GROUP INIT_CONF="/etc/conf.d/privoxy" @@ -128,7 +149,7 @@ EOF fi if [[ ! -r "${SCRIPTCONF}" ]]; then - debug "Can't read ${SCRIPTCONF}. Permission denied." -1 + debug -1 "Can't read ${SCRIPTCONF}. Permission denied." fi # shellcheck disable=SC1090 @@ -136,6 +157,11 @@ EOF if [ -n "${OPT_DBG:-}" ]; then DBG="${OPT_DBG}" fi + if [ -n "${OPT_FILTERS[*]}" ]; then + FILTERS=("${OPT_FILTERS[@]}") + fi + debug 2 "Content filters: ${OPT_FILTERS[*]:-disabled}" + # load privoxy config # shellcheck disable=SC1090 if [[ -r "${INIT_CONF:-no-init-conf}" ]]; then @@ -157,8 +183,7 @@ EOF PRIVOXY_CONF="/etc/privoxy/config" ;; esac - PRIVOXY_CONF="/etc/privoxy/config" - info "\$PRIVOXY_CONF isn't set, falling back to '/etc/privoxy/config'" + info "\$PRIVOXY_CONF isn't set, falling back to '${PRIVOXY_CONF}'" fi if [[ -z "${PRIVOXY_USER:-}" ]]; then PRIVOXY_USER="privoxy" @@ -174,8 +199,14 @@ EOF } function debug() { - if [ "${DBG}" -ge "${2}" ]; then - echo -e "${1}" + local expected_level="${1}" + shift 1 + if [ "${DBG}" -ge "${expected_level}" ]; then + if [ "${expected_level}" -eq 0 ]; then + info "${@}" + else + printf '%s\n' "${@}" + fi fi } @@ -187,28 +218,35 @@ function info() { printf '\e[1;33m%s\e[0m\n' "$@" } +# shellcheck disable=SC2317 # function is called in case of FILTERS not empty +function filter_active() { + grep -qxF "$1" <(printf '%s\n' "${FILTERS[@]}") +} + # shellcheck disable=SC2317 function main() { for url in "${URLS[@]}"; do - debug "Processing ${url} ...\n" 0 + debug 0 "Processing ${url} ..." file="${TMPDIR}/$(basename "${url}")" - address_file="${TMPDIR}/$(basename "${url}").address" - address_except_file="${TMPDIR}/$(basename "${url}").address_except" - url_file="${TMPDIR}/$(basename "${url}").url" - url_except_file="${TMPDIR}/$(basename "${url}").url_except" - domain_name_file="${TMPDIR}/$(basename "${url}").domain" - domain_name_except_file="${TMPDIR}/$(basename "${url}").domain_except" - regex_file="${TMPDIR}/$(basename "${url}").regex" - regex_except_file="${TMPDIR}/$(basename "${url}").regex_except" + address_file="${file}.address" + address_except_file="${file}.address_except" + url_file="${file}.url" + url_except_file="${file}.url_except" + domain_name_file="${file}.domain" + domain_name_except_file="${file}.domain_except" + regex_file="${file}.regex" + regex_except_file="${file}.regex_except" + html_file="${file}.html" + html_except_file="${file}.html_except" actionfile=${file%\.*}.script.action filterfile=${file%\.*}.script.filter list="$(basename "${file%\.*}")" # download list - debug "Downloading ${url} ..." 0 + debug 0 "Downloading ${url} ..." wget -t 3 --no-check-certificate -O "${file}" "${url}" > "${TMPDIR}/wget-${url//\//#}.log" 2>&1 - debug "$(cat "${TMPDIR}/wget-${url//\//#}.log")" 2 - debug ".. downloading done." 0 + debug 2 "$(cat "${TMPDIR}/wget-${url//\//#}.log")" + debug 0 ".. downloading done." if ! grep -qE '^.*\[Adblock.*\].*$' "${file}"; then info "The list recieved from ${url} does not contain AdblockPlus list header. Try to process anyway." fi @@ -229,80 +267,446 @@ function main() { ## regex block grep '^/^' "${file}" > "${regex_file}" grep '^@@/^' "${file}" > "${regex_except_file}" + ## html element block + grep -E '^.*##.+' "${file}" > "${html_file}" + grep -E '^.*#@#.+' "${file}" > "${html_except_file}" set -e # convert AdblockPlus list to Privoxy list # blacklist of urls - debug "Creating actionfile for ${list} ..." 1 - echo -e "{ +block{${list}} }" > "${actionfile}" - sed '/\$.*/d;/#/d;s/\?/\\?/g;s/\*/.*/g;s/(/\\(/g;s/)/\\)/g;s/\[/\\[/g;s/\]/\\]/g;s/\^$//g;s/^||/\./g;s/^|/^/g;s/|$/\$/g;/|/d' "${domain_name_file}" >> "${actionfile}" - - debug "... creating filterfile for ${list} ..." 1 - echo "FILTER: ${list} Tag filter of ${list}" > "${filterfile}" - # set filter for html elements - sed '/^#/!d;s/^##//g;s/^#\(.*\)\[.*\]\[.*\]*/s@<([a-zA-Z0-9]+)\\s+.*id=.?\1.*>.*<\/\\1>@@g/g;s/^#\(.*\)/s@<([a-zA-Z0-9]+)\\s+.*id=.?\1.*>.*<\/\\1>@@g/g;s/^\.\(.*\)/s@<([a-zA-Z0-9]+)\\s+.*class=.?\1.*>.*<\/\\1>@@g/g;s/^a\[\(.*\)\]/s@.*<\/a>@@g/g;s/^\([a-zA-Z0-9]*\)\.\(.*\)\[.*\]\[.*\]*/s@<\1.*class=.?\2.*>.*<\/\1>@@g/g;s/^\([a-zA-Z0-9]*\)#\(.*\):.*[\:[^:]]*[^:]*/s@<\1.*id=.?\2.*>.*<\/\1>@@g/g;s/^\([a-zA-Z0-9]*\)#\(.*\)/s@<\1.*id=.?\2.*>.*<\/\1>@@g/g;s/^\[\([a-zA-Z]*\).=\(.*\)\]/s@\1^=\2>@@g/g;s/\^/[\/\&:\?=_]/g;s/\.\([a-zA-Z0-9]\)/\\.\1/g' "${file}" >> "${filterfile}" - debug "... filterfile created - adding filterfile to actionfile ..." 1 - echo "{ +filter{${list}} }" >> "${actionfile}" - echo "*" >> "${actionfile}" - debug "... filterfile added ..." 1 + debug 1 "Creating actionfile for ${list} ..." + echo "{ +block{${list}} }" > "${actionfile}" + sed ' + # skip domains with additional filter definition + /\$.*/d + # skip domains with HTML filter + /#/d + # replace characters to match Privoxy domain syntax + s/\?/\\?/g;s/\*/.*/g;s/(/\\(/g;s/)/\\)/g;s/\[/\\[/g;s/\]/\\]/g + # replace marking seperator of Adblock + s/\^$//g + # replace domain matcher + s/^||/\./g + ' "${domain_name_file}" >> "${actionfile}" + sed ' + # skip domains with additional filter definition + /\$.*/d + # skip domains with HTML filter + /#/d + # replace characters to match Privoxy domain syntax + s/\?/\\?/g;s/\*/.*/g;s/(/\\(/g;s/)/\\)/g;s/\[/\\[/g;s/\]/\\]/g + # replace marking seperator of Adblock + s/\^$//g + # handle exact domain matching + s/^|\([^|][^|]*\)|/^\1\$/g;s/|$/\$/g + ' "${address_file}" >> "${actionfile}" + + echo > "${filterfile}" + if [ -n "${FILTERS[*]}" ]; then + debug 1 "... creating filterfile for ${list} ..." + if filter_active "class_global"; then + debug 1 "... processing global 'class'-matches ..." + ( + # allow handling of left-over lines from last while-loop-run + shopt -s lastpipe + echo "FILTER: ${list}_class_global Tag filter of ${list}" + lines=() + # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex + sed -e ' + # only process gloabl class matches + /^##\..*/!d + # remove all combinations with attribute matching + /^##\..*\[.*/d + # remove all matches with combinators + /^##\..*[>+~ ].*/d + # cleanup + s/^##\.//g + # prepare regex merging + s/$/|/ + ' "${html_file}" | while read -r line; do + # number of matches within one rule impacts runtime of each request to modify the content + if [ "${#lines[@]}" -lt 1000 ]; then + lines+=("$line") + continue + fi + # complexity of regex impacts runtime of each request to modify the content + # using removal of whole HTML tag as multiple matches with different classes in same element are not possible + # printf to inject both quoting characters " and ' + printf 's@<([a-zA-Z0-9]+)\\s+.*class=[%s][^%s]*(' "\"'" "\"'" + # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing + printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n' + # printf to inject both quoting characters " and ' + printf ')[^%s]*[%s].*>.*<\/\\1[^>]*>@@g\n' "\"'" "\"'" + lines=() + done + # process last chunk with less than 1000 entries + if [ "${#lines[@]}" -gt 0 ]; then + printf 's@<([a-zA-Z0-9]+)\\s+.*class=[%s][^%s]*(' "\"'" "\"'" + printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n' + printf ')[^%s]*[%s].*>.*<\/\\1[^>]*>@@g\n' "\"'" "\"'" + fi + shopt -u lastpipe + ) >> "${filterfile}" + + debug 1 "... registering ${list}_class_global in actionfile ..." + ( + echo "{ +filter{${list}_class_global} }" + echo "/" + ) >> "${actionfile}" + debug 1 "... registered ..." + # FIXME: add class handling with domains + # FIXME: add class handling with combinators + # FIXME: add class with defined HTML tag ? + # FIXME: add class with cascading + fi + + if filter_active "id_global"; then + debug 1 "... processing global 'id'-matches ..." + echo "FILTER: ${list}_id_global Tag filter of ${list}" >> "${filterfile}" + ( + # allow handling of left-over lines from last while-loop-run + shopt -s lastpipe + lines=() + # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex + sed -e ' + # only process gloabl id-only matches + /^###.*/!d + # remove all matches with combinators + /^###.*[>+~ ].*/d + # cleanup + s/^###//g + # prepare regex merging + s/$/|/ + ' "${html_file}" | while read -r line; do + # number of matches within one rule impacts runtime of each request to modify the content + if [ "${#lines[@]}" -lt 1000 ]; then + lines+=("$line") + continue + fi + # complexity of regex impacts runtime of each request to modify the content + # using removal of whole HTML tag as multiple matches with different classes in same element are not possible + # printf to inject both quoting characters " and ' + printf 's@<([a-zA-Z0-9]+)\\s+.*id=[%s](' "\"'" + # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing + printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n' + # printf to inject both quoting characters " and ' + printf ')[%s].*>.*<\/\\1[^>]*>@@g\n' "\"'" + lines=() + done + # process last chunk with less than 1000 entries + if [ "${#lines[@]}" -gt 0 ]; then + printf 's@<([a-zA-Z0-9]+)\\s+.*id=[%s](' "\"'" + printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n' + printf ')[%s].*>.*<\/\\1[^>]*>@@g\n' "\"'" + fi + shopt -u lastpipe + ) >> "${filterfile}" + + debug 1 "... registering ${list}_id_global in actionfile ..." + ( + echo "{ +filter{${list}_id_global} }" + echo "/" + ) >> "${actionfile}" + debug 1 "... registered ..." + # FIXME: add id handling with domains + # FIXME: add id handling with combinators + # FIXME: add id with cascading + fi + + debug 1 "... processing 'attribute'-matches with no HTML tag ..." + ( + shopt -s lastpipe + + if filter_active "attribute_global_name"; then + # allow handling of left-over lines from last while-loop-run + echo "FILTER: ${list}_attribute_global_name Tag filter of ${list}" + lines=() + # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex + sed -e ' + # only process gloabl classes + /^##\[[^=][^=]*$/!d + # remove all matches with combinators + /^##.*[>+~ ].*/d + # cleanup + s/^##//g + # convert attribute name-only matches + s/^\[\([^=][^=]*\)\]/\1/g + # convert dots + s/\.\([^\.]\)/\\.\1/g + s/$/|/ + ' "${html_file}" | sort -u | while read -r line; do + # number of matches within one rule impacts runtime of each request to modify the content + if [ "${#lines[@]}" -lt 1000 ]; then + lines+=("$line") + continue + fi + # complexity of regex impacts runtime of each request to modify the content + # using removal of whole HTML tag as multiple matches with different classes in same element are not possible + # printf to inject both quoting characters " and ' + printf 's@<([a-zA-Z0-9]+)\\s+.*(' + # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing + printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n' + # printf to inject both quoting characters " and ' + printf ').*>.*<\/\\1[^>]*>@@g\n' + lines=() + done + # process last chunk with less than 1000 entries + if [ "${#lines[@]}" -gt 0 ]; then + printf 's@<([a-zA-Z0-9]+)\\s+.*(' + printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n' + printf ').*>.*<\/\\1[^>]*>@@g\n' + fi + fi + + if filter_active "attribute_global_exact"; then + echo "FILTER: ${list}_attribute_global_exact Tag filter of ${list}" + lines=() + # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex + sed -e ' + # only process gloabl classes + /^##\[[^=^*][^=^*]*=.*$/!d + # remove all matches with combinators + /^##.*[>+~ ].*/d + # cleanup + s/^##//g + # convert attribute name-only matches + s/^\[\([^=][^=]*\)=\(.*\)\]/\1=\2/g + # convert dots + s/\.\([^\.]\)/\\.\1/g + s/$/|/ + ' "${html_file}" | sort -u | while read -r line; do + # number of matches within one rule impacts runtime of each request to modify the content + if [ "${#lines[@]}" -lt 1000 ]; then + lines+=("$line") + continue + fi + # complexity of regex impacts runtime of each request to modify the content + # using removal of whole HTML tag as multiple matches with different classes in same element are not possible + # printf to inject both quoting characters " and ' + printf 's@<([a-zA-Z0-9]+)\\s+.*(' + # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing + printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n' + # printf to inject both quoting characters " and ' + printf ').*>.*<\/\\1[^>]*>@@g\n' + lines=() + done + # process last chunk with less than 1000 entries + if [ "${#lines[@]}" -gt 0 ]; then + printf 's@<([a-zA-Z0-9]+)\\s+.*(' + printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n' + printf ').*>.*<\/\\1[^>]*>@@g\n' + fi + fi + + if filter_active "attribute_global_contain"; then + echo "FILTER: ${list}_attribute_global_contain Tag filter of ${list}" + lines=() + # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex + sed -e ' + # only process gloabl classes + /^##\[[^*][^*]*\*=.*$/!d + # remove all matches with combinators + /^##.*[>+~ ].*/d + # cleanup + s/^##//g + # convert dots + s/\.\([^\.]\)/\\.\1/g + # convert attribute based filter with contain match + s/^\[\([^*][^*]*\)\*=\(["'"'"']*\)\([^"][^"]*\)"*\(["'"'"']*\)\]/\1=\2.*\3.*\4/g + s/$/|/ + ' "${html_file}" | sort -u | while read -r line; do + # number of matches within one rule impacts runtime of each request to modify the content + if [ "${#lines[@]}" -lt 1000 ]; then + lines+=("$line") + continue + fi + # complexity of regex impacts runtime of each request to modify the content + # using removal of whole HTML tag as multiple matches with different classes in same element are not possible + # printf to inject both quoting characters " and ' + printf 's@<([a-zA-Z0-9]+)\\s+.*(' + # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing + printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n' + # printf to inject both quoting characters " and ' + printf ').*>.*<\/\\1[^>]*>@@g\n' + lines=() + done + # process last chunk with less than 1000 entries + if [ "${#lines[@]}" -gt 0 ]; then + printf 's@<([a-zA-Z0-9]+)\\s+.*(' + printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n' + printf ').*>.*<\/\\1[^>]*>@@g\n' + fi + fi + + if filter_active "attribute_global_startswith"; then + echo "FILTER: ${list}_attribute_global_startswith Tag filter of ${list}" + lines=() + # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex + sed -e ' + # only process gloabl classes + /^##\[[^=^][^=^]*\^=.*$/!d + # remove all matches with combinators + /^##.*[>+~ ].*/d + # cleanup + s/^##//g + # convert dots + s/\.\([^\.]\)/\\.\1/g + # convert attribute based filter with startwith match + s/^\[\([^^][^^]*\)^=\(["'"'"']*\)\(.*[^"'"'"']\)\(["'"'"']*\)\]/\1=\2\3.*\4/g + s/$/|/ + ' "${html_file}" | sort -u | while read -r line; do + # number of matches within one rule impacts runtime of each request to modify the content + if [ "${#lines[@]}" -lt 1000 ]; then + lines+=("$line") + continue + fi + # complexity of regex impacts runtime of each request to modify the content + # using removal of whole HTML tag as multiple matches with different classes in same element are not possible + # printf to inject both quoting characters " and ' + printf 's@<([a-zA-Z0-9]+)\\s+.*(' + # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing + printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n' + # printf to inject both quoting characters " and ' + printf ').*>.*<\/\\1[^>]*>@@g\n' + lines=() + done + # process last chunk with less than 1000 entries + if [ "${#lines[@]}" -gt 0 ]; then + printf 's@<([a-zA-Z0-9]+)\\s+.*(' + printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n' + printf ').*>.*<\/\\1[^>]*>@@g\n' + fi + fi + + if filter_active "attribute_global_endswith"; then + echo "FILTER: ${list}_attribute_global_endswith Tag filter of ${list}" + lines=() + # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex + sed -e ' + # only process gloabl classes + /^##\[[^$][^=$]*\$=.*$/!d + # remove all matches with combinators + /^##.*[>+~ ].*/d + # cleanup + s/^##//g + # convert dots + s/\.\([^\.]\)/\\.\1/g + # convert attribute based filter with endswith match + s/^\[\([^\$][^\$]*\)\$=\(["'"'"']*\)\(.*[^"'"'"']\)\(["'"'"']*\)\]/\1=\2.*\3\4/g + s/$/|/ + ' "${html_file}" | sort -u | while read -r line; do + # number of matches within one rule impacts runtime of each request to modify the content + if [ "${#lines[@]}" -lt 1000 ]; then + lines+=("$line") + continue + fi + # complexity of regex impacts runtime of each request to modify the content + # using removal of whole HTML tag as multiple matches with different classes in same element are not possible + # printf to inject both quoting characters " and ' + printf 's@<([a-zA-Z0-9]+)\\s+.*(' + # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing + printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n' + # printf to inject both quoting characters " and ' + printf ').*>.*<\/\\1[^>]*>@@g\n' + lines=() + done + # process last chunk with less than 1000 entries + if [ "${#lines[@]}" -gt 0 ]; then + printf 's@<([a-zA-Z0-9]+)\\s+.*(' + printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n' + printf ').*>.*<\/\\1[^>]*>@@g\n' + fi + fi + shopt -u lastpipe + ) >> "${filterfile}" + + debug 1 "... registering ${list}_attribute filters in actionfile ..." + ( + if filter_active "attribute_global_name"; then + echo "{ +filter{${list}_attribute_global_name} }" + echo "/" + fi + if filter_active "attribute_global_exact"; then + echo "{ +filter{${list}_attribute_global_exact} }" + echo "/" + fi + if filter_active "attribute_global_contain"; then + echo "{ +filter{${list}_attribute_global_contain} }" + echo "/" + fi + if filter_active "attribute_global_startswith"; then + echo "{ +filter{${list}_attribute_global_startswith} }" + echo "/" + fi + if filter_active "attribute_global_endswith"; then + echo "{ +filter{${list}_attribute_global_endswith} }" + echo "/" + fi + ) >> "${actionfile}" + debug 1 "... registered ..." + + # FIXME: add attribute handling with domains + # FIXME: add attribute handling with combinators + # FIXME: add combination of classes and attributes: ##.OUTBRAIN[data-widget-id^="FMS_REELD_"] + fi # create domain based whitelist # create domain based blacklist # domains=$(sed '/^#/d;/#/!d;s/,~/,\*/g;s/~/;:\*/g;s/^\([a-zA-Z]\)/;:\1/g' ${file}) - # [ -n "${domains}" ] && debug "... creating domainbased filterfiles ..." 1 - # debug "Found Domains: ${domains}." 2 + # [ -n "${domains}" ] && debug 1 "... creating domainbased filterfiles ..." + # debug 2 "Found Domains: ${domains}." # ifs=$IFS # IFS=";:" # for domain in ${domains} # do # dns=$(echo ${domain} | awk -F ',' '{print $1}' | awk -F '#' '{print $1}') - # debug "Modifying line: ${domain}" 2 - # debug " ... creating filterfile for ${dns} ..." 1 + # debug 2 "Modifying line: ${domain}" + # debug 1 " ... creating filterfile for ${dns} ..." # sed '' ${file} > ${file%\.*}-${dns%~}.script.filter - # debug " ... filterfile created ..." 1 - # debug " ... adding filterfile for ${dns} to actionfile ..." 1 + # debug 1 " ... filterfile created ..." + # debug 1 " ... adding filterfile for ${dns} to actionfile ..." # echo "{ +filter{${list}-${dns}} }" >> ${actionfile} # echo "${dns}" >> ${actionfile} - # debug " ... filterfile added ..." 1 + # debug 1 " ... filterfile added ..." # done # IFS=${ifs} - # debug "... all domainbased filterfiles created ..." 1 + # debug 1 "... all domainbased filterfiles created ..." - debug "... creating and adding whitlist for urls ..." 1 + debug 1 "... creating and adding whitlist for urls ..." # whitelist of urls echo "{ -block }" >> "${actionfile}" sed 's/^@@//g;/\$.*/d;/#/d;s/\./\\./g;s/\?/\\?/g;s/\*/.*/g;s/(/\\(/g;s/)/\\)/g;s/\[/\\[/g;s/\]/\\]/g;s/\^/[\/\&:\?=_]/g;s/^||/\./g;s/^|/^/g;s/|$/\$/g;/|/d' "${domain_name_except_file}" >> "${actionfile}" - debug "... created and added whitelist - creating and adding image handler ..." 1 + debug 1 "... created and added whitelist - creating and adding image handler ..." # whitelist of image urls echo "{ -block +handle-as-image }" >> "${actionfile}" sed '/^@@.*/!d;s/^@@//g;/\$.*image.*/!d;s/\$.*image.*//g;/#/d;s/\./\\./g;s/\?/\\?/g;s/\*/.*/g;s/(/\\(/g;s/)/\\)/g;s/\[/\\[/g;s/\]/\\]/g;s/\^/[\/\&:\?=_]/g;s/^||/\./g;s/^|/^/g;s/|$/\$/g;/|/d' "${file}" >> "${actionfile}" - debug "... created and added image handler ..." 1 - debug "... created actionfile for ${list}." 1 + debug 1 "... created and added image handler ..." + debug 1 "... created actionfile for ${list}." # install Privoxy actionsfile install -o "${PRIVOXY_USER}" -g "${PRIVOXY_GROUP}" "${VERBOSE[@]}" "${actionfile}" "${PRIVOXY_DIR}" if ! grep -q "$(basename "${actionfile}")" "${PRIVOXY_CONF}"; then - debug "\nModifying ${PRIVOXY_CONF} ..." 0 + debug 0 "Modifying ${PRIVOXY_CONF} ..." sed "s/^actionsfile user\.action/actionsfile $(basename "${actionfile}")\nactionsfile user.action/" "${PRIVOXY_CONF}" > "${TMPDIR}/config" - debug "... modification done.\n" 0 - debug "Installing new config ..." 0 + debug 0 "... modification done." + debug 0 "Installing new config ..." install -o "${PRIVOXY_USER}" -g "${PRIVOXY_GROUP}" "${VERBOSE[@]}" "${TMPDIR}/config" "${PRIVOXY_CONF}" - debug "... installation done\n" 0 + debug 0 "... installation done" fi # install Privoxy filterfile install -o "${PRIVOXY_USER}" -g "${PRIVOXY_GROUP}" "${VERBOSE[@]}" "${filterfile}" "${PRIVOXY_DIR}" if ! grep -q "$(basename "${filterfile}")" "${PRIVOXY_CONF}"; then - debug "\nModifying ${PRIVOXY_CONF} ..." 0 + debug 0 "Modifying ${PRIVOXY_CONF} ..." sed "s/^\(#*\)filterfile user\.filter/filterfile $(basename "${filterfile}")\n\1filterfile user.filter/" "${PRIVOXY_CONF}" > "${TMPDIR}/config" - debug "... modification done.\n" 0 - debug "Installing new config ..." 0 + debug 0 "... modification done." + debug 0 "Installing new config ..." install -o "${PRIVOXY_USER}" -g "${PRIVOXY_GROUP}" "${VERBOSE[@]}" "${TMPDIR}/config" "${PRIVOXY_CONF}" - debug "... installation done\n" 0 + debug 0 "... installation done" fi - debug "... ${url} installed successfully.\n" 0 + debug 0 "... ${url} installed successfully." done } @@ -319,9 +723,9 @@ function lock() { echo "An instance of ${TMPNAME} is already running. Exit" exit 1 fi - debug "Found dead lock file." 0 + debug 0 "Found dead lock file." rm -f "${PID_FILE}" - debug "File removed." 0 + debug 0 "File removed." fi # safe PID in lock-file @@ -330,32 +734,33 @@ function lock() { # shellcheck disable=SC2317 function remove() { - read -rp "Do you really want to remove all build lists?(y/N) " choice - if [ "${choice}" != "y" ]; then - exit 0 + read -rp "Do you really want to remove all build lists?(y/N) " choice + if [ "${choice}" != "y" ]; then + exit 0 fi - if rm -rf "${PRIVOXY_DIR}/"*.script.{action,filter} \ - && sed '/^actionsfile .*\.script\.action$/d;/^filterfile .*\.script\.filter$/d' -i "${PRIVOXY_CONF}"; then - echo "Lists removed." - exit 0 + if rm -rf "${PRIVOXY_DIR}/"*.script.{action,filter} \ + && sed '/^actionsfile .*\.script\.action$/d;/^filterfile .*\.script\.filter$/d' -i "${PRIVOXY_CONF}"; then + echo "Lists removed." + exit 0 fi - error "An error occured while removing the lists." - error "Please have a look into ${PRIVOXY_DIR} whether there are .script.* files and search for *.script.* in ${PRIVOXY_CONF}." - exit 1 + error "An error occured while removing the lists." + error "Please have a look into ${PRIVOXY_DIR} whether there are .script.* files and search for *.script.* in ${PRIVOXY_CONF}." + exit 1 } VERBOSE=() method="main" +OS="$(uname)" +OPT_FILTERS=() # loop for options -while getopts ":c:hrqv:V" opt; do +while getopts ":c:f:hrqv:V" opt; do case "${opt}" in "c") SCRIPTCONF="${OPTARG}" ;; - "v") - OPT_DBG="${OPTARG}" - VERBOSE=("-v") + "f") + OPT_FILTERS+=("${OPTARG,,}") ;; "q") OPT_DBG=-1 @@ -363,6 +768,10 @@ while getopts ":c:hrqv:V" opt; do "r") method="remove" ;; + "v") + OPT_DBG="${OPTARG}" + VERBOSE=("-v") + ;; "V") #
is replaced by release process echo "Version:
" @@ -379,12 +788,21 @@ while getopts ":c:hrqv:V" opt; do esac done +if [ -n "${OPT_FILTERS[*]}" ]; then + if unknown="$(grep -vxFf <(printf '%s\n' "${FILTERTYPES[@]}") <(printf '%s\n' "${OPT_FILTERS[@]}"))"; then + error "Unknown filters: ${unknown}" + exit 1 + fi +fi + prepare trap 'rm -fr "${TMPDIR}";exit' INT TERM EXIT lock -debug "URL-List: ${URLS}\nPrivoxy-Configdir: ${PRIVOXY_DIR}\nTemporary directory: ${TMPDIR}" 2 +debug 2 "URL-List: ${URLS[*]}" +debug 2 "Privoxy-Configdir: ${PRIVOXY_DIR}" +debug 2 "Temporary directory: ${TMPDIR}" "${method}" # restore default exit command diff --git a/tests/Dockerfile_ubuntu b/tests/Dockerfile_ubuntu index d5366d5..08b58c0 100644 --- a/tests/Dockerfile_ubuntu +++ b/tests/Dockerfile_ubuntu @@ -5,10 +5,12 @@ COPY helper/install_deps.sh /install_deps.sh ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update \ && apt-get install --no-install-recommends -q --yes \ + curl \ build-essential \ python3-pip \ python3-dev \ sudo \ + vim \ && pip install --no-cache-dir -qr /requirements.txt \ && rm -f /requirements.txt \ && install -d -o root -g root /pytest_cache \ diff --git a/tests/config.py b/tests/config.py new file mode 100644 index 0000000..df40ece --- /dev/null +++ b/tests/config.py @@ -0,0 +1,75 @@ +"""Configuration of test suite to configure tests.""" + +from conftest import check_in, check_not_in + +content_removed = [ + "ad_970x250", # class match: https://www.iphoneitalia.com/ + "MyAdsId3", # id match + "AdRight2", # class match with element having multiple classes + "data-ad-manager-id", # attribute match + 'data-role="tile-ads-module"', # attribute exact match + 'onclick="content.ad/"', # attribute contain match + 'class="adDisplay-module_foobar"', # attribute startswith match + "onclick=\"location.href='http://www.reimageplus.com/foobar'", # attribute startswith match +] +content_exists = [ + "ajlkl", # should exist, although one element is removed by privoxy + '"adDisplay-modul"', # should exist +] + +# FIXME: see https://github.com/Andrwe/privoxy-blocklist/issues/35 +urls_allowed = ["duckduckgo.com/", "hs-exp.jp/ads/"] +urls_allowed = ["duckduckgo.com/"] + +# FIXME: implement regex-filter for domains, e.g. +# /^https?:\/\/s3\.*.*\.amazonaws\.com\/[a-f0-9]{45,}\/[a-f,0-9]{8,10}$/$script, +# third-party,xmlhttprequest,domain=~amazon.com +urls_blocked = [ + "andrwe.org/ads/", + "andrwe.jp/ads/", + "pubfeed.linkby.com", + f"s3.{'a'*6}.amazonaws.com/{'0123abcd'*6}/{'ab,12'*2}/", +] +urls_blocked = ["andrwe.org/ads/", "andrwe.jp/ads/", "pubfeed.linkby.com"] + +config_checks = { + "url_extended_config.conf": [ + ( + check_in, + "Processing https://raw.githubusercontent.com/easylist/easylist/master/" + "easylist/easylist_allowlist_general_hide.txt", + ), + ( + check_in, + "Processing https://easylist-downloads.adblockplus.org/easylistgermany.txt", + ), + ( + check_in, + "The list recieved from https://raw.githubusercontent.com/easylist/easylist/master" + "/easylist/easylist_allowlist_general_hide.txt does not contain AdblockPlus list " + "header. Try to process anyway.", + ), + ( + check_not_in, + "created and added image handler", + ), + ], + "debugging.conf": [ + ( + check_in, + "Processing https://easylist-downloads.adblockplus.org/easylistgermany.txt", + ), + ( + check_not_in, + "does not contain AdblockPlus list header.", + ), + ( + check_in, + "‘/tmp/privoxy-blocklist.sh/easylist.txt’ saved", + ), + ( + check_in, + "created and added image handler", + ), + ], +} diff --git a/tests/configs/debugging.conf b/tests/configs/debugging.conf index 05f13ef..ac9b183 100644 --- a/tests/configs/debugging.conf +++ b/tests/configs/debugging.conf @@ -7,6 +7,11 @@ URLS=( "https://easylist-downloads.adblockplus.org/easylist.txt" ) +# array of content filters to convert +# for supported values check: $0 -h +# empty by default to deactivate as content filters slowdown privoxy a lot +FILTERS=() + # config for privoxy initscript providing PRIVOXY_CONF, PRIVOXY_USER and PRIVOXY_GROUP #INIT_CONF="/etc/conf.d/privoxy" diff --git a/tests/configs/url_extended_config.conf b/tests/configs/url_extended_config.conf index 236b1f8..65f9c9b 100644 --- a/tests/configs/url_extended_config.conf +++ b/tests/configs/url_extended_config.conf @@ -8,6 +8,11 @@ URLS=( "https://raw.githubusercontent.com/easylist/easylist/master/easylist/easylist_allowlist_general_hide.txt" ) +# array of content filters to convert +# for supported values check: $0 -h +# empty by default to deactivate as content filters slowdown privoxy a lot +FILTERS=() + # config for privoxy initscript providing PRIVOXY_CONF, PRIVOXY_USER and PRIVOXY_GROUP #INIT_CONF="/etc/conf.d/privoxy" diff --git a/tests/conftest.py b/tests/conftest.py index e6d037f..901636b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,14 +4,34 @@ import os from pathlib import Path from re import search -from typing import Dict, Generator, Optional, cast +from typing import Generator, Optional import pytest import requests -from pytest import CollectReport, StashKey +from pytest import StashKey from pytestshellutils.shell import Daemon +from urllib3.util import Url, parse_url -phase_report_key = StashKey[Dict[str, CollectReport]]() +phase_report_key = StashKey[int]() + + +class UrlParsed: + """Class to parse and store URL.""" + + origin_url: str + parsed_url: Url + scheme: str + scheme_less_url: str + + def __init__(self, url: str): + """Initialize object by parsing given URL.""" + self.origin_url = url + self.parsed_url = parse_url(self.origin_url) + self.scheme = self.parsed_url.scheme or "http" + parsed_port = f":{self.parsed_url.port}" if self.parsed_url.port else "" + self.scheme_less_url = ( + f"{self.parsed_url.host}{parsed_port}{self.parsed_url.request_uri}" + ) def debug_enabled() -> bool: @@ -26,6 +46,16 @@ def debug_enabled() -> bool: ) +def check_in(needle: str, haystack: str) -> bool: + """Check given haystack for given string.""" + return needle in haystack + + +def check_not_in(needle: str, haystack: str) -> bool: + """Check that given string is not in given text.""" + return needle not in haystack + + # based on # https://docs.pytest.org/en/latest/example/simple.html#making-test-result-information-available-in-fixtures @pytest.hookimpl(wrapper=True, tryfirst=True) @@ -35,16 +65,49 @@ def pytest_runtest_makereport(item: pytest.Item): report = yield if item.parent: + # store test results for each phase ("setup", "call", "teardown") of each test + # within module-scope + if phase_report_key not in item.parent.stash: + item.parent.stash.setdefault(phase_report_key, 0) if report.failed: - # store test results for each phase ("setup", "call", "teardown") of each test - # within module-scope - item.parent.stash.setdefault( - phase_report_key, cast(Dict[str, CollectReport], {}) - )[f"{report.nodeid}_{report.when}"] = report + item.parent.stash[phase_report_key] += 1 return report +@pytest.fixture +def webserver(httpserver) -> UrlParsed: + """Start HTTP server and return parsed URL object.""" + with Path(__file__).parent.joinpath("response.html").open( + "r", encoding="UTF-8" + ) as f_h: + response_html = f_h.read() + httpserver.expect_request("/").respond_with_data( + response_data=response_html, content_type="text/html" + ) + return UrlParsed(httpserver.url_for("/")) + + +@pytest.fixture(scope="module") +def filtertypes() -> list[str]: + """Return filtertypes supported by privoxy-blocklist.""" + filter_types = [] + with Path(__file__).parent.parent.joinpath("privoxy-blocklist.sh").open( + "r", encoding="UTF-8" + ) as f_h: + found_line = False + for line in f_h.readlines(): + if not found_line and not line.startswith("FILTERTYPES"): + continue + if line.startswith("FILTERTYPES"): + found_line = True + continue + if line.endswith(")\n"): + break + filter_types.append(line.strip().strip('"')) + return filter_types + + @pytest.fixture(scope="module") def privoxy_blocklist() -> str: """Return the path to privoxy-blocklist.sh.""" @@ -95,12 +158,14 @@ def start_privoxy(request: pytest.FixtureRequest) -> Generator[bool, None, None] run.start() yield run.is_running() run_result = run.terminate() + logs = run_result.stdout + run_result.stderr # request.node is an "module" because we use the "module" scope node = request.node - if (phase_report_key in node.stash) and len(node.stash[phase_report_key]) > 0: - print( - f"\n\nprivoxy-results\n stdout:\n{run_result.stdout}\n stderr:\n{run_result.stderr}" - ) + if ( + (phase_report_key in node.stash) and node.stash[phase_report_key] > 0 + ) or " Error: " in logs: + print(f"\n\nprivoxy-logs\n{logs}") + assert " Error: " not in logs @pytest.fixture(scope="module") diff --git a/tests/requirements.txt b/tests/requirements.txt index 295a104..d9def25 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,4 +1,5 @@ pytest pytest-durations +pytest-httpserver pytest-shell-utilities requests diff --git a/tests/response.html b/tests/response.html new file mode 100644 index 0000000..8882c68 --- /dev/null +++ b/tests/response.html @@ -0,0 +1,16 @@ + + +
just-some-test-string-always-present
+
single class should be removed
+
multiple classes that should be removed
+
multiple classes that should exist
+
id should be removed
+
name-only attibute should be removed
+
exact match attibute should be removed
+
1. contain match attribute should be removed
+
2. contain match attribute should be removed
+
1. startswith match attribute should be removed
+
startswith match attribute should be exist
+
2. startswith match attribute should be removed
+ + diff --git a/tests/setup.cfg b/tests/setup.cfg new file mode 100644 index 0000000..035f93b --- /dev/null +++ b/tests/setup.cfg @@ -0,0 +1,5 @@ +[pycodestyle] +max-line-length = 99 + +[flake8] +max-line-length = 99 diff --git a/tests/test_00_minimal.py b/tests/test_00_minimal.py index e86afff..0402a8a 100644 --- a/tests/test_00_minimal.py +++ b/tests/test_00_minimal.py @@ -11,7 +11,6 @@ def test_permissions() -> None: ".ci_config/bandit.yml", ".ci_config/prospector.yaml", ".editorconfig", - ".flake8", ".github/release.yml", ".github/workflows/pytest.yml", ".github/workflows/release.yml", @@ -22,6 +21,7 @@ def test_permissions() -> None: "tests/Dockerfile_alpine", "tests/Dockerfile_ubuntu", "tests/requirements.txt", + "tests/setup.cfg", "tests/test_00_minimal.py", "tests/test_01_root_execute.py", ] diff --git a/tests/test_01_root_execute.py b/tests/test_01_root_execute.py index fa722fe..0d59fd7 100644 --- a/tests/test_01_root_execute.py +++ b/tests/test_01_root_execute.py @@ -4,29 +4,31 @@ from pathlib import Path from shutil import copyfile, copymode, which +import config import requests +from conftest import check_in, check_not_in def test_config_generator(shell, privoxy_blocklist) -> None: """Test config generator with default path.""" - config = Path("/etc/privoxy-blocklist.conf") - if config.exists(): - config.unlink() + config_file = Path("/etc/privoxy-blocklist.conf") + if config_file.exists(): + config_file.unlink() ret = shell.run(privoxy_blocklist) assert ret.returncode == 2 assert "Creating default one and exiting" in ret.stdout - assert config.exists() + assert config_file.exists() def test_custom_config_generator(shell, tmp_path, privoxy_blocklist) -> None: """Test config generator with custom path.""" - config = Path(f"{tmp_path}/privoxy-blocklist") - if config.exists(): - config.unlink() - ret = shell.run(privoxy_blocklist, "-c", str(config)) + config_file = Path(f"{tmp_path}/privoxy-blocklist") + if config_file.exists(): + config_file.unlink() + ret = shell.run(privoxy_blocklist, "-c", str(config_file)) assert ret.returncode == 2 assert "Creating default one and exiting" in ret.stdout - assert config.exists() + assert config_file.exists() def test_version_option(shell, tmp_path, privoxy_blocklist) -> None: @@ -46,9 +48,21 @@ def test_version_option(shell, tmp_path, privoxy_blocklist) -> None: assert ret.stdout == "Version: 0.0.1\n" -def test_next_run(shell, privoxy_blocklist) -> None: +def test_filter_check(shell, privoxy_blocklist) -> None: + """Test filtertype check.""" + cmd = [privoxy_blocklist, "-f", "bla"] + ret_script = shell.run(*cmd) + assert ret_script.returncode == 1 + assert "" == ret_script.stdout + assert "Unknown filters: bla" in ret_script.stderr.strip() + + +def test_next_run(shell, privoxy_blocklist, filtertypes) -> None: """Test followup runs.""" - ret_script = shell.run(privoxy_blocklist) + cmd = [privoxy_blocklist] + for filtertype in filtertypes: + cmd.extend(["-f", filtertype]) + ret_script = shell.run(*cmd) assert ret_script.returncode == 0 ret_privo = shell.run( "/usr/sbin/privoxy", "--no-daemon", "--config-test", "/etc/privoxy/config" @@ -58,77 +72,61 @@ def test_next_run(shell, privoxy_blocklist) -> None: def test_request_success(start_privoxy, supported_schemes) -> None: """Test URLs not blocked by privoxy.""" - # FIXME: see https://github.com/Andrwe/privoxy-blocklist/issues/35 - urls = ["duckduckgo.com/", "hs-exp.jp/ads/"] - urls = ["duckduckgo.com/"] - run_requests(start_privoxy, supported_schemes, urls, [200, 301, 302]) + run_requests(start_privoxy, supported_schemes, config.urls_allowed, [200, 301, 302]) def test_request_block_url(start_privoxy, supported_schemes) -> None: """Test URLs blocked by privoxy due to easylist.""" - urls = [ - "andrwe.org/ads/", - "andrwe.jp/ads/", - "pubfeed.linkby.com", - f"s3.{'a'*6}.amazonaws.com/{'0123abcd'*6}/{'ab,12'*2}/", - ] - urls = ["andrwe.org/ads/", "andrwe.jp/ads/", "pubfeed.linkby.com"] - run_requests(start_privoxy, supported_schemes, urls, [403]) + run_requests(start_privoxy, supported_schemes, config.urls_blocked, [403]) + + +def test_content_removed(start_privoxy, webserver) -> None: + """Test filters for removing content.""" + response = run_request( + start_privoxy, + scheme=webserver.scheme, + url=webserver.scheme_less_url, + expected_code=[200], + ) + # expected response + assert check_in("just-some-test-string-always-present", response.text) + for needle in config.content_removed: + # check presence of needle without privoxy + assert check_in(needle, requests.get(webserver.origin_url, timeout=10).text) + # check presence of needle with privoxy + assert check_not_in(needle, response.text) + + +def test_content_exists(start_privoxy, webserver) -> None: + """Test filters for removing content.""" + response = run_request( + start_privoxy, + scheme=webserver.scheme, + url=webserver.scheme_less_url, + expected_code=[200], + ) + # expected response + assert check_in("just-some-test-string-always-present", response.text) + for needle in config.content_exists: + # check presence of needle without privoxy + assert check_in(needle, requests.get(webserver.origin_url, timeout=10).text) + # check presence of needle with privoxy + assert check_in(needle, response.text) +# must be second last test as it will generate unpredictable privoxy configurations def test_predefined_custom_config_generator(shell, privoxy_blocklist) -> None: """Run tests for all pre-defined configs.""" - checks = { - "url_extended_config.conf": [ - ( - check_in, - "Processing https://raw.githubusercontent.com/easylist/easylist/master/" - "easylist/easylist_allowlist_general_hide.txt", - ), - ( - check_in, - "Processing https://easylist-downloads.adblockplus.org/easylistgermany.txt", - ), - ( - check_in, - "The list recieved from https://raw.githubusercontent.com/easylist/easylist/master" - "/easylist/easylist_allowlist_general_hide.txt does not contain AdblockPlus list " - "header. Try to process anyway.", - ), - ( - check_not_in, - "created and added image handler", - ), - ], - "debugging.conf": [ - ( - check_in, - "Processing https://easylist-downloads.adblockplus.org/easylistgermany.txt", - ), - ( - check_not_in, - "does not contain AdblockPlus list header.", - ), - ( - check_in, - "‘/tmp/privoxy-blocklist.sh/easylist.txt’ saved", - ), - ( - check_in, - "created and added image handler", - ), - ], - } test_config_dir = Path(__file__).parent / "configs" - for config in test_config_dir.iterdir(): - if not config.is_file(): + for config_file in test_config_dir.iterdir(): + if not config_file.is_file(): continue - ret = shell.run(privoxy_blocklist, "-c", str(config)) + ret = shell.run(privoxy_blocklist, "-c", str(config_file)) assert ret.returncode == 0 assert check_not_in("Creating default one and exiting", ret.stdout) - for check in checks.get(config.name, []): + for check in config.config_checks.get(config_file.name, []): assert check[0](check[1], ret.stdout) - assert config.exists() + assert config_file.exists() # must be last test as it will uninstall dependencies and check error handling @@ -150,17 +148,11 @@ def test_missing_deps(shell, privoxy_blocklist) -> None: assert "Please install the package providing" in ret_script.stderr -# Heloer functions - - -def check_in(needle: str, haystack: str) -> bool: - """Check given haystack for given string.""" - return needle in haystack +def test_privoxy_runtime_log() -> None: + """NOOP function to support checking privoxy logs during tear-down.""" -def check_not_in(needle: str, haystack: str) -> bool: - """Check that given string is not in given text.""" - return needle not in haystack +# Heloer functions def run_requests(