Support multiple phishing configurations (#7079)

Gudahtt · trn1ty · 409H · web-flow · commit fb540b0f42be · 2022-04-19T12:19:37.000-02:30
* Support multiple phishing configurations

The phishing detector has been updated to support multiple phishing
configurations. Both the configuration object and the result object
have been updated to accommodate the need to identify the name of the
config that the checked domain matched. Since the config and return
value was already being changed, the nomenclature has been updated to
replace `black/white` with `block/allow` as well, which is a change we
have been meaning to make for some time.

This change to both the configuration and result object applies only
when the new configuration format is used. The old format preserves the
old config and result value, making this a non-breaking change.

The old configuration accepted three lists (`blacklist`, `whitelist`,
and `fuzzylist`), and a `tolerance` value for the fuzzylist match.

The new configuration is an array of objects rather than an object, to
accommodate multiple configurations. Each configuration option accepts
three lists (`blocklist`, `allowlist`, and `fuzzylist`), `tolerance`
for the fuzzylist match, and two new properties: `name` and `version`.

The `version` parameter was already used by the old configuration, but
it was not required or used by the detector itself. It is now required
with the new configuration, and it is returned with each match.

The new `name` parameter describes which configuration matched the
origin being checked (if any). This was critical for us because it
allows us to direct the user to the appropriate place when they want to
dispute a blocked site.

The return value was updated to include the `name` and `version`
parameters. The `type` was updated from `blacklist` to `blocklist` and
from `whitelist` to `allowlist` as well.

* v1.2.0

This release adds support for multiple phishing configurations, and
includes changes to the configuration object and return value if an
array of configuration values is passed to the phishing detector
constructor.

This is a non-breaking change because the old configuration format
is still supported, and the return values remain the same if the old
configuration format is used.

Co-authored-by: Deven Blake &lt;trinity@trinity.moe&gt;
Co-authored-by: H &lt;409H@users.noreply.github.com&gt;
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "eth-phishing-detect",
-  "version": "1.1.16",
+  "version": "1.2.0",
   "description": "Utility for detecting phishing domains targeting Ethereum users",
   "main": "src/index.js",
   "scripts": {
diff --git a/src/detector.js b/src/detector.js
@@ -3,47 +3,112 @@ const DEFAULT_TOLERANCE = 3
 
 class PhishingDetector {
 
+  /**
+   * Legacy phishing detector configuration.
+   *
+   * @typedef {object} LegacyPhishingDetectorConfiguration
+   * @property {string[]} [whitelist] - Origins that should not be blocked.
+   * @property {string[]} [blacklist] - Origins to block.
+   * @property {string[]} [fuzzylist] - Origins of common phishing targets.
+   * @property {number} [tolerance] - Tolerance to use for the fuzzylist levenshtein match.
+   */
+
+  /**
+   * A configuration object for phishing detection.
+   *
+   * @typedef {object} PhishingDetectorConfiguration
+   * @property {string[]} [allowlist] - Origins that should not be blocked.
+   * @property {string[]} [blocklist] - Origins to block.
+   * @property {string[]} [fuzzylist] - Origins of common phishing targets.
+   * @property {string} name - The name of this configuration. Used to explain to users why a site is being blocked.
+   * @property {number} [tolerance] - Tolerance to use for the fuzzylist levenshtein match.
+   * @property {number} version - The current version of the configuration.
+   */
+
+  /**
+   * Construct a phishing detector, which can check whether origins are known
+   * to be malicious or similar to common phishing targets.
+   *
+   * A list of configurations is accepted. Each origin checked is processed
+   * using each configuration in sequence, so the order defines which
+   * configurations take precedence.
+   *
+   * @param {LegacyPhishingDetectorConfiguration | PhishingDetectorConfiguration[]} opts - Phishing detection options
+   */
   constructor (opts) {
-    this.whitelist = processDomainList(opts.whitelist || [])
-    this.blacklist = processDomainList(opts.blacklist || [])
-    this.fuzzylist = processDomainList(opts.fuzzylist || [])
-    this.tolerance = ('tolerance' in opts) ? opts.tolerance : DEFAULT_TOLERANCE
+    // recommended configuration
+    if (Array.isArray(opts)) {
+      this.configs = processConfigs(opts)
+      this.legacyConfig = false
+    // legacy configuration
+    } else {
+      this.configs = [{
+        allowlist: processDomainList(opts.whitelist || []),
+        blocklist: processDomainList(opts.blacklist || []),
+        fuzzylist: processDomainList(opts.fuzzylist || []),
+        tolerance: ('tolerance' in opts) ? opts.tolerance : DEFAULT_TOLERANCE
+      }]
+      this.legacyConfig = true
+    }
   }
 
-  check (domain) {
-    let fqdn = domain.substring(domain.length - 1) === "." 
+  check(domain) {
+    const result = this._check(domain)
+
+    if (this.legacyConfig) {
+      let legacyType = result.type;
+      if (legacyType === 'allowlist') {
+        legacyType = 'whitelist'
+      } else if (legacyType === 'blocklist') {
+        legacyType = 'blacklist'
+      }
+      return {
+        match: result.match,
+        result: result.result,
+        type: legacyType,
+      }
+    }
+    return result
+  }
+
+  _check (domain) {
+    let fqdn = domain.substring(domain.length - 1) === "."
       ? domain.slice(0, -1)
       : domain;
 
     const source = domainToParts(fqdn)
 
-    // if source matches whitelist domain (or subdomain thereof), PASS
-    const whitelistMatch = matchPartsAgainstList(source, this.whitelist)
-    if (whitelistMatch) return { type: 'whitelist', result: false }
-
-    // if source matches blacklist domain (or subdomain thereof), FAIL
-    const blacklistMatch = matchPartsAgainstList(source, this.blacklist)
-    if (blacklistMatch) return { type: 'blacklist', result: true }
-
-    if (this.tolerance > 0) {
-      // check if near-match of whitelist domain, FAIL
-      let fuzzyForm = domainPartsToFuzzyForm(source)
-      // strip www
-      fuzzyForm = fuzzyForm.replace('www.', '')
-      // check against fuzzylist
-      const levenshteinMatched = this.fuzzylist.find((targetParts) => {
-        const fuzzyTarget = domainPartsToFuzzyForm(targetParts)
-        const distance = levenshtein.get(fuzzyForm, fuzzyTarget)
-        return distance <= this.tolerance
-      })
-      if (levenshteinMatched) {
-        const match = domainPartsToDomain(levenshteinMatched)
-        return { type: 'fuzzy', result: true, match }
+    for (const { allowlist, name, version } of this.configs) {
+      // if source matches whitelist domain (or subdomain thereof), PASS
+      const whitelistMatch = matchPartsAgainstList(source, allowlist)
+      if (whitelistMatch) return { name, result: false, type: 'allowlist', version }
+    }
+
+    for (const { blocklist, fuzzylist, name, tolerance, version } of this.configs) {
+      // if source matches blacklist domain (or subdomain thereof), FAIL
+      const blacklistMatch = matchPartsAgainstList(source, blocklist)
+      if (blacklistMatch) return { name, result: true, type: 'blocklist', version }
+
+      if (tolerance > 0) {
+        // check if near-match of whitelist domain, FAIL
+        let fuzzyForm = domainPartsToFuzzyForm(source)
+        // strip www
+        fuzzyForm = fuzzyForm.replace('www.', '')
+        // check against fuzzylist
+        const levenshteinMatched = fuzzylist.find((targetParts) => {
+          const fuzzyTarget = domainPartsToFuzzyForm(targetParts)
+          const distance = levenshtein.get(fuzzyForm, fuzzyTarget)
+          return distance <= tolerance
+        })
+        if (levenshteinMatched) {
+          const match = domainPartsToDomain(levenshteinMatched)
+          return { name, match, result: true, type: 'fuzzy', version }
+        }
       }
     }
 
     // matched nothing, PASS
-    return { type: 'all', result: false }
+    return { result: false, type: 'all' }
   }
 
 }
@@ -52,12 +117,52 @@ module.exports = PhishingDetector
 
 // util
 
+function processConfigs(configs = []) {
+  return configs.map((config) => {
+    validateConfig(config)
+    return Object.assign({}, config, {
+      allowlist: processDomainList(config.allowlist || []),
+      blocklist: processDomainList(config.blocklist || []),
+      fuzzylist: processDomainList(config.fuzzylist || []),
+      tolerance: ('tolerance' in config) ? config.tolerance : DEFAULT_TOLERANCE
+    })
+  });
+}
+
+function validateConfig(config) {
+  if (config === null || typeof config !== 'object') {
+    throw new Error('Invalid config')
+  }
+
+  if (config.tolerance && !config.fuzzylist) {
+    throw new Error('Fuzzylist tolerance provided without fuzzylist')
+  }
+
+  if (
+    typeof config.name !== 'string' ||
+    config.name === ''
+  ) {
+    throw new Error("Invalid config parameter: 'name'")
+  }
+
+  if (
+    !['number', 'string'].includes(typeof config.version) ||
+    config.version === ''
+  ) {
+    throw new Error("Invalid config parameter: 'version'")
+  }
+}
+
 function processDomainList (list) {
   return list.map(domainToParts)
 }
 
 function domainToParts (domain) {
+  try {
   return domain.split('.').reverse()
+  } catch (e) {
+    throw new Error(JSON.stringify(domain))
+  }
 }
 
 function domainPartsToDomain(domainParts) {
@@ -80,4 +185,4 @@ function matchPartsAgainstList(source, list) {
     // source matches target or (is deeper subdomain)
     return target.every((part, index) => source[index] === part)
   })
-}
+}
diff --git a/test/index.js b/test/index.js

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "eth-phishing-detect",`
`3`		`- "version": "1.1.16",`
	`3`	`+ "version": "1.2.0",`
`4`	`4`	`"description": "Utility for detecting phishing domains targeting Ethereum users",`
`5`	`5`	`"main": "src/index.js",`
`6`	`6`	`"scripts": {`