diff --git a/.esformatter b/.esformatter deleted file mode 100644 index a53a9ecb8..000000000 --- a/.esformatter +++ /dev/null @@ -1,33 +0,0 @@ -{ - "plugins": [ - "esformatter-quotes", - "esformatter-braces", - "esformatter-semicolons" - ], - "quotes": { - "type": "single", - "avoidEscape": false - }, - "whiteSpace": { - "before": { - "ParameterList": -1, - "ParameterComma": -1, - "FunctionDeclarationOpeningBrace": -1, - "FunctionDeclarationClosingBrace": -1, - "ForStatementExpressionOpening": -1 - }, - "after": { - "FunctionName": -1, - "ParameterComma": 1, - "FunctionReservedWord": -1, - "ParameterList": -1, - "FunctionDeclarationOpeningBrace": -1, - "PropertyName": -1 - } - }, - "lineBreak": { - "before": { - "EndOfFile": 1 - } - } -} diff --git a/.eslintrc b/.eslintrc index edd2566d8..a618f3642 100644 --- a/.eslintrc +++ b/.eslintrc @@ -1,6 +1,7 @@ { "env": { - "es6": true + "node": true, + "browser": true }, "parserOptions": { "ecmaVersion": 6, @@ -11,7 +12,7 @@ }, "rules": { "no-cond-assign": 2, - "no-var": 0, + "no-var": 1, "prefer-const": 0, "no-extra-parens": 0, "no-dupe-keys": 2, @@ -34,6 +35,7 @@ "no-octal-escape": 2, "no-constant-condition": 1, "no-unused-expressions": 2, - "no-undefined": 0 + "no-undefined": 0, + "no-undef": 2 } } diff --git a/.gitignore b/.gitignore index 383db7e1f..8b9247097 100644 --- a/.gitignore +++ b/.gitignore @@ -1,24 +1,6 @@ node_modules/ coverage/ -scripts/bower/bower-* -bower_components/ -tmp/ -viz/ - .DS_Store -.jshintrc .env .nyc_output/ - -*.iml -*.log -*.stackdump -*.swp -*~ -compromise.js.tmproj -compromise.xcodeproj -npm-debug.log -pennTreebank.js -compiled_tests.js -package-lock.json -coverage.lcov +.vscode \ No newline at end of file diff --git a/.npmignore b/.npmignore index 7eed469bb..ed6ed6eea 100644 --- a/.npmignore +++ b/.npmignore @@ -1,14 +1,12 @@ data demo +plugins scripts src -test -.esformatter +tests .eslintrc -.babelrc .gitignore -TODO.MD -viz changelog.md -fixme.md +hmm.md +rollup.config.js scratch.js diff --git a/LICENSE b/LICENSE index 324efbb63..6e3ff28f5 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ -MIT License +The MIT License (MIT) -Copyright (c) 2017 compromise +Copyright (c) 2019 Spencer Kelly Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md index ecfb92de7..c9a905674 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,23 @@
npm install compromise
-
- ![]() |
-
-
- ![]() |
-
+ Welcome to v12! - Release Notes here 👍
+
+- - <script src> - - - | -
- 🙏
-
- npm install compromise
-
- |
-
-
-
-
- 86%
-
-
-
- on the Penn treebank
-
- |
-
- IE9+
-
- caniuse, youbetcha
-
- |
-
-
-
- Tutorial #1
-
-
-
- Input → output
-
- |
-
-
-
- Tutorial #2
-
-
-
- Match & transform
-
- |
-
-
-
- Tutorial #3
-
-
-
- Making a bot
-
- |
-
-
-
- API
-
-
- |
-
-
-
- Full Tagset
-
-
- |
-
-
-
- Plugins
-
-
- |
-
-
-
- Outputs
-
-
- |
-
-
-
- Match Syntax
-
-
- |
-
-
-
- nouns! verbs! adjectives!
-
- |
-
-
-
- people, places, organizations
-
- |
-
-
-
- seven hundred and fifty == 750
-
- |
-
-
-
- like a regex for a sentence
-
- |
-
-
-
- all your base are belong
-
- |
-
-
-
- case, whitespace, contractions..
-
- |
-
-
- ![]() Twitter
-
- |
-
-
- ![]() Gitter chat
-
- |
-
-
- ![]() Stackoverflow
-
- |
-
-
- ![]() Projects
-
- |
-
-
- ![]() Pull-requests
-
- |
-
nlp('the koala eats/shoots/leaves').has('koala leaves') //false
+
+- **inter-sentence match:**
+ By default, sentences are the top-level abstraction.
+ Inter-sentence, or multi-sentence matches aren't supported:
+ nlp("that's it. Back to Winnipeg!").has('it back')//false
+
+- **nested match syntax:**
+ the doc.match('(modern (major|minor))? general')
+ complex matches must be achieved with successive **.match()** statements.
+
+- **dependency parsing:**
+ Proper sentence transformation requires understanding the [syntax tree](https://en.wikipedia.org/wiki/Parse_tree) of a sentence, which we don't currently do.
+ We should! Help wanted with this.
+
+##### FAQ
+
☂️ Isn't javascript too...
- yeah!
+ yeah it is!
- it wasn't built to compete with the stanford tagger, and may not fit every project.
+ it wasn't built to compete with NLTK, and may not fit every project.
- string stuff is synchronous too, and parallelizing is weird.
+ string processing is synchronous too, and parallelizing node processes is weird.
- See here for information about speed & performance, and
- here> for project motivations
+ See here for information about speed & performance, and
+ here for project motivations
🌎 Other Languages?
+ 🌎 Compromise in other Languages?
npm install compromise-adjectives
+npm install compromise-dates
+npm install compromise-numbers
+npm install compromise-ngrams
+npm install compromise-numbers
+npm install compromise-output
++ The Children are right to laugh at you, Ralph ++*/ +``` + +### .hash() + +this hash function incorporates the term pos-tags, and whitespace, so that tagging or normalizing the document will change the hash. + +Md5 is not considered a very-secure hash, so heads-up if you're doing some top-secret work. + +It can though, be used successfully to compare two documents, without looping through tags: + +```js +let docA = nlp('hello there') +let docB = nlp('hello there') +console.log(docA.hash() === docB.hash()) +// true + +docB.match('hello').tag('Greeting') +console.log(docA.hash() === docB.hash()) +// false +``` + +if you're looking for insensitivity to punctuation, or case, you can normalize or transform your document before making the hash. + +```js +let doc = nlp(`He isn't... working `) +doc.normalize({ + case: true, + punctuation: true, + contractions: true, +}) + +nlp('he is not working').hash() === doc.hash() +// true +``` + +### .html({segments}, {options}) + +this turns the document into easily-to-display html output. + +Special html characters within the document get escaped, in a simple way. Be extra careful when rendering untrusted input, against XSS and other forms of sneaky-html. This library is not considered a battle-tested guard against these security vulnerabilities. + +```js +let doc = nlp('i <3 you') +doc.html() +//
made by Spencer Kelly+``` + +The library uses `.segment()` method, which is [documented here](https://observablehq.com/@spencermountain/compromise-split). + +by default, whitespace and punctuation are _outside_ the html tag. This is sometimes awkward, and not-ideal. + +the method returns html-strings by default, but the library uses [Jason Miller's htm library](https://github.com/developit/htm) so you can return React Components, or anything: + +```js +doc.html( + {}, + { + bind: React.createElement, + } +) +``` + +MIT diff --git a/plugins/output/builds/compromise-output.js b/plugins/output/builds/compromise-output.js new file mode 100644 index 000000000..2036d685d --- /dev/null +++ b/plugins/output/builds/compromise-output.js @@ -0,0 +1,401 @@ +(function (global, factory) { + typeof exports === 'object' && typeof module !== 'undefined' ? module.exports = factory() : + typeof define === 'function' && define.amd ? define(factory) : + (global = global || self, global.compromiseOutput = factory()); +}(this, (function () { 'use strict'; + + function _typeof(obj) { + if (typeof Symbol === "function" && typeof Symbol.iterator === "symbol") { + _typeof = function (obj) { + return typeof obj; + }; + } else { + _typeof = function (obj) { + return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; + }; + } + + return _typeof(obj); + } + + function _taggedTemplateLiteral(strings, raw) { + if (!raw) { + raw = strings.slice(0); + } + + return Object.freeze(Object.defineProperties(strings, { + raw: { + value: Object.freeze(raw) + } + })); + } + + var commonjsGlobal = typeof globalThis !== 'undefined' ? globalThis : typeof window !== 'undefined' ? window : typeof global !== 'undefined' ? global : typeof self !== 'undefined' ? self : {}; + + function unwrapExports (x) { + return x && x.__esModule && Object.prototype.hasOwnProperty.call(x, 'default') ? x['default'] : x; + } + + function createCommonjsModule(fn, module) { + return module = { exports: {} }, fn(module, module.exports), module.exports; + } + + function getCjsExportFromNamespace (n) { + return n && n['default'] || n; + } + + var lib = createCommonjsModule(function (module, exports) { + !function (r, n) { + module.exports = n(); + }("undefined" != typeof self ? self : commonjsGlobal, function () { + return function (r) { + var n = {}; + + function e(t) { + if (n[t]) return n[t].exports; + var o = n[t] = { + i: t, + l: !1, + exports: {} + }; + return r[t].call(o.exports, o, o.exports, e), o.l = !0, o.exports; + } + + return e.m = r, e.c = n, e.d = function (r, n, t) { + e.o(r, n) || Object.defineProperty(r, n, { + enumerable: !0, + get: t + }); + }, e.r = function (r) { + "undefined" != typeof Symbol && Symbol.toStringTag && Object.defineProperty(r, Symbol.toStringTag, { + value: "Module" + }), Object.defineProperty(r, "__esModule", { + value: !0 + }); + }, e.t = function (r, n) { + if (1 & n && (r = e(r)), 8 & n) return r; + if (4 & n && "object" == _typeof(r) && r && r.__esModule) return r; + var t = Object.create(null); + if (e.r(t), Object.defineProperty(t, "default", { + enumerable: !0, + value: r + }), 2 & n && "string" != typeof r) for (var o in r) { + e.d(t, o, function (n) { + return r[n]; + }.bind(null, o)); + } + return t; + }, e.n = function (r) { + var n = r && r.__esModule ? function () { + return r["default"]; + } : function () { + return r; + }; + return e.d(n, "a", n), n; + }, e.o = function (r, n) { + return Object.prototype.hasOwnProperty.call(r, n); + }, e.p = "", e(e.s = 0); + }([function (r, n, e) { + + e.r(n); + var t = "0123456789abcdef".split(""); + + var o = function o(r) { + for (var n = "", e = 0; e < 4; e++) { + n += t[r >> 8 * e + 4 & 15] + t[r >> 8 * e & 15]; + } + + return n; + }; + + var u = function u(r) { + for (var n = r.length, e = 0; e < n; e++) { + r[e] = o(r[e]); + } + + return r.join(""); + }; + + var f = function f(r, n) { + return r + n & 4294967295; + }; + + var i = function i(r, n, e, t, o, u, _i) { + return function (r, n, e) { + return f(r << n | r >>> 32 - n, e); + }(n = function (r, n, e, t) { + return n = f(f(n, r), f(e, t)); + }(r, n, t, u), o, e); + }; + + var a = function a(r, n, e, t, o, u, f, _a) { + return i(e & t | ~e & o, n, e, u, f, _a); + }; + + var c = function c(r, n, e, t, o, u, f, a) { + return i(e & o | t & ~o, n, e, u, f, a); + }; + + var l = function l(r, n, e, t, o, u, f, a) { + return i(e ^ t ^ o, n, e, u, f, a); + }; + + var d = function d(r, n, e, t, o, u, f, a) { + return i(t ^ (e | ~o), n, e, u, f, a); + }; + + var v = function v(r, n, e) { + void 0 === e && (e = f); + var t = r[0], + o = r[1], + u = r[2], + i = r[3], + v = a.bind(null, e); + t = v(t, o, u, i, n[0], 7, -680876936), i = v(i, t, o, u, n[1], 12, -389564586), u = v(u, i, t, o, n[2], 17, 606105819), o = v(o, u, i, t, n[3], 22, -1044525330), t = v(t, o, u, i, n[4], 7, -176418897), i = v(i, t, o, u, n[5], 12, 1200080426), u = v(u, i, t, o, n[6], 17, -1473231341), o = v(o, u, i, t, n[7], 22, -45705983), t = v(t, o, u, i, n[8], 7, 1770035416), i = v(i, t, o, u, n[9], 12, -1958414417), u = v(u, i, t, o, n[10], 17, -42063), o = v(o, u, i, t, n[11], 22, -1990404162), t = v(t, o, u, i, n[12], 7, 1804603682), i = v(i, t, o, u, n[13], 12, -40341101), u = v(u, i, t, o, n[14], 17, -1502002290), o = v(o, u, i, t, n[15], 22, 1236535329); + var s = c.bind(null, e); + t = s(t, o, u, i, n[1], 5, -165796510), i = s(i, t, o, u, n[6], 9, -1069501632), u = s(u, i, t, o, n[11], 14, 643717713), o = s(o, u, i, t, n[0], 20, -373897302), t = s(t, o, u, i, n[5], 5, -701558691), i = s(i, t, o, u, n[10], 9, 38016083), u = s(u, i, t, o, n[15], 14, -660478335), o = s(o, u, i, t, n[4], 20, -405537848), t = s(t, o, u, i, n[9], 5, 568446438), i = s(i, t, o, u, n[14], 9, -1019803690), u = s(u, i, t, o, n[3], 14, -187363961), o = s(o, u, i, t, n[8], 20, 1163531501), t = s(t, o, u, i, n[13], 5, -1444681467), i = s(i, t, o, u, n[2], 9, -51403784), u = s(u, i, t, o, n[7], 14, 1735328473), o = s(o, u, i, t, n[12], 20, -1926607734); + var b = l.bind(null, e); + t = b(t, o, u, i, n[5], 4, -378558), i = b(i, t, o, u, n[8], 11, -2022574463), u = b(u, i, t, o, n[11], 16, 1839030562), o = b(o, u, i, t, n[14], 23, -35309556), t = b(t, o, u, i, n[1], 4, -1530992060), i = b(i, t, o, u, n[4], 11, 1272893353), u = b(u, i, t, o, n[7], 16, -155497632), o = b(o, u, i, t, n[10], 23, -1094730640), t = b(t, o, u, i, n[13], 4, 681279174), i = b(i, t, o, u, n[0], 11, -358537222), u = b(u, i, t, o, n[3], 16, -722521979), o = b(o, u, i, t, n[6], 23, 76029189), t = b(t, o, u, i, n[9], 4, -640364487), i = b(i, t, o, u, n[12], 11, -421815835), u = b(u, i, t, o, n[15], 16, 530742520), o = b(o, u, i, t, n[2], 23, -995338651); + var p = d.bind(null, e); + t = p(t, o, u, i, n[0], 6, -198630844), i = p(i, t, o, u, n[7], 10, 1126891415), u = p(u, i, t, o, n[14], 15, -1416354905), o = p(o, u, i, t, n[5], 21, -57434055), t = p(t, o, u, i, n[12], 6, 1700485571), i = p(i, t, o, u, n[3], 10, -1894986606), u = p(u, i, t, o, n[10], 15, -1051523), o = p(o, u, i, t, n[1], 21, -2054922799), t = p(t, o, u, i, n[8], 6, 1873313359), i = p(i, t, o, u, n[15], 10, -30611744), u = p(u, i, t, o, n[6], 15, -1560198380), o = p(o, u, i, t, n[13], 21, 1309151649), t = p(t, o, u, i, n[4], 6, -145523070), i = p(i, t, o, u, n[11], 10, -1120210379), u = p(u, i, t, o, n[2], 15, 718787259), o = p(o, u, i, t, n[9], 21, -343485551), r[0] = e(t, r[0]), r[1] = e(o, r[1]), r[2] = e(u, r[2]), r[3] = e(i, r[3]); + }; + + var s = function s(r) { + for (var n = [], e = 0; e < 64; e += 4) { + n[e >> 2] = r.charCodeAt(e) + (r.charCodeAt(e + 1) << 8) + (r.charCodeAt(e + 2) << 16) + (r.charCodeAt(e + 3) << 24); + } + + return n; + }; + + var b = function b(r, n) { + var e, + t = r.length, + o = [1732584193, -271733879, -1732584194, 271733878]; + + for (e = 64; e <= t; e += 64) { + v(o, s(r.substring(e - 64, e)), n); + } + + var u = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + f = (r = r.substring(e - 64)).length; + + for (e = 0; e < f; e++) { + u[e >> 2] |= r.charCodeAt(e) << (e % 4 << 3); + } + + if (u[e >> 2] |= 128 << (e % 4 << 3), e > 55) for (v(o, u, n), e = 16; e--;) { + u[e] = 0; + } + return u[14] = 8 * t, v(o, u, n), o; + }; + + function p(r) { + var n; + return "5d41402abc4b2a76b9719d911017c592" !== u(b("hello")) && (n = function n(r, _n) { + var e = (65535 & r) + (65535 & _n); + return (r >> 16) + (_n >> 16) + (e >> 16) << 16 | 65535 & e; + }), u(b(r, n)); + } + + e.d(n, "md5", function () { + return p; + }); + }]); + }); + }); + unwrapExports(lib); + + var md5 = lib.md5; + + var makeHash = function makeHash(doc) { + var str = doc.text(); + doc.list.forEach(function (p) { + p.terms().forEach(function (t) { + str += t.pre + (t.implicit || t.text) + t.post; + str += Object.keys(t.tags).join(''); + }); + }); + return md5(str); + }; + + var hash = makeHash; + + var n = function n(t, r, u, e) { + for (var p = 1; p < r.length; p++) { + var s = r[p], + h = "number" == typeof s ? u[s] : s, + a = r[++p]; + 1 === a ? e[0] = h : 3 === a ? e[1] = Object.assign(e[1] || {}, h) : 5 === a ? (e[1] = e[1] || {})[r[++p]] = h : 6 === a ? e[1][r[++p]] += h + "" : e.push(a ? t.apply(null, n(t, h, u, ["", null])) : h); + } + + return e; + }, + t = function t(n) { + for (var t, r, u = 1, e = "", p = "", s = [0], h = function h(n) { + 1 === u && (n || (e = e.replace(/^\s*\n\s*|\s*\n\s*$/g, ""))) ? s.push(n || e, 0) : 3 === u && (n || e) ? (s.push(n || e, 1), u = 2) : 2 === u && "..." === e && n ? s.push(n, 3) : 2 === u && e && !n ? s.push(!0, 5, e) : u >= 5 && ((e || !n && 5 === u) && (s.push(e, u, r), u = 6), n && (s.push(n, u, r), u = 6)), e = ""; + }, a = 0; a < n.length; a++) { + a && (1 === u && h(), h(a)); + + for (var f = 0; f < n[a].length; f++) { + t = n[a][f], 1 === u ? "<" === t ? (h(), s = [s], u = 3) : e += t : 4 === u ? "--" === e && ">" === t ? (u = 1, e = "") : e = t + e[0] : p ? t === p ? p = "" : e += t : '"' === t || "'" === t ? p = t : ">" === t ? (h(), u = 1) : u && ("=" === t ? (u = 5, r = e, e = "") : "/" === t && (u < 5 || ">" === n[a][f + 1]) ? (h(), 3 === u && (s = s[0]), u = s, (s = s[0]).push(u, 2), u = 0) : " " === t || "\t" === t || "\n" === t || "\r" === t ? (h(), u = 2) : e += t), 3 === u && "!--" === e && (u = 4, s = s[0]); + } + } + + return h(), s; + }, + r = "function" == typeof Map, + u = r ? new Map() : {}, + e = r ? function (n) { + var r = u.get(n); + return r || u.set(n, r = t(n)), r; + } : function (n) { + for (var r = "", e = 0; e < n.length; e++) { + r += n[e].length + "-" + n[e]; + } + + return u[r] || (u[r] = t(n)); + }; + + function htm_module (t) { + var r = n(this, e(t), arguments, []); + return r.length > 1 ? r : r[0]; + } + + var htm_module$1 = /*#__PURE__*/Object.freeze({ + __proto__: null, + 'default': htm_module + }); + + var vhtml = createCommonjsModule(function (module, exports) { + (function (global, factory) { + module.exports = factory() ; + })(commonjsGlobal, function () { + + var emptyTags = ['area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']; + + var esc = function esc(str) { + return String(str).replace(/[&<>"']/g, function (s) { + return '&' + map[s] + ';'; + }); + }; + + var map = { + '&': 'amp', + '<': 'lt', + '>': 'gt', + '"': 'quot', + "'": 'apos' + }; + var sanitized = {}; + + function h(name, attrs) { + var stack = []; + + for (var i = arguments.length; i-- > 2;) { + stack.push(arguments[i]); + } + + if (typeof name === 'function') { + (attrs || (attrs = {})).children = stack.reverse(); + return name(attrs); + } + + var s = '<' + name; + if (attrs) for (var _i in attrs) { + if (attrs[_i] !== false && attrs[_i] != null) { + s += ' ' + esc(_i) + '="' + esc(attrs[_i]) + '"'; + } + } + + if (emptyTags.indexOf(name) === -1) { + s += '>'; + + while (stack.length) { + var child = stack.pop(); + + if (child) { + if (child.pop) { + for (var _i2 = child.length; _i2--;) { + stack.push(child[_i2]); + } + } else { + s += sanitized[child] === true ? child : esc(child); + } + } + } + + s += '' + name + '>'; + } else { + s += '>'; + } + + sanitized[s] = true; + return s; + } + + return h; + }); + }); + + var htm = getCjsExportFromNamespace(htm_module$1); + + function _templateObject2() { + var data = _taggedTemplateLiteral(["
", ""]); + + _templateObject2 = function _templateObject2() { + return data; + }; + + return data; + } + + function _templateObject() { + var data = _taggedTemplateLiteral(["", ""]); + + _templateObject = function _templateObject() { + return data; + }; + + return data; + } + + var toHtml = function toHtml(doc, segments, options) { + var h = htm.bind(vhtml); + + if (options.bind) { + h = htm.bind(options.bind); + } + + var html = []; + var arr = doc.segment(segments); + arr.forEach(function (o) { + var str = h(_templateObject(), o.segment, o.text); + html.push(str); + }); + return h(_templateObject2(), html); + }; + + var html = toHtml; + + var addMethods = function addMethods(Doc) { + /** generate an md5 hash from the document */ + Doc.prototype.hash = function () { + return hash(this); + }; + /** generate sanitized html from the document */ + + + Doc.prototype.html = function () { + var segments = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {}; + var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; + return html(this, segments, options); + }; + }; + + var src = addMethods; + + return src; + +}))); +//# sourceMappingURL=compromise-output.js.map diff --git a/plugins/output/builds/compromise-output.js.map b/plugins/output/builds/compromise-output.js.map new file mode 100644 index 000000000..30622b6d3 --- /dev/null +++ b/plugins/output/builds/compromise-output.js.map @@ -0,0 +1 @@ +{"version":3,"file":"compromise-output.js","sources":["../node_modules/pure-md5/lib/index.js","../src/hash.js","../node_modules/htm/dist/htm.module.js","../src/html.js","../src/index.js"],"sourcesContent":["!function(r,n){if(\"object\"==typeof exports&&\"object\"==typeof module)module.exports=n();else if(\"function\"==typeof define&&define.amd)define([],n);else{var e=n();for(var t in e)(\"object\"==typeof exports?exports:r)[t]=e[t]}}(\"undefined\"!=typeof self?self:this,function(){return function(r){var n={};function e(t){if(n[t])return n[t].exports;var o=n[t]={i:t,l:!1,exports:{}};return r[t].call(o.exports,o,o.exports,e),o.l=!0,o.exports}return e.m=r,e.c=n,e.d=function(r,n,t){e.o(r,n)||Object.defineProperty(r,n,{enumerable:!0,get:t})},e.r=function(r){\"undefined\"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(r,Symbol.toStringTag,{value:\"Module\"}),Object.defineProperty(r,\"__esModule\",{value:!0})},e.t=function(r,n){if(1&n&&(r=e(r)),8&n)return r;if(4&n&&\"object\"==typeof r&&r&&r.__esModule)return r;var t=Object.create(null);if(e.r(t),Object.defineProperty(t,\"default\",{enumerable:!0,value:r}),2&n&&\"string\"!=typeof r)for(var o in r)e.d(t,o,function(n){return r[n]}.bind(null,o));return t},e.n=function(r){var n=r&&r.__esModule?function(){return r.default}:function(){return r};return e.d(n,\"a\",n),n},e.o=function(r,n){return Object.prototype.hasOwnProperty.call(r,n)},e.p=\"\",e(e.s=0)}([function(r,n,e){\"use strict\";e.r(n);var t=\"0123456789abcdef\".split(\"\");var o=function(r){for(var n=\"\",e=0;e<4;e++)n+=t[r>>8*e+4&15]+t[r>>8*e&15];return n};var u=function(r){for(var n=r.length,e=0;e
${html}`\n}\nmodule.exports = toHtml\n","const makeHash = require('./hash')\nconst toHtml = require('./html')\n\nconst addMethods = function(Doc) {\n /** generate an md5 hash from the document */\n Doc.prototype.hash = function() {\n return makeHash(this)\n }\n\n /** generate sanitized html from the document */\n Doc.prototype.html = function(segments = {}, options = {}) {\n return toHtml(this, segments, options)\n }\n}\nmodule.exports = addMethods\n"],"names":["r","n","module","self","this","e","t","exports","o","i","l","call","m","c","d","Object","defineProperty","enumerable","get","Symbol","toStringTag","value","__esModule","create","bind","prototype","hasOwnProperty","p","s","split","u","length","join","f","a","v","b","charCodeAt","substring","md5","require$$0","makeHash","doc","str","text","list","forEach","terms","pre","implicit","post","keys","tags","h","assign","push","apply","replace","Map","set","arguments","toHtml","segments","options","htm","vhtml","html","arr","segment","addMethods","Doc","hash"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAAA,GAAC,UAASA,CAAT,EAAWC,CAAX,EAAa;EAAC,IAAqDC,cAAA,GAAeD,CAAC,EAAhB,CAArD;EAA+M,GAA7N,CAA8N,eAAa,OAAOE,IAApB,GAAyBA,IAAzB,GAA8BC,cAA5P,EAAiQ,YAAU;EAAC,WAAO,UAASJ,CAAT,EAAW;EAAC,UAAIC,CAAC,GAAC,EAAN;;EAAS,eAASI,CAAT,CAAWC,CAAX,EAAa;EAAC,YAAGL,CAAC,CAACK,CAAD,CAAJ,EAAQ,OAAOL,CAAC,CAACK,CAAD,CAAD,CAAKC,OAAZ;EAAoB,YAAIC,CAAC,GAACP,CAAC,CAACK,CAAD,CAAD,GAAK;EAACG,UAAAA,CAAC,EAACH,CAAH;EAAKI,UAAAA,CAAC,EAAC,CAAC,CAAR;EAAUH,UAAAA,OAAO,EAAC;EAAlB,SAAX;EAAiC,eAAOP,CAAC,CAACM,CAAD,CAAD,CAAKK,IAAL,CAAUH,CAAC,CAACD,OAAZ,EAAoBC,CAApB,EAAsBA,CAAC,CAACD,OAAxB,EAAgCF,CAAhC,GAAmCG,CAAC,CAACE,CAAF,GAAI,CAAC,CAAxC,EAA0CF,CAAC,CAACD,OAAnD;EAA2D;;EAAA,aAAOF,CAAC,CAACO,CAAF,GAAIZ,CAAJ,EAAMK,CAAC,CAACQ,CAAF,GAAIZ,CAAV,EAAYI,CAAC,CAACS,CAAF,GAAI,UAASd,CAAT,EAAWC,CAAX,EAAaK,CAAb,EAAe;EAACD,QAAAA,CAAC,CAACG,CAAF,CAAIR,CAAJ,EAAMC,CAAN,KAAUc,MAAM,CAACC,cAAP,CAAsBhB,CAAtB,EAAwBC,CAAxB,EAA0B;EAACgB,UAAAA,UAAU,EAAC,CAAC,CAAb;EAAeC,UAAAA,GAAG,EAACZ;EAAnB,SAA1B,CAAV;EAA2D,OAA3F,EAA4FD,CAAC,CAACL,CAAF,GAAI,UAASA,CAAT,EAAW;EAAC,uBAAa,OAAOmB,MAApB,IAA4BA,MAAM,CAACC,WAAnC,IAAgDL,MAAM,CAACC,cAAP,CAAsBhB,CAAtB,EAAwBmB,MAAM,CAACC,WAA/B,EAA2C;EAACC,UAAAA,KAAK,EAAC;EAAP,SAA3C,CAAhD,EAA6GN,MAAM,CAACC,cAAP,CAAsBhB,CAAtB,EAAwB,YAAxB,EAAqC;EAACqB,UAAAA,KAAK,EAAC,CAAC;EAAR,SAArC,CAA7G;EAA8J,OAA1Q,EAA2QhB,CAAC,CAACC,CAAF,GAAI,UAASN,CAAT,EAAWC,CAAX,EAAa;EAAC,YAAG,IAAEA,CAAF,KAAMD,CAAC,GAACK,CAAC,CAACL,CAAD,CAAT,GAAc,IAAEC,CAAnB,EAAqB,OAAOD,CAAP;EAAS,YAAG,IAAEC,CAAF,IAAK,oBAAiBD,CAAjB,CAAL,IAAyBA,CAAzB,IAA4BA,CAAC,CAACsB,UAAjC,EAA4C,OAAOtB,CAAP;EAAS,YAAIM,CAAC,GAACS,MAAM,CAACQ,MAAP,CAAc,IAAd,CAAN;EAA0B,YAAGlB,CAAC,CAACL,CAAF,CAAIM,CAAJ,GAAOS,MAAM,CAACC,cAAP,CAAsBV,CAAtB,EAAwB,SAAxB,EAAkC;EAACW,UAAAA,UAAU,EAAC,CAAC,CAAb;EAAeI,UAAAA,KAAK,EAACrB;EAArB,SAAlC,CAAP,EAAkE,IAAEC,CAAF,IAAK,YAAU,OAAOD,CAA3F,EAA6F,KAAI,IAAIQ,CAAR,IAAaR,CAAb;EAAeK,UAAAA,CAAC,CAACS,CAAF,CAAIR,CAAJ,EAAME,CAAN,EAAQ,UAASP,CAAT,EAAW;EAAC,mBAAOD,CAAC,CAACC,CAAD,CAAR;EAAY,WAAxB,CAAyBuB,IAAzB,CAA8B,IAA9B,EAAmChB,CAAnC,CAAR;EAAf;EAA8D,eAAOF,CAAP;EAAS,OAA9iB,EAA+iBD,CAAC,CAACJ,CAAF,GAAI,UAASD,CAAT,EAAW;EAAC,YAAIC,CAAC,GAACD,CAAC,IAAEA,CAAC,CAACsB,UAAL,GAAgB,YAAU;EAAC,iBAAOtB,CAAC,WAAR;EAAiB,SAA5C,GAA6C,YAAU;EAAC,iBAAOA,CAAP;EAAS,SAAvE;EAAwE,eAAOK,CAAC,CAACS,CAAF,CAAIb,CAAJ,EAAM,GAAN,EAAUA,CAAV,GAAaA,CAApB;EAAsB,OAA7pB,EAA8pBI,CAAC,CAACG,CAAF,GAAI,UAASR,CAAT,EAAWC,CAAX,EAAa;EAAC,eAAOc,MAAM,CAACU,SAAP,CAAiBC,cAAjB,CAAgCf,IAAhC,CAAqCX,CAArC,EAAuCC,CAAvC,CAAP;EAAiD,OAAjuB,EAAkuBI,CAAC,CAACsB,CAAF,GAAI,EAAtuB,EAAyuBtB,CAAC,CAACA,CAAC,CAACuB,CAAF,GAAI,CAAL,CAAjvB;EAAyvB,KAAp5B,CAAq5B,CAAC,UAAS5B,CAAT,EAAWC,CAAX,EAAaI,CAAb,EAAe;AAAC;EAAaA,MAAAA,CAAC,CAACL,CAAF,CAAIC,CAAJ;EAAO,UAAIK,CAAC,GAAC,mBAAmBuB,KAAnB,CAAyB,EAAzB,CAAN;;EAAmC,UAAIrB,CAAC,GAAC,SAAFA,CAAE,CAASR,CAAT,EAAW;EAAC,aAAI,IAAIC,CAAC,GAAC,EAAN,EAASI,CAAC,GAAC,CAAf,EAAiBA,CAAC,GAAC,CAAnB,EAAqBA,CAAC,EAAtB;EAAyBJ,UAAAA,CAAC,IAAEK,CAAC,CAACN,CAAC,IAAE,IAAEK,CAAF,GAAI,CAAP,GAAS,EAAV,CAAD,GAAeC,CAAC,CAACN,CAAC,IAAE,IAAEK,CAAL,GAAO,EAAR,CAAnB;EAAzB;;EAAwD,eAAOJ,CAAP;EAAS,OAAnF;;EAAoF,UAAI6B,CAAC,GAAC,SAAFA,CAAE,CAAS9B,CAAT,EAAW;EAAC,aAAI,IAAIC,CAAC,GAACD,CAAC,CAAC+B,MAAR,EAAe1B,CAAC,GAAC,CAArB,EAAuBA,CAAC,GAACJ,CAAzB,EAA2BI,CAAC,EAA5B;EAA+BL,UAAAA,CAAC,CAACK,CAAD,CAAD,GAAKG,CAAC,CAACR,CAAC,CAACK,CAAD,CAAF,CAAN;EAA/B;;EAA4C,eAAOL,CAAC,CAACgC,IAAF,CAAO,EAAP,CAAP;EAAkB,OAAhF;;EAAiF,UAAIC,CAAC,GAAC,SAAFA,CAAE,CAASjC,CAAT,EAAWC,CAAX,EAAa;EAAC,eAAOD,CAAC,GAACC,CAAF,GAAI,UAAX;EAAsB,OAA1C;;EAA2C,UAAIQ,CAAC,GAAC,WAAST,CAAT,EAAWC,CAAX,EAAaI,CAAb,EAAeC,CAAf,EAAiBE,CAAjB,EAAmBsB,CAAnB,EAAqBrB,EAArB,EAAuB;EAAC,eAAO,UAAST,CAAT,EAAWC,CAAX,EAAaI,CAAb,EAAe;EAAC,iBAAO4B,CAAC,CAACjC,CAAC,IAAEC,CAAH,GAAKD,CAAC,KAAG,KAAGC,CAAb,EAAeI,CAAf,CAAR;EAA0B,SAA1C,CAA2CJ,CAAC,GAAC,UAASD,CAAT,EAAWC,CAAX,EAAaI,CAAb,EAAeC,CAAf,EAAiB;EAAC,iBAAOL,CAAC,GAACgC,CAAC,CAACA,CAAC,CAAChC,CAAD,EAAGD,CAAH,CAAF,EAAQiC,CAAC,CAAC5B,CAAD,EAAGC,CAAH,CAAT,CAAV;EAA0B,SAA5C,CAA6CN,CAA7C,EAA+CC,CAA/C,EAAiDK,CAAjD,EAAmDwB,CAAnD,CAA7C,EAAmGtB,CAAnG,EAAqGH,CAArG,CAAP;EAA+G,OAA7I;;EAA8I,UAAI6B,CAAC,GAAC,WAASlC,CAAT,EAAWC,CAAX,EAAaI,CAAb,EAAeC,CAAf,EAAiBE,CAAjB,EAAmBsB,CAAnB,EAAqBG,CAArB,EAAuBC,EAAvB,EAAyB;EAAC,eAAOzB,CAAC,CAACJ,CAAC,GAACC,CAAF,GAAI,CAACD,CAAD,GAAGG,CAAR,EAAUP,CAAV,EAAYI,CAAZ,EAAcyB,CAAd,EAAgBG,CAAhB,EAAkBC,EAAlB,AAAA,CAAR;EAA+B,OAA/D;;EAAgE,UAAIrB,CAAC,GAAC,SAAFA,CAAE,CAASb,CAAT,EAAWC,CAAX,EAAaI,CAAb,EAAeC,CAAf,EAAiBE,CAAjB,EAAmBsB,CAAnB,EAAqBG,CAArB,EAAuBC,CAAvB,EAAyB;EAAC,eAAOzB,CAAC,CAACJ,CAAC,GAACG,CAAF,GAAIF,CAAC,GAAC,CAACE,CAAR,EAAUP,CAAV,EAAYI,CAAZ,EAAcyB,CAAd,EAAgBG,CAAhB,EAAkBC,CAAlB,AAAA,CAAR;EAA+B,OAA/D;;EAAgE,UAAIxB,CAAC,GAAC,SAAFA,CAAE,CAASV,CAAT,EAAWC,CAAX,EAAaI,CAAb,EAAeC,CAAf,EAAiBE,CAAjB,EAAmBsB,CAAnB,EAAqBG,CAArB,EAAuBC,CAAvB,EAAyB;EAAC,eAAOzB,CAAC,CAACJ,CAAC,GAACC,CAAF,GAAIE,CAAL,EAAOP,CAAP,EAASI,CAAT,EAAWyB,CAAX,EAAaG,CAAb,EAAeC,CAAf,AAAA,CAAR;EAA4B,OAA5D;;EAA6D,UAAIpB,CAAC,GAAC,SAAFA,CAAE,CAASd,CAAT,EAAWC,CAAX,EAAaI,CAAb,EAAeC,CAAf,EAAiBE,CAAjB,EAAmBsB,CAAnB,EAAqBG,CAArB,EAAuBC,CAAvB,EAAyB;EAAC,eAAOzB,CAAC,CAACH,CAAC,IAAED,CAAC,GAAC,CAACG,CAAL,CAAF,EAAUP,CAAV,EAAYI,CAAZ,EAAcyB,CAAd,EAAgBG,CAAhB,EAAkBC,CAAlB,AAAA,CAAR;EAA+B,OAA/D;;EAAgE,UAAIC,CAAC,GAAC,WAASnC,CAAT,EAAWC,CAAX,EAAaI,CAAb,EAAe;EAAC,aAAK,CAAL,KAASA,CAAT,KAAaA,CAAC,GAAC4B,CAAf;EAAkB,YAAI3B,CAAC,GAACN,CAAC,CAAC,CAAD,CAAP;EAAA,YAAWQ,CAAC,GAACR,CAAC,CAAC,CAAD,CAAd;EAAA,YAAkB8B,CAAC,GAAC9B,CAAC,CAAC,CAAD,CAArB;EAAA,YAAyBS,CAAC,GAACT,CAAC,CAAC,CAAD,CAA5B;EAAA,YAAgCmC,CAAC,GAACD,CAAC,CAACV,IAAF,CAAO,IAAP,EAAYnB,CAAZ,CAAlC;EAAiDC,QAAAA,CAAC,GAAC6B,CAAC,CAAC7B,CAAD,EAAGE,CAAH,EAAKsB,CAAL,EAAOrB,CAAP,EAASR,CAAC,CAAC,CAAD,CAAV,EAAc,CAAd,EAAgB,CAAC,SAAjB,CAAH,EAA+BQ,CAAC,GAAC0B,CAAC,CAAC1B,CAAD,EAAGH,CAAH,EAAKE,CAAL,EAAOsB,CAAP,EAAS7B,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,CAAC,SAAlB,CAAlC,EAA+D6B,CAAC,GAACK,CAAC,CAACL,CAAD,EAAGrB,CAAH,EAAKH,CAAL,EAAOE,CAAP,EAASP,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,SAAjB,CAAlE,EAA8FO,CAAC,GAAC2B,CAAC,CAAC3B,CAAD,EAAGsB,CAAH,EAAKrB,CAAL,EAAOH,CAAP,EAASL,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,CAAC,UAAlB,CAAjG,EAA+HK,CAAC,GAAC6B,CAAC,CAAC7B,CAAD,EAAGE,CAAH,EAAKsB,CAAL,EAAOrB,CAAP,EAASR,CAAC,CAAC,CAAD,CAAV,EAAc,CAAd,EAAgB,CAAC,SAAjB,CAAlI,EAA8JQ,CAAC,GAAC0B,CAAC,CAAC1B,CAAD,EAAGH,CAAH,EAAKE,CAAL,EAAOsB,CAAP,EAAS7B,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,UAAjB,CAAjK,EAA8L6B,CAAC,GAACK,CAAC,CAACL,CAAD,EAAGrB,CAAH,EAAKH,CAAL,EAAOE,CAAP,EAASP,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,CAAC,UAAlB,CAAjM,EAA+NO,CAAC,GAAC2B,CAAC,CAAC3B,CAAD,EAAGsB,CAAH,EAAKrB,CAAL,EAAOH,CAAP,EAASL,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,CAAC,QAAlB,CAAlO,EAA8PK,CAAC,GAAC6B,CAAC,CAAC7B,CAAD,EAAGE,CAAH,EAAKsB,CAAL,EAAOrB,CAAP,EAASR,CAAC,CAAC,CAAD,CAAV,EAAc,CAAd,EAAgB,UAAhB,CAAjQ,EAA6RQ,CAAC,GAAC0B,CAAC,CAAC1B,CAAD,EAAGH,CAAH,EAAKE,CAAL,EAAOsB,CAAP,EAAS7B,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,CAAC,UAAlB,CAAhS,EAA8T6B,CAAC,GAACK,CAAC,CAACL,CAAD,EAAGrB,CAAH,EAAKH,CAAL,EAAOE,CAAP,EAASP,CAAC,CAAC,EAAD,CAAV,EAAe,EAAf,EAAkB,CAAC,KAAnB,CAAjU,EAA2VO,CAAC,GAAC2B,CAAC,CAAC3B,CAAD,EAAGsB,CAAH,EAAKrB,CAAL,EAAOH,CAAP,EAASL,CAAC,CAAC,EAAD,CAAV,EAAe,EAAf,EAAkB,CAAC,UAAnB,CAA9V,EAA6XK,CAAC,GAAC6B,CAAC,CAAC7B,CAAD,EAAGE,CAAH,EAAKsB,CAAL,EAAOrB,CAAP,EAASR,CAAC,CAAC,EAAD,CAAV,EAAe,CAAf,EAAiB,UAAjB,CAAhY,EAA6ZQ,CAAC,GAAC0B,CAAC,CAAC1B,CAAD,EAAGH,CAAH,EAAKE,CAAL,EAAOsB,CAAP,EAAS7B,CAAC,CAAC,EAAD,CAAV,EAAe,EAAf,EAAkB,CAAC,QAAnB,CAAha,EAA6b6B,CAAC,GAACK,CAAC,CAACL,CAAD,EAAGrB,CAAH,EAAKH,CAAL,EAAOE,CAAP,EAASP,CAAC,CAAC,EAAD,CAAV,EAAe,EAAf,EAAkB,CAAC,UAAnB,CAAhc,EAA+dO,CAAC,GAAC2B,CAAC,CAAC3B,CAAD,EAAGsB,CAAH,EAAKrB,CAAL,EAAOH,CAAP,EAASL,CAAC,CAAC,EAAD,CAAV,EAAe,EAAf,EAAkB,UAAlB,CAAle;EAAggB,YAAI2B,CAAC,GAACf,CAAC,CAACW,IAAF,CAAO,IAAP,EAAYnB,CAAZ,CAAN;EAAqBC,QAAAA,CAAC,GAACsB,CAAC,CAACtB,CAAD,EAAGE,CAAH,EAAKsB,CAAL,EAAOrB,CAAP,EAASR,CAAC,CAAC,CAAD,CAAV,EAAc,CAAd,EAAgB,CAAC,SAAjB,CAAH,EAA+BQ,CAAC,GAACmB,CAAC,CAACnB,CAAD,EAAGH,CAAH,EAAKE,CAAL,EAAOsB,CAAP,EAAS7B,CAAC,CAAC,CAAD,CAAV,EAAc,CAAd,EAAgB,CAAC,UAAjB,CAAlC,EAA+D6B,CAAC,GAACF,CAAC,CAACE,CAAD,EAAGrB,CAAH,EAAKH,CAAL,EAAOE,CAAP,EAASP,CAAC,CAAC,EAAD,CAAV,EAAe,EAAf,EAAkB,SAAlB,CAAlE,EAA+FO,CAAC,GAACoB,CAAC,CAACpB,CAAD,EAAGsB,CAAH,EAAKrB,CAAL,EAAOH,CAAP,EAASL,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,CAAC,SAAlB,CAAlG,EAA+HK,CAAC,GAACsB,CAAC,CAACtB,CAAD,EAAGE,CAAH,EAAKsB,CAAL,EAAOrB,CAAP,EAASR,CAAC,CAAC,CAAD,CAAV,EAAc,CAAd,EAAgB,CAAC,SAAjB,CAAlI,EAA8JQ,CAAC,GAACmB,CAAC,CAACnB,CAAD,EAAGH,CAAH,EAAKE,CAAL,EAAOsB,CAAP,EAAS7B,CAAC,CAAC,EAAD,CAAV,EAAe,CAAf,EAAiB,QAAjB,CAAjK,EAA4L6B,CAAC,GAACF,CAAC,CAACE,CAAD,EAAGrB,CAAH,EAAKH,CAAL,EAAOE,CAAP,EAASP,CAAC,CAAC,EAAD,CAAV,EAAe,EAAf,EAAkB,CAAC,SAAnB,CAA/L,EAA6NO,CAAC,GAACoB,CAAC,CAACpB,CAAD,EAAGsB,CAAH,EAAKrB,CAAL,EAAOH,CAAP,EAASL,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,CAAC,SAAlB,CAAhO,EAA6PK,CAAC,GAACsB,CAAC,CAACtB,CAAD,EAAGE,CAAH,EAAKsB,CAAL,EAAOrB,CAAP,EAASR,CAAC,CAAC,CAAD,CAAV,EAAc,CAAd,EAAgB,SAAhB,CAAhQ,EAA2RQ,CAAC,GAACmB,CAAC,CAACnB,CAAD,EAAGH,CAAH,EAAKE,CAAL,EAAOsB,CAAP,EAAS7B,CAAC,CAAC,EAAD,CAAV,EAAe,CAAf,EAAiB,CAAC,UAAlB,CAA9R,EAA4T6B,CAAC,GAACF,CAAC,CAACE,CAAD,EAAGrB,CAAH,EAAKH,CAAL,EAAOE,CAAP,EAASP,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,CAAC,SAAlB,CAA/T,EAA4VO,CAAC,GAACoB,CAAC,CAACpB,CAAD,EAAGsB,CAAH,EAAKrB,CAAL,EAAOH,CAAP,EAASL,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,UAAjB,CAA/V,EAA4XK,CAAC,GAACsB,CAAC,CAACtB,CAAD,EAAGE,CAAH,EAAKsB,CAAL,EAAOrB,CAAP,EAASR,CAAC,CAAC,EAAD,CAAV,EAAe,CAAf,EAAiB,CAAC,UAAlB,CAA/X,EAA6ZQ,CAAC,GAACmB,CAAC,CAACnB,CAAD,EAAGH,CAAH,EAAKE,CAAL,EAAOsB,CAAP,EAAS7B,CAAC,CAAC,CAAD,CAAV,EAAc,CAAd,EAAgB,CAAC,QAAjB,CAAha,EAA2b6B,CAAC,GAACF,CAAC,CAACE,CAAD,EAAGrB,CAAH,EAAKH,CAAL,EAAOE,CAAP,EAASP,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,UAAjB,CAA9b,EAA2dO,CAAC,GAACoB,CAAC,CAACpB,CAAD,EAAGsB,CAAH,EAAKrB,CAAL,EAAOH,CAAP,EAASL,CAAC,CAAC,EAAD,CAAV,EAAe,EAAf,EAAkB,CAAC,UAAnB,CAA9d;EAA6f,YAAImC,CAAC,GAAC1B,CAAC,CAACc,IAAF,CAAO,IAAP,EAAYnB,CAAZ,CAAN;EAAqBC,QAAAA,CAAC,GAAC8B,CAAC,CAAC9B,CAAD,EAAGE,CAAH,EAAKsB,CAAL,EAAOrB,CAAP,EAASR,CAAC,CAAC,CAAD,CAAV,EAAc,CAAd,EAAgB,CAAC,MAAjB,CAAH,EAA4BQ,CAAC,GAAC2B,CAAC,CAAC3B,CAAD,EAAGH,CAAH,EAAKE,CAAL,EAAOsB,CAAP,EAAS7B,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,CAAC,UAAlB,CAA/B,EAA6D6B,CAAC,GAACM,CAAC,CAACN,CAAD,EAAGrB,CAAH,EAAKH,CAAL,EAAOE,CAAP,EAASP,CAAC,CAAC,EAAD,CAAV,EAAe,EAAf,EAAkB,UAAlB,CAAhE,EAA8FO,CAAC,GAAC4B,CAAC,CAAC5B,CAAD,EAAGsB,CAAH,EAAKrB,CAAL,EAAOH,CAAP,EAASL,CAAC,CAAC,EAAD,CAAV,EAAe,EAAf,EAAkB,CAAC,QAAnB,CAAjG,EAA8HK,CAAC,GAAC8B,CAAC,CAAC9B,CAAD,EAAGE,CAAH,EAAKsB,CAAL,EAAOrB,CAAP,EAASR,CAAC,CAAC,CAAD,CAAV,EAAc,CAAd,EAAgB,CAAC,UAAjB,CAAjI,EAA8JQ,CAAC,GAAC2B,CAAC,CAAC3B,CAAD,EAAGH,CAAH,EAAKE,CAAL,EAAOsB,CAAP,EAAS7B,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,UAAjB,CAAjK,EAA8L6B,CAAC,GAACM,CAAC,CAACN,CAAD,EAAGrB,CAAH,EAAKH,CAAL,EAAOE,CAAP,EAASP,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,CAAC,SAAlB,CAAjM,EAA8NO,CAAC,GAAC4B,CAAC,CAAC5B,CAAD,EAAGsB,CAAH,EAAKrB,CAAL,EAAOH,CAAP,EAASL,CAAC,CAAC,EAAD,CAAV,EAAe,EAAf,EAAkB,CAAC,UAAnB,CAAjO,EAAgQK,CAAC,GAAC8B,CAAC,CAAC9B,CAAD,EAAGE,CAAH,EAAKsB,CAAL,EAAOrB,CAAP,EAASR,CAAC,CAAC,EAAD,CAAV,EAAe,CAAf,EAAiB,SAAjB,CAAnQ,EAA+RQ,CAAC,GAAC2B,CAAC,CAAC3B,CAAD,EAAGH,CAAH,EAAKE,CAAL,EAAOsB,CAAP,EAAS7B,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,CAAC,SAAlB,CAAlS,EAA+T6B,CAAC,GAACM,CAAC,CAACN,CAAD,EAAGrB,CAAH,EAAKH,CAAL,EAAOE,CAAP,EAASP,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,CAAC,SAAlB,CAAlU,EAA+VO,CAAC,GAAC4B,CAAC,CAAC5B,CAAD,EAAGsB,CAAH,EAAKrB,CAAL,EAAOH,CAAP,EAASL,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,QAAjB,CAAlW,EAA6XK,CAAC,GAAC8B,CAAC,CAAC9B,CAAD,EAAGE,CAAH,EAAKsB,CAAL,EAAOrB,CAAP,EAASR,CAAC,CAAC,CAAD,CAAV,EAAc,CAAd,EAAgB,CAAC,SAAjB,CAAhY,EAA4ZQ,CAAC,GAAC2B,CAAC,CAAC3B,CAAD,EAAGH,CAAH,EAAKE,CAAL,EAAOsB,CAAP,EAAS7B,CAAC,CAAC,EAAD,CAAV,EAAe,EAAf,EAAkB,CAAC,SAAnB,CAA/Z,EAA6b6B,CAAC,GAACM,CAAC,CAACN,CAAD,EAAGrB,CAAH,EAAKH,CAAL,EAAOE,CAAP,EAASP,CAAC,CAAC,EAAD,CAAV,EAAe,EAAf,EAAkB,SAAlB,CAAhc,EAA6dO,CAAC,GAAC4B,CAAC,CAAC5B,CAAD,EAAGsB,CAAH,EAAKrB,CAAL,EAAOH,CAAP,EAASL,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,CAAC,SAAlB,CAAhe;EAA6f,YAAI0B,CAAC,GAACb,CAAC,CAACU,IAAF,CAAO,IAAP,EAAYnB,CAAZ,CAAN;EAAqBC,QAAAA,CAAC,GAACqB,CAAC,CAACrB,CAAD,EAAGE,CAAH,EAAKsB,CAAL,EAAOrB,CAAP,EAASR,CAAC,CAAC,CAAD,CAAV,EAAc,CAAd,EAAgB,CAAC,SAAjB,CAAH,EAA+BQ,CAAC,GAACkB,CAAC,CAAClB,CAAD,EAAGH,CAAH,EAAKE,CAAL,EAAOsB,CAAP,EAAS7B,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,UAAjB,CAAlC,EAA+D6B,CAAC,GAACH,CAAC,CAACG,CAAD,EAAGrB,CAAH,EAAKH,CAAL,EAAOE,CAAP,EAASP,CAAC,CAAC,EAAD,CAAV,EAAe,EAAf,EAAkB,CAAC,UAAnB,CAAlE,EAAiGO,CAAC,GAACmB,CAAC,CAACnB,CAAD,EAAGsB,CAAH,EAAKrB,CAAL,EAAOH,CAAP,EAASL,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,CAAC,QAAlB,CAApG,EAAgIK,CAAC,GAACqB,CAAC,CAACrB,CAAD,EAAGE,CAAH,EAAKsB,CAAL,EAAOrB,CAAP,EAASR,CAAC,CAAC,EAAD,CAAV,EAAe,CAAf,EAAiB,UAAjB,CAAnI,EAAgKQ,CAAC,GAACkB,CAAC,CAAClB,CAAD,EAAGH,CAAH,EAAKE,CAAL,EAAOsB,CAAP,EAAS7B,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,CAAC,UAAlB,CAAnK,EAAiM6B,CAAC,GAACH,CAAC,CAACG,CAAD,EAAGrB,CAAH,EAAKH,CAAL,EAAOE,CAAP,EAASP,CAAC,CAAC,EAAD,CAAV,EAAe,EAAf,EAAkB,CAAC,OAAnB,CAApM,EAAgOO,CAAC,GAACmB,CAAC,CAACnB,CAAD,EAAGsB,CAAH,EAAKrB,CAAL,EAAOH,CAAP,EAASL,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,CAAC,UAAlB,CAAnO,EAAiQK,CAAC,GAACqB,CAAC,CAACrB,CAAD,EAAGE,CAAH,EAAKsB,CAAL,EAAOrB,CAAP,EAASR,CAAC,CAAC,CAAD,CAAV,EAAc,CAAd,EAAgB,UAAhB,CAApQ,EAAgSQ,CAAC,GAACkB,CAAC,CAAClB,CAAD,EAAGH,CAAH,EAAKE,CAAL,EAAOsB,CAAP,EAAS7B,CAAC,CAAC,EAAD,CAAV,EAAe,EAAf,EAAkB,CAAC,QAAnB,CAAnS,EAAgU6B,CAAC,GAACH,CAAC,CAACG,CAAD,EAAGrB,CAAH,EAAKH,CAAL,EAAOE,CAAP,EAASP,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,CAAC,UAAlB,CAAnU,EAAiWO,CAAC,GAACmB,CAAC,CAACnB,CAAD,EAAGsB,CAAH,EAAKrB,CAAL,EAAOH,CAAP,EAASL,CAAC,CAAC,EAAD,CAAV,EAAe,EAAf,EAAkB,UAAlB,CAApW,EAAkYK,CAAC,GAACqB,CAAC,CAACrB,CAAD,EAAGE,CAAH,EAAKsB,CAAL,EAAOrB,CAAP,EAASR,CAAC,CAAC,CAAD,CAAV,EAAc,CAAd,EAAgB,CAAC,SAAjB,CAArY,EAAiaQ,CAAC,GAACkB,CAAC,CAAClB,CAAD,EAAGH,CAAH,EAAKE,CAAL,EAAOsB,CAAP,EAAS7B,CAAC,CAAC,EAAD,CAAV,EAAe,EAAf,EAAkB,CAAC,UAAnB,CAApa,EAAmc6B,CAAC,GAACH,CAAC,CAACG,CAAD,EAAGrB,CAAH,EAAKH,CAAL,EAAOE,CAAP,EAASP,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,SAAjB,CAAtc,EAAkeO,CAAC,GAACmB,CAAC,CAACnB,CAAD,EAAGsB,CAAH,EAAKrB,CAAL,EAAOH,CAAP,EAASL,CAAC,CAAC,CAAD,CAAV,EAAc,EAAd,EAAiB,CAAC,SAAlB,CAAre,EAAkgBD,CAAC,CAAC,CAAD,CAAD,GAAKK,CAAC,CAACC,CAAD,EAAGN,CAAC,CAAC,CAAD,CAAJ,CAAxgB,EAAihBA,CAAC,CAAC,CAAD,CAAD,GAAKK,CAAC,CAACG,CAAD,EAAGR,CAAC,CAAC,CAAD,CAAJ,CAAvhB,EAAgiBA,CAAC,CAAC,CAAD,CAAD,GAAKK,CAAC,CAACyB,CAAD,EAAG9B,CAAC,CAAC,CAAD,CAAJ,CAAtiB,EAA+iBA,CAAC,CAAC,CAAD,CAAD,GAAKK,CAAC,CAACI,CAAD,EAAGT,CAAC,CAAC,CAAD,CAAJ,CAArjB;EAA8jB,OAAhtE;;EAAitE,UAAI4B,CAAC,GAAC,SAAFA,CAAE,CAAS5B,CAAT,EAAW;EAAC,aAAI,IAAIC,CAAC,GAAC,EAAN,EAASI,CAAC,GAAC,CAAf,EAAiBA,CAAC,GAAC,EAAnB,EAAsBA,CAAC,IAAE,CAAzB;EAA2BJ,UAAAA,CAAC,CAACI,CAAC,IAAE,CAAJ,CAAD,GAAQL,CAAC,CAACqC,UAAF,CAAahC,CAAb,KAAiBL,CAAC,CAACqC,UAAF,CAAahC,CAAC,GAAC,CAAf,KAAmB,CAApC,KAAwCL,CAAC,CAACqC,UAAF,CAAahC,CAAC,GAAC,CAAf,KAAmB,EAA3D,KAAgEL,CAAC,CAACqC,UAAF,CAAahC,CAAC,GAAC,CAAf,KAAmB,EAAnF,CAAR;EAA3B;;EAA0H,eAAOJ,CAAP;EAAS,OAArJ;;EAAsJ,UAAImC,CAAC,GAAC,SAAFA,CAAE,CAASpC,CAAT,EAAWC,CAAX,EAAa;EAAC,YAAII,CAAJ;EAAA,YAAMC,CAAC,GAACN,CAAC,CAAC+B,MAAV;EAAA,YAAiBvB,CAAC,GAAC,CAAC,UAAD,EAAY,CAAC,SAAb,EAAuB,CAAC,UAAxB,EAAmC,SAAnC,CAAnB;;EAAiE,aAAIH,CAAC,GAAC,EAAN,EAASA,CAAC,IAAEC,CAAZ,EAAcD,CAAC,IAAE,EAAjB;EAAoB8B,UAAAA,CAAC,CAAC3B,CAAD,EAAGoB,CAAC,CAAC5B,CAAC,CAACsC,SAAF,CAAYjC,CAAC,GAAC,EAAd,EAAiBA,CAAjB,CAAD,CAAJ,EAA0BJ,CAA1B,CAAD;EAApB;;EAAkD,YAAI6B,CAAC,GAAC,CAAC,CAAD,EAAG,CAAH,EAAK,CAAL,EAAO,CAAP,EAAS,CAAT,EAAW,CAAX,EAAa,CAAb,EAAe,CAAf,EAAiB,CAAjB,EAAmB,CAAnB,EAAqB,CAArB,EAAuB,CAAvB,EAAyB,CAAzB,EAA2B,CAA3B,EAA6B,CAA7B,EAA+B,CAA/B,CAAN;EAAA,YAAwCG,CAAC,GAAC,CAACjC,CAAC,GAACA,CAAC,CAACsC,SAAF,CAAYjC,CAAC,GAAC,EAAd,CAAH,EAAsB0B,MAAhE;;EAAuE,aAAI1B,CAAC,GAAC,CAAN,EAAQA,CAAC,GAAC4B,CAAV,EAAY5B,CAAC,EAAb;EAAgByB,UAAAA,CAAC,CAACzB,CAAC,IAAE,CAAJ,CAAD,IAASL,CAAC,CAACqC,UAAF,CAAahC,CAAb,MAAkBA,CAAC,GAAC,CAAF,IAAK,CAAvB,CAAT;EAAhB;;EAAmD,YAAGyB,CAAC,CAACzB,CAAC,IAAE,CAAJ,CAAD,IAAS,QAAMA,CAAC,GAAC,CAAF,IAAK,CAAX,CAAT,EAAuBA,CAAC,GAAC,EAA5B,EAA+B,KAAI8B,CAAC,CAAC3B,CAAD,EAAGsB,CAAH,EAAK7B,CAAL,CAAD,EAASI,CAAC,GAAC,EAAf,EAAkBA,CAAC,EAAnB;EAAuByB,UAAAA,CAAC,CAACzB,CAAD,CAAD,GAAK,CAAL;EAAvB;EAA8B,eAAOyB,CAAC,CAAC,EAAD,CAAD,GAAM,IAAExB,CAAR,EAAU6B,CAAC,CAAC3B,CAAD,EAAGsB,CAAH,EAAK7B,CAAL,CAAX,EAAmBO,CAA1B;EAA4B,OAA1V;;EAA2V,eAASmB,CAAT,CAAW3B,CAAX,EAAa;EAAC,YAAIC,CAAJ;EAAM,eAAM,uCAAqC6B,CAAC,CAACM,CAAC,CAAC,OAAD,CAAF,CAAtC,KAAqDnC,CAAC,GAAC,WAASD,CAAT,EAAWC,EAAX,EAAa;EAAC,cAAII,CAAC,GAAC,CAAC,QAAML,CAAP,KAAW,QAAMC,EAAjB,CAAN;EAA0B,iBAAM,CAACD,CAAC,IAAE,EAAJ,KAASC,EAAC,IAAE,EAAZ,KAAiBI,CAAC,IAAE,EAApB,KAAyB,EAAzB,GAA4B,QAAMA,CAAxC;EAA0C,SAAzI,GAA2IyB,CAAC,CAACM,CAAC,CAACpC,CAAD,EAAGC,CAAH,CAAF,CAAlJ;EAA2J;;EAAAI,MAAAA,CAAC,CAACS,CAAF,CAAIb,CAAJ,EAAM,KAAN,EAAY,YAAU;EAAC,eAAO0B,CAAP;EAAS,OAAhC;EAAkC,KAAtjH,CAAr5B,CAAP;EAAq9I,GAAjuJ,CAAD;;;;ECAA,IAAMY,GAAG,GAAGC,GAAmB,CAACD,GAAhC;;EAEA,IAAME,QAAQ,GAAG,SAAXA,QAAW,CAASC,GAAT,EAAc;EAC7B,MAAIC,GAAG,GAAGD,GAAG,CAACE,IAAJ,EAAV;EACAF,EAAAA,GAAG,CAACG,IAAJ,CAASC,OAAT,CAAiB,UAAAnB,CAAC,EAAI;EACpBA,IAAAA,CAAC,CAACoB,KAAF,GAAUD,OAAV,CAAkB,UAAAxC,CAAC,EAAI;EACrBqC,MAAAA,GAAG,IAAIrC,CAAC,CAAC0C,GAAF,IAAS1C,CAAC,CAAC2C,QAAF,IAAc3C,CAAC,CAACsC,IAAzB,IAAiCtC,CAAC,CAAC4C,IAA1C;EACAP,MAAAA,GAAG,IAAI5B,MAAM,CAACoC,IAAP,CAAY7C,CAAC,CAAC8C,IAAd,EAAoBpB,IAApB,CAAyB,EAAzB,CAAP;EACD,KAHD;EAID,GALD;EAMA,SAAOO,GAAG,CAACI,GAAD,CAAV;EACD,CATD;;EAUA,QAAc,GAAGF,QAAjB;;ECZA,IAAIxC,CAAC,GAAC,SAAFA,CAAE,CAASK,CAAT,EAAWN,CAAX,EAAa8B,CAAb,EAAezB,CAAf,EAAiB;EAAC,OAAI,IAAIsB,CAAC,GAAC,CAAV,EAAYA,CAAC,GAAC3B,CAAC,CAAC+B,MAAhB,EAAuBJ,CAAC,EAAxB,EAA2B;EAAC,QAAIC,CAAC,GAAC5B,CAAC,CAAC2B,CAAD,CAAP;EAAA,QAAW0B,CAAC,GAAC,YAAU,OAAOzB,CAAjB,GAAmBE,CAAC,CAACF,CAAD,CAApB,GAAwBA,CAArC;EAAA,QAAuCM,CAAC,GAAClC,CAAC,CAAC,EAAE2B,CAAH,CAA1C;EAAgD,UAAIO,CAAJ,GAAM7B,CAAC,CAAC,CAAD,CAAD,GAAKgD,CAAX,GAAa,MAAInB,CAAJ,GAAM7B,CAAC,CAAC,CAAD,CAAD,GAAKU,MAAM,CAACuC,MAAP,CAAcjD,CAAC,CAAC,CAAD,CAAD,IAAM,EAApB,EAAuBgD,CAAvB,CAAX,GAAqC,MAAInB,CAAJ,GAAM,CAAC7B,CAAC,CAAC,CAAD,CAAD,GAAKA,CAAC,CAAC,CAAD,CAAD,IAAM,EAAZ,EAAgBL,CAAC,CAAC,EAAE2B,CAAH,CAAjB,IAAwB0B,CAA9B,GAAgC,MAAInB,CAAJ,GAAM7B,CAAC,CAAC,CAAD,CAAD,CAAKL,CAAC,CAAC,EAAE2B,CAAH,CAAN,KAAc0B,CAAC,GAAC,EAAtB,GAAyBhD,CAAC,CAACkD,IAAF,CAAOrB,CAAC,GAAC5B,CAAC,CAACkD,KAAF,CAAQ,IAAR,EAAavD,CAAC,CAACK,CAAD,EAAG+C,CAAH,EAAKvB,CAAL,EAAO,CAAC,EAAD,EAAI,IAAJ,CAAP,CAAd,CAAD,GAAkCuB,CAA1C,CAA3G;EAAwJ;;EAAA,SAAOhD,CAAP;EAAS,CAArQ;EAAA,IAAsQC,CAAC,GAAC,WAASL,CAAT,EAAW;EAAC,OAAI,IAAIK,CAAJ,EAAMN,CAAN,EAAQ8B,CAAC,GAAC,CAAV,EAAYzB,CAAC,GAAC,EAAd,EAAiBsB,CAAC,GAAC,EAAnB,EAAsBC,CAAC,GAAC,CAAC,CAAD,CAAxB,EAA4ByB,CAAC,GAAC,SAAFA,CAAE,CAASpD,CAAT,EAAW;EAAC,UAAI6B,CAAJ,KAAQ7B,CAAC,KAAGI,CAAC,GAACA,CAAC,CAACoD,OAAF,CAAU,sBAAV,EAAiC,EAAjC,CAAL,CAAT,IAAqD7B,CAAC,CAAC2B,IAAF,CAAOtD,CAAC,IAAEI,CAAV,EAAY,CAAZ,CAArD,GAAoE,MAAIyB,CAAJ,KAAQ7B,CAAC,IAAEI,CAAX,KAAeuB,CAAC,CAAC2B,IAAF,CAAOtD,CAAC,IAAEI,CAAV,EAAY,CAAZ,GAAeyB,CAAC,GAAC,CAAhC,IAAmC,MAAIA,CAAJ,IAAO,UAAQzB,CAAf,IAAkBJ,CAAlB,GAAoB2B,CAAC,CAAC2B,IAAF,CAAOtD,CAAP,EAAS,CAAT,CAApB,GAAgC,MAAI6B,CAAJ,IAAOzB,CAAP,IAAU,CAACJ,CAAX,GAAa2B,CAAC,CAAC2B,IAAF,CAAO,CAAC,CAAR,EAAU,CAAV,EAAYlD,CAAZ,CAAb,GAA4ByB,CAAC,IAAE,CAAH,KAAO,CAACzB,CAAC,IAAE,CAACJ,CAAD,IAAI,MAAI6B,CAAZ,MAAiBF,CAAC,CAAC2B,IAAF,CAAOlD,CAAP,EAASyB,CAAT,EAAW9B,CAAX,GAAc8B,CAAC,GAAC,CAAjC,GAAoC7B,CAAC,KAAG2B,CAAC,CAAC2B,IAAF,CAAOtD,CAAP,EAAS6B,CAAT,EAAW9B,CAAX,GAAc8B,CAAC,GAAC,CAAnB,CAA5C,CAAnK,EAAsOzB,CAAC,GAAC,EAAxO;EAA2O,GAArR,EAAsR6B,CAAC,GAAC,CAA5R,EAA8RA,CAAC,GAACjC,CAAC,CAAC8B,MAAlS,EAAySG,CAAC,EAA1S,EAA6S;EAACA,IAAAA,CAAC,KAAG,MAAIJ,CAAJ,IAAOuB,CAAC,EAAR,EAAWA,CAAC,CAACnB,CAAD,CAAf,CAAD;;EAAqB,SAAI,IAAID,CAAC,GAAC,CAAV,EAAYA,CAAC,GAAChC,CAAC,CAACiC,CAAD,CAAD,CAAKH,MAAnB,EAA0BE,CAAC,EAA3B;EAA8B3B,MAAAA,CAAC,GAACL,CAAC,CAACiC,CAAD,CAAD,CAAKD,CAAL,CAAF,EAAU,MAAIH,CAAJ,GAAM,QAAMxB,CAAN,IAAS+C,CAAC,IAAGzB,CAAC,GAAC,CAACA,CAAD,CAAL,EAASE,CAAC,GAAC,CAArB,IAAwBzB,CAAC,IAAEC,CAAjC,GAAmC,MAAIwB,CAAJ,GAAM,SAAOzB,CAAP,IAAU,QAAMC,CAAhB,IAAmBwB,CAAC,GAAC,CAAF,EAAIzB,CAAC,GAAC,EAAzB,IAA6BA,CAAC,GAACC,CAAC,GAACD,CAAC,CAAC,CAAD,CAAxC,GAA4CsB,CAAC,GAACrB,CAAC,KAAGqB,CAAJ,GAAMA,CAAC,GAAC,EAAR,GAAWtB,CAAC,IAAEC,CAAf,GAAiB,QAAMA,CAAN,IAAS,QAAMA,CAAf,GAAiBqB,CAAC,GAACrB,CAAnB,GAAqB,QAAMA,CAAN,IAAS+C,CAAC,IAAGvB,CAAC,GAAC,CAAf,IAAkBA,CAAC,KAAG,QAAMxB,CAAN,IAASwB,CAAC,GAAC,CAAF,EAAI9B,CAAC,GAACK,CAAN,EAAQA,CAAC,GAAC,EAAnB,IAAuB,QAAMC,CAAN,KAAUwB,CAAC,GAAC,CAAF,IAAK,QAAM7B,CAAC,CAACiC,CAAD,CAAD,CAAKD,CAAC,GAAC,CAAP,CAArB,KAAiCoB,CAAC,IAAG,MAAIvB,CAAJ,KAAQF,CAAC,GAACA,CAAC,CAAC,CAAD,CAAX,CAAH,EAAmBE,CAAC,GAACF,CAArB,EAAuB,CAACA,CAAC,GAACA,CAAC,CAAC,CAAD,CAAJ,EAAS2B,IAAT,CAAczB,CAAd,EAAgB,CAAhB,CAAvB,EAA0CA,CAAC,GAAC,CAA9E,IAAiF,QAAMxB,CAAN,IAAS,SAAOA,CAAhB,IAAmB,SAAOA,CAA1B,IAA6B,SAAOA,CAApC,IAAuC+C,CAAC,IAAGvB,CAAC,GAAC,CAA7C,IAAgDzB,CAAC,IAAEC,CAA9J,CAAnJ,EAAoT,MAAIwB,CAAJ,IAAO,UAAQzB,CAAf,KAAmByB,CAAC,GAAC,CAAF,EAAIF,CAAC,GAACA,CAAC,CAAC,CAAD,CAA1B,CAApT;EAA9B;EAAiX;;EAAA,SAAOyB,CAAC,IAAGzB,CAAX;EAAa,CAAr9B;EAAA,IAAs9B5B,CAAC,GAAC,cAAY,OAAO0D,GAA3+B;EAAA,IAA++B5B,CAAC,GAAC9B,CAAC,GAAC,IAAI0D,GAAJ,EAAD,GAAS,EAA3/B;EAAA,IAA8/BrD,CAAC,GAACL,CAAC,GAAC,UAASC,CAAT,EAAW;EAAC,MAAID,CAAC,GAAC8B,CAAC,CAACZ,GAAF,CAAMjB,CAAN,CAAN;EAAe,SAAOD,CAAC,IAAE8B,CAAC,CAAC6B,GAAF,CAAM1D,CAAN,EAAQD,CAAC,GAACM,CAAC,CAACL,CAAD,CAAX,CAAH,EAAmBD,CAA1B;EAA4B,CAAxD,GAAyD,UAASC,CAAT,EAAW;EAAC,OAAI,IAAID,CAAC,GAAC,EAAN,EAASK,CAAC,GAAC,CAAf,EAAiBA,CAAC,GAACJ,CAAC,CAAC8B,MAArB,EAA4B1B,CAAC,EAA7B;EAAgCL,IAAAA,CAAC,IAAEC,CAAC,CAACI,CAAD,CAAD,CAAK0B,MAAL,GAAY,GAAZ,GAAgB9B,CAAC,CAACI,CAAD,CAApB;EAAhC;;EAAwD,SAAOyB,CAAC,CAAC9B,CAAD,CAAD,KAAO8B,CAAC,CAAC9B,CAAD,CAAD,GAAKM,CAAC,CAACL,CAAD,CAAb,CAAP;EAAyB,CAAvpC;;AAAwpC,EAAe,qBAASK,CAAT,EAAW;EAAC,MAAIN,CAAC,GAACC,CAAC,CAAC,IAAD,EAAMI,CAAC,CAACC,CAAD,CAAP,EAAWsD,SAAX,EAAqB,EAArB,CAAP;EAAgC,SAAO5D,CAAC,CAAC+B,MAAF,GAAS,CAAT,GAAW/B,CAAX,GAAaA,CAAC,CAAC,CAAD,CAArB;EAAyB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;ECG5uC,IAAM6D,MAAM,GAAG,SAATA,MAAS,CAASnB,GAAT,EAAcoB,QAAd,EAAwBC,OAAxB,EAAiC;EAC9C,MAAIV,CAAC,GAAGW,GAAG,CAACxC,IAAJ,CAASyC,KAAT,CAAR;;EACA,MAAIF,OAAO,CAACvC,IAAZ,EAAkB;EAChB6B,IAAAA,CAAC,GAAGW,GAAG,CAACxC,IAAJ,CAASuC,OAAO,CAACvC,IAAjB,CAAJ;EACD;;EACD,MAAI0C,IAAI,GAAG,EAAX;EACA,MAAIC,GAAG,GAAGzB,GAAG,CAAC0B,OAAJ,CAAYN,QAAZ,CAAV;EACAK,EAAAA,GAAG,CAACrB,OAAJ,CAAY,UAAAtC,CAAC,EAAI;EACf,QAAImC,GAAG,GAAGU,CAAH,oBAAmB7C,CAAC,CAAC4D,OAArB,EAAgC5D,CAAC,CAACoC,IAAlC,CAAP;EACAsB,IAAAA,IAAI,CAACX,IAAL,CAAUZ,GAAV;EACD,GAHD;EAIA,SAAOU,CAAP,qBAAgBa,IAAhB;EACD,CAZD;;EAaA,QAAc,GAAGL,MAAjB;;ECbA,IAAMQ,UAAU,GAAG,SAAbA,UAAa,CAASC,GAAT,EAAc;;EAE/BA,EAAAA,GAAG,CAAC7C,SAAJ,CAAc8C,IAAd,GAAqB,YAAW;EAC9B,WAAO9B,IAAQ,CAAC,IAAD,CAAf;EACD,GAFD;;;;EAKA6B,EAAAA,GAAG,CAAC7C,SAAJ,CAAcyC,IAAd,GAAqB,YAAsC;EAAA,QAA7BJ,QAA6B,uEAAlB,EAAkB;EAAA,QAAdC,OAAc,uEAAJ,EAAI;EACzD,WAAOF,IAAM,CAAC,IAAD,EAAOC,QAAP,EAAiBC,OAAjB,CAAb;EACD,GAFD;EAGD,CAVD;;EAWA,OAAc,GAAGM,UAAjB;;;;;;;;"} \ No newline at end of file diff --git a/plugins/output/builds/compromise-output.min.js b/plugins/output/builds/compromise-output.min.js new file mode 100644 index 000000000..92113892f --- /dev/null +++ b/plugins/output/builds/compromise-output.min.js @@ -0,0 +1 @@ +!function(n,t){"object"==typeof exports&&"undefined"!=typeof module?module.exports=t():"function"==typeof define&&define.amd?define(t):(n=n||self).compromiseOutput=t()}(this,(function(){"use strict";function n(t){return(n="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(n){return typeof n}:function(n){return n&&"function"==typeof Symbol&&n.constructor===Symbol&&n!==Symbol.prototype?"symbol":typeof n})(t)}function t(n,t){return t||(t=n.slice(0)),Object.freeze(Object.defineProperties(n,{raw:{value:Object.freeze(t)}}))}"undefined"!=typeof globalThis?globalThis:"undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self&&self;function e(n,t){return n(t={exports:{}},t.exports),t.exports}var r,o=e((function(t,e){"undefined"!=typeof self&&self,t.exports=function(t){var e={};function r(n){if(e[n])return e[n].exports;var o=e[n]={i:n,l:!1,exports:{}};return t[n].call(o.exports,o,o.exports,r),o.l=!0,o.exports}return r.m=t,r.c=e,r.d=function(n,t,e){r.o(n,t)||Object.defineProperty(n,t,{enumerable:!0,get:e})},r.r=function(n){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(n,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(n,"__esModule",{value:!0})},r.t=function(t,e){if(1&e&&(t=r(t)),8&e)return t;if(4&e&&"object"==n(t)&&t&&t.__esModule)return t;var o=Object.create(null);if(r.r(o),Object.defineProperty(o,"default",{enumerable:!0,value:t}),2&e&&"string"!=typeof t)for(var u in t)r.d(o,u,function(n){return t[n]}.bind(null,u));return o},r.n=function(n){var t=n&&n.__esModule?function(){return n.default}:function(){return n};return r.d(t,"a",t),t},r.o=function(n,t){return Object.prototype.hasOwnProperty.call(n,t)},r.p="",r(r.s=0)}([function(n,t,e){e.r(t);var r="0123456789abcdef".split(""),o=function(n){for(var t="",e=0;e<4;e++)t+=r[n>>8*e+4&15]+r[n>>8*e&15];return t},u=function(n){for(var t=n.length,e=0;e
",""]);return v=function(){return n},n}function h(){var n=t(["",""]);return h=function(){return n},n}var y=function(n,t,e){var r=b.bind(d);e.bind&&(r=b.bind(e.bind));var o=[];return n.segment(t).forEach((function(n){var t=r(h(),n.segment,n.text);o.push(t)})),r(v(),o)};return function(n){n.prototype.hash=function(){return f(this)},n.prototype.html=function(){var n=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{},t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{};return y(this,n,t)}}})); diff --git a/plugins/output/builds/compromise-output.mjs b/plugins/output/builds/compromise-output.mjs new file mode 100644 index 000000000..70de05b62 --- /dev/null +++ b/plugins/output/builds/compromise-output.mjs @@ -0,0 +1,381 @@ +function _typeof(obj) { + if (typeof Symbol === "function" && typeof Symbol.iterator === "symbol") { + _typeof = function (obj) { + return typeof obj; + }; + } else { + _typeof = function (obj) { + return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; + }; + } + + return _typeof(obj); +} + +function _taggedTemplateLiteral(strings, raw) { + if (!raw) { + raw = strings.slice(0); + } + + return Object.freeze(Object.defineProperties(strings, { + raw: { + value: Object.freeze(raw) + } + })); +} + +var commonjsGlobal = typeof globalThis !== 'undefined' ? globalThis : typeof window !== 'undefined' ? window : typeof global !== 'undefined' ? global : typeof self !== 'undefined' ? self : {}; + +function unwrapExports (x) { + return x && x.__esModule && Object.prototype.hasOwnProperty.call(x, 'default') ? x['default'] : x; +} + +function createCommonjsModule(fn, module) { + return module = { exports: {} }, fn(module, module.exports), module.exports; +} + +var lib = createCommonjsModule(function (module, exports) { + !function (r, n) { + module.exports = n(); + }("undefined" != typeof self ? self : commonjsGlobal, function () { + return function (r) { + var n = {}; + + function e(t) { + if (n[t]) return n[t].exports; + var o = n[t] = { + i: t, + l: !1, + exports: {} + }; + return r[t].call(o.exports, o, o.exports, e), o.l = !0, o.exports; + } + + return e.m = r, e.c = n, e.d = function (r, n, t) { + e.o(r, n) || Object.defineProperty(r, n, { + enumerable: !0, + get: t + }); + }, e.r = function (r) { + "undefined" != typeof Symbol && Symbol.toStringTag && Object.defineProperty(r, Symbol.toStringTag, { + value: "Module" + }), Object.defineProperty(r, "__esModule", { + value: !0 + }); + }, e.t = function (r, n) { + if (1 & n && (r = e(r)), 8 & n) return r; + if (4 & n && "object" == _typeof(r) && r && r.__esModule) return r; + var t = Object.create(null); + if (e.r(t), Object.defineProperty(t, "default", { + enumerable: !0, + value: r + }), 2 & n && "string" != typeof r) for (var o in r) { + e.d(t, o, function (n) { + return r[n]; + }.bind(null, o)); + } + return t; + }, e.n = function (r) { + var n = r && r.__esModule ? function () { + return r["default"]; + } : function () { + return r; + }; + return e.d(n, "a", n), n; + }, e.o = function (r, n) { + return Object.prototype.hasOwnProperty.call(r, n); + }, e.p = "", e(e.s = 0); + }([function (r, n, e) { + + e.r(n); + var t = "0123456789abcdef".split(""); + + var o = function o(r) { + for (var n = "", e = 0; e < 4; e++) { + n += t[r >> 8 * e + 4 & 15] + t[r >> 8 * e & 15]; + } + + return n; + }; + + var u = function u(r) { + for (var n = r.length, e = 0; e < n; e++) { + r[e] = o(r[e]); + } + + return r.join(""); + }; + + var f = function f(r, n) { + return r + n & 4294967295; + }; + + var i = function i(r, n, e, t, o, u, _i) { + return function (r, n, e) { + return f(r << n | r >>> 32 - n, e); + }(n = function (r, n, e, t) { + return n = f(f(n, r), f(e, t)); + }(r, n, t, u), o, e); + }; + + var a = function a(r, n, e, t, o, u, f, _a) { + return i(e & t | ~e & o, n, e, u, f, _a); + }; + + var c = function c(r, n, e, t, o, u, f, a) { + return i(e & o | t & ~o, n, e, u, f, a); + }; + + var l = function l(r, n, e, t, o, u, f, a) { + return i(e ^ t ^ o, n, e, u, f, a); + }; + + var d = function d(r, n, e, t, o, u, f, a) { + return i(t ^ (e | ~o), n, e, u, f, a); + }; + + var v = function v(r, n, e) { + void 0 === e && (e = f); + var t = r[0], + o = r[1], + u = r[2], + i = r[3], + v = a.bind(null, e); + t = v(t, o, u, i, n[0], 7, -680876936), i = v(i, t, o, u, n[1], 12, -389564586), u = v(u, i, t, o, n[2], 17, 606105819), o = v(o, u, i, t, n[3], 22, -1044525330), t = v(t, o, u, i, n[4], 7, -176418897), i = v(i, t, o, u, n[5], 12, 1200080426), u = v(u, i, t, o, n[6], 17, -1473231341), o = v(o, u, i, t, n[7], 22, -45705983), t = v(t, o, u, i, n[8], 7, 1770035416), i = v(i, t, o, u, n[9], 12, -1958414417), u = v(u, i, t, o, n[10], 17, -42063), o = v(o, u, i, t, n[11], 22, -1990404162), t = v(t, o, u, i, n[12], 7, 1804603682), i = v(i, t, o, u, n[13], 12, -40341101), u = v(u, i, t, o, n[14], 17, -1502002290), o = v(o, u, i, t, n[15], 22, 1236535329); + var s = c.bind(null, e); + t = s(t, o, u, i, n[1], 5, -165796510), i = s(i, t, o, u, n[6], 9, -1069501632), u = s(u, i, t, o, n[11], 14, 643717713), o = s(o, u, i, t, n[0], 20, -373897302), t = s(t, o, u, i, n[5], 5, -701558691), i = s(i, t, o, u, n[10], 9, 38016083), u = s(u, i, t, o, n[15], 14, -660478335), o = s(o, u, i, t, n[4], 20, -405537848), t = s(t, o, u, i, n[9], 5, 568446438), i = s(i, t, o, u, n[14], 9, -1019803690), u = s(u, i, t, o, n[3], 14, -187363961), o = s(o, u, i, t, n[8], 20, 1163531501), t = s(t, o, u, i, n[13], 5, -1444681467), i = s(i, t, o, u, n[2], 9, -51403784), u = s(u, i, t, o, n[7], 14, 1735328473), o = s(o, u, i, t, n[12], 20, -1926607734); + var b = l.bind(null, e); + t = b(t, o, u, i, n[5], 4, -378558), i = b(i, t, o, u, n[8], 11, -2022574463), u = b(u, i, t, o, n[11], 16, 1839030562), o = b(o, u, i, t, n[14], 23, -35309556), t = b(t, o, u, i, n[1], 4, -1530992060), i = b(i, t, o, u, n[4], 11, 1272893353), u = b(u, i, t, o, n[7], 16, -155497632), o = b(o, u, i, t, n[10], 23, -1094730640), t = b(t, o, u, i, n[13], 4, 681279174), i = b(i, t, o, u, n[0], 11, -358537222), u = b(u, i, t, o, n[3], 16, -722521979), o = b(o, u, i, t, n[6], 23, 76029189), t = b(t, o, u, i, n[9], 4, -640364487), i = b(i, t, o, u, n[12], 11, -421815835), u = b(u, i, t, o, n[15], 16, 530742520), o = b(o, u, i, t, n[2], 23, -995338651); + var p = d.bind(null, e); + t = p(t, o, u, i, n[0], 6, -198630844), i = p(i, t, o, u, n[7], 10, 1126891415), u = p(u, i, t, o, n[14], 15, -1416354905), o = p(o, u, i, t, n[5], 21, -57434055), t = p(t, o, u, i, n[12], 6, 1700485571), i = p(i, t, o, u, n[3], 10, -1894986606), u = p(u, i, t, o, n[10], 15, -1051523), o = p(o, u, i, t, n[1], 21, -2054922799), t = p(t, o, u, i, n[8], 6, 1873313359), i = p(i, t, o, u, n[15], 10, -30611744), u = p(u, i, t, o, n[6], 15, -1560198380), o = p(o, u, i, t, n[13], 21, 1309151649), t = p(t, o, u, i, n[4], 6, -145523070), i = p(i, t, o, u, n[11], 10, -1120210379), u = p(u, i, t, o, n[2], 15, 718787259), o = p(o, u, i, t, n[9], 21, -343485551), r[0] = e(t, r[0]), r[1] = e(o, r[1]), r[2] = e(u, r[2]), r[3] = e(i, r[3]); + }; + + var s = function s(r) { + for (var n = [], e = 0; e < 64; e += 4) { + n[e >> 2] = r.charCodeAt(e) + (r.charCodeAt(e + 1) << 8) + (r.charCodeAt(e + 2) << 16) + (r.charCodeAt(e + 3) << 24); + } + + return n; + }; + + var b = function b(r, n) { + var e, + t = r.length, + o = [1732584193, -271733879, -1732584194, 271733878]; + + for (e = 64; e <= t; e += 64) { + v(o, s(r.substring(e - 64, e)), n); + } + + var u = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + f = (r = r.substring(e - 64)).length; + + for (e = 0; e < f; e++) { + u[e >> 2] |= r.charCodeAt(e) << (e % 4 << 3); + } + + if (u[e >> 2] |= 128 << (e % 4 << 3), e > 55) for (v(o, u, n), e = 16; e--;) { + u[e] = 0; + } + return u[14] = 8 * t, v(o, u, n), o; + }; + + function p(r) { + var n; + return "5d41402abc4b2a76b9719d911017c592" !== u(b("hello")) && (n = function n(r, _n) { + var e = (65535 & r) + (65535 & _n); + return (r >> 16) + (_n >> 16) + (e >> 16) << 16 | 65535 & e; + }), u(b(r, n)); + } + + e.d(n, "md5", function () { + return p; + }); + }]); + }); +}); +unwrapExports(lib); + +var md5 = lib.md5; + +var makeHash = function makeHash(doc) { + var str = doc.text(); + doc.list.forEach(function (p) { + p.terms().forEach(function (t) { + str += t.pre + (t.implicit || t.text) + t.post; + str += Object.keys(t.tags).join(''); + }); + }); + return md5(str); +}; + +var hash = makeHash; + +var n = function n(t, r, u, e) { + for (var p = 1; p < r.length; p++) { + var s = r[p], + h = "number" == typeof s ? u[s] : s, + a = r[++p]; + 1 === a ? e[0] = h : 3 === a ? e[1] = Object.assign(e[1] || {}, h) : 5 === a ? (e[1] = e[1] || {})[r[++p]] = h : 6 === a ? e[1][r[++p]] += h + "" : e.push(a ? t.apply(null, n(t, h, u, ["", null])) : h); + } + + return e; +}, + t = function t(n) { + for (var t, r, u = 1, e = "", p = "", s = [0], h = function h(n) { + 1 === u && (n || (e = e.replace(/^\s*\n\s*|\s*\n\s*$/g, ""))) ? s.push(n || e, 0) : 3 === u && (n || e) ? (s.push(n || e, 1), u = 2) : 2 === u && "..." === e && n ? s.push(n, 3) : 2 === u && e && !n ? s.push(!0, 5, e) : u >= 5 && ((e || !n && 5 === u) && (s.push(e, u, r), u = 6), n && (s.push(n, u, r), u = 6)), e = ""; + }, a = 0; a < n.length; a++) { + a && (1 === u && h(), h(a)); + + for (var f = 0; f < n[a].length; f++) { + t = n[a][f], 1 === u ? "<" === t ? (h(), s = [s], u = 3) : e += t : 4 === u ? "--" === e && ">" === t ? (u = 1, e = "") : e = t + e[0] : p ? t === p ? p = "" : e += t : '"' === t || "'" === t ? p = t : ">" === t ? (h(), u = 1) : u && ("=" === t ? (u = 5, r = e, e = "") : "/" === t && (u < 5 || ">" === n[a][f + 1]) ? (h(), 3 === u && (s = s[0]), u = s, (s = s[0]).push(u, 2), u = 0) : " " === t || "\t" === t || "\n" === t || "\r" === t ? (h(), u = 2) : e += t), 3 === u && "!--" === e && (u = 4, s = s[0]); + } + } + + return h(), s; +}, + r = "function" == typeof Map, + u = r ? new Map() : {}, + e = r ? function (n) { + var r = u.get(n); + return r || u.set(n, r = t(n)), r; +} : function (n) { + for (var r = "", e = 0; e < n.length; e++) { + r += n[e].length + "-" + n[e]; + } + + return u[r] || (u[r] = t(n)); +}; + +function htm (t) { + var r = n(this, e(t), arguments, []); + return r.length > 1 ? r : r[0]; +} + +var vhtml = createCommonjsModule(function (module, exports) { + (function (global, factory) { + module.exports = factory() ; + })(commonjsGlobal, function () { + + var emptyTags = ['area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']; + + var esc = function esc(str) { + return String(str).replace(/[&<>"']/g, function (s) { + return '&' + map[s] + ';'; + }); + }; + + var map = { + '&': 'amp', + '<': 'lt', + '>': 'gt', + '"': 'quot', + "'": 'apos' + }; + var sanitized = {}; + + function h(name, attrs) { + var stack = []; + + for (var i = arguments.length; i-- > 2;) { + stack.push(arguments[i]); + } + + if (typeof name === 'function') { + (attrs || (attrs = {})).children = stack.reverse(); + return name(attrs); + } + + var s = '<' + name; + if (attrs) for (var _i in attrs) { + if (attrs[_i] !== false && attrs[_i] != null) { + s += ' ' + esc(_i) + '="' + esc(attrs[_i]) + '"'; + } + } + + if (emptyTags.indexOf(name) === -1) { + s += '>'; + + while (stack.length) { + var child = stack.pop(); + + if (child) { + if (child.pop) { + for (var _i2 = child.length; _i2--;) { + stack.push(child[_i2]); + } + } else { + s += sanitized[child] === true ? child : esc(child); + } + } + } + + s += '' + name + '>'; + } else { + s += '>'; + } + + sanitized[s] = true; + return s; + } + + return h; + }); +}); + +function _templateObject2() { + var data = _taggedTemplateLiteral(["
", ""]); + + _templateObject2 = function _templateObject2() { + return data; + }; + + return data; +} + +function _templateObject() { + var data = _taggedTemplateLiteral(["", ""]); + + _templateObject = function _templateObject() { + return data; + }; + + return data; +} + +var toHtml = function toHtml(doc, segments, options) { + var h = htm.bind(vhtml); + + if (options.bind) { + h = htm.bind(options.bind); + } + + var html = []; + var arr = doc.segment(segments); + arr.forEach(function (o) { + var str = h(_templateObject(), o.segment, o.text); + html.push(str); + }); + return h(_templateObject2(), html); +}; + +var html = toHtml; + +var addMethods = function addMethods(Doc) { + /** generate an md5 hash from the document */ + Doc.prototype.hash = function () { + return hash(this); + }; + /** generate sanitized html from the document */ + + + Doc.prototype.html = function () { + var segments = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {}; + var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; + return html(this, segments, options); + }; +}; + +var src = addMethods; + +export default src; diff --git a/plugins/output/package-lock.json b/plugins/output/package-lock.json new file mode 100644 index 000000000..f0f1fd10d --- /dev/null +++ b/plugins/output/package-lock.json @@ -0,0 +1,774 @@ +{ + "name": "compromise-output", + "version": "0.0.2", + "lockfileVersion": 1, + "requires": true, + "dependencies": { + "@babel/code-frame": { + "version": "7.5.5", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.5.5.tgz", + "integrity": "sha512-27d4lZoomVyo51VegxI20xZPuSHusqbQag/ztrBC7wegWoQ1nLREPVSKSW8byhTlzTKyNE4ifaTA6lCp7JjpFw==", + "dev": true, + "requires": { + "@babel/highlight": "^7.0.0" + } + }, + "@babel/helper-module-imports": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/@babel/helper-module-imports/-/helper-module-imports-7.0.0.tgz", + "integrity": "sha512-aP/hlLq01DWNEiDg4Jn23i+CXxW/owM4WpDLFUbpjxe4NS3BhLVZQ5i7E0ZrxuQ/vwekIeciyamgB1UIYxxM6A==", + "dev": true, + "requires": { + "@babel/types": "^7.0.0" + } + }, + "@babel/highlight": { + "version": "7.5.0", + "resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.5.0.tgz", + "integrity": "sha512-7dV4eu9gBxoM0dAnj/BCFDW9LFU0zvTrkq0ugM7pnHEgguOEeOz1so2ZghEdzviYzQEED0r4EAgpsBChKy1TRQ==", + "dev": true, + "requires": { + "chalk": "^2.0.0", + "esutils": "^2.0.2", + "js-tokens": "^4.0.0" + } + }, + "@babel/types": { + "version": "7.6.3", + "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.6.3.tgz", + "integrity": "sha512-CqbcpTxMcpuQTMhjI37ZHVgjBkysg5icREQIEZ0eG1yCNwg3oy+5AaLiOKmjsCj6nqOsa6Hf0ObjRVwokb7srA==", + "dev": true, + "requires": { + "esutils": "^2.0.2", + "lodash": "^4.17.13", + "to-fast-properties": "^2.0.0" + } + }, + "@types/estree": { + "version": "0.0.39", + "resolved": "https://registry.npmjs.org/@types/estree/-/estree-0.0.39.tgz", + "integrity": "sha512-EYNwp3bU+98cpU4lAWYYL7Zz+2gryWH1qbdDTidVd6hkiR6weksdbMadyXKXNPEkQFhXM+hVO9ZygomHXp+AIw==", + "dev": true + }, + "@types/node": { + "version": "12.7.12", + "resolved": "https://registry.npmjs.org/@types/node/-/node-12.7.12.tgz", + "integrity": "sha512-KPYGmfD0/b1eXurQ59fXD1GBzhSQfz6/lKBxkaHX9dKTzjXbK68Zt7yGUxUsCS1jeTy/8aL+d9JEr+S54mpkWQ==", + "dev": true + }, + "@types/resolve": { + "version": "0.0.8", + "resolved": "https://registry.npmjs.org/@types/resolve/-/resolve-0.0.8.tgz", + "integrity": "sha512-auApPaJf3NPfe18hSoJkp8EbZzer2ISk7o8mCC3M9he/a04+gbMF97NkpD2S8riMGvm4BMRI59/SZQSaLTKpsQ==", + "dev": true, + "requires": { + "@types/node": "*" + } + }, + "acorn": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-7.1.0.tgz", + "integrity": "sha512-kL5CuoXA/dgxlBbVrflsflzQ3PAas7RYZB52NOm/6839iVYJgKMJ3cQJD+t2i5+qFa8h3MDpEOJiS64E8JLnSQ==", + "dev": true + }, + "ansi-styles": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz", + "integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==", + "dev": true, + "requires": { + "color-convert": "^1.9.0" + } + }, + "balanced-match": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.0.tgz", + "integrity": "sha1-ibTRmasr7kneFk6gK4nORi1xt2c=", + "dev": true + }, + "brace-expansion": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", + "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "dev": true, + "requires": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "buffer-from": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.1.tgz", + "integrity": "sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A==", + "dev": true + }, + "buffer-shims": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/buffer-shims/-/buffer-shims-1.0.0.tgz", + "integrity": "sha1-mXjOMXOIxkmth5MCjDR37wRKi1E=", + "dev": true + }, + "builtin-modules": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/builtin-modules/-/builtin-modules-3.1.0.tgz", + "integrity": "sha512-k0KL0aWZuBt2lrxrcASWDfwOLMnodeQjodT/1SxEQAXsHANgo6ZC/VEaSEHCXt7aSTZ4/4H5LKa+tBXmW7Vtvw==", + "dev": true + }, + "chalk": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz", + "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==", + "dev": true, + "requires": { + "ansi-styles": "^3.2.1", + "escape-string-regexp": "^1.0.5", + "supports-color": "^5.3.0" + } + }, + "color-convert": { + "version": "1.9.3", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", + "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==", + "dev": true, + "requires": { + "color-name": "1.1.3" + } + }, + "color-name": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", + "integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU=", + "dev": true + }, + "commander": { + "version": "2.20.1", + "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.1.tgz", + "integrity": "sha512-cCuLsMhJeWQ/ZpsFTbE765kvVfoeSddc4nU3up4fV+fDBcfUXnbITJ+JzhkdjzOqhURjZgujxaioam4RM9yGUg==", + "dev": true + }, + "concat-map": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=", + "dev": true + }, + "core-util-is": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz", + "integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=", + "dev": true + }, + "deep-equal": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/deep-equal/-/deep-equal-1.0.1.tgz", + "integrity": "sha1-9dJgKStmDghO/0zbyfCK0yR0SLU=", + "dev": true + }, + "define-properties": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.1.3.tgz", + "integrity": "sha512-3MqfYKj2lLzdMSf8ZIZE/V+Zuy+BgD6f164e8K2w7dgnpKArBDerGYpM46IYYcjnkdPNMjPk9A6VFB8+3SKlXQ==", + "dev": true, + "requires": { + "object-keys": "^1.0.12" + } + }, + "defined": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/defined/-/defined-1.0.0.tgz", + "integrity": "sha1-yY2bzvdWdBiOEQlpFRGZ45sfppM=", + "dev": true + }, + "es-abstract": { + "version": "1.15.0", + "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.15.0.tgz", + "integrity": "sha512-bhkEqWJ2t2lMeaJDuk7okMkJWI/yqgH/EoGwpcvv0XW9RWQsRspI4wt6xuyuvMvvQE3gg/D9HXppgk21w78GyQ==", + "dev": true, + "requires": { + "es-to-primitive": "^1.2.0", + "function-bind": "^1.1.1", + "has": "^1.0.3", + "has-symbols": "^1.0.0", + "is-callable": "^1.1.4", + "is-regex": "^1.0.4", + "object-inspect": "^1.6.0", + "object-keys": "^1.1.1", + "string.prototype.trimleft": "^2.1.0", + "string.prototype.trimright": "^2.1.0" + } + }, + "es-to-primitive": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/es-to-primitive/-/es-to-primitive-1.2.0.tgz", + "integrity": "sha512-qZryBOJjV//LaxLTV6UC//WewneB3LcXOL9NP++ozKVXsIIIpm/2c13UDiD9Jp2eThsecw9m3jPqDwTyobcdbg==", + "dev": true, + "requires": { + "is-callable": "^1.1.4", + "is-date-object": "^1.0.1", + "is-symbol": "^1.0.2" + } + }, + "escape-string-regexp": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", + "integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=", + "dev": true + }, + "estree-walker": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-0.6.1.tgz", + "integrity": "sha512-SqmZANLWS0mnatqbSfRP5g8OXZC12Fgg1IwNtLsyHDzJizORW4khDfjPqJZsemPWBB2uqykUah5YpQ6epsqC/w==", + "dev": true + }, + "esutils": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", + "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", + "dev": true + }, + "for-each": { + "version": "0.3.3", + "resolved": "https://registry.npmjs.org/for-each/-/for-each-0.3.3.tgz", + "integrity": "sha512-jqYfLp7mo9vIyQf8ykW2v7A+2N4QjeCeI5+Dz9XraiO1ign81wjiH7Fb9vSOWvQfNtmSa4H2RoQTrrXivdUZmw==", + "dev": true, + "requires": { + "is-callable": "^1.1.3" + } + }, + "fs.realpath": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", + "integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=", + "dev": true + }, + "function-bind": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz", + "integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==", + "dev": true + }, + "glob": { + "version": "7.1.4", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.4.tgz", + "integrity": "sha512-hkLPepehmnKk41pUGm3sYxoFs/umurYfYJCerbXEyFIWcAzvpipAgVkBqqT9RBKMGjnq6kMuyYwha6csxbiM1A==", + "dev": true, + "requires": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.0.4", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + } + }, + "has": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/has/-/has-1.0.3.tgz", + "integrity": "sha512-f2dvO0VU6Oej7RkWJGrehjbzMAjFp5/VKPp5tTpWIV4JHHZK1/BxbFRtf/siA2SWTe09caDmVtYYzWEIbBS4zw==", + "dev": true, + "requires": { + "function-bind": "^1.1.1" + } + }, + "has-flag": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", + "integrity": "sha1-tdRU3CGZriJWmfNGfloH87lVuv0=", + "dev": true + }, + "has-symbols": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.0.tgz", + "integrity": "sha1-uhqPGvKg/DllD1yFA2dwQSIGO0Q=", + "dev": true + }, + "htm": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/htm/-/htm-2.2.1.tgz", + "integrity": "sha512-nPWqBlSbdSHaT2d/cpFRpZLqH5BcdaRcXaicyAtXbkIsIiXLnstPPg0fEX3I8SrlY16FSzb1FLK44uk25xhMvw==" + }, + "inflight": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", + "integrity": "sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=", + "dev": true, + "requires": { + "once": "^1.3.0", + "wrappy": "1" + } + }, + "inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "dev": true + }, + "is-callable": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/is-callable/-/is-callable-1.1.4.tgz", + "integrity": "sha512-r5p9sxJjYnArLjObpjA4xu5EKI3CuKHkJXMhT7kwbpUyIFD1n5PMAsoPvWnvtZiNz7LjkYDRZhd7FlI0eMijEA==", + "dev": true + }, + "is-date-object": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/is-date-object/-/is-date-object-1.0.1.tgz", + "integrity": "sha1-mqIOtq7rv/d/vTPnTKAbM1gdOhY=", + "dev": true + }, + "is-module": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-module/-/is-module-1.0.0.tgz", + "integrity": "sha1-Mlj7afeMFNW4FdZkM2tM/7ZEFZE=", + "dev": true + }, + "is-reference": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/is-reference/-/is-reference-1.1.4.tgz", + "integrity": "sha512-uJA/CDPO3Tao3GTrxYn6AwkM4nUPJiGGYu5+cB8qbC7WGFlrKZbiRo7SFKxUAEpFUfiHofWCXBUNhvYJMh+6zw==", + "dev": true, + "requires": { + "@types/estree": "0.0.39" + } + }, + "is-regex": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/is-regex/-/is-regex-1.0.4.tgz", + "integrity": "sha1-VRdIm1RwkbCTDglWVM7SXul+lJE=", + "dev": true, + "requires": { + "has": "^1.0.1" + } + }, + "is-symbol": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/is-symbol/-/is-symbol-1.0.2.tgz", + "integrity": "sha512-HS8bZ9ox60yCJLH9snBpIwv9pYUAkcuLhSA1oero1UB5y9aiQpRA8y2ex945AOtCZL1lJDeIk3G5LthswI46Lw==", + "dev": true, + "requires": { + "has-symbols": "^1.0.0" + } + }, + "isarray": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", + "integrity": "sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE=", + "dev": true + }, + "jest-worker": { + "version": "24.9.0", + "resolved": "https://registry.npmjs.org/jest-worker/-/jest-worker-24.9.0.tgz", + "integrity": "sha512-51PE4haMSXcHohnSMdM42anbvZANYTqMrr52tVKPqqsPJMzoP6FYYDVqahX/HrAoKEKz3uUPzSvKs9A3qR4iVw==", + "dev": true, + "requires": { + "merge-stream": "^2.0.0", + "supports-color": "^6.1.0" + }, + "dependencies": { + "supports-color": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-6.1.0.tgz", + "integrity": "sha512-qe1jfm1Mg7Nq/NSh6XE24gPXROEVsWHxC1LIx//XNlD9iw7YZQGjZNjYN7xGaEG6iKdA8EtNFW6R0gjnVXp+wQ==", + "dev": true, + "requires": { + "has-flag": "^3.0.0" + } + } + } + }, + "js-tokens": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", + "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==", + "dev": true + }, + "lodash": { + "version": "4.17.15", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.15.tgz", + "integrity": "sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A==", + "dev": true + }, + "magic-string": { + "version": "0.25.4", + "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.25.4.tgz", + "integrity": "sha512-oycWO9nEVAP2RVPbIoDoA4Y7LFIJ3xRYov93gAyJhZkET1tNuB0u7uWkZS2LpBWTJUWnmau/To8ECWRC+jKNfw==", + "dev": true, + "requires": { + "sourcemap-codec": "^1.4.4" + } + }, + "merge-stream": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz", + "integrity": "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==", + "dev": true + }, + "minimatch": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz", + "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==", + "dev": true, + "requires": { + "brace-expansion": "^1.1.7" + } + }, + "minimist": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz", + "integrity": "sha1-o1AIsg9BOD7sH7kU9M1d95omQoQ=", + "dev": true + }, + "object-inspect": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.6.0.tgz", + "integrity": "sha512-GJzfBZ6DgDAmnuaM3104jR4s1Myxr3Y3zfIyN4z3UdqN69oSRacNK8UhnobDdC+7J2AHCjGwxQubNJfE70SXXQ==", + "dev": true + }, + "object-keys": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz", + "integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==", + "dev": true + }, + "once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha1-WDsap3WWHUsROsF9nFC6753Xa9E=", + "dev": true, + "requires": { + "wrappy": "1" + } + }, + "path-is-absolute": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", + "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=", + "dev": true + }, + "path-parse": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.6.tgz", + "integrity": "sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw==", + "dev": true + }, + "process-nextick-args": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-1.0.7.tgz", + "integrity": "sha1-FQ4gt1ZZCtP5EJPyWk8q2L/zC6M=", + "dev": true + }, + "pure-md5": { + "version": "0.1.9", + "resolved": "https://registry.npmjs.org/pure-md5/-/pure-md5-0.1.9.tgz", + "integrity": "sha512-SA/xeqGm/WSx9sup7801Xl8cs2mi5IB6UTCOapWcHc7IeAlMn4k3JBO9aLH+88+JrqqUANhVwX1+Tz3dlvRI+A==" + }, + "re-emitter": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/re-emitter/-/re-emitter-1.1.3.tgz", + "integrity": "sha1-+p4xn/3u6zWycpbvDz03TawvUqc=", + "dev": true + }, + "readable-stream": { + "version": "2.2.9", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.2.9.tgz", + "integrity": "sha1-z3jsb0ptHrQ9JkiMrJfwQudLf8g=", + "dev": true, + "requires": { + "buffer-shims": "~1.0.0", + "core-util-is": "~1.0.0", + "inherits": "~2.0.1", + "isarray": "~1.0.0", + "process-nextick-args": "~1.0.6", + "string_decoder": "~1.0.0", + "util-deprecate": "~1.0.1" + } + }, + "resolve": { + "version": "1.12.0", + "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.12.0.tgz", + "integrity": "sha512-B/dOmuoAik5bKcD6s6nXDCjzUKnaDvdkRyAk6rsmsKLipWj4797iothd7jmmUhWTfinVMU+wc56rYKsit2Qy4w==", + "dev": true, + "requires": { + "path-parse": "^1.0.6" + } + }, + "resumer": { + "version": "0.0.0", + "resolved": "https://registry.npmjs.org/resumer/-/resumer-0.0.0.tgz", + "integrity": "sha1-8ej0YeQGS6Oegq883CqMiT0HZ1k=", + "dev": true, + "requires": { + "through": "~2.3.4" + } + }, + "rollup": { + "version": "1.27.5", + "resolved": "https://registry.npmjs.org/rollup/-/rollup-1.27.5.tgz", + "integrity": "sha512-8rfVdzuTg2kt8ObD9LNJpEwUN7B6lsl3sHc5fddtgICpLjpYeSf4m2+RftBzcCaBTMi1iYX3Ez8zFT4Gj2nJjg==", + "dev": true, + "requires": { + "@types/estree": "*", + "@types/node": "*", + "acorn": "^7.1.0" + } + }, + "rollup-plugin-babel": { + "version": "4.3.3", + "resolved": "https://registry.npmjs.org/rollup-plugin-babel/-/rollup-plugin-babel-4.3.3.tgz", + "integrity": "sha512-tKzWOCmIJD/6aKNz0H1GMM+lW1q9KyFubbWzGiOG540zxPPifnEAHTZwjo0g991Y+DyOZcLqBgqOdqazYE5fkw==", + "dev": true, + "requires": { + "@babel/helper-module-imports": "^7.0.0", + "rollup-pluginutils": "^2.8.1" + } + }, + "rollup-plugin-commonjs": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/rollup-plugin-commonjs/-/rollup-plugin-commonjs-10.1.0.tgz", + "integrity": "sha512-jlXbjZSQg8EIeAAvepNwhJj++qJWNJw1Cl0YnOqKtP5Djx+fFGkp3WRh+W0ASCaFG5w1jhmzDxgu3SJuVxPF4Q==", + "dev": true, + "requires": { + "estree-walker": "^0.6.1", + "is-reference": "^1.1.2", + "magic-string": "^0.25.2", + "resolve": "^1.11.0", + "rollup-pluginutils": "^2.8.1" + } + }, + "rollup-plugin-json": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/rollup-plugin-json/-/rollup-plugin-json-4.0.0.tgz", + "integrity": "sha512-hgb8N7Cgfw5SZAkb3jf0QXii6QX/FOkiIq2M7BAQIEydjHvTyxXHQiIzZaTFgx1GK0cRCHOCBHIyEkkLdWKxow==", + "dev": true, + "requires": { + "rollup-pluginutils": "^2.5.0" + } + }, + "rollup-plugin-node-resolve": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/rollup-plugin-node-resolve/-/rollup-plugin-node-resolve-5.2.0.tgz", + "integrity": "sha512-jUlyaDXts7TW2CqQ4GaO5VJ4PwwaV8VUGA7+km3n6k6xtOEacf61u0VXwN80phY/evMcaS+9eIeJ9MOyDxt5Zw==", + "dev": true, + "requires": { + "@types/resolve": "0.0.8", + "builtin-modules": "^3.1.0", + "is-module": "^1.0.0", + "resolve": "^1.11.1", + "rollup-pluginutils": "^2.8.1" + } + }, + "rollup-plugin-terser": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/rollup-plugin-terser/-/rollup-plugin-terser-5.1.2.tgz", + "integrity": "sha512-sWKBCOS+vUkRtHtEiJPAf+WnBqk/C402fBD9AVHxSIXMqjsY7MnYWKYEUqGixtr0c8+1DjzUEPlNgOYQPVrS1g==", + "dev": true, + "requires": { + "@babel/code-frame": "^7.0.0", + "jest-worker": "^24.6.0", + "rollup-pluginutils": "^2.8.1", + "serialize-javascript": "^1.7.0", + "terser": "^4.1.0" + } + }, + "rollup-pluginutils": { + "version": "2.8.2", + "resolved": "https://registry.npmjs.org/rollup-pluginutils/-/rollup-pluginutils-2.8.2.tgz", + "integrity": "sha512-EEp9NhnUkwY8aif6bxgovPHMoMoNr2FulJziTndpt5H9RdwC47GSGuII9XxpSdzVGM0GWrNPHV6ie1LTNJPaLQ==", + "dev": true, + "requires": { + "estree-walker": "^0.6.1" + } + }, + "safe-buffer": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", + "dev": true + }, + "serialize-javascript": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-1.9.1.tgz", + "integrity": "sha512-0Vb/54WJ6k5v8sSWN09S0ora+Hnr+cX40r9F170nT+mSkaxltoE/7R3OrIdBSUv1OoiobH1QoWQbCnAO+e8J1A==", + "dev": true + }, + "source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "dev": true + }, + "source-map-support": { + "version": "0.5.13", + "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.13.tgz", + "integrity": "sha512-SHSKFHadjVA5oR4PPqhtAVdcBWwRYVd6g6cAXnIbRiIwc2EhPrTuKUBdSLvlEKyIP3GCf89fltvcZiP9MMFA1w==", + "dev": true, + "requires": { + "buffer-from": "^1.0.0", + "source-map": "^0.6.0" + } + }, + "sourcemap-codec": { + "version": "1.4.6", + "resolved": "https://registry.npmjs.org/sourcemap-codec/-/sourcemap-codec-1.4.6.tgz", + "integrity": "sha512-1ZooVLYFxC448piVLBbtOxFcXwnymH9oUF8nRd3CuYDVvkRBxRl6pB4Mtas5a4drtL+E8LDgFkQNcgIw6tc8Hg==", + "dev": true + }, + "split": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/split/-/split-1.0.0.tgz", + "integrity": "sha1-xDlc5oOrzSVLwo/h2rtuXCfc/64=", + "dev": true, + "requires": { + "through": "2" + } + }, + "string.prototype.trim": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/string.prototype.trim/-/string.prototype.trim-1.1.2.tgz", + "integrity": "sha1-0E3iyJ4Tf019IG8Ia17S+ua+jOo=", + "dev": true, + "requires": { + "define-properties": "^1.1.2", + "es-abstract": "^1.5.0", + "function-bind": "^1.0.2" + } + }, + "string.prototype.trimleft": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/string.prototype.trimleft/-/string.prototype.trimleft-2.1.0.tgz", + "integrity": "sha512-FJ6b7EgdKxxbDxc79cOlok6Afd++TTs5szo+zJTUyow3ycrRfJVE2pq3vcN53XexvKZu/DJMDfeI/qMiZTrjTw==", + "dev": true, + "requires": { + "define-properties": "^1.1.3", + "function-bind": "^1.1.1" + } + }, + "string.prototype.trimright": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/string.prototype.trimright/-/string.prototype.trimright-2.1.0.tgz", + "integrity": "sha512-fXZTSV55dNBwv16uw+hh5jkghxSnc5oHq+5K/gXgizHwAvMetdAJlHqqoFC1FSDVPYWLkAKl2cxpUT41sV7nSg==", + "dev": true, + "requires": { + "define-properties": "^1.1.3", + "function-bind": "^1.1.1" + } + }, + "string_decoder": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.0.3.tgz", + "integrity": "sha512-4AH6Z5fzNNBcH+6XDMfA/BTt87skxqJlO0lAh3Dker5zThcAxG6mKz+iGu308UKoPPQ8Dcqx/4JhujzltRa+hQ==", + "dev": true, + "requires": { + "safe-buffer": "~5.1.0" + } + }, + "supports-color": { + "version": "5.5.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", + "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==", + "dev": true, + "requires": { + "has-flag": "^3.0.0" + } + }, + "tap-dancer": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/tap-dancer/-/tap-dancer-0.2.0.tgz", + "integrity": "sha512-SKUl8jHmYf/7rugeFTwYGpguRi43zqOTVZOfeh3DKCAlazZerdXd11ER5kNVbsbdWc5FB8wwjlZhjTc0W69iGQ==", + "dev": true, + "requires": { + "chalk": "2.4.2", + "tap-out": "3.0.0" + } + }, + "tap-out": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/tap-out/-/tap-out-3.0.0.tgz", + "integrity": "sha512-JzlrjCL3anqI9xHGPfYb6Mo+6nYs60m0tDy6i0sWhYDlrBDVerYPNWoUo1buK3YeWQFdm42KQv8wu9qLf1tL5A==", + "dev": true, + "requires": { + "re-emitter": "1.1.3", + "readable-stream": "2.2.9", + "split": "1.0.0", + "trim": "0.0.1" + } + }, + "tape": { + "version": "4.11.0", + "resolved": "https://registry.npmjs.org/tape/-/tape-4.11.0.tgz", + "integrity": "sha512-yixvDMX7q7JIs/omJSzSZrqulOV51EC9dK8dM0TzImTIkHWfe2/kFyL5v+d9C+SrCMaICk59ujsqFAVidDqDaA==", + "dev": true, + "requires": { + "deep-equal": "~1.0.1", + "defined": "~1.0.0", + "for-each": "~0.3.3", + "function-bind": "~1.1.1", + "glob": "~7.1.4", + "has": "~1.0.3", + "inherits": "~2.0.4", + "minimist": "~1.2.0", + "object-inspect": "~1.6.0", + "resolve": "~1.11.1", + "resumer": "~0.0.0", + "string.prototype.trim": "~1.1.2", + "through": "~2.3.8" + }, + "dependencies": { + "resolve": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.11.1.tgz", + "integrity": "sha512-vIpgF6wfuJOZI7KKKSP+HmiKggadPQAdsp5HiC1mvqnfp0gF1vdwgBWZIdrVft9pgqoMFQN+R7BSWZiBxx+BBw==", + "dev": true, + "requires": { + "path-parse": "^1.0.6" + } + } + } + }, + "terser": { + "version": "4.3.8", + "resolved": "https://registry.npmjs.org/terser/-/terser-4.3.8.tgz", + "integrity": "sha512-otmIRlRVmLChAWsnSFNO0Bfk6YySuBp6G9qrHiJwlLDd4mxe2ta4sjI7TzIR+W1nBMjilzrMcPOz9pSusgx3hQ==", + "dev": true, + "requires": { + "commander": "^2.20.0", + "source-map": "~0.6.1", + "source-map-support": "~0.5.12" + } + }, + "through": { + "version": "2.3.8", + "resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz", + "integrity": "sha1-DdTJ/6q8NXlgsbckEV1+Doai4fU=", + "dev": true + }, + "to-fast-properties": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/to-fast-properties/-/to-fast-properties-2.0.0.tgz", + "integrity": "sha1-3F5pjL0HkmW8c+A3doGk5Og/YW4=", + "dev": true + }, + "trim": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/trim/-/trim-0.0.1.tgz", + "integrity": "sha1-WFhUf2spB1fulczMZm+1AITEYN0=", + "dev": true + }, + "util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=", + "dev": true + }, + "vhtml": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/vhtml/-/vhtml-2.1.0.tgz", + "integrity": "sha1-qFjtLjKMWVOZphroig4A+3Vr4Bw=" + }, + "wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=", + "dev": true + } + } +} diff --git a/plugins/output/package.json b/plugins/output/package.json new file mode 100644 index 000000000..7014a6d82 --- /dev/null +++ b/plugins/output/package.json @@ -0,0 +1,51 @@ +{ + "name": "compromise-output", + "description": "plugin for nlp-compromise", + "version": "0.0.2", + "author": "Spencer Kelly
${html}` +} +module.exports = toHtml diff --git a/plugins/output/src/index.js b/plugins/output/src/index.js new file mode 100644 index 000000000..cb02ae270 --- /dev/null +++ b/plugins/output/src/index.js @@ -0,0 +1,15 @@ +const makeHash = require('./hash') +const toHtml = require('./html') + +const addMethods = function(Doc) { + /** generate an md5 hash from the document */ + Doc.prototype.hash = function() { + return makeHash(this) + } + + /** generate sanitized html from the document */ + Doc.prototype.html = function(segments = {}, options = {}) { + return toHtml(this, segments, options) + } +} +module.exports = addMethods diff --git a/plugins/output/tests/_lib.js b/plugins/output/tests/_lib.js new file mode 100644 index 000000000..254633449 --- /dev/null +++ b/plugins/output/tests/_lib.js @@ -0,0 +1,12 @@ +if (typeof process !== undefined && typeof module !== undefined) { + let nlp + if (process.env.TESTENV === 'prod') { + nlp = require('../../../') + nlp.extend(require(`../`)) + } else { + nlp = require('../../../src') + nlp.extend(require(`../src`)) + } + + module.exports = nlp +} diff --git a/plugins/output/tests/misc.test.js b/plugins/output/tests/misc.test.js new file mode 100644 index 000000000..b198df6a3 --- /dev/null +++ b/plugins/output/tests/misc.test.js @@ -0,0 +1,30 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('hash has whitespace', function(t) { + let doc = nlp(`He isn't... working `) + doc.normalize({ + case: false, + punctuation: true, + contractions: false, + }) + t.equal(nlp('he is not working').hash() === doc.hash(), false, 'whitespace changes hash') + t.end() +}) + +test('hash output', function(t) { + let docA = nlp('hello there') + let docB = nlp('hello there') + t.equal(docA.hash() === docB.hash(), true, 'both are equal') + + docB.match('hello').tag('Greeting') + t.equal(docA.hash() === docB.hash(), false, 'hashes not equal after tag') + t.end() +}) + +test('html output', function(t) { + let doc = nlp('i <3 you') + t.equal(doc.html(), '
i <3 you
', 'html escaped')
+ doc = nlp()
+ t.end()
+})
diff --git a/plugins/paragraphs/README.md b/plugins/paragraphs/README.md
new file mode 100644
index 000000000..8dfd0b05b
--- /dev/null
+++ b/plugins/paragraphs/README.md
@@ -0,0 +1,105 @@
+
+
+npm install compromise-paragraphs
+npm install compromise-sentences
+npm install compromise-syllables
+- browser-based test of compromise -- - - - diff --git a/test/unit/docs/docs.test.js b/test/unit/docs/docs.test.js deleted file mode 100644 index 97744e5ee..000000000 --- a/test/unit/docs/docs.test.js +++ /dev/null @@ -1,83 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); -var docs = require('../../../docs/api'); -var freshPrince = require('../lib/freshPrince'); - -test('generic-methods-run:', function(t) { - var getters = { - found: true, - length: true - }; - var skip = { - whitespace: true, - insertAt: true, - debug: true, //too noisy - forEach: true, - filter: true, - map: true, - find: true, - reduce: true, - }; - var needString = { - insertBefore: true, - insertAfter: true, - match: true, - splitOn: true, - splitBefore: true, - splitAfter: true - }; - - var r = nlp(freshPrince); - Object.keys(docs.generic).forEach(function(type) { - Object.keys(docs.generic[type]).forEach(function(fn) { - //simply call this method to see if it throws an error - var func = function() { - if (getters[fn]) { - //getters dont have a '()' - return r[fn]; - } else if (needString[fn]) { - //give a dummy param - return r[fn]('fun'); - } else if (skip[fn]) { - //these are too fancy to call - return typeof r[fn] === 'function'; - } else { - //call this method - return r[fn](); - } - }; - - t.doesNotThrow(func, true, fn); - }); - }); - t.end(); -}); - -test('subsets-methods-exist:', function(t) { - var addParam = { - sentences: { - append: true, - prepend: true - } - }; - var r = nlp(freshPrince); - Object.keys(docs.subsets).forEach(function(subset) { - //each subset - t.doesNotThrow(function() { - return r[subset](), true, subset + '()'; - }); - //each method in that subset - Object.keys(docs.subsets[subset]).forEach(function(method) { - var func = function() { - if (addParam[subset] && addParam[subset][method]) { - r[subset]()[method]('fun'); - } else { - r[subset]()[method](); - } - }; - var msg = subset + '().' + method; - t.doesNotThrow(func, true, msg); - }); - }); - t.end(); -}); diff --git a/test/unit/docs/eval.test.js b/test/unit/docs/eval.test.js deleted file mode 100644 index 2a738a4bd..000000000 --- a/test/unit/docs/eval.test.js +++ /dev/null @@ -1,41 +0,0 @@ -var test = require('tape'); -var docs = require('../../../docs/api'); -//gotta include this for the 'eval()' -var nlp = require('../lib/nlp'); // eslint-disable-line no-unused-vars - -var flat = []; -Object.keys(docs.generic).forEach(k => { - var obj = docs.generic[k]; - Object.keys(obj).forEach(k2 => { - obj[k2].title = k + '().' + k2 + '()'; - flat.push(obj[k2]); - }); -}); -Object.keys(docs.subsets).forEach(k => { - var obj = docs.subsets[k]; - Object.keys(obj).forEach(k2 => { - obj[k2].title = k + '().' + k2 + '()'; - flat.push(obj[k2]); - }); -}); - -test('docs-eval:', function(t) { - flat.forEach(o => { - var code = o.example; - try { - code = `(function(){ - ` + - code + - ` - })()`; - eval(code); - t.ok(true, o.title); - // t.doesNotThrow(eval(code)) - } catch (e) { - console.log(o.title); - console.log(e); - t.fail(o.title); - } - }); - t.end(); -}); diff --git a/test/unit/extend/addConjugations.test.js b/test/unit/extend/addConjugations.test.js deleted file mode 100644 index 537719e25..000000000 --- a/test/unit/extend/addConjugations.test.js +++ /dev/null @@ -1,23 +0,0 @@ - -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('addConjugations', function(t) { - var conjugations = { - woo: { - PastTense: 'wooed' - } - }; - nlp.addConjugations(conjugations); - var doc = nlp('woo the crush'); - t.equal(doc.verbs().length, 1, 'has inf in lexicon'); - - doc.verbs().toPastTense(); - t.equal(doc.out(), 'wooed the crush', 'conjugated from infinitive'); - t.equal(doc.verbs().length, 1, 'still has 1 verb'); - - doc.verbs().toInfinitive(); - t.equal(doc.out(), 'woo the crush', 'conjugated back tp infinitive'); - - t.end(); -}); diff --git a/test/unit/extend/addPatterns.test.js b/test/unit/extend/addPatterns.test.js deleted file mode 100644 index 62f9e6dee..000000000 --- a/test/unit/extend/addPatterns.test.js +++ /dev/null @@ -1,16 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('addpatterns-test', function(t) { - var patterns = { - 'master of #Noun': 'Person', - 'captain of the #Noun+': 'Person', - }; - nlp.addPatterns(patterns); - var doc = nlp('he is the master of ceremonies and captain of the Utah baseball team'); - var arr = doc.people().data(); - t.equal(arr.length, 2, 'found both'); - t.equal(arr[0].normal, 'master of ceremonies', 'first one'); - t.equal(arr[1].normal, 'captain of the utah baseball team', 'second-one'); - t.end(); -}); diff --git a/test/unit/extend/addPlurals.test.js b/test/unit/extend/addPlurals.test.js deleted file mode 100644 index d4e1101a9..000000000 --- a/test/unit/extend/addPlurals.test.js +++ /dev/null @@ -1,25 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('addPlurals-test', function(t) { - var plurals = { - mather: 'mathii', - algebra: 'algebri' - }; - nlp.addPlurals(plurals); - var doc = nlp('the mather did many algebri'); - var arr = doc.nouns().data(); - t.equal(arr.length, 2, 'found both'); - t.equal(arr[0].singular, 'mather', 'singular-form'); - t.equal(arr[0].plural, 'mathii', 'plural-form'); - - t.equal(arr[1].singular, 'algebra', 'singular-form2'); - t.equal(arr[1].plural, 'algebri', 'plural-form2'); - - doc.nouns().toPlural(); - t.equal(doc.out(), 'the mathii did many algebri', 'toPlural'); - doc.nouns().toSingular(); - t.equal(doc.out(), 'the mather did many algebra', 'toSingular'); - - t.end(); -}); diff --git a/test/unit/extend/addTags.test.js b/test/unit/extend/addTags.test.js deleted file mode 100644 index 641772e8b..000000000 --- a/test/unit/extend/addTags.test.js +++ /dev/null @@ -1,95 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('tagset-change-isA-basic', function(t) { - nlp.addTags({ - Doctor: { - isA: 'Person' - } - }); - nlp.addWords({ - surgeon: 'Doctor', - 'surgeon general': 'Doctor' - }); - var doc = nlp('the surgeon operated'); - - //basic isA - t.equal(doc.match('#Doctor').out('normal'), 'surgeon', 'surgeon is a doctor'); - t.equal(doc.people().length, 1, 'doctor is a person'); - - doc = nlp('lkjsdf').tag('#Person'); - t.equal(doc.match('#Doctor').length, 0, 'person isnt a doctor, necessarily'); - - doc = nlp('lkjsdf').tag('#Doctor'); - t.equal(doc.match('#Person').length, 1, 'post-hoc tags work, too'); - - //multi-word - doc = nlp('the surgeon general operated'); - t.equal(doc.match('#Doctor').out('normal'), 'surgeon general', 'multi-word'); - t.equal(doc.match('#Person').out('normal'), 'surgeon general', 'multi-word-isA'); - t.end(); -}); - -test('tagset-change-isA', function(t) { - nlp.addTags({ - Doctor: { - isA: 'Person', - notA: ['Foo'] - } - }); - nlp.addWords({ - lkjj: 'Foo' - }); - var doc = nlp('he is lkjj'); - t.equal(doc.match('#Foo').out('normal'), 'lkjj', 'init-there'); - doc.match('lkjj').tag('#Doctor'); - - t.equal(doc.match('#Doctor').out('normal'), 'lkjj', 'doctor-tag-there'); - t.equal(doc.match('#Foo').out('normal'), '', 'foo-is-gone'); - - t.end(); -}); - -test('tagset-remove-downward', function(t) { - nlp.addTags({ - Doctor: { - isA: 'Person' - }, - Surgeon: { - isA: 'Doctor' - } - }); - var doc = nlp('george is a person.'); - doc.match('george').tag('Surgeon'); - - t.ok(doc.has('#Surgeon'), 'Surgeon-tag-there'); - t.ok(doc.has('#Doctor'), 'doctor-tag-there'); - t.ok(doc.has('#Person'), 'person-tag-there'); - - //remove one in the middle.. - doc.match('george').unTag('Person'); - t.ok(doc.has('#Person') === false, 'person-tag-gone'); - t.ok(doc.has('#Doctor') === false, 'doctor-tag-gone'); - t.ok(doc.has('#Surgeon') === false, 'Surgeon-tag-gone'); - t.end(); -}); - -test('tagset-remove-half-downward', function(t) { - nlp.addTags({ - Doctor: { - isA: 'Person' - }, - Surgeon: { - isA: 'Doctor' - } - }); - var doc = nlp('george is a person.'); - doc.match('george').tag('Surgeon'); - - //remove one just under the top.. - doc.match('george').unTag('Doctor'); - t.ok(doc.has('#Person') === true, 'person-tag-there'); - t.ok(doc.has('#Doctor') === false, 'doctor-tag-gone'); - t.ok(doc.has('#Surgeon') === false, 'Surgeon-tag-gone'); - t.end(); -}); diff --git a/test/unit/extend/addWords.test.js b/test/unit/extend/addWords.test.js deleted file mode 100644 index b7a266828..000000000 --- a/test/unit/extend/addWords.test.js +++ /dev/null @@ -1,25 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('persistent-lexicon-change', function(t) { - nlp = nlp.clone(); - var doc = nlp('he is James'); - t.equal(doc.places().length, 0, 'default-no-place'); - t.equal(doc.people().length, 1, 'default-one-person'); - - nlp.addWords({ - james: 'Place' - }); - doc = nlp('he is James'); - t.equal(doc.places().length, 1, 'now-one-place'); - t.equal(doc.people().length, 0, 'now-no-person'); - - nlp.addWords({ - foo: 'Place' - }); - doc = nlp('he is James'); - t.equal(doc.places().length, 1, 'still-one-place'); - t.equal(doc.people().length, 0, 'still-no-person'); - - t.end(); -}); diff --git a/test/unit/extend/clone.test.js b/test/unit/extend/clone.test.js deleted file mode 100644 index e4712058f..000000000 --- a/test/unit/extend/clone.test.js +++ /dev/null @@ -1,30 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('nlp.clone() -change original', function(t) { - var nlp2 = nlp.clone(); - t.ok(nlp('bat').has('#Noun'), 'nlp1-init'); - t.ok(nlp2('bat').has('#Noun'), 'nlp2-init'); - - //change nlp1 - nlp.addWords({ - bat: 'Man' - }); - t.ok(nlp('bat').has('#Man'), 'nlp1-changed'); - t.ok(nlp2('bat').has('#Man') === false, 'nlp2-unchanged'); - - //change nlp2 - nlp2.addWords({ - bat: 'ManTwo' - }); - t.ok(nlp('bat').has('#ManTwo') === false, 'nlp1-changed'); - t.ok(nlp2('bat').has('#ManTwo') === true, 'nlp2-unchanged'); - - //try nlp3 - var nlp3 = nlp.clone(); - t.ok(nlp3('bat').has('#Noun'), 'nlp3-normal-default'); - t.ok(nlp3('bat').has('#Man') === false, 'nlp3-normal'); - t.ok(nlp3('bat').has('#ManTwo') === false, 'nlp3-normal-again'); - - t.end(); -}); diff --git a/test/unit/extend/lexicon.test.js b/test/unit/extend/lexicon.test.js deleted file mode 100644 index 532948afb..000000000 --- a/test/unit/extend/lexicon.test.js +++ /dev/null @@ -1,43 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -var lexicon = { - 'Jardas al Abid': 'Place', - 'Umm Ar Rizam': 'Place', - Tobruk: 'Place' -}; - -test('user-lex-with-hyphenation:', function(t) { - var sentence = - 'A suicide attack hit the centre of Jardas-al-Abid killing one person (and the attacker) and injuring more than twenty.'; - var found = nlp(sentence, lexicon).places().data()[0] || {}; - t.equal('jardas al abid', found.normal, 'found-place1'); - t.equal(lexicon, lexicon, 'lexicon-unchanged'); - t.end(); -}); - -test('user-lex-with-possessive form:', function(t) { - var sentence = - "A suicide attack hit Jardas al Abid's center killing one person (and the attacker) and injuring more than twenty."; - var found = nlp(sentence, lexicon).places().data()[0] || {}; - t.equal("jardas al abid's", found.normal, 'found-place2'); - t.equal(lexicon, lexicon, 'lexicon-unchanged'); - t.end(); -}); - -test('user-lex-with-proper name in front:', function(t) { - var sentence = - "A suicide attack hit Lybia's Jardas al Abid city killing one person (and the attacker) and injuring more than twenty."; - var found = nlp(sentence, lexicon).places().data()[0] || {}; - t.equal('jardas al abid', found.normal, 'found-place3'); - t.equal(lexicon, lexicon, 'lexicon-unchanged'); - t.end(); -}); - -test('user-lex-with-punctuation:', function(t) { - var sentence = 'A suicide attack hit Jardas al Abid, which killed one person (and the attacker) and injured more than twenty.'; - var found = nlp(sentence, lexicon).places().data()[0] || {}; - t.equal('jardas al abid', found.normal, 'found-place4'); - t.equal(lexicon, lexicon, 'lexicon-unchanged'); - t.end(); -}); diff --git a/test/unit/extend/plugin.test.js b/test/unit/extend/plugin.test.js deleted file mode 100644 index 582c6c80f..000000000 --- a/test/unit/extend/plugin.test.js +++ /dev/null @@ -1,29 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('basic-plugin', function(t) { - var plugin = { - words: { - trex: 'Dinosaur' - }, - tags: { - Dinosaur: { - isA: 'Animal' - }, - Animal: { - isA: 'Noun' - } - }, - regex: { - uuu: 'Exaggeration' - } - }; - nlp.plugin(plugin); - var doc = nlp('i saw a HUUUUGE trex'); - - t.equal(doc.match('#Exaggeration').out('normal'), 'huuuuge', 'regex-works'); - t.equal(doc.match('#Dinosaur').out('normal'), 'trex', 'lexicon-works'); - t.equal(doc.match('#Animal').out('normal'), 'trex', 'tagset-works'); - - t.end(); -}); diff --git a/test/unit/extend/tagset.test.js b/test/unit/extend/tagset.test.js deleted file mode 100644 index c549d588d..000000000 --- a/test/unit/extend/tagset.test.js +++ /dev/null @@ -1,34 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('extend-tagset-flat', function(t) { - var tagSet = { - Color: {} - }; - var lexicon = { - 'mother of pearl': 'Color' - }; - nlp.addTags(tagSet); - var m = nlp('it is mother of pearl', lexicon).match('#Color+'); - t.equal(m.out('normal'), 'mother of pearl', 'text found'); - t.ok(m.has('#Noun'), 'it does not get in the way of the tagger'); - t.end(); -}); - -test('extend-tagset-nested', function(t) { - var tagSet = { - Color: {}, - OffWhite: { - isA: 'Color' - }, - }; - nlp.addTags(tagSet); - var lexicon = { - 'mother of pearl': 'OffWhite' - }; - var m = nlp('it is mother of pearl', lexicon).match('#OffWhite'); - t.equal(m.out('normal'), 'mother of pearl', 'text found'); - t.ok(m.has('#Noun'), 'it does not get in the way of the tagger'); - t.ok(m.has('#Color'), 'has isA tag, too'); - t.end(); -}); diff --git a/test/unit/extend/tokenize.test.js b/test/unit/extend/tokenize.test.js deleted file mode 100644 index 7123969c0..000000000 --- a/test/unit/extend/tokenize.test.js +++ /dev/null @@ -1,43 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('tokenize() runs without pos-tagging', function(t) { - var str = 'Miss Hoover, I glued my head to my shoulder.'; - var r = nlp.tokenize(str); - t.equal(r.out('text'), str, 'tokenize output is same'); - - t.equal(r.list.length, 1, 'sentence-parser-working'); - - var found = r.match('#Noun').found; - t.equal(found, false, 'no sneaky-tagging'); - - t.end(); -}); -test('em-dash, en-dash', function(t) { - // '-': //dash - // '–': //en-dash - // '—': //em-dash - var doc = nlp('fun-time'); - t.equal(doc.terms().length, 2, 'dash'); - doc = nlp('fun–time'); - t.equal(doc.terms().length, 2, 'en-dash'); - doc = nlp('fun—time'); - t.equal(doc.terms().length, 2, 'em-dash'); - - //not a full word, either - doc = nlp('fun - time'); - t.equal(doc.terms().length, 2, 'dash-word'); - doc = nlp('fun – time'); - t.equal(doc.terms().length, 2, 'en-dash-word'); - doc = nlp('fun — time'); - t.equal(doc.terms().length, 2, 'em-dash-word'); - - //numeric forms are split, but contractions too - doc = nlp('20-20'); - t.equal(doc.terms().length, 3, 'dash-num'); - doc = nlp('20–20'); - t.equal(doc.terms().length, 3, 'en-dash-num'); - doc = nlp('20—20'); - t.equal(doc.terms().length, 3, 'em-dash-num'); - t.end(); -}); diff --git a/test/unit/lib/fns.js b/test/unit/lib/fns.js deleted file mode 100644 index bf1863450..000000000 --- a/test/unit/lib/fns.js +++ /dev/null @@ -1,79 +0,0 @@ -'use strict' - -var pad = function(str, width, char) { - char = char || '.' - str = str.toString() - while (str.length < width) { - str += char - } - return str -} - -//helpers to make test output messages nicer -var str_test = function(got, input, want, t) { - var msg = pad("'" + got + "'", 20) + "(want: '" + want + "' )" //'\'' + input + - t.equal(got, want, msg) - return -} - -var arr_test = function(got, input, want, t) { - got = JSON.stringify(got) - want = JSON.stringify(want) - var msg = pad("'" + got + "'") + " (want: '" + want + "' )" //'\'' + input + - t.equal(got, want, msg) - return -} - -var has_pos = function(r, tags) { - var terms = r.terms() - for (var i = 0; i < terms.list.length; i++) { - var t = terms.list[i].terms[0] - if (!t.tags[tags[i]]) { - return false - } - } - return true -} - -var pos_test = function(r, tags, t) { - var str = '' - var got = r - .terms() - .list.map(function(ts) { - var term = ts.terms[0] - str += ' ' + term.normal - return Object.keys(term.tags)[0] - }) - .join(', ') - var msg = pad('"' + str.trim() + '"', 30) + pad(tags.join(', '), 45) + got - t.equal(has_pos(r, tags), true, msg) - return -} - -var terms_test = function(terms, want, t, isText) { - var str = '' - var got = terms.map(function(term) { - str += ' ' + term.text - if (isText) { - return term.text - } - return term.normal - }) - var msg = pad('"' + str + '"', 38) + ' got: [' + got.join(',') + '] want: [' + want.join(',') + ']' - t.deepEqual(got, want, msg) -} - -var isArray = function(someVar) { - if (Object.prototype.toString.call(someVar) === '[object Array]') { - return true - } - return false -} - -module.exports = { - str_test: str_test, - pos_test: pos_test, - terms_test: terms_test, - arr_test: arr_test, - isArray: isArray -} diff --git a/test/unit/lib/freshPrince.js b/test/unit/lib/freshPrince.js deleted file mode 100644 index 1e9f6a858..000000000 --- a/test/unit/lib/freshPrince.js +++ /dev/null @@ -1,28 +0,0 @@ -module.exports = `Now this is a story all about how my life got flipped-turned upside down. -and I'd like to take a minute, just sit right there, I'll tell you how I became the prince of a town called Bel-Air. - -In west Philadelphia born and raised, on the playground was where I spent most of my days. -Chillin' out maxin' relaxin' all cool, and all shooting some b-ball outside of the school. -When a couple of guys who were up to no good started making trouble in my neighborhood, -I got in one little fight and my mom got scared, she said, "You're movin' with your auntie and uncle in Bel-Air". - -I begged and pleaded with her day after day but she packed my suitcase and sent me on my way. -She gave me a kiss and then she gave me my ticket. I put my Walkman on and said, "I might as well kick it". - -First class, yo, this is bad. Drinking orange juice out of a champagne glass. -Is this what the people of Bel-Air living like? Hmm, this might be alright. - -But wait I hear they're prissy, bourgeois, all that. Is this the type of place that they just send this cool cat? -I don't think so, I'll see when I get there. -I hope they're prepared for the prince of Bel-Air. - -Well, the plane landed and when I came out. There was a dude who looked like a cop standing there with my name out. -I ain't trying to get arrested yet, I just got here. -I sprang with the quickness like lightning, disappeared. - -I whistled for a cab and when it came near. The license plate said "Fresh" and it had dice in the mirror. -If anything I could say that this cab was rare, but I thought, "Nah, forget it" – "Yo, home to Bel-Air"! - -I pulled up to the house about 7 or 8 and I yelled to the cabbie, "Yo home smell ya later". -I looked at my kingdom, I was finally there. To sit on my throne as the Prince of Bel-Air. -`; diff --git a/test/unit/lib/friends.js b/test/unit/lib/friends.js deleted file mode 100644 index f598c2b26..000000000 --- a/test/unit/lib/friends.js +++ /dev/null @@ -1,277 +0,0 @@ -module.exports = `Hey you guys! -Hey! -Hey, what are you doing? -Oh, figuring out our wedding plans. -That's funny, we were doing the same thing! -Yeah! -It's really crazy! The hall, the dress, the food... I-I had no idea how expensive this stuff was! -Yeah it is really pricey. I mean, I freaked when I first heard the numbers. -So what did you two do about it? -It was pretty simple actually, I came up with a couple of cost-cutting solutions, wrote out a list and Monica told me to go to hell. -There's no way around it Pheebs, you just gonna have to accept the fact that this is gonna cost you a lot of money. -I heard that weddings are like a 40 billion dollar a year industry. -Yeah, and I'm responsible for just like half of that. -But really, it does seem like this money could be put to better use? -Are you serious? -Yeah! Now, how would you feel if we gave all the wedding money to charity and we just got married at City Hall? -I think it would make me wanna marry you even more. -I've got to say you guys, that's an incredible gesture! -Maybe you do that next time you get married! -No, no, no. The next time it's gonna be a Hawaii at sunset. But maybe the time after that! -Hey! -Hey! -What's going on? -Our adoption social worker is coming by today so we are cleaning the apartment. -We? -You know you don't want me to help. You can't have it both ways! -Hey, is this person who decides whether or not you... get a baby? -Kind of. She's coming by to interview us and see where we live. -And it has to go perfectly, because if she doesn't like something about us she can keep us off every adoption list in the state. -Hey, maybe I should stop by! She could be a soap opera fan! It's very impressive when the little people know a celebrity. -Little people? -Celebrity. -Ok, so I think I'm just about done here, unless you have any bad stuff hidden somewhere, like... porn or cigarettes? -What...? NO! -Chandler? -I don't, and I'm offended by the insinuation! -Ok, so there's not a magazine under the couch, or a pack of cigarettes taped to the back of the toilet tank, or a filthy video in the VCR? -I'll admit to the cigarettes and the magazine, but that tape is not mine. -It isn't mine! -Well, I guess we'll never know whose it is! -May I help you? -Yes. We're here to make a rather sizeable donation to the children. -Well, any contribution, large or small, is always appreciated. -Well, I think you're gonna appreciate it the crap out of this one -Well, this is very generous! -And we don't want any recognition. This is completely anonymous. -Completely anonymous. From two kind strangers. -Mr. X and Phoebe Buffay. -Well if you like, we can include your names in our newsletter. -Not necessary. -Buffay is spelled B-U-F-F-A-Y. -And "X" is spelled uhm... "Mike Hannigan". -Right. Well, on behalf of the children: thank you both very much. -Sure, I so glad we did this. It feels so good! -It does. It feels really good! -Oh, look! And we get these free t-shirts! -Oh, actually, that's the shirt I wore to the gym. -Mhm... it's moist. -Hi! -Hi! Emma will be up in a minute! -Oh, good! -Oh hey Ross... Listen, I heard about you and Charlie. I'm really sorry. -Oh, that's OK. I'm sure there are tons of other beautiful paleontologists out there. -Absolutely. -There was one! She's it! All the rest look like they should live under a bridge! -So, uhm... what are you gonna do today? -Well, I was thinking of taking Emma to the playground! -Oh my God, what!? -Like I said I was thinking of taking Emma to the museum of knives and fire! -Ok, look, Ross. I do not want Emma going to the playground. -Be-caaauuuse... -All right, well, if you must know... I had a traumatic... swing incident... when I was little. -Seriously? -Yes, I was 4 years old and I was on the swing and then all of a sudden my hair got tangled in the chain. And to get me out my mom had to-had to cut a big chunk of my hair! And it was uneven for weeks! -And you made it through that? I wonder who's gonna play you in the movie! -Ok, fine! You can make fun of me. I do not want Emma going there. And I was thinking Claire Danes. -Look, I'm sorry to hear about your tragedy, ok? But the swings are perfectly safe, and besides Emma loves them. You know what, you should come with us and you'll see! -Ross, those things go like 40 miles an hour! Ok? When you're... and there is that moment when you are at the top, when you just don't know if you're gonna return back to earth! -Space is filled with orbiting children. Look, please, just come on, you know, when you’ll see the look on Emma’s face, I swear you won’t regret it. -All right! -Good, you don’t want to be one of those mothers who pass on their irrational fears on their children, do you? -Irrational, huh? All right, well, I’ll remember that the next time you freak out about a spider in your apartment! -Oh, yeah, that’s the same, I am sure there are thirty different species of poisonous swings! -Oh my God, the adoption lady is early! -Ok, ok, here we go. -Ok. -Here we go. Stand up straight. -Hello, is this the creepy residence? -We’re waiting for the adoption lady, but, hey, I’m glad you’re here. I was cleaning this morning and I found this . I don’t know if you wanna use it, but… -Awe, this is so sweet of you! But you know what? I won’t be needing a veil, I actually won’t be wearing a dress at all! -I told you! I am not coming to a naked wedding! -No, no, no, we’re not having a big reception, we took the money we were gonna spend on a wedding and we donate them to the children charity. -That’s crazy! . I am sorry. I just can’t imagine giving up my one wedding day like that! -We, you know, we’re different! We don’t care about having a huge party. All right, well, who cares, I don’t need a pretty veil and a fancy dress. -That’s right. You’re making a commitment and that’s the same, whether you do that at the Plaza or, where are you gonna do it? -City Hall. -Ow! Oh, that sounds nice! I am just there for jury duty. They really spruce that place up! -It’s ok, it’s ok. I made my decision. What I really want is a great big wedding -Yay! -But you already gave all your money to charity! -Well, I’ll just ask for it back! -I don’t think you can do that! -Why not! This is her wedding day, this is way more important than some stupid kids! -That’s sweet, honey, but save something for the adoption Lady. -Ok, careful. -Ok. -Careful, watch her hair. WATCH HER HAIR! -Rach, she’s got like three hairs! -I know but they’re just so beautiful! Oh, my God, I just pulled one out. -I promise you she’s safe! No watch how much she loves this. -Ok. -Ready sweety? -Ok. -Here we go! -Ok, careful, ok. Oh, she’s smiling! Oh my God, she does like it! -See, I told you! -Awe! Oh my God! Looks, she’s a little dare-devil! Oh, let me push, can I push? -Oh, absolutely! -Ok. Oh God. Get the camera, it’s in the diaper bag. -Ok! Ow! -We’re seriously asking for our money back? -It’s for our wedding day! Right, now, is this guy gay or straight, because one of us gonna have to start flirting. -Wow! Are you here to make another donation the same day? I don’t think that that’s ever happened before. -Gay, go. -Oh my God, I love your shirt! -The donation we made earlier, we k…, we w…, we want it back. -Excuse me? -Yeah. See, that money was for a big wedding, that we thought we didn’t want, but it turns out we do. -So you’re asking us to refund your donation to the children? -Yeah! This feels really good. -I am sorry. I am, but this wedding is just really important to me. -Hey, it’s not my business, besides it’s probably a good thing. We really would have been spoiling the children, all those food, and warm clothing… -Hey, that’s not fair! A person’s wedding is important! And especially to me! Ok? I didn’t have a graduation party! And I didn’t go to Prom. And I spent my sweet sixteen being chased round a tire yard by an escaped mental patient who is his own words wanted to “kill me” or whatever. So I deserve a real celebration and I am not gonna let some sweaty little man make me feel badly about it. -She could have been talking about either one of us. -Hi, I am Laura, I am here for your adoption interview. -Hi, I am Monica and this is Chandler. Please come in. -Thank you! -Would you like something to drink? -Oh, water would be fine. -Ok. Great. I am so glad that you are here. We’re really excited about getting this process started. -Oh, because we love kids. Love ‘em to death.Well, not actually to death, that's just a figure of speech - we love kids the appropriate amount... as allowed by law. -Your place is just lovely. -Ah, thank you. This building does have a wholesome family feel to it. -You know, I... I feel like I've been here before. Are any other couples in the building adopting? -Is that that couple on the first floor? Because we should get a baby before them. Yeah! That guy tried to sell me drugs. -But other than that... wholesome, wholesome building. -Oh... -What? -I just realized why I remember this place. -Really? What is it? -Oh, it's nothing. I went on a date with a guy who lived in this building and it didn't end very well. -Ohh... that wouldn't by any chance be... Joey Tribbiani? -Yes! -Of course it was! -Yeah, we had a really great night and in the morning he promised he would call me and he didn't. -RAT BASTARD! -So you're not friends with him? -OH GOD NO! Nope, no, no, no. No! No, no. Nope! No, no, no, no, no, no, no, no, no. NO! No! -Well, I'm sorry I brought it up. So, are either one of you planning on staying at home with your child... -OW! -What was that? -Oh, it's just some crazy guy who roams the halls here. He's great with kids though. -Oh, oh Ross, oh my God, are you okay? -SON OF A BITCH! -Ross, see! I told you, those swings are evil! Alright, that is it. That is the last time Emma is getting on one of those things for her entire life. -No! No, no, no, no, okay, it wasn't the swing's fault. It was my fault and kind of that kids fault. Who is still laughing. Nice. -Ross, c'mon, please. Can we just get out of here, before somebody else gets hurt? -No wait, okay, okay, I have an idea. I want you to get on the swing, okay? And you'll see that there's nothing to be afraid of. -I know what this is all about... You've always been jealous of my hair. -Look, I just think you're an adult, okay? And you should get over your silly fears. -Alright fine. I'll do it. -Good. -If you hold a spider. -WHAT? WHERE? WHERE? -IF you hold a spider. -I know. -Guys? Everything ok? It's me, Joe... -AAAAAAAAAAAAAAAAAAAAA......AAAaaa-doption!! -What's going on? -Oh, just like I said. That crazy... Bert... roaming the halls. -Guys!? -Keep on roaming Bert! We don't want any crazy today! -What's going on? -WE'LL TALK TO YOU LATER, BERT. EVERYTHNG'S FINE!! -Everything doesn't sound fine! -Is he alright out there by himself? -Oh yeah! He has a caretaker. His older brother... Ernie. -Bert and Ernie! -You can't make this stuff up! -You never told me about that guy on your sweet sixteen. Oh, ugh. I'm sorry about that. -Oh! It ended okay. One of my friends shot him. -Well, hey, at least you're getting a proper wedding. I mean, you really deserve that. -Yeah, I really do. You know, I had nothing growing up. Just like the kids I took the money from. -No! No, no. I see where this is going. Don't make me go back there. -Look, I can't have a wedding with this money now. It's tainted. -Alright, fine. We'll give the money back. -And if that guy at the charity gives us a hard time, my friend hasn't shot anyone in a really long time. -Well, I must say, this seems like a lovely environment to raise a child in. -Oh, by the way, you are more than welcome to look under any of the furniture, because, believe me, you won't find any porn or cigarettes under there! -Oh! Well, actually, before we look around, let me make sure I have everything I need up to here... -Why don't I show you the baby's room? -What the hell are you doing? -Well, you wouldn't let me in, so I thought you were in trouble. -Well, we're not. -But you called me 'Bert'!? That's our code word for danger! -We don't have a code word. -We don't? We really should. From now on, 'Bert' will be our code word for danger. -So that was the baby's room. -What room should we see next? -Any room that isn't behind this couch! -. -I did not care for that! -You have to get out of here. You slept with our social worker and you never called her back and she is still pissed, so she can't see you. -Ok, ok! -Ok! -What? -I forgot my bat. -Oh my God! -And for the last time, we do not want to be friends with you! And we don't want to buy your bat! -What are you doing here? -Bert! Bert! Bert! Bert! -Are you friends with him? -I can explain... Joey... -Uhm... ok... uhm... Well, yeah... You have got some nerve, coming back here. I can't believe you never called me. -Excuse me? -Oh... yeah... Probably you don't even remember my name. It's Joey, by the way. And don't bother telling me yours, because I totally remember it... lady. Yeah! I waited weeks for you to call me. -I gave you my number, you never called me. -No, no! Don't try to turn this around on me, ok? I'm not some kind of... social work, ok, that you can just... do. -Well, I'm pretty sure I gave you my number. -Really? Think about it. Come on! You're a beautiful woman, smart, funny, we had a really good time, huh? If I had your number, why wouldn't I call you? -I don't know... Well, maybe I'm wrong... I'm sorry... -No, no, hey, no! Too late for apologies... ok? You broke my heart. You know how many women I had to sleep with to get over you? -Joey, wait! -I'm sorry that you had to see that. I'm so embarrassed... -Oh, that's really ok. -Yeah, that we totally understand. Dating is hard. -Boy, you people are nice... And I've got to say... I think you're going to make excellent parents. -LAURA! -We're back! -Are you here to take more money? Because, I think what you're looking for is an ATM. -No, no, we're here to give the money back. -Yeah, because you know what, it's... it's all about the children. -Although... it's also about the wedding... Ugh, alright... here. No... Oh God... Oh! -If I haven't said so already sir, congratulations! -Now... what do you think we should do? -You know what? It's not your decision anymore. -What? -On behalf of the Children of New York, I reject your money. -But... but... but we're giving you this! -Yeah... And I'm giving it back to you... Come on! Consider it a contribution. -Well, this is very generous! -Please, take the check, go have a great wedding and a wonderful life together. -Well, I mean... It sounds good to me. And that way we can save up, come back in a few years and make an even bigger donation. -Absolutely! And when you do, make sure you ask for Brian. -Oh, is that you? -No! -Hello...? Oh hi... Oh my God...! Really...? I can't wait to tell Chandler... Ok, goodbye. -Wrong number? -It was Laura... She gave us a great report and we are officially on the waiting list. -That's great! -Now we just have to wait for a call and... and someone tells us there's a baby waiting for us. Oh... -Hello...? Have you seen Joey's bat? - -Ok... I got a spider. There were two, I picked the bigger one. -Ok... -Ok... -This feels perfectly normal. Ok, get on the swing! -Ok... O-k... -Ok... -whoo... ok... wow... ok... OH! -See? -A-alright! I can do this. -There you go! Good for you! And you know what, I'm actually getting used to this little guy. I don't really even feel him in here anymore. -That's because he's on your neck. -Well... Whaa... aaah... aaahhh... -ROSS! -`; diff --git a/test/unit/lib/nlp.js b/test/unit/lib/nlp.js deleted file mode 100644 index 10db6c6a6..000000000 --- a/test/unit/lib/nlp.js +++ /dev/null @@ -1,9 +0,0 @@ -if (typeof process !== undefined && typeof module !== undefined) { - if (process.env.TESTENV === 'prod') { - console.warn('== production build test 🚀 =='); - // module.exports = require('../../builds/efrt'); - module.exports = require('../../../'); - } else { - module.exports = require('../../../src/'); - } -} diff --git a/test/unit/lib/pennSample.js b/test/unit/lib/pennSample.js deleted file mode 100644 index bcd9e256b..000000000 --- a/test/unit/lib/pennSample.js +++ /dev/null @@ -1,325 +0,0 @@ -//(very) small subset of the Penn-treebank that should always pass -module.exports = [ - { - text: - 'Dr. Talcott led a team of researchers from the National Cancer Institute and the medical schools of Harvard University and Boston University.', - pos: 'NNP, NNP, VBD, DT, NN, IN, NNS, IN, DT, NNP, NNP, NNP, CC, DT, JJ, NNS, IN, NNP, NNP, CC, NNP, NNP' - }, - { - text: 'The monthly sales have been setting records every month since March.', - pos: 'DT, JJ, NNS, VBP, VBN, VBG, NNS, DT, NN, IN, NNP' - }, - { - text: ' Cray Computer will be a concept stock, he said.', - pos: 'NNP, NNP, MD, VB, DT, NN, NN, PRP, VBD' - }, - { - text: 'Esso said the Whiting field started production Tuesday.', - pos: 'NNP, VBD, DT, NNP, NN, VBD, NN, NNP' - }, - { - text: 'Pick a country, any country.', - pos: 'VB, DT, NN, DT, NN' - }, - { - text: 'They fell into oblivion after the 1929 crash.', - pos: 'PRP, VBD, IN, NN, IN, DT, CD, NN' - }, - { - text: 'Political and currency gyrations can whipsaw the funds.', - pos: 'JJ, CC, NN, NNS, MD, VB, DT, NNS' - }, - { - text: 'They cite a lack of imbalances that provide early warning signals of a downturn.', - pos: 'PRP, VBP, DT, NN, IN, NNS, WDT, VBP, JJ, NN, NNS, IN, DT, NN' - }, - { - text: 'Characters drink Salty Dogs, whistle Johnny B. Goode and watch Bugs Bunny reruns.', - pos: 'NNS, VBP, NNP, NNP, VBP, NNP, NNP, NNP, CC, VBP, NNP, NNP, NNS' - }, - { - text: 'They read Mickey Spillane and talk about Groucho and Harpo.', - pos: 'PRP, VBP, NNP, NNP, CC, VBP, IN, NNP, CC, NNP' - }, - { - text: ' Consider Jim Courter.', - pos: 'VB, NNP, NNP' - }, - { - text: 'But it resists yielding political ground.', - pos: 'CC, PRP, VBZ, VBG, JJ, NN' - }, - { - text: ' In Asia, as in Europe, a new order is taking shape, Mr. Baker said.', - pos: 'IN, NNP, IN, IN, NNP, DT, JJ, NN, VBZ, VBG, NN, NNP, NNP, VBD' - }, - { - text: 'And South Carolina says it is getting results.', - pos: 'CC, NNP, NNP, VBZ, PRP, VBZ, VBG, NNS' - }, - { - text: " It was full of violence and gangs and kids cutting class, says Linda Ward, the school's principal.", - pos: 'PRP, VBD, JJ, IN, NN, CC, NNS, CC, NNS, VBG, NN, VBZ, NNP, NNP, DT, NN, NN' - }, - { - text: 'I believe in the system.', - pos: 'PRP, VBP, IN, DT, NN' - }, - { - text: 'Mrs. Yeargin declined.', - pos: 'NNP, NNP, VBD' - }, - { - text: 'Yeargin won widespread local support.', - pos: 'NNP, VBD, JJ, JJ, NN' - }, - { - text: 'But Learning Materials matched on 66.5 of 69 subskills.', - pos: 'CC, NNP, NNPS, VBD, IN, CD, IN, CD, NNS' - }, - { - text: 'The two banks merged in 1985.', - pos: 'DT, CD, NNS, VBD, IN, CD' - }, - { - text: "He said the company's core business remains strong.", - pos: 'PRP, VBD, DT, NN, NN, NN, VBZ, JJ' - }, - { - text: 'Estimated volume was a moderate 3.5 million ounces.', - pos: 'VBN, NN, VBD, DT, JJ, CD, CD, NNS' - }, - { - text: 'Mr. Gillespie at Viacom says the ratings are rising.', - pos: 'NNP, NNP, IN, NNP, VBZ, DT, NNS, VBP, VBG' - }, - { - text: 'Ad Notes....', - pos: 'NNP, NNPS' - }, - { - text: 'The business had been handled by VanSant Dugdale, Baltimore.', - pos: 'DT, NN, VBD, VBN, VBN, IN, NNP, NNP, NNP' - }, - { - text: ' The economy is clearly slowing, says Robert Black, president of the Richmond Federal Reserve Bank.', - pos: 'DT, NN, VBZ, RB, VBG, VBZ, NNP, NNP, NN, IN, DT, NNP, NNP, NNP, NNP' - }, - { - text: 'They will mature Dec. 21.', - pos: 'PRP, MD, VB, NNP, CD' - }, - { - text: 'Lancaster Colony Corp. said it acquired Reames Foods Inc. in a cash transaction.', - pos: 'NNP, NNP, NNP, VBD, PRP, VBD, NNP, NNP, NNP, IN, DT, NN, NN' - }, - { - text: 'NL is officially making the offer.', - pos: 'NNP, VBZ, RB, VBG, DT, NN' - }, - { - text: "The Japanese fret openly about the U.S. public's rancor.", - pos: 'DT, NNP, NN, RB, IN, DT, NNP, NN, NN' - }, - { - text: 'They operate ships and banks.', - pos: 'PRP, VBP, NNS, CC, NNS' - }, - { - text: - 'Adds Takeshi Kondo, senior vice president of C. Itoh America Inc.: We have a great interest in making investments, particularly in new ventures.', - pos: 'VBZ, NNP, NNP, JJ, NN, NN, IN, NNP, NNP, NNP, NNP, PRP, VBP, DT, JJ, NN, IN, VBG, NNS, RB, IN, JJ, NNS' - }, - { - text: 'But many banks are turning away from strict price competition.', - pos: 'CC, JJ, NNS, VBP, VBG, RB, IN, JJ, NN, NN' - }, - { - text: 'One big reason: thin margins.', - pos: 'CD, JJ, NN, JJ, NNS' - }, - { - text: 'Buy a new Chevrolet.', - pos: 'VB, DT, JJ, NNP' - }, - { - text: 'Buy a diamond necklace.', - pos: 'VB, DT, NN, NN' - }, - { - text: 'They are keeping a close watch on the yield on the S&P 500.', - pos: 'PRP, VBP, VBG, DT, JJ, NN, IN, DT, NN, IN, DT, NNP, CD' - }, - { - text: 'In fact, the market has always tanked.', - pos: 'IN, NN, DT, NN, VBZ, RB, VBN' - }, - { - text: 'Always.', - pos: 'RB' - }, - { - text: 'China pulled out of the program in July.', - pos: 'NNP, VBD, VB, IN, DT, NN, IN, NNP' - }, - { - text: 'But regulators are wary.', - pos: 'CC, NNS, VBP, JJ' - }, - { - text: 'He also is a consensus manager, insiders say.', - pos: 'PRP, RB, VBZ, DT, NN, NN, NNS, VBP' - }, - { - text: 'Compromises are possible.', - pos: 'NNS, VBP, JJ' - }, - { - text: 'The company acknowledges some problems.', - pos: 'DT, NN, VBZ, DT, NNS' - }, - { - text: - 'A number of cities including Minneapolis, Philadelphia and Houston have vacant grain elevators, Eggers says.', - pos: 'DT, NN, IN, NNS, VBG, NNP, NNP, CC, NNP, VBP, JJ, NN, NNS, NNP, VBZ' - }, - { - text: - 'They suffered from malnutrition, chest diseases, cardiovascular disorders, skin problems, infectious diseases and the aftereffects of assaults and rape.', - pos: 'PRP, VBD, IN, NN, NN, NNS, JJ, NNS, NN, NNS, JJ, NNS, CC, DT, NNS, IN, NNS, CC, NN' - }, - { - text: 'That was the law.', - pos: 'DT, VBD, DT, NN' - }, - { - text: 'It was censorship.', - pos: 'PRP, VBD, NN' - }, - { - text: 'It was outrageous.', - pos: 'PRP, VBD, JJ' - }, - { - text: 'But the court disagreed.', - pos: 'CC, DT, NN, VBD' - }, - { - text: 'The man was Charles Z. Wick.', - pos: 'DT, NN, VBD, NNP, NNP, NNP' - }, - { - text: ' Bob has handled the extraordinary growth of the company quite brilliantly, said Mr. Newhouse.', - pos: 'NNP, VBZ, VBN, DT, JJ, NN, IN, DT, NN, RB, RB, VBD, NNP, NNP' - }, - { - text: - 'This species of congressional action is predicated on an interpretation of the appropriations clause that is erroneous and unconstitutional.', - pos: 'DT, NN, IN, JJ, NN, VBZ, VBN, IN, DT, NN, IN, DT, NNS, NN, WDT, VBZ, JJ, CC, JJ' - }, - { - text: 'President Reagan learned that lesson.', - pos: 'NNP, NNP, VBD, DT, NN' - }, - { - text: 'Mr. Sidak served as an attorney in the Reagan administration.', - pos: 'NNP, NNP, VBD, IN, DT, NN, IN, DT, NNP, NN' - }, - { - text: 'The death of the Herald, a newsstand paper in a freeway town, was perhaps inevitable.', - pos: 'DT, NN, IN, DT, NNP, DT, NN, NN, IN, DT, NN, NN, VBD, RB, JJ' - }, - { - text: ' The Herald was a survivor from a bygone age, said J. Kendrick Noble, a media analyst with PaineWebber Inc.', - pos: 'DT, NNP, VBD, DT, NN, IN, DT, JJ, NN, VBD, NNP, NNP, NNP, DT, NNS, NN, IN, NNP, NNP' - }, - { - text: 'The reaction in the newsroom was emotional.', - pos: 'DT, NN, IN, DT, NN, VBD, JJ' - }, - { - text: - 'The program traders, on the other hand, portray old-fashioned stock pickers as the Neanderthals of the industry.', - pos: 'DT, NN, NNS, IN, DT, JJ, NN, VBP, JJ, JJ, NN, NNS, IN, DT, NNS, IN, DT, NN' - }, - { - text: 'Reducing volatility.', - pos: 'VBG, NN' - }, - { - text: 'Ballot watchers say attention already is focused on the 1990 elections.', - pos: 'NN, NNS, VBP, NN, RB, VBZ, VBN, IN, DT, CD, NNS' - }, - { - text: 'Colleges, she says, are eyeing registration through 900 service.', - pos: 'NNS, PRP, VBZ, VBP, VBG, NN, IN, CD, NN' - }, - { - text: 'FAMILY PETS are improving recovery rates of patients at Columbia Hospital, Milwaukee.', - pos: 'NN, NNS, VBP, VBG, NN, NNS, IN, NNS, IN, NNP, NNP, NNP' - }, - { - text: 'The appointment takes effect Nov. 13.', - pos: 'DT, NN, VBZ, NN, NNP, CD' - }, - { - text: 'Heiwado Co.', - pos: 'NNP, NNP' - }, - { - text: 'Guaranteed by Svenska Handelsbanken.', - pos: 'VBN, IN, NNP, NNP' - }, - { - text: 'Mitsubishi Pencil Co.', - pos: 'NNP, NNP, NNP' - }, - { - text: 'Koizumi Sangyo Corp.', - pos: 'NNP, NNP, NNP' - }, - { - text: 'A stadium craze is sweeping the country.', - pos: 'DT, NN, NN, VBZ, VBG, DT, NN' - }, - { - text: 'Stock prices closed higher in Stockholm, Amsterdam and Frankfurt and lower in Zurich.', - pos: 'NN, NNS, VBD, JJR, IN, NNP, NNP, CC, NNP, CC, JJR, IN, NNP' - }, - { - text: 'A faster version, the SuperDot, was launched in 1984.', - pos: 'DT, JJR, NN, DT, NNP, VBD, VBN, IN, CD' - }, - { - text: 'Valley Federal is currently being examined by regulators.', - pos: 'NNP, NNP, VBZ, RB, VBG, VBN, IN, NNS' - }, - { - text: 'Columbia has only about 10 million common shares in public hands.', - pos: 'NNP, VBZ, RB, IN, CD, CD, JJ, NNS, IN, JJ, NNS' - }, - { - text: 'But the concept is workable.', - pos: 'CC, DT, NN, VBZ, JJ' - }, - { - text: "Mr. Spiegel's next career move is a subject of speculation on Wall Street.", - pos: 'NNP, NNP, JJ, NN, NN, VBZ, DT, NN, IN, NN, IN, NNP, NNP' - }, - { - text: 'Wedtech management used the merit system.', - pos: 'NNP, NN, VBD, DT, NN, NN' - }, - { - text: 'Numerous other scandals, among them the ones at HUD, have the same characteristics as Wedtech.', - pos: 'JJ, JJ, NNS, IN, PRP, DT, NNS, IN, NNP, VBP, DT, JJ, NNS, IN, NNP' - }, - { - text: 'Railroad companies and some ports are reaping a sudden windfall of business.', - pos: 'NN, NNS, CC, DT, NNS, VBP, VBG, DT, JJ, NN, IN, NN' - }, - { - text: ' The recent rally in precious metals was a result of uncertainty and volatility in equities, he said.', - pos: 'DT, JJ, NN, IN, JJ, NNS, VBD, DT, NN, IN, NN, CC, NN, IN, NNS, PRP, VBD' - } -]; diff --git a/test/unit/match/capture.test.js b/test/unit/match/capture.test.js deleted file mode 100644 index 4ae6c0e53..000000000 --- a/test/unit/match/capture.test.js +++ /dev/null @@ -1,52 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('match-capture-group', function(t) { - var m = nlp('John eats glue').match('[john]'); - t.equal(m.out('text'), 'John', 'capture-group-simple'); - - m = nlp('John Smith eats glue').match('[#Person+]'); - t.equal(m.out('text'), 'John Smith', 'capture-two'); - - m = nlp('ralf eats the glue').match('ralf [#Verb] the'); - t.equal(m.out('normal'), 'eats', 'simple subset'); - - m = nlp('ralf eats the glue').match('[ralf #Verb] the'); - t.equal(m.out('normal'), 'ralf eats', 'two-word capture'); - - m = nlp('i saw ralf eat the glue Mrs. Hoover').match('ralf [#Verb the glue] mrs'); - t.equal(m.out('normal'), 'eat the glue', 'three-word capture'); - - m = nlp('ralf eats the glue').match('* [#Verb]'); - t.equal(m.out('normal'), 'eats', 'capture after wildcard'); - - m = nlp('ralf eats the glue').match('ralf eats [*]'); - t.equal(m.out('normal'), 'the glue', 'wildcard capture at the end'); - - m = nlp('ralf eats the glue').match('ralf eats [*] glue'); - t.equal(m.out('normal'), 'the', 'wildcard capture in the middle'); - - m = nlp('saw the Toronto International Documentary Film Festival yesterday').match('saw the? [#Noun+] yesterday'); - t.equal(m.trim().out('text'), 'Toronto International Documentary Film Festival', 'greedy capture'); - - t.end(); -}); - -test('replace-capture-group', function(t) { - var m = nlp('John eats glue').replace('john [#Verb]', 'sniffs'); - t.equal(m.out('text'), 'John sniffs glue', 'capture-group-simple'); - // - // m = nlp('John eats glue. john is fun.').replace('[john]', '$1 smith'); - // t.equal(m.out('text'), 'John smith eats glue. john smith is fun.', 'capture-group-multiple'); - // - // m = nlp('John Smith eats glue').replace('[#Person+]', 'dr. $1'); - // t.equal(m.out('text'), 'dr. John Smith eats glue', 'capture-two'); - // - // m = nlp('ralf eats the glue').replace('ralf [#Verb]', 'he $1'); - // t.equal(m.out('text'), 'he eats the glue', 'simple subset'); - // - // m = nlp('John eats the glue').replace('the [#Noun]', 'the cyber-$1'); - // t.equal(m.out('text'), 'John eats the cyber-glue', 'capture-group as subset'); - // - t.end(); -}); diff --git a/test/unit/match/encoding.test.js b/test/unit/match/encoding.test.js deleted file mode 100644 index 3bfcdbc10..000000000 --- a/test/unit/match/encoding.test.js +++ /dev/null @@ -1,63 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('encoding-match:', function(t) { - var r = nlp('it is * nice'); - var str = r.match('is \\*').trim().out(); - t.equal(str, 'is *', 'encode asterix'); - - // r = nlp('it is + nice'); - // str = r.match('is \\+ nice').trim().out(); - // t.equal(str, 'is + nice', 'encode plus'); - - t.end(); -}); - -test('reserved-word-in-src:', function(t) { - var r = nlp('buy eggs constructor yeah prototype'); - t.equal(r.has(`backburner`), false, 'single'); - t.equal(r.has(`#Foo`), false, 'tag'); - t.equal(r.has(`(upcoming|backburner)`), false, 'anyOf'); - t.equal(r.has(`lala+`), false, 'manyOf'); - t.equal(r.has(`nword{2,4}`), false, 'someOf'); - t.end(); -}); - -test('reserved-word-in-match:', function(t) { - var r = nlp('fo foo fulala repeat'); - t.equal(r.has(`constructor`), false, 'single'); - t.equal(r.has(`#constructor`), false, 'tag'); - t.equal(r.has(`(upcoming|constructor)`), false, 'anyOf'); - t.equal(r.has(`constructor+`), false, 'manyOf'); - t.equal(r.has(`constructor{2,4}`), false, 'someOf'); - t.end(); -}); - -test('test-infinite-loop', function(t) { - var weirdDoc = nlp('^ ? * . + $'); - weirdDoc.match('is?'); - weirdDoc.match('.?'); - weirdDoc.match('*'); - weirdDoc.match('.+'); - weirdDoc.match('+'); - weirdDoc.match('?'); - weirdDoc.match('.'); - weirdDoc.match('? * . +'); - weirdDoc.not('?'); - weirdDoc.not('*'); - weirdDoc.not('^'); - weirdDoc.not('$'); - weirdDoc.not('+'); - weirdDoc.not('? * . +'); - t.ok(true, 'didnt regress'); - - var str = 'And you are?. Marshal'; - var have = nlp(str).sentences().out(); - t.equal(have, str, 'regression #1'); - - str = `- where is she.Oh. you guys don't know?`; - have = nlp(str).sentences().out(); - t.equal(have, str, 'regression #2'); - - t.end(); -}); diff --git a/test/unit/match/if.test.js b/test/unit/match/if.test.js deleted file mode 100644 index 6e43b8ff0..000000000 --- a/test/unit/match/if.test.js +++ /dev/null @@ -1,33 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('if-basic:', function(t) { - var r = nlp('spencer is here'); - var m = r.if('asdf'); - t.equal(m.out('text'), '', 'if-negative'); - - m = r.if('spencer'); - t.equal(m.out('text'), 'spencer is here', 'if-positive'); - - r = nlp('spencer is here. john was here.'); - m = r.if('is'); - t.equal(m.out('normal'), 'spencer is here.', 'if-partial'); - - t.end(); -}); - -test('ifNo:', function(t) { - var r = nlp('spencer is here'); - //ifNo - var m = r.ifNo('spencer'); - t.equal(m.out('text'), '', 'ifNo-positive'); - - m = r.ifNo('asdf'); - t.equal(m.out('text'), 'spencer is here', 'ifNo-negative'); - - r = nlp('spencer is here. john was here.'); - m = r.ifNo('is'); - t.equal(m.out('normal'), 'john was here.', 'if-no-partial'); - - t.end(); -}); diff --git a/test/unit/match/insert.test.js b/test/unit/match/insert.test.js deleted file mode 100644 index 2d85a346c..000000000 --- a/test/unit/match/insert.test.js +++ /dev/null @@ -1,44 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('insert-basic :', function(t) { - var m = nlp('the dog sat').insertBefore('and'); - t.equal(m.out('text'), 'and the dog sat', 'and-dog'); - - m = nlp('the dog sat').insertAfter('patiently'); - t.equal(m.out('text'), 'the dog sat patiently', 'sat-patiently'); - - m = nlp('the dog sat'); - m.match('dog').insertBefore('nice'); - t.equal(m.out('text'), 'the nice dog sat', 'nice-dog'); - - m = nlp('a dog sat'); - m.match('sat').insertAfter('quickly'); - t.equal(m.out('text'), 'a dog sat quickly', 'sat-quickly'); - - m = nlp('a dog sat'); - m.match('a dog sat').insertAfter('quickly'); - t.equal(m.out('text'), 'a dog sat quickly', 'multi-match-quickly'); - - m = nlp('a dog sat'); - m.match('asdf').insertAfter('no no no'); - t.equal(m.out('text'), 'a dog sat', 'no no no no'); - - t.end(); -}); - -test('insert-subset-include :', function(t) { - var m = nlp('the dog is nice'); - var sub = m.match('is'); - sub.insertAfter('really'); - t.equal(sub.out('normal'), 'is really', 'is-really'); - t.equal(m.out('normal'), 'the dog is really nice', 'dog-is-really-nice'); - - m = nlp('the dog climbed the fence'); - sub = m.match('climbed'); - sub.insertBefore('really'); - t.equal(sub.out('normal'), 'really climbed', 'really-quickly'); - t.equal(m.out('normal'), 'the dog really climbed the fence', 'dog-really-climbed'); - - t.end(); -}); diff --git a/test/unit/match/match.test.js b/test/unit/match/match.test.js deleted file mode 100644 index 3aa34b931..000000000 --- a/test/unit/match/match.test.js +++ /dev/null @@ -1,187 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('==Match ==', function(T) { - T.test('term-match :', function(t) { - [ - ['quick', 'quick', true], - ['Quick', 'Quick', true], - ['quick', 's', false], - ['quick', '#Adjective', true], - ['quick', '#Noun', false], - ['quick', '(fun|nice|quick|cool)', true], - ['quick', '(fun|nice|good)', false] - ].forEach(function(a) { - var m = nlp(a[0]).match(a[1]); - var msg = a[0] + ' matches ' + a[1] + ' ' + a[2]; - t.equal(m.found, a[2], msg); - }); - t.end(); - }); - - T.test('sentence-match:', function(t) { - [ - ['the dog played', 'the dog', 'the dog'], - ['the dog played', 'the dog played', 'the dog played'], - ['the dog played', 'the #Noun', 'the dog'], - ['the dog played', 'the #Noun played', 'the dog played'], - ['the dog played', 'the cat played', ''], - ['the dog played', 'the #Adjective played', ''], - ['the dog played', 'the (cat|dog|piano) played', 'the dog played'], - ['the dog played', 'the (cat|piano) played', ''], - ['the dog played', 'the . played', 'the dog played'], - //optional - ['the dog played', 'the dog quickly? played', 'the dog played'], - ['the dog played', 'the dog #Adverb? played', 'the dog played'], - ['the dog quickly played', 'the dog #Adverb? played', 'the dog quickly played'], - ['the dog quickly played', 'the dog #Adverb played', 'the dog quickly played'], - ['the dog quickly played', 'the dog . played', 'the dog quickly played'], - ['the dog quickly played', 'the dog .? played', 'the dog quickly played'], - // ['the dog played', 'the dog .? played', 'the dog played'], - - //leading/trailing logic - ['the dog played', 'the dog played$', 'the dog played'], - ['the dog played', 'the dog', 'the dog'], - ['the dog played', 'the dog$', ''], - ['the dog played', 'the dog$ played', ''], - ['the dog played', '^the dog', 'the dog'], - ['the dog played', 'dog played', 'dog played'], - ['the dog played', '^dog played', ''], - ['the dog played', '^played', ''], - ['the dog played', '^the', 'the'], - - ['john eats glue', 'john eats glue', 'john eats glue'], - ['john eats glue', 'john eats', 'john eats'], - ['john eats glue', 'eats glue', 'eats glue'], - ['john eats glue', 'eats glue all day', ''], - - //test contractions - // [`if you don't mind`, `you don't mind`, `you don't mind`], - [`if you don't mind`, `you don't care`, ``], - // [`if you don't mind`, `you don't`, `you don't`], - // [`if you don't mind`, `don't mind`, `don't mind`], - [`if you didn't care`, `didn't`, `didn't`], - // [`if you wouldn't care, i'll eat here`, `i'll eat`, `i'll eat`], //omg hard one - - // [`don't go`, `do not`, `don't`], - [`do not go`, `do not`, `do not`] - // [`i dunno`, `do not`, `dunno`], - //bugs - // [`really remind me to buy`, '#Adverb? #Infinitive (me|us) (to|for)', `really remind me to`], - ].forEach(function(a) { - var m = nlp(a[0]).match(a[1]); - if (!m.found) { - t.equal(a[2], '', 'no-match: ' + a[0] + ' - -' + a[1]); - } else { - var msg = "'" + a[0] + "' - " + a[1] + " - - have : '" + m.out('normal') + "'"; - t.equal(m.out('normal'), a[2], msg); - } - }); - t.end(); - }); - - test('match-from-array :', function(t) { - var m = nlp('spencer is really cool').match(['spencer']); - t.equal(m.out('normal'), 'spencer', 'just-spencer'); - t.equal(m.length, 1, 'one-result'); - - m = nlp('spencer is really cool').match([]); - t.equal(m.out('normal'), '', 'empty match'); - t.equal(m.length, 0, 'zero-results'); - - m = nlp('spencer is really cool'); - var r = m.match(['spencer', 'really']).toUpperCase(); - t.equal(r.out('text'), 'SPENCER REALLY', 'match-spencer-really'); - t.equal(r.length, 2, 'two-results'); - - t.equal(m.out('text'), 'SPENCER is REALLY cool', 'match-spencer-really'); - t.equal(m.length, 1, 'still-one-result'); - t.end(); - }); - - test('match-from-object :', function(t) { - var m = nlp('spencer is really cool').match({ - spencer: true - }); - t.equal(m.out('normal'), 'spencer', 'just-spencer'); - t.equal(m.length, 1, 'one-result'); - t.end(); - }); - - test('tag-match-tag :', function(t) { - var m = nlp('apple is cool'); - m.match(['apple', 'susan']).tag('Person'); - var p = m.people(); - t.equal(p.out('normal'), 'apple', 'apple-tagged'); - t.equal(m.length, 1, 'one-result'); - t.end(); - }); - - test('lump-match:', function(t) { - var m = nlp('hello one two three hello'); - m.match('one two three').lump(); - - t.equal(m.has('hello'), true, 'has-unlumped'); - t.equal(m.has('one two three'), true, 'has-lumped'); - t.equal(m.has('hello one two three'), true, 'word+lumped'); - t.equal(m.has('one two three hello'), true, 'lumped+word'); - - t.equal(m.has('one'), false, 'no-partial1'); - t.equal(m.has('two'), false, 'no-partial2'); - t.equal(m.has('three'), false, 'no-partial3'); - t.equal(m.has('one two'), false, 'no-partial4'); - t.equal(m.has('two three'), false, 'no-partial5'); - t.equal(m.has('hello one two'), false, 'no-partial6'); - t.equal(m.has('three hello'), false, 'no-partial7'); - t.equal(m.has('two three hello'), false, 'no-partial8'); - t.end(); - }); - - test('before-match:', function(t) { - var r = nlp('one two three four five').before('two'); - t.equal(r.out('normal'), 'one', 'before-two'); - - r = nlp('one two three four five').before('three . five'); - t.equal(r.out('normal'), 'one two', 'before-several'); - - r = nlp('one two three four five').before('one two'); - t.equal(r.out('normal'), '', 'no-before-start'); - - // r = nlp('one two three four').before('.'); //tricky - // t.equal(r.out('normal'), '', 'before-any'); - - r = nlp('one two three four. No, not here. He said two days a week.').before('two'); - var arr = r.out('array'); - t.equal(arr[0], 'one', 'before-twice-1'); - t.equal(arr[1], 'he said', 'before-twice-2'); - - r = nlp('it was all the way over to two. It was the number two.').before('it'); - t.equal(r.found, false, 'no-empty-matches'); - - t.end(); - }); - - test('after-match:', function(t) { - var r = nlp('one two three four five').after('two'); - t.equal(r.out('normal'), 'three four five', 'after-one'); - - r = nlp('one two three four five').after('one . three'); - t.equal(r.out('normal'), 'four five', 'after-several'); - - r = nlp('one two three four five').after('four five'); - t.equal(r.out('normal'), '', 'no-afters-end'); - - r = nlp('one two three four').after('.'); - t.equal(r.out('normal'), 'two three four', 'after-any'); - - r = nlp('one two three four. No, not here. He said two days a week.').after('two'); - var arr = r.out('array'); - t.equal(arr[0], 'three four', 'after-twice-1'); - t.equal(arr[1], 'days a week', 'after-twice-2'); - - r = nlp('all the way over to two. It was the number two.').after('two'); - t.equal(r.found, false, 'no-empty-matches'); - - t.end(); - }); -}); diff --git a/test/unit/match/match_tricky.test.js b/test/unit/match/match_tricky.test.js deleted file mode 100644 index 3a5ca3e3f..000000000 --- a/test/unit/match/match_tricky.test.js +++ /dev/null @@ -1,123 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('fancy match', function(t) { - [ - //misc - ['doug is good', '', 0], - ['doug is good', '.', 3], - ['doug is good', '.?', 3], - ['doug is good', '.+', 3], - - //contractions - ['he\'s nice', 'he is', 2], - ['he\'s nice', 'is nice', 2], - ['he\'s nice', 'he\'s', 1], - ['he\'s nice', 'he\'s nice', 3], - ['he\'s nice', 'nice', 1], - - //over/under - ['he is nice', 'is nice and good', 0], - ['is nice', 'he is nice', 0], - - //dot - ['doug is good', 'doug is good', 3], - ['doug is good', 'doug . good', 3], - ['doug is good', 'doug is .', 3], - ['doug is good', '. is .', 3], - ['doug is good', '. . .', 3], - ['doug is good', '. . . .', 0], - - //optional miss - ['doug is good', 'doug is really? good', 3], - ['doug is good', 'doug is .? good', 3], //tricky 'greedy optional' bug - ['doug is good', 'doug is #Adverb? good', 3], - //optional has - ['doug is really good', 'doug is really? good', 4], - ['doug is really good', 'doug is .? good', 4], - ['doug is really good', 'doug is #Adverb? good', 4], - //asterix empty - ['doug is good', 'doug *', 3], - ['doug is good', 'doug is *', 3], - ['doug is good', '*', 3], - //asterix positive - ['doug is good', 'doug * good', 3], - ['doug is really good', 'doug * good', 4], - ['doug is really so very good', 'doug * good', 6], - ['doug is really so very good at stuff', 'doug * good', 6], - ['we think doug is really so very good at stuff', 'doug * good', 6], - //asterix negative - ['doug is good', 'doug * bad', 0], - ['doug is good', 'spencer * bad', 0], - ['doug is good', 'spencer *', 0], - ['doug is good', 'doug * is', 2], //another tricky 'greedy optional' bug - ['cool, fun, great, nice', '#Adjective+ great', 3], - - // - ['Dr. Spencer Smith says hi', 'dr', 1], - ['Dr. Spencer Smith says hi', 'dr spencer', 2], - ['Dr. Spencer Smith says hi', 'dr spencer smith', 3], - ['Dr. Spencer Smith says hi', 'dr spencer smith says', 4], - ['Lately, Dr. Spencer Smith says hi', 'lately dr spencer smith', 4], - //start ^ - ['in toronto', '^toronto', 0], - ['toronto', '^toronto', 1], - ['in toronto', '^in toronto', 2], - ['in toronto', 'in ^toronto', 0], - //end $ - ['it snows', 'it snows', 2], - ['it snows', 'snows$', 1], - ['it snows', 'it snows$', 2], - ['it snows', 'it$ snows', 0], - ['it snows', 'foo$', 0], - //negative ! - ['it really snows', 'it #adverb snows', 3], - ['it really snows', 'it !#adverb snows', 0], - ['it really snows. it goes.', 'it !#adverb', 2], - ['it is nice.', '!#adverb', 3], - //max/min {} - ['if it goes really well', 'if .{1,2} well', 0], - ['if it goes really well', 'if .{1,6} well', 5], - ['so i said that spencer is nice', '^.{1,3} spencer', 0], - ['so i said that spencer is nice', '^.{1,6} spencer', 5], - ['one two three four five', 'one two{1,2}', 2], - ['one two three four five', 'one two{1,3}', 2], - ['one two three four five', 'one two{0,3}', 2], - ['one two three four five', 'one .{0,3} two', 2], - ['one two three four five', 'one .{0,3} three', 3], - ['one two three four five', 'one .{1,3} two', 0], - ['one two three four five six seven', 'one .{0,4} six seven', 7], - //optional/consecutive - ['is really walking', 'is #Adverb+? walking', 3], - ['is walking', 'is #Adverb+? walking', 2], - ['is really really walking', 'is #Adverb+? walking', 4], - ['is really not walking', 'is (#Adverb|not)+? walking', 4], - ['is really not quickly walking', 'is (#Adverb|not)+? walking', 5], - ['is walking', 'is (#Adverb|not)+? walking', 2], - ['Phoenix AZ', '#City #Region', 2], - //this isn't working - ['the canadian senate', 'the (united states|canadian) senate', 3], - ['the canadian senate', '(canadian|united states|british)', 1], - ].forEach(function(a) { - var r = nlp(a[0]).match(a[1]).terms() || []; - var msg = '\'' + a[0] + '\' - - - \'' + a[1] + '\' - - got:' + r.length + ' want:' + a[2]; - t.equal(r.length, a[2], msg); - }); - t.end(); -}); - -test('tricky-case', function(t) { - t.equal(nlp('Number II').has('Number II'), true, 'uppercase-match'); - t.equal(nlp('Number I').has('Number I'), true, 'uppercase-match'); - t.end(); -}); - -test('text-as-input', function(t) { - var doc = nlp('he is from Phoenix AZ'); - var m = doc.match('#City'); - var matchWith = doc.match(m).out('normal'); - var without = doc.not(m).out('text'); - t.equal(matchWith, 'phoenix', 'text-as-match'); - t.equal(without, 'he is from AZ', 'text-as-not'); - t.end(); -}); diff --git a/test/unit/match/not.test.js b/test/unit/match/not.test.js deleted file mode 100644 index 591a28238..000000000 --- a/test/unit/match/not.test.js +++ /dev/null @@ -1,79 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('not-basic :', function(t) { - var m = nlp('spencer is really cool').not('brown'); - t.equal(m.out('text'), 'spencer is really cool', 'missing-not'); - t.equal(m.length, 1, 'one-result'); - - m = nlp('spencer is really cool').not('#Adverb'); - t.equal(m.out('text'), 'spencer is cool', 'one-not'); - t.equal(m.length, 2, 'two-result'); - - m = nlp('spencer is really cool').not('#Adverb+'); - t.equal(m.out('text'), 'spencer is cool', 'still-one-not'); - t.equal(m.length, 2, 'two-results'); - - m = nlp('spencer is really cool').not('#Adverb+'); - t.equal(m.out('text'), 'spencer is cool', 'two-not'); - t.equal(m.length, 2, 'two-results'); - - m = nlp('spencer is really cool').not('is #Adverb+'); - t.equal(m.out('text'), 'spencer cool', 'three-not'); - t.equal(m.length, 2, 'two-results'); - - m = nlp('spencer is really cool. John is really nice.').not('#Adverb'); - t.equal(m.out('text'), 'spencer is cool. John is nice.', 'two-terms-matches'); - t.equal(m.length, 4, 'four-results'); - - m = nlp('spencer is really cool. John is really nice.').not('pardon me, #Adverb'); - t.equal(m.out('text'), 'spencer is really cool. John is really nice.', 'tricky-no-match'); - t.equal(m.length, 2, 'two-original-results'); - - t.end(); -}); - -test('not-from-array :', function(t) { - var m = nlp('spencer is really cool').not(['spencer']); - t.equal(m.out('normal'), 'is really cool', 'not-spencer'); - t.equal(m.length, 1, 'one-results'); - - m = nlp('spencer is really cool').not(['']); - t.equal(m.out('normal'), 'spencer is really cool', 'not-spencer'); - t.equal(m.length, 1, 'one-results'); - - m = nlp('spencer is really cool').not(['spencer', 'really']); - t.equal(m.out('normal'), 'is cool', 'not-spencer-really'); - t.equal(m.length, 2, 'two-results'); - t.end(); -}); - -//test object-form -test('not-from-object :', function(t) { - var m = nlp('spencer is not really cool.'); - var r = m.not({ - not: true, - really: true - }); - t.equal(m.out('normal'), 'spencer is not really cool.', 'double-obj-remains'); - t.equal(r.out('normal'), 'spencer is cool.', 'spencer-double-obj'); - - m = nlp('everyone is cool. I said hi to everyone.').not({ - everyone: true, - totally: true - }); - t.equal(m.out('normal'), 'is cool. i said hi to', 'not-everyone'); - - m = nlp('spencer is really, secretly, very cool.'); - var adv = m.adverbs().not({ - really: true - }); - t.equal(adv.out('normal'), 'secretly very', 'not-subset'); - t.equal(adv.length, 2, 'one-result'); - - var adv2 = m.adverbs().not('secretly'); - t.equal(adv2.out('normal'), 'really very', 'not-subset2'); - t.equal(adv2.length, 2, 'two-results'); - - t.end(); -}); diff --git a/test/unit/match/pluck.test.js b/test/unit/match/pluck.test.js deleted file mode 100644 index 6a963314d..000000000 --- a/test/unit/match/pluck.test.js +++ /dev/null @@ -1,119 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); -var terms_test = function(r, arr, t) { - var have = r.out('array').join(' - '); - arr = arr.join(' - '); - var msg = have + ' == ' + arr; - t.equal(have, arr, msg); -}; - -test('pluck-people :', function(t) { - [ - ['Sally Daniels went to the park with Don Douglas', ['sally daniels', 'don douglas']], - ['Then Sally went to the park with all her friends.', ['sally']], - ['Oh say can you see? By the dawn\'s early rise.', []], - ['All the base are belong to us.', []] - ].forEach(function(a) { - var terms = nlp(a[0]).people(); - terms_test(terms, a[1], t); - }); - t.end(); -}); - -test('pluck-places :', function(t) { - [ - ['Toronto is the biggest city in Canada', ['toronto', 'canada']], - ['Beijing China grows each year. It is usually sunny.', ['beijing china']], - ['How long is the flight from SFO to LAX? Both in the USA!', ['sfo', 'lax', 'usa']], - ['Oh say can you see? By the dawn\'s early rise.', []] - ].forEach(function(a) { - var terms = nlp(a[0]).places(); - terms_test(terms, a[1], t, true); - }); - t.end(); -}); - -test('pluck nouns :', function(t) { - [ - ['Cat eats meat.', ['cat', 'meat']], - ['Running, swimming, jumping.', []], - ['John Doe ran the race', ['john doe', 'race']] - ].forEach(function(a) { - var terms = nlp(a[0]).nouns(); - terms_test(terms, a[1], t, true); - }); - t.end(); -}); - -test('pluck-adjectives ', function(t) { - [['Nice dog is eating', ['nice']], ['Beautiful, dirty, rich.', ['beautiful', 'dirty', 'rich']]].forEach(function(a) { - var terms = nlp(a[0]).adjectives(); - terms_test(terms, a[1], t, true); - }); - t.end(); -}); - -test('pluck verbs :', function(t) { - [ - ['Cat eats meat.', ['eats']], - ['Beijing China grows each year. It is usually sunny.', ['grows', 'is usually']], - ['Running, swimming, jumping.', ['running', 'swimming', 'jumping']] - ].forEach(function(a) { - var terms = nlp(a[0]).verbs(); - terms_test(terms, a[1], t, true); - }); - t.end(); -}); - -test('pluck adverbs :', function(t) { - [ - ['Eat gently, slowly.', ['gently', 'slowly']], - ['John quickly ate the food', ['quickly']], - ['all spectators immediately started cheering hard', ['immediately', 'hard']], - ['walk softly and carry a big stick', ['softly']] - ].forEach(function(a) { - var terms = nlp(a[0]).adverbs(); - terms_test(terms, a[1], t, true); - }); - t.end(); -}); - -test('pluck dates :', function(t) { - [ - ['Toronto is best in January', ['january']], - ['My birthday is June 5th', ['june 5th']], - ['Oh say can you see? By the dawn\'s early rise.', []] - ].forEach(function(a) { - var terms = nlp(a[0]).dates(); - terms_test(terms, a[1], t, true); - }); - t.end(); -}); - -test('pluck values :', function(t) { - [ - ['The 5 books in Toronto are best in January', ['5']], - ['My harddrive is 5 Gb', ['5 gb']], - ['he is seven', ['seven']], - ['add eight and five', ['eight', 'five']], - ['My birthday is June 5th 1999', ['5th', '1999']], - ['Oh say can you see? By the dawn\'s early rise.', []] - ].forEach(function(a) { - var terms = nlp(a[0]).values(); - terms_test(terms, a[1], t, true); - }); - t.end(); -}); - -test('pluck organizations :', function(t) { - [ - ['The 5 books in Toronto are best in January', []], - ['My birthday is June 5th', []], - ['Oh say can you see? By the dawn\'s early rise.', []], - ['Google may purchase Cannabis Inc', ['google', 'cannabis inc']] - ].forEach(function(a) { - var terms = nlp(a[0]).organizations(); - terms_test(terms, a[1], t, true); - }); - t.end(); -}); diff --git a/test/unit/match/prefix.test.js b/test/unit/match/prefix.test.js deleted file mode 100644 index 7a411538e..000000000 --- a/test/unit/match/prefix.test.js +++ /dev/null @@ -1,37 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('prefix/infix/suffix basic', function(t) { - var r = nlp('it is funny and weird'); - var m = r.match('_nny', true); - t.equal(m.out('normal'), 'funny', 'suffix-match'); - m = r.match('fu_', true); - t.equal(m.out('normal'), 'funny', 'prefix_match'); - m = r.match('_nn_', true); - t.equal(m.out('normal'), 'funny', 'infix-match'); - - m = r.match('_ff', true); - t.equal(m.out('normal'), '', 'no-false-suffix'); - m = r.match('ff_', true); - t.equal(m.out('normal'), '', 'no-false-prefix'); - m = r.match('_ff_', true); - t.equal(m.out('normal'), '', 'no-false-infix'); - - m = r.match('_', true); - t.equal(m.out('normal'), '', 'no-throw1'); - m = r.match(' _ ', true); - t.equal(m.out('normal'), '', 'no-throw2'); - m = r.match(' __ ', true); - t.equal(m.out('normal'), '', 'no-throw3'); - m = r.match(' _ _ ', true); - t.equal(m.out('normal'), '', 'no-throw4'); - - m = r.match('w_', true); - t.equal(m.out('normal'), 'weird', 'one-char-one-word'); - m = r.match('_r_', true); - t.equal(m.out('normal'), 'weird', 'one-char-one-word2'); - m = r.match('_y', true); - t.equal(m.out('normal'), 'funny', 'one-char-one-word3'); - - t.end(); -}); diff --git a/test/unit/match/regex.test.js b/test/unit/match/regex.test.js deleted file mode 100644 index 491e90ca7..000000000 --- a/test/unit/match/regex.test.js +++ /dev/null @@ -1,19 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('regex-match:', function(t) { - var doc = nlp('it is waaaay cool'); - var m = doc.match('/aaa/'); - t.equal(m.out('normal'), 'waaaay', 'basic-match'); - - m = doc.match('/[ao]{2}/'); - t.equal(m.out('array').length, 2, 'trickier-match'); - - m = doc.match('is /aaam?/ .'); - t.equal(m.out('normal'), 'is waaaay cool', 'trickier-match'); - - m = doc.match('#Copula /a+/ /ool$/'); - t.equal(m.out('normal'), 'is waaaay cool', 'even-trickier-match'); - - t.end(); -}); diff --git a/test/unit/match/remove.test.js b/test/unit/match/remove.test.js deleted file mode 100644 index c7cdb7478..000000000 --- a/test/unit/match/remove.test.js +++ /dev/null @@ -1,49 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('remove-basic :', function(t) { - var m = nlp('the brown cat played').match('brown').delete().all(); - t.equal(m.out('text'), 'the cat played', 'brown-cat'); - - m = nlp('the nice brown cat played').match('nice brown').delete().all(); - t.equal(m.out('text'), 'the cat played', 'nice-brown'); - - m = nlp('the nice brown cat played').match('#Adjective').delete().all(); - t.equal(m.out('text'), 'the cat played', 'adj-each'); - - m = nlp('the nice brown cat played').match('#Adjective+').delete().all(); - t.equal(m.out('text'), 'the cat played', 'adj-consecutive'); - - t.end(); -}); - -test('remove-match :', function(t) { - var m = nlp('the brown cat played').delete('brown'); - t.equal(m.out('text'), 'the cat played', 'brown-cat'); - - m = nlp('the brown cat played. The brown dog sat down.').delete('brown'); - t.equal(m.out('text'), 'the cat played. The dog sat down.', 'brown-cat'); - - m = nlp('the nice brown cat played. The nice dog waited.').delete('nice brown'); - t.equal(m.out('text'), 'the cat played. The nice dog waited.', 'nice-brown'); - - m = nlp('the nice brown cat played. The cute dogs ate.').delete('#Adjective'); - t.equal(m.out('text'), 'the cat played. The dogs ate.', 'adj-each'); - - m = nlp('the nice brown cat played. The cute dogs ate.').delete('#Adjective+'); - t.equal(m.out('text'), 'the cat played. The dogs ate.', 'adj-consecutive'); - - t.end(); -}); - -test('remove-logic :', function(t) { - var m = nlp('spencer kelly is here').match('spencer kelly').delete('spencer'); - t.equal(m.out('normal'), 'kelly', 'remove(reg) returns this'); - - m = nlp('spencer kelly is here').match('spencer kelly').delete().all(); - t.equal(m.out('normal'), 'is here', 'remove() returns parent'); - - m = nlp('spencer kelly is here').match('spencer kelly').delete('notfound'); - t.equal(m.out('normal'), 'spencer kelly', 'remove(notfound) returns this'); - t.end(); -}); diff --git a/test/unit/match/replace.test.js b/test/unit/match/replace.test.js deleted file mode 100644 index 7927d76b5..000000000 --- a/test/unit/match/replace.test.js +++ /dev/null @@ -1,57 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('replace-basic :', function(t) { - var m = nlp('the dog played').match('dog').replace('cat').all(); - t.equal(m.out('text'), 'the cat played', 'dog-cat'); - - m = nlp('the dog played').match('the dog').replace('a cat').all(); - t.equal(m.out('text'), 'a cat played', 'a-cat'); - - m = nlp('the dog played').match('#Noun').replace('snake').all(); - t.equal(m.out('text'), 'the snake played', 'snake'); - - m = nlp('the pit bull played').match('#Noun+').replace('snake').all(); - t.equal(m.out('text'), 'the snake played', 'pit bull'); - - m = nlp('the pit bull dog played').match('#Noun+').replace('grey snake').all(); - t.equal(m.out('text'), 'the grey snake played', 'pit bull dog'); - - t.end(); -}); - -test('match-replace :', function(t) { - [ - ['the dog played', 'the dog', 'the cat', 'the cat played'], - ['the dog played', 'the #Noun', 'the cat', 'the cat played'], - ['the dog played', 'the (dog|hamster|pet-snake)', 'the cat', 'the cat played'], - ['the boy and the girl', 'the #Noun', 'the house', 'the house and the house'], - ['the boy and the girl', 'the cat', 'the house', 'the boy and the girl'] - ].forEach(function(a) { - var str = nlp(a[0]).replace(a[1], a[2]).out('text'); - var msg = str + ' -- ' + a[3]; - t.equal(str, a[3], msg); - }); - - t.end(); -}); - -test('replace-with-punctuation', function(t) { - var doc = nlp('Simon, how is Pamela and Jason?'); - var str = doc.match('#Person').replace('PERSON').all().out(); - t.equal(str, 'PERSON, how is PERSON and PERSON?', 'replace-with-punctuation'); - t.end(); -}); - -test('structured-object-replace :', function(t) { - var r = nlp('fun times in cool town'); - var term = r.match('times'); - r.replace(term, 'day'); - t.equal(r.out(), 'fun day in cool town', 'structured-replace'); - - r = nlp('fun times in cool town'); - var terms = r.match('cool town'); - r.replace(terms, 'shitsville'); - t.equal(r.out(), 'fun times in shitsville', 'structured-replace-multi'); - t.end(); -}); diff --git a/test/unit/misc.test.js b/test/unit/misc.test.js deleted file mode 100644 index fd6b5c463..000000000 --- a/test/unit/misc.test.js +++ /dev/null @@ -1,61 +0,0 @@ -var test = require('tape') -var nlp = require('./lib/nlp') - -//make sure it can handle garbage inputs -test('garbage:', function(t) { - var garbage = ['', ' ', null, '\n\n', [], {}] - garbage.forEach(function(g, i) { - var num = nlp(g).list.length - var msg = typeof g + ' text input #' + i - t.equal(num, 0, msg) - }) - var str = nlp(2).out() - t.equal(str, '2', 'integer-casted') - str = nlp(2.2).out() - t.equal(str, '2.2', 'float-casted') - - //garbage in lexicon too - str = nlp('hello', null).out() - t.equal(str, 'hello', 'null-lexicon') - - str = nlp('hello', 2).out() - t.equal(str, 'hello', 'int-lexicon') - t.end() -}) - -test('extra exports:', function(t) { - t.ok(nlp.version, 'version number exported') - - t.doesNotThrow(function() { - nlp.verbose(true) - nlp.verbose(false) - }, 'can set verbosity') - - t.end() -}) - -test('misc:', function(t) { - var str = '2 million five hundred thousand and fifty nine is bigger than 2882' - var m = nlp(str) - m.values().toNumber() - t.equal(m.out('normal'), '2500059 is bigger than 2882', str) - - str = '2 million five hundred thousand and fifty nine is bigger than 2882' - m = nlp(str) - m.values().toNice() - t.equal(m.out('text'), '2,500,059 is bigger than 2,882', str) - - str = 'doug is 5 years old' - m = nlp(str) - m.values().toText() - t.equal(m.out('normal'), 'doug is five years old', str) - - var r = nlp('Homer, have you been eating that sandwich again?').terms().slice(0, 3) - t.equal(r.out('text'), 'Homer, have you', 'result.slice') - - // str = 'men go'; - // m = nlp(str).sentences().toPastTense().nouns().toSingular(); - // t.equal(m.out('normal'), 'a man went', str); - - t.end() -}) diff --git a/test/unit/out/offset.test.js b/test/unit/out/offset.test.js deleted file mode 100644 index af2a332bf..000000000 --- a/test/unit/out/offset.test.js +++ /dev/null @@ -1,48 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); -var freshPrince = require('../lib/freshPrince'); - -test('offsets-equals-substr:', function(t) { - var r = nlp(freshPrince); - var arr = r.verbs().out('offsets'); - arr.forEach(function(obj) { - var substr = freshPrince.substr(obj.offset, obj.length); - t.equal(obj.text, substr, "'" + obj.text + "' offset " + obj.offset); - }); - t.end(); -}); - -test('index-output:', function(t) { - var str = `I am the very model of a modern Major-General. I've information vegetable, animal, and mineral`; - var arr = nlp(str).match('model').out('index'); - t.equal(arr[0].term, 4, 'which term'); - t.equal(arr[0].sentence, 0, 'which sentence'); - t.equal(arr[0].sentenceTerm, 4, 'which sentence-term'); - - arr = nlp(str).match('vegetable').out('index'); - t.equal(arr[0].term, 13, 'which term'); - t.equal(arr[0].sentence, 1, 'which sentence'); - t.equal(arr[0].sentenceTerm, 3, 'which sentence-term'); - t.end(); -}); - -test('offset-with-whitespace:', function(t) { - var str = `I am the very model of a modern Major-General. I've information vegetable, animal, and mineral`; - var place = nlp(str).match('animal').first().out('offset')[0]; - - //full term offset - var substr = str.substring(place.offset, place.offset + place.length); - t.equal(substr, ' animal,', 'offset+length'); - - //no-punctuation or whitespace offset - substr = str.substring(place.wordStart, place.wordEnd); - t.equal(substr, 'animal', 'wordStart-wordEnd'); - - str = 'hello there. I work for the F.B.I. in ft. Mede. hello there!'; - var r = nlp(str); - var o = r.sentences(1).out('offsets')[0]; - substr = str.substring(o.wordStart, o.wordEnd); - t.equal(substr, 'I work for the F.B.I. in ft. Mede', 'keeps-internal-punctuation'); - - t.end(); -}); diff --git a/test/unit/out/out.test.js b/test/unit/out/out.test.js deleted file mode 100644 index 2885dd8d6..000000000 --- a/test/unit/out/out.test.js +++ /dev/null @@ -1,86 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('topk:', function(t) { - var str = 'it is good. it was nice. she is cool.'; - var r = nlp(str); - var arr = r.verbs().out('topk'); - t.equal(arr[0].normal, 'is', '#1 is'); - t.equal(arr[0].count, 2, 'two is count'); - - t.equal(arr[1].normal, 'was', 'one was count'); - t.equal(arr[1].count, 1, 'one was count'); - - arr = nlp('we\'re cool. they are fun').terms().out('freq'); - t.equal(arr[0].normal, 'are', 'contraction- are'); - t.equal(arr[0].count, 2, 'are combined'); - - t.end(); -}); - -test('out-tags:', function(t) { - var str = 'texas rangers are a baseball team'; - var r = nlp(str); - var arr = r.out('tags'); - t.equal(arr.length, 6, '6 terms'); - t.equal(arr[0].normal, 'texas', 'texas #1'); - t.equal(arr[1].normal, 'rangers', 'rangers #2'); - t.equal(arr[2].normal, 'are', 'are #2'); - t.ok(arr[0].tags.indexOf('SportsTeam') !== -1, 'they are a sportsteam'); - t.end(); -}); - -test('out-array:', function(t) { - var str = 'texas rangers are a baseball team. They do not play in houston.'; - var r = nlp(str).verbs(); - var arr = r.out('array'); - t.equal(arr.length, 2, '2 verbs'); - t.equal(arr[0], 'are', 'are #1'); - t.equal(arr[1], 'do not play', 'do not play #2'); - t.end(); -}); - -test('out-csv:', function(t) { - var str = 'John, Jill McGraw, and Moe were swimming'; - var have = nlp(str).people().out('csv'); - var want = 'john\njill,mcgraw\nmoe'; - t.equal(have, want, str + ' -> ' + have); - t.end(); -}); - -test('out-newlines:', function(t) { - var str = 'John, Jill McGraw, and Moe were swimming'; - var have = nlp(str).people().out('newlines'); - var want = 'John,\nJill McGraw,\nMoe'; - t.equal(have, want, want + ' -> ' + have); - t.end(); -}); - -test('out-custom:', function(t) { - var doc = nlp('The competent drum work of Don Brewer?'); - var arr = doc.out({ - text: true, - normal: false, - tags: true, - sdf: true, - root: true - }); - arr = arr[0]; - t.equal(arr[0].text, 'The', 'has text'); - t.equal(arr[5].root, 'don', 'has root'); - t.equal(arr[5].sdf, undefined, 'has no sdf'); - t.equal(arr[0].tags.Determiner, true, 'has tags'); - t.end(); -}); - -test('out-others:', function(t) { - var str = 'texas rangers are a baseball team. They do not play in houston.'; - var r = nlp(str).verbs(); - var txt = r.out('text'); - t.notEqual(r.out('html'), txt, 'html-out'); - t.notEqual(r.out('grid'), txt, 'grid-out'); - t.notEqual(r.out('root'), txt, 'grid-out'); - t.notEqual(r.out('color'), txt, 'color-out'); - t.notEqual(r.out('tags'), txt, 'tags-out'); - t.end(); -}); diff --git a/test/unit/out/sort.test.js b/test/unit/out/sort.test.js deleted file mode 100644 index 997fb8dd1..000000000 --- a/test/unit/out/sort.test.js +++ /dev/null @@ -1,76 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); -var fns = require('../lib/fns'); - -test('sortAlpha:', function(t) { - var str = 'John xoo, John fredman, John davis, John fredman,'; - var r = nlp(str); - r = r.people(); - r.sort('alpha'); - var want = ['john davis', 'john fredman', 'john fredman', 'john xoo']; - fns.arr_test(r.out('array'), str, want, t); - t.end(); -}); - -test('sortChronological:', function(t) { - var str = 'John xoo, John fredman, John davis'; - var r = nlp(str); - r = r.people(); - r.sort('alphabetical'); - r.sort('chronological'); - var want = ['john xoo', 'john fredman', 'john davis']; - fns.arr_test(r.out('array'), str, want, t); - t.end(); -}); - -test('reverse:', function(t) { - var str = 'John xoo, John fredman, John davis'; - var r = nlp(str); - r = r.people(); - r.sort('alphabetical'); - r.reverse(); - var want = ['john xoo', 'john fredman', 'john davis']; - fns.arr_test(r.out('array'), str, want, t); - t.end(); -}); - -test('length:', function(t) { - var str = 'Amy, John Fredman, Dr. Bill, Alexis Smithsonian'; - var r = nlp(str); - r = r.people(); - r.sort('length'); - r.reverse(); - var want = ['amy', 'dr bill', 'john fredman', 'alexis smithsonian']; - fns.arr_test(r.out('array'), str, want, t); - t.end(); -}); - -test('wordCount:', function(t) { - var str = 'John Fredman, Amy, Dr. Bill G. Gates'; - var r = nlp(str); - r = r.people(); - r.sort('wordCount'); - r.reverse(); - var want = ['dr bill g gates', 'john fredman', 'amy']; - fns.arr_test(r.out('array'), str, want, t); - t.end(); -}); - -test('unique:', function(t) { - var str = 'John xoo, John fredman, john xoo, John davis'; - var r = nlp(str); - r = r.people(); - r.unique(); - var want = ['john xoo', 'john fredman', 'john davis']; - fns.arr_test(r.out('array'), str, want, t); - t.end(); -}); - -test('frequency:', function(t) { - var str = 'John xoo, John fredman, john xoo, John davis'; - var r = nlp(str).people(); - var a = r.out('frequency'); - t.equal(a[0].normal, 'john xoo', 'topk is sorted'); - t.equal(a[0].count, 2, 'topk finds two'); - t.end(); -}); diff --git a/test/unit/result/case.test.js b/test/unit/result/case.test.js deleted file mode 100644 index 6df0da1bd..000000000 --- a/test/unit/result/case.test.js +++ /dev/null @@ -1,31 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('sanity-check case:', function(t) { - var str = 'John xoo, John fredman'; - var r = nlp(str); - str = r.toUpperCase().out('text'); - t.equal(str, 'JOHN XOO, JOHN FREDMAN', 'uppercase'); - - str = r.toLowerCase().out('text'); - t.equal(str, 'john xoo, john fredman', 'lowercase'); - - str = r.toCamelCase().out('text'); - t.equal(str, 'JohnXoo,JohnFredman', 'camelcase'); - t.end(); -}); - -test('tricky case:', function(t) { - var str = 'i am spencer kelly here with Amy Adams.'; - var r = nlp(str); - r.people().toUpperCase(); - str = r.out('text'); - t.equal(str, 'i am SPENCER KELLY here with AMY ADAMS.', 'tricky-uppercase'); - - str = 'the Spencer Kelly Festival of Silly Walks'; - r = nlp(str); - r.match('#TitleCase+').toCamelCase(); - t.equal(r.out('text'), 'the SpencerKellyFestival of SillyWalks', 'tricky-camelcase'); - - t.end(); -}); diff --git a/test/unit/result/clone.test.js b/test/unit/result/clone.test.js deleted file mode 100644 index e6a907a8d..000000000 --- a/test/unit/result/clone.test.js +++ /dev/null @@ -1,33 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('clone:', function(t) { - var arr = [ - 'he eats the alligator', - 'Jumanji is the best move. He eats cheese.', - 'Uperman is wayyyy better than batman!' - ]; - arr.forEach(function(str) { - var m = nlp(str); - var neg = m.clone().sentences().toNegative(); - var past = m.clone().sentences().toPastTense(); - var fut = m.clone().sentences().toFutureTense(); - var adv = m.clone().verbs().insertBefore('really'); - var rm = m.clone().verbs().delete('#Verb'); - var out = m.out(); - t.equal(out, str, 'equals input - ' + out); - t.notEqual(str, neg.out(), 'neg not equal - ' + str); - t.notEqual(str, past.out(), 'past not equal - ' + str); - t.notEqual(str, fut.out(), 'future not equal - ' + str); - t.notEqual(str, adv.out(), 'adv not equal - ' + str); - t.notEqual(str, rm.out(), 'rm not equal - ' + str); - }); - t.end(); -}); - -// test('one-liner:', function (t) { -// var str = 'would somebody please think of the children'; -// var have = nlp(str).clone().toUpperCase().parent.out(); -// t.equal(str, have, 'parent-unchanged'); -// t.end(); -// }); diff --git a/test/unit/result/loops.test.js b/test/unit/result/loops.test.js deleted file mode 100644 index 68ddbe2e5..000000000 --- a/test/unit/result/loops.test.js +++ /dev/null @@ -1,79 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('js-loop-map', function(t) { - var text = 'oh hello. please turn on the lights and then take out the garbage too. After that, play some music.'; - var doc = nlp(text); - var arr = doc.map(m => { - return m.terms(0).out('normal'); - }); - t.equal(arr.length, 3, 'right-size'); - t.equal(arr[0], 'oh', 'oh-first'); - t.equal(arr[1], 'please', 'please-first'); - t.equal(arr[2], 'after', 'after-first'); - t.end(); -}); - -test('js-loop-reduce', function(t) { - var text = 'oh hello. please turn on the lights and then take out the garbage too. After that, play some music.'; - var doc = nlp(text); - var list = doc.reduce((arr, m) => { - arr.push(m.terms(0).out('normal')); - return arr; - }, []); - t.equal(list.length, 3, 'right-size'); - t.equal(list[0], 'oh', 'oh-first'); - t.equal(list[1], 'please', 'please-first'); - t.equal(list[2], 'after', 'after-first'); - - var txt = doc.reduce((str, m) => { - str += m.terms(0).out('normal'); - return str; - }, ''); - t.equal(txt, 'ohpleaseafter', 'reduce-to-a-string'); - t.end(); -}); - -test('js-loop-filter', function(t) { - var text = 'oh hello. please turn on the lights and then take out the garbage too. After that, play some music.'; - var doc = nlp(text); - t.equal(doc.list.length, 3, 'before-filter'); - var doc2 = doc.filter(m => { - return m.terms().out('array').length > 2; - }); - t.equal(doc.list.length, 3, 'same-after-filter'); - t.equal(doc2.list.length, 2, 'new array smaller'); - t.end(); -}); - -test('js-loop-forEach', function(t) { - var text = 'oh hello. please turn on the lights and then take out the garbage too. After that, play some music.'; - var doc = nlp(text); - var arr = []; - doc.forEach(m => { - arr.push(m.firstTerm().out('normal')); - }); - t.equal(arr.length, 3, 'right-size'); - t.equal(arr[0], 'oh', 'oh-first'); - t.equal(arr[1], 'please', 'please-first'); - t.equal(arr[2], 'after', 'after-first'); - t.end(); -}); - -test('js-loop-find', function(t) { - var text = 'oh hello. please turn on the lights and then take out the garbage too. After that, play some music.'; - var doc = nlp(text); - t.equal(doc.list.length, 3, 'before-filter'); - var doc2 = doc.find(m => { - return m.terms(0).out('normal') === 'after'; - }); - t.equal(doc.list.length, 3, 'same-after-filter'); - t.equal(doc2.list.length, 1, 'found one'); - t.equal(doc2.out(), 'After that, play some music.', 'found the right one'); - - var doc3 = doc.find(m => { - return m.terms(0).out('normal') === 'missing term'; - }); - t.equal(doc3, undefined, 'missing value returns undefined'); - t.end(); -}); diff --git a/test/unit/result/lump.test.js b/test/unit/result/lump.test.js deleted file mode 100644 index 2592ce15c..000000000 --- a/test/unit/result/lump.test.js +++ /dev/null @@ -1,53 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('lumper:', function(t) { - var str = 'we live in Toronto Canada and it is cold.'; - var r = nlp(str); - t.equal(r.wordCount(), 9, '9 words start'); - - r.match('#Place+').lump(); - t.equal(r.out('text'), str, 'output unchanged'); - t.equal(r.wordCount(), 8, '8 words now'); - - var term = r.list[0].terms[3]; - t.ok(term.tags.Country, 'has-country-tag'); - t.ok(term.tags.City, 'has-city-tag'); - t.ok(term.tags.Noun, 'has-shared-noun-tag'); - t.end(); -}); - -test('lumper-multiple-matches:', function(t) { - var str = 'imagine if Patrick Watson and Emma Stone got married'; - var r = nlp(str); - t.equal(r.wordCount(), 9, '9 words start'); - - r.match('#Person+').lump(); - t.equal(r.out('text'), str, 'output unchanged'); - t.equal(r.wordCount(), 7, '7 words now'); - - var term = r.list[0].terms[2]; - t.ok(term.tags.FirstName, 'has-firstname-tag'); - t.ok(term.tags.Person, 'has-person-tag'); - - term = r.list[0].terms[4]; - t.ok(term.tags.FirstName, 'has-firstname-tag2'); - t.ok(term.tags.Person, 'has-person-tag2'); - t.end(); -}); - -test('lumper-long:', function(t) { - var str = 'I think Sir Patrick James Watson is cool'; - var r = nlp(str); - t.equal(r.wordCount(), 8, '8 words start'); - - r.match('#Person+').lump(); - t.equal(r.out('text'), str, 'output unchanged'); - t.equal(r.wordCount(), 5, '5 words now'); - - var term = r.list[0].terms[2]; - t.ok(term.tags.FirstName, 'has-firstname-tag'); - t.ok(term.tags.Person, 'has-person-tag'); - - t.end(); -}); diff --git a/test/unit/result/normalize.test.js b/test/unit/result/normalize.test.js deleted file mode 100644 index 75aef9567..000000000 --- a/test/unit/result/normalize.test.js +++ /dev/null @@ -1,122 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); -var str_test = require('../lib/fns').str_test; - -test('sentence():', function(t) { - [ - ['he is good', 'he is good'], - ['Jack and Jill went up the hill.', 'jack and jill went up the hill.'], - ['Mr. Clinton did so.', 'mr clinton did so.'], - ['he is good', 'he is good'], - ['Jack and Jill went up the hill. She got water.', 'jack and jill went up the hill. she got water.'], - ['Joe', 'joe'], - ['just-right', 'just right'], - ['camel', 'camel'], - ['4', '4'], - ['four', 'four'], - ['john smith', 'john smith'], - ['Dr. John Smith-McDonald', 'dr john smith mcdonald'], - ['Contains no fruit juice. \n\n All rights reserved', 'contains no fruit juice. all rights reserved'] - ].forEach(function(a) { - var str = nlp(a[0]).out('normal'); - str_test(str, a[0], a[1], t); - }); - t.end(); -}); - -test('normalize():', function(t) { - [ - [' so... you like DONUTS? have all the donuts in the WORLD!!!', 'so you like donuts? have all the donuts in the world!'], - ['This is a test. .', 'this is a test.'], - ['Björk, the singer-songwriter...', 'bjork the singer songwriter'], - ['the so-called “fascist dictator”', 'the so called "fascist dictator"'], - // ['the so-called ❛singer-songwriter❜', 'the so called \'singer songwriter\''], - // ['the so-called ❛group of seven❜', 'the so called \'group of 7\''], - ['Director of the F.B.I.', 'director of the fbi'], - ].forEach(function(a) { - var str = nlp(a[0]).normalize().out('text'); - str_test(str, a[0], a[1], t); - }); - t.end(); -}); - -test('possessives', function(t) { - var doc = nlp(`Corey Hart's pudding and Google's advertising`); - doc = doc.normalize({ - possessives: true, - case: false - }); - t.equal(doc.out(), 'Corey Hart pudding and Google advertising', 'normalize possessives'); - t.end(); -}); - -test('optional params', function(t) { - var doc = nlp(`John Smith bought automobiles (for us)`).normalize({ - case: true, - possessives: true, - parentheses: true, - // plurals: true, - verbs: true, - }); - t.equal(doc.out(), 'john smith buy automobiles', 'many-on'); - t.end(); -}); - -test('honorifics', function(t) { - var tests = [ - ['rear admiral Smith', 'smith'], - ['Lieutenant John Smith', 'john smith'], - // ['Admiral Davis Jr', 'davis jr'], - ['Field marshal Herring', 'herring'], - ['General Lou Gobbells of the US air force', 'lou gobbells of the us air force'], - ['Rear admiral John', 'john'], - ['Lieutenant general James Baker', 'james baker'], - ['Lieutenant colonel Bing Crosby', 'bing crosby'], - ['Major Tom', 'tom'], - ['major effort by President Xi', 'major effort by xi'], - ['Corporal John Herring', 'john herring'], - ['sergeant major Harold', 'harold'], - ['Second lieutenant Semore Hirthman', 'semore hirthman'], - ['first lady Michelle obama', 'michelle obama'], - ['prime minister Stephen Hawking', 'stephen hawking'], - //no names - // ['first lieutenant', '1st lieutenant'], - // ['Sergeant', 'sergeant'], - ]; - tests.forEach((a) => { - var doc = nlp(a[0]); - doc = doc.normalize({ - honorifics: true, - case: true - }); - t.equal(doc.out('normal'), a[1], a[0]); - }); - t.end(); -}); - -test('elipses-whitespace:', function(t) { - var doc = nlp('about this ...').normalize(); - t.equal(doc.out('text'), 'about this', 'normalize seperate elipses'); - - doc = nlp('about this ...').toLowerCase(); - t.equal(doc.out('text'), 'about this ...', 'lowercase elipses'); - - doc = nlp('about this...').normalize(); - t.equal(doc.out('text'), 'about this', 'normalize attatched elipses'); - t.end(); -}); - -test('more-normalize:', function(t) { - var doc = nlp(`i saw first lady michelle obama`); - doc.normalize({ - honorifics: true - }); - t.equal(doc.out('text'), 'i saw michelle obama', 'normalize honorifics'); - - doc = nlp(`google's tax return`); - doc.normalize({ - possessives: true - }); - t.equal(doc.out('text'), 'google tax return', 'normalize possessives'); - t.end(); -}); diff --git a/test/unit/result/punctuation.test.js b/test/unit/result/punctuation.test.js deleted file mode 100644 index 98242f942..000000000 --- a/test/unit/result/punctuation.test.js +++ /dev/null @@ -1,34 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('getPunctuation', function(t) { - var doc = nlp('he is green and cool'); - var arr = doc.match('#Adjective').getPunctuation(); - t.deepEqual(arr, ['', ''], 'both-empty'); - - doc = nlp('he is green, and cool!'); - arr = doc.match('#Adjective').getPunctuation(); - t.deepEqual(arr, [',', '!'], 'both-punctuations'); - - doc = nlp('he is green, and cool! He is Kermit, the frog.'); - arr = doc.match('#Comma').getPunctuation(); - t.deepEqual(arr, [',', ','], 'both-sentences'); - - doc = nlp('he is Kermit, the frog.'); - var char = doc.terms().getPunctuation(2); - t.equal(char, ',', 'support-num-param'); - t.end(); -}); - -test('setPunctuation', function(t) { - var doc = nlp('he is green and cool'); - doc.match('#Adjective and').firstTerm().setPunctuation(','); - t.equal(doc.text(), 'he is green, and cool', 'oxford-comma'); - - doc = nlp('he is green, and cool'); - t.equal(doc.has('#Comma'), true, 'has-comma-tag'); - doc.match('green').firstTerm().setPunctuation('!'); - t.equal(doc.text(), 'he is green! and cool', 'exclaim'); - t.equal(doc.has('#Comma'), false, 'no-more-comma-tag'); - t.end(); -}); diff --git a/test/unit/result/result_fns.test.js b/test/unit/result/result_fns.test.js deleted file mode 100644 index 500753a10..000000000 --- a/test/unit/result/result_fns.test.js +++ /dev/null @@ -1,21 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('result methods', function(t) { - var text = 'this :cookie: <3 💯 so good. It is really nice. Yes it is <3'; - - //has method - var m = nlp(text); - t.equal(m.match('#Emoji').found, true, 'nlp.has positive'); - t.equal(m.match('#SportsTeam').found, false, 'nlp.has neg'); - - //filter string - var small = m.if('#Emoji'); - t.equal(small.out('normal'), 'this :cookie: <3 💯 so good. yes it is <3', 'nlp.filter string'); - - //filter method - small = m.ifNo('#Emoji'); - t.equal(small.out('normal'), 'it is really nice.', 'nlp.filter method'); - - t.end(); -}); diff --git a/test/unit/result/setTag.test.js b/test/unit/result/setTag.test.js deleted file mode 100644 index 8b3b387ac..000000000 --- a/test/unit/result/setTag.test.js +++ /dev/null @@ -1,39 +0,0 @@ -var test = require('tape') -var nlp = require('../lib/nlp') - -test('custom-tags-persist', function(t) { - var r = nlp('i am two years older now') - var two = r.match('#Value').tag('#FunTag') - two.replaceWith('never') - t.equal(two.has('#FunTag'), false, 'custom tag is forgotten') - - r = nlp('i am two years older now') - two = r.match('#Value').tag('#FunTag') - two.replaceWith('three', true) - t.equal(two.has('#FunTag'), true, 'custom tag is kept') - - r = nlp('i am two years older now') - two = r.match('#Value').tag('#FunTag') - two.toUpperCase() - two.values().toNumber() - t.equal(two.has('#FunTag'), true, 'custom tag stays over transformations') - - r = nlp('june 1999') - r.values().toNumber() - var year = r.match('#Year') - t.equal(year.out('normal'), '1999', 'year-stays-a-year') - - //not sure if these should pass.. - // r = nlp('i am two years older now') - // r.match('am').tag('#FunTag') - // r = r.sentences().toFutureTense().toPresentTense().toPastTense() - // var verb = r.match('#FunTag') - // t.equal(verb.out('normal'), 'was', 'tag stays over sentence-change') - - // r = nlp('walked').tag('#FunTag'); - // r = r.verbs().toFutureTense().toPresentTense().toPastTense(); - // verb = r.match('#FunTag'); - // t.equal(verb.out('normal'), 'walked', 'tag stays over verb-change'); - - t.end() -}) diff --git a/test/unit/result/split.test.js b/test/unit/result/split.test.js deleted file mode 100644 index 090bd627c..000000000 --- a/test/unit/result/split.test.js +++ /dev/null @@ -1,70 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); -var arr_test = require('../lib/fns').arr_test; - -test('splitAfter', function(t) { - [ - ['doug and nancy', 'and', ['doug and', 'nancy']], - ['doug and also nancy', 'and also', ['doug and also', 'nancy']], - ['doug and definetly nancy', 'and #Adverb', ['doug and definetly', 'nancy']], - ['maybe doug but possibly nancy', 'but', ['maybe doug but', 'possibly nancy']], - - ['a x b x c', 'x', ['a x', 'b x', 'c']], - ['a b x c x', 'x', ['a b x', 'c x']], - ['x a b x c', 'x', ['x', 'a b x', 'c']], - ['x x a b c', 'x', ['x', 'x', 'a b c']], - ['a x b x', 'x', ['a x', 'b x']], - ['a x b c x', 'x', ['a x', 'b c x']], - ['x x a b c', 'x', ['x', 'x', 'a b c']], - - ['john, paul, george, ringo', '#Comma', ['john', 'paul', 'george', 'ringo']], - ['doug is really nice', 'is', ['doug is', 'really nice']] - ].forEach(function(a) { - var want = a[2]; - var got = nlp(a[0]).splitAfter(a[1]).out('array'); - arr_test(got, a[0], want, t); - }); - t.end(); -}); - -test('splitBefore', function(t) { - [ - ['doug and nancy', 'and', ['doug', 'and nancy']], - ['doug and also nancy', 'and also', ['doug', 'and also nancy']], - ['doug and definetly nancy', 'and #Adverb', ['doug', 'and definetly nancy']], - ['maybe doug but possibly nancy', 'but', ['maybe doug', 'but possibly nancy']], - ['doug is really nice', 'is', ['doug', 'is really nice']], - - ['a x b x c', 'x', ['a', 'x b', 'x c']], - ['a b x x c', 'x', ['a b', 'x', 'x c']], - ['x a b x c', 'x', ['x a b', 'x c']], - ['x x a b c', 'x', ['x', 'x a b c']], - ['a x b x', 'x', ['a', 'x b', 'x']] - ].forEach(function(a) { - var want = a[2]; - var got = nlp(a[0]).splitBefore(a[1]).out('array'); - arr_test(got, a[0], want, t); - }); - t.end(); -}); - -test('splitOn', function(t) { - [ - ['doug and nancy', 'and', ['doug', 'and', 'nancy']], - ['doug and also nancy', 'and also', ['doug', 'and also', 'nancy']], - ['doug and definetly nancy', 'and #Adverb', ['doug', 'and definetly', 'nancy']], - ['maybe doug but possibly nancy', 'but', ['maybe doug', 'but', 'possibly nancy']], - ['doug is really nice', 'is', ['doug', 'is', 'really nice']], - - ['a x b x c', 'x', ['a', 'x', 'b', 'x', 'c']], - ['a b x x c', 'x', ['a b', 'x', 'x', 'c']], - ['x a b x c', 'x', ['x', 'a b', 'x', 'c']], - ['x x a b c', 'x', ['x', 'x', 'a b c']], - ['a x b x', 'x', ['a', 'x', 'b', 'x']] - ].forEach(function(a) { - var want = a[2]; - var got = nlp(a[0]).splitOn(a[1]).out('array'); - arr_test(got, a[0], want, t); - }); - t.end(); -}); diff --git a/test/unit/result/whitespace.test.js b/test/unit/result/whitespace.test.js deleted file mode 100644 index a8a79dda9..000000000 --- a/test/unit/result/whitespace.test.js +++ /dev/null @@ -1,49 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('sanity-check case:', function(t) { - var m = nlp('john is cool. he is nice'); - m.whitespace.before(' '); - t.equal(m.out('text'), ' john is cool. he is nice'); - - m = nlp('john is cool. he is nice'); - m.whitespace.after(' '); - t.equal(m.out('text'), 'john is cool. he is nice '); - - m = nlp('so john smith is cool.'); - m.people().whitespace.before(' '); - m.people().whitespace.after(' '); - t.equal(m.out('text'), 'so john smith is cool.'); - - t.end(); -}); - -test('slashes-as-whitespace:', function(t) { - var doc = nlp('john is cool/fun'); - t.equal(doc.terms().length, 4, '4 terms'); - t.equal(doc.has('cool'), true, 'has cool'); - t.equal(doc.has('fun'), true, 'has fun'); - t.equal(doc.out('text'), 'john is cool/fun', 'slash in output'); - t.end(); -}); - -test('normalized whitespace', function(t) { - var doc = nlp(`It doesn't matter`); - doc.normalize({ - contractions: false - }); - t.equal(doc.text(), `it doesn't matter`, 'normalized contractionwhitespace'); - t.end(); -}); - -test('punctuation-whitespace-mixing', function(t) { - let doc = nlp(`we released, "Square Up".`); - let arr = doc.terms().map(obj => obj.text()); - t.deepEqual(arr, ['we', ' released,', ' "Square', ' Up".'], 'punctuation 1'); - - doc = nlp('you said ... ?'); - arr = doc.terms().map(obj => obj.text()); - t.deepEqual(arr, ['you', ' said ... ?'], 'punctuation 2'); - - t.end(); -}); diff --git a/test/unit/result/wordcount.test.js b/test/unit/result/wordcount.test.js deleted file mode 100644 index f0b22a6a0..000000000 --- a/test/unit/result/wordcount.test.js +++ /dev/null @@ -1,21 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); -var str_test = require('../lib/fns').str_test; - -test('==WordCount==', function(t) { - [ - ['he is good', 3], - ['jack and jill went up the hill.', 7], - ['Mr. Clinton did so.', 4], - ['Bill Clinton ate cheese.', 4], - ['5kb of data.', 3], - ['it was five hundred and seventy two.', 7], - ['jack and jill went up the hill. They got water.', 10], - ['Bill Clinton went walking', 4], - ['Bill Clinton will go walking', 5] - ].forEach(function(a) { - var num = nlp(a[0]).terms().length; - str_test(num, a[0], a[1], t); - }); - t.end(); -}); diff --git a/test/unit/subset/adjective/adjective.test.js b/test/unit/subset/adjective/adjective.test.js deleted file mode 100644 index 695ab43d4..000000000 --- a/test/unit/subset/adjective/adjective.test.js +++ /dev/null @@ -1,86 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); -var str_test = require('../../lib/fns').str_test; - -test('==Adjective==', function(T) { - T.test('to_adverb:', function(t) { - [ - ['quick', 'quickly'], - // ['idle', 'idly'], - ['dirty', null], - ['fun', null], - ['full', null], - ['quixotic', 'quixotically'], - ['cute', 'cutely'] - // ['good', 'well'], - // ['low', 'low'] - ].forEach(function(a) { - var arr = nlp(a[0]).adjectives().data(); - var obj = arr[0] || {}; - str_test(obj.adverbForm, a[0], a[1], t); - }); - t.end(); - }); - - T.test(' to_superlative', function(t) { - [ - ['quick', 'quickest'], - ['friendly', 'friendliest'], - // ['caring', 'most caring'], - ['fun', 'most fun'], - ['full', 'fullest'], - // ['quixotic', 'most quixotic'], - ['cute', 'cutest'], - ['large', 'largest'] - ].forEach(function(a) { - var arr = nlp(a[0]).adjectives().data(); - var obj = arr[0] || {}; - str_test(obj.superlative, a[0], a[1], t); - }); - t.end(); - }); - // - T.test(' to_comparative', function(t) { - [ - ['quick', 'quicker'], - ['friendly', 'friendlier'], - // ['caring', 'more caring'], - ['fun', 'more fun'], - ['full', 'fuller'], - // ['quixotic', 'more quixotic'], - ['cute', 'cuter'] - ].forEach(function(a) { - var arr = nlp(a[0]).adjectives().data(); - var obj = arr[0] || {}; - str_test(obj.comparative, a[0], a[1], t); - }); - t.end(); - }); - // - T.test(' to_noun', function(t) { - [ - ['quick', 'quickness'], - ['fancy', 'fanciness'], - // ['ferocious', 'ferociousness'], - // ['', ''], - // [' ', ''], - ['clean', 'cleanliness'] - ].forEach(function(a) { - var arr = nlp(a[0]).adjectives().data(); - var obj = arr[0] || {}; - str_test(obj.nounForm, a[0], a[1], t); - }); - t.end(); - }); - // - T.test(' conjugate', function(t) { - var o = nlp('nice').adjectives().data()[0] || {}; - str_test(o.comparative, 'nice', 'nicer', t); - str_test(o.superlative, 'nice', 'nicest', t); - str_test(o.adverbForm, 'nice', 'nicely', t); - str_test(o.nounForm, 'nice', 'niceness', t); - t.end(); - }); - - T.end(); -}); diff --git a/test/unit/subset/adverb/adverb.test.js b/test/unit/subset/adverb/adverb.test.js deleted file mode 100644 index bfb341c73..000000000 --- a/test/unit/subset/adverb/adverb.test.js +++ /dev/null @@ -1,66 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('==Adverb==', function(T) { - T.test('to_adjective:', function(t) { - [ - ['quickly', 'quick'], - ['garishly', 'garish'], - ['tediously', 'tedious'], - ['frightfully', 'frightful'], - ['tortuously', 'tortuous'], - ['privately', 'private'], - ['unambiguously', 'unambiguous'], - ['cortically', 'cortic'], - ['biradially', 'biradial'], - ['meanly', 'mean'], - ['raspingly', 'rasping'], - ['comprehensively', 'comprehensive'], - ['fervently', 'fervent'], - ['nationally', 'national'], - ['maternally', 'maternal'], - ['flashily', 'flashy'], - ['only', 'only'], - ['narrowly', 'narrow'], - ['blasphemously', 'blasphemous'], - ['abortively', 'abortive'], - ['inoffensively', 'inoffensive'], - ['truly', 'true'], - ['gently', 'gent'], - ['tolerantly', 'tolerant'], - ['enchantingly', 'enchanting'], - ['unswervingly', 'unswerving'], - ['grubbily', 'grubby'], - ['longitudinally', 'longitudinal'], - ['thermodynamically', 'thermodynamic'], - ['mirthfully', 'mirthful'], - ['salaciously', 'salacious'], - ['dourly', 'dour'], - ['credulously', 'credulous'], - ['carefully', 'careful'], - ['knowingly', 'knowing'], - ['geometrically', 'geometrical'], - ['unassailably', 'unassailable'], - ['antecedently', 'antecedent'], - ['adjectively', 'adjective'], - ['hebdomadally', 'hebdomadal'], - ['dizzily', 'dizzy'], - ['obnoxiously', 'obnoxious'], - ['thirstily', 'thirsty'], - ['biennially', 'biennial'], - ['roguishly', 'roguish'], - ['mentally', 'mental'], - ['incessantly', 'incessant'], - ['intelligently', 'intelligent'], - ['perseveringly', 'persevering'], - ['namely', 'name'], - ['formidably', 'formidable'], - ['vertically', 'vertical'] - ].forEach(function(a) { - var o = nlp(a[0]).tag('Adverb').adverbs().data()[0]; - var msg = a[0] + ' -> ' + o.adjectiveForm; - t.equal(o.adjectiveForm, a[1], msg); - }); - t.end(); - }); -}); diff --git a/test/unit/subset/allSubset.test.js b/test/unit/subset/allSubset.test.js deleted file mode 100644 index 341d67050..000000000 --- a/test/unit/subset/allSubset.test.js +++ /dev/null @@ -1,69 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); -var fns = require('../lib/fns'); -var freshPrince = require('../lib/freshPrince'); - -var subsets = [ - 'acronyms', - 'adjectives', - 'adverbs', - 'contractions', - 'dates', - 'hashTags', - 'organizations', - 'people', - 'phoneNumbers', - 'places', - 'sentences', - 'questions', - 'statements', - 'nouns', - 'urls', - 'values', - 'verbs' -]; - -test('all combined subsets empty:', function(t) { - var r = nlp(freshPrince); - var small = r.all(); - for (var i = 0; i < subsets.length; i++) { - var sub = subsets[i]; - small = small[sub](); - } - t.equal(small.out('text'), '', 'no-uber subset'); - t.end(); -}); - -test('all subsets have a data method:', function(t) { - var r = nlp(freshPrince); - subsets.forEach(function(s) { - var sub = r[s](); - var arr = sub.data(); - t.ok(fns.isArray(arr), s + '.data() is an array'); - }); - t.end(); -}); - -test('all subsets support .all():', function(t) { - var txt = freshPrince; - var r = nlp(txt); - subsets.forEach(function(s) { - var sub = r[s](); - var str = sub.all().out('text'); - var msg = s + '.all() works'; - t.equal(str, txt, msg); - }); - t.end(); -}); - -test('all subsets have an empty 100th element', function(t) { - var txt = freshPrince; - var r = nlp(txt); - subsets.forEach(function(s) { - var sub = r[s](9999); - var str = sub.out('text'); - var msg = s + ' is empty'; - t.equal(str, '', msg); - }); - t.end(); -}); diff --git a/test/unit/subset/contractions/basic.test.js b/test/unit/subset/contractions/basic.test.js deleted file mode 100644 index 0727ea898..000000000 --- a/test/unit/subset/contractions/basic.test.js +++ /dev/null @@ -1,137 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('basic is contractions', function(t) { - var r = nlp(`he is cool.`); - r.contractions().expand(); - t.equal(r.out('text'), `he is cool.`, 'expanded-expand'); - - r = nlp(`he's cool.`); - r.contractions().expand(); - t.equal(r.out('text'), `he is cool.`, 'contracted-expand'); - - r = nlp(`he is cool.`); - r.contractions().contract(); - t.equal(r.out('text'), `he's cool.`, 'expanded-contract'); - - r = nlp(`he's cool.`); - r.contractions().contract(); - t.equal(r.out('text'), `he's cool.`, 'contracted-contract'); - - r = nlp(`that's really great.`); - r.contractions().expand(); - t.equal(r.out('text'), `that is really great.`, 'contracted-expand'); - - r = nlp(`she'll, eat icecream`); - r.contractions().expand(); - t.equal(r.out('text'), `she will eat icecream`, 'with-punctuation'); - - r = nlp("we're not gonna take it, no we're not gonna take it"); - r.contractions().expand(); - t.equal(r.out('text'), `we are not going to take it, no we are not going to take it`, 'expand gonna twice'); - - r = nlp("let's let's we're gonna gonna"); - r.contractions().expand(); - t.equal(r.out('text'), `let us let us we are going to going to`, 'expand consecutive'); - t.end(); -}); - -test('do-not contractions', function(t) { - var r = nlp(`please do not eat the marshmellow`); - r.contractions().expand(); - t.equal(r.out('text'), `please do not eat the marshmellow`, 'expanded-expand'); - - r = nlp(`please don't eat the marshmellow`); - r.contractions().expand(); - t.equal(r.out('text'), `please do not eat the marshmellow`, 'contracted-expand'); - - r = nlp(`please do not eat the marshmellow`); - r.contractions().contract(); - t.equal(r.out('text'), `please don't eat the marshmellow`, 'expanded-contract'); - - r = nlp(`please don't eat the marshmellow`); - r.contractions().contract(); - t.equal(r.out('text'), `please don't eat the marshmellow`, 'contracted-contract'); - - t.end(); -}); - -test('have contractions', function(t) { - var r = nlp(`i have stood`); - r.contractions().expand(); - t.equal(r.out('text'), `i have stood`, 'expanded-expand'); - - r = nlp(`i've stood`); - r.contractions().expand(); - t.equal(r.out('text'), `i have stood`, 'contracted-expand'); - - r = nlp(`i have stood`); - r.contractions().contract(); - t.equal(r.out('text'), `i've stood`, 'expanded-contract'); - - r = nlp(`i've stood`); - r.contractions().contract(); - t.equal(r.out('text'), `i've stood`, 'contracted-contract'); - t.end(); -}); - -test('repeated contract-expand', function(t) { - var r = nlp(`i'm good`); - r.contractions().expand(); - t.equal(r.out('text'), `i am good`, 'expand-1'); - r.contractions().contract(); - t.equal(r.out('text'), `i'm good`, 'contract-1'); - r.contractions().expand(); - t.equal(r.out('text'), `i am good`, 'expand-2'); - r.contractions().contract(); - t.equal(r.out('text'), `i'm good`, 'contract-2'); - - r - .contractions() - .contract() - .contract() - .contract(); - t.equal(r.out('text'), `i'm good`, 'contract-n'); - - r - .contractions() - .expand() - .expand() - .expand(); - t.equal(r.out('text'), `i am good`, 'expand-n'); - t.end(); -}); - -test('contracted', function(t) { - var r = nlp(`I'll go to Toronto. I will see.`); - var str = r - .contractions() - .contracted() - .out('text'); - t.equal(str, `I'll`, 'contracted'); - str = r - .contractions() - .expanded() - .out('text'); - t.equal(str, `I will`, 'expanded'); - t.end(); -}); - -test('would-or-did', function(t) { - var r = nlp(`i'd contemplate`); - var str = r - .contractions() - .expand() - .all() - .out('text'); - t.equal(str, `i would contemplate`, 'i-would'); - - r = nlp(`i'd contemplated`); - str = r - .contractions() - .expand() - .all() - .out('text'); - t.equal(str, `i had contemplated`, 'i-had'); - t.end(); -}); diff --git a/test/unit/subset/contractions/contractions.test.js b/test/unit/subset/contractions/contractions.test.js deleted file mode 100644 index 1540ba061..000000000 --- a/test/unit/subset/contractions/contractions.test.js +++ /dev/null @@ -1,230 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); -var str_test = require('../../lib/fns').str_test; - -test('==contractions==', function(T) { - T.test('possessives-or-contractions:', function(t) { - [ - [`spencer's good`, `spencer is good`], - [`spencer's house`, `spencer's house`], - [`he's good`, `he is good`], - [`spencer's really good`, `spencer is really good`], - [`google's about to earn money`, `google is about to earn money`], - [`they're mark's 'question marks'`, `they are mark's question marks`], - - [`toronto's citizens`, `toronto's citizens`], - [`rocket's red glare`, `rocket's red glare`], - [`everyone's victories`, `everyone's victories`], - [`the tornado's power`, `the tornado's power`], - - [`somebody's walking`, `somebody is walking`], - // [`spencer's walking`, `spencer was walking`], //ambiguous - - [`spencer's walked`, `spencer has walked`], - [`spencer's had problems`, `spencer has had problems`], - [`spencer's got potatoes`, `spencer has got potatoes`], - [`spencer's gotten groceries`, `spencer has gotten groceries`], - [`he's become fat`, `he has become fat`], - [`she's earned money`, `she has earned money`], - [`he's not ever milked a cow`, `he has not ever milked a cow`] - ].forEach(function(a) { - var m = nlp(a[0]); - m.contractions().expand(); - var str = m.out('normal'); - str_test(str, a[0], a[1], t); - }); - t.end(); - }); - - T.test('contraction-pos:', function(t) { - [ - [`john's good`, `Person`], - [`ankara's good`, `Place`], - [`January's good`, `Date`], - [`john's cousin`, `Person`], - [`ankara's citizens`, `Place`], - [`January's weather`, `Date`] - ].forEach(function(a) { - var term = nlp(a[0]).list[0].terms[0]; - var msg = term.text + ' has tag ' + a[1]; - t.equal(term.tags[a[1]], true, msg); - }); - t.end(); - }); - - T.test('expand:', function(t) { - [ - [`he's a hero`, ['he', 'is']], - [`she's here`, ['she', 'is']], - [`it's a hero`, ['it', 'is']], - [`he'd win`, ['he', 'would']], - [`they'd win`, ['they', 'would']], - [`they've begun`, ['they', 'have']], - [`they'll begun`, ['they', 'will']], - [`we've begun`, ['we', 'have']], - [`don't go`, ['do', 'not']], - // dont expand leading 'nt contraction - [`mustn't go`, ['must', 'not']], - [`haven't gone`, ['have', 'not']], - [`isn't going`, ['is', 'not']], - ['can\'t go', ['can', 'not']], - ['ain\'t going', ['is', 'not']], - ['won\'t go', ['will', 'not']], - - ['i\'d go', ['i', 'would']], - ['she\'d go', ['she', 'would']], - ['he\'d go', ['he', 'would']], - ['they\'d go', ['they', 'would']], - ['we\'d go', ['we', 'would']], - - ['i\'ll go', ['i', 'will']], - ['she\'ll go', ['she', 'will']], - ['he\'ll go', ['he', 'will']], - ['they\'ll go', ['they', 'will']], - ['we\'ll go', ['we', 'will']], - - ['i\'ve go', ['i', 'have']], - ['they\'ve go', ['they', 'have']], - ['we\'ve go', ['we', 'have']], - ['should\'ve go', ['should', 'have']], - ['would\'ve go', ['would', 'have']], - ['could\'ve go', ['could', 'have']], - ['must\'ve go', ['must', 'have']], - - ['i\'m going', ['i', 'am']], - ['we\'re going', ['we', 'are']], - ['they\'re going', ['they', 'are']], - - [`don't`, ['do', 'not']], - [`do not`, ['do', 'not']], - [`dunno`, ['do', 'not', 'know']], - - [`spencer's going`, ['spencer', 'is']], - [`he's going`, ['he', 'is']], - - [`how'd`, ['how', 'did']], - // [`why'd`, ['why', 'did']], - // [`who'd`, ['who', 'did']], - [`when'll`, ['when', 'will']], - [`how'll`, ['how', 'will']], - [`who'll`, ['who', 'will']], - [`who's`, ['who', 'is']], - [`how's`, ['how', 'is']] - ].forEach(function(a) { - var arr = nlp(a[0]).contractions().expand().out('terms'); - var got = arr.map(function(term) { - return term.normal; - }); - var msg = a[0] + ' - - [' + got.join(', ') + '] should be [' + a[1].join(', ') + ']'; - t.deepEqual(got, a[1], msg); - }); - t.end(); - }); - // - T.test('contract:', function(t) { - [ - [`he is a hero`, `he's`], - [`she is here`, `she's`], - [`it is a hero`, `it's`], - [`he would win`, `he'd`], - [`they would win`, `they'd`], - [`they have begun`, `they've`], - [`how will`, `how'll`], - [`when will`, `when'll`], - [`who did`, `who'd`], - [`who is`, `who's`] - ].forEach(function(a) { - var term = nlp(a[0]).contractions().contract().list[0].terms[0]; - str_test(term.normal, a[0], a[1], t); - }); - t.end(); - }); - - T.test('preserve-contractions:', function(t) { - [`he is a hero`, `she is here`, `it is a hero`, `he would win`, `they would win`].forEach(function(a) { - var str = nlp(a[0]).out('normal'); - str_test(str, a[0], a[0], t); - }); - t.end(); - }); - - T.test('reverse-is-consistent:', function(t) { - var str = `doesn't there's i'd i'll can't won't wasn't weren't wouldn't haven't`; - var doc = nlp(str); - doc.contractions().expand(); - doc.contractions().contract(); - doc.contractions().expand(); - doc.contractions().contract(); - t.equal(doc.out(), str, 'idempodent expand/contract'); - t.end(); - }); - - T.test('contraction-supports-whitespace:', function(t) { - [ - ['We\'ve only just begun', 'We have only just begun'], - ['We\'ve only just begun', 'We have only just begun'] - ].forEach(function(a) { - var m = nlp(a[0]); - m.contractions().expand(); - var str = m.out('text'); - str_test(str, a[0], a[1], t); - }); - t.end(); - }); - - T.test('numberRange-contraction:', function(t) { - var r = nlp('june 5-7 1998').match('5 to 7'); - t.equal(r.out('normal'), '5-7', 'june 5-7 numberRange'); - - r = nlp('rooms 99-102').match('99 to 102'); - t.equal(r.out('normal'), '99-102', 'rooms 99-102'); - - r = nlp('around 7.5-8').match('7.5 to 8'); - t.equal(r.out('normal'), '7.5-8', 'around 7.5-8'); - - r = nlp('june 5th-7th 1998').match('5th to 7th'); - t.equal(r.out('normal'), '5th-7th', 'june 5th-7th numberRange'); - - r = nlp('june 5th - 7th 1998').match('5th to 7th'); - t.equal(r.out('text'), ' 5th - 7th', 'june 5th - 7th numberRange'); - - t.end(); - }); - - T.test('numberRange-expand:', function(t) { - var r = nlp('june 5-7 1998'); - r.contractions().expand(); - var str = r.out('normal'); - t.equal(str, 'june 5 to 7 1998', 'june 5-7 numberRange'); - - r = nlp('rooms 99-102'); - r.contractions().expand(); - str = r.out('normal'); - t.equal(str, 'rooms 99 to 102', 'rooms 99-102'); - - r = nlp('june 5th-7th 1998'); - r.contractions().expand(); - str = r.out('normal'); - t.equal(str, 'june 5th to 7th 1998', 'june 5th-7th numberRange'); - - r = nlp('june 5th - 7th 1998'); - r.contractions().expand(); - str = r.out('normal'); - t.equal(str, 'june 5th to 7th 1998', 'june 5th - 7th numberRange'); - - var doc = nlp('measuring 7.5–11 micrometers'); - doc.contractions().expand(); - doc.values().toNice(); - t.equal('measuring 7.5 to 11 micrometers', doc.out(), 'numer-range-emdash'); - doc.values().toText(); - t.equal('measuring seven point five to eleven micrometers', doc.out(), 'numer-range-emdash2'); - - t.end(); - }); - - T.test('not-a-numberRange:', function(t) { - var doc = nlp('twenty-two'); - t.equal(doc.has('#NumberRange'), false, 'twenty-two not numberRange'); - t.end(); - }); -}); diff --git a/test/unit/subset/date/basic_date.test.js b/test/unit/subset/date/basic_date.test.js deleted file mode 100644 index 338e45e84..000000000 --- a/test/unit/subset/date/basic_date.test.js +++ /dev/null @@ -1,64 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('date-parse :', function(t) { - [ - ['june 5th 1999', [5, 5, 1999]], - ['june 5th 1999', [5, 5, 1999]], - ['january 1st 1644', [0, 1, 1644]], - ['jan 1st 1644', [0, 1, 1644]], - ['June 4th 1993', [5, 4, 1993]], - ['March 1st 1987', [2, 1, 1987]], - ['June 22nd 2014', [5, 22, 2014]], - ['may 22nd 2014', [4, 22, 2014]], - ['sep 22nd 2014', [8, 22, 2014]], - ['apr 22nd 2014', [3, 22, 2014]], - ['June 22nd 1997', [5, 22, 1997]], - ['3rd of March 1969', [2, 3, 1969]], - ['2nd of April 1929', [3, 2, 1929]], - ['2nd of jul 1929', [6, 2, 1929]], - //incomplete dates - ['March 1969', [2, null, 1969]], - ['March 18th', [2, 18, null]], - ['August 28th', [7, 28, null]], - ['18th of March', [2, 18, null]], - ['27th of March', [2, 27, null]], - // ['2012-2014', [null, null, 2012]], - // ['1997-1998', [null, null, 1997]], - // ['1998', [null, null, 1998]], - // ['1672', [null, null, 1672]], - // ['2015', [null, null, 2015]], - ['january 5th 1998', [0, 5, 1998]], - ['february 10th', [1, 10, null]], - ['february 30th', [1, 30, null]], - ['jan 1921', [0, null, 1921]] - //invalid dates - // ['303rd of March 1969', [2, null, 1969]], - // ['4103', [null, null, null]], - - // ['January 5th 4032', [0, 5, null]], - ].forEach(function(a) { - var arr = nlp(a[0]).dates().data(); - var o = arr[0].date; - var got = [o.month, o.date, o.year]; - var msg = 'date "' + a[0] + '" got: [' + got.join(',') + '] want: [' + a[1].join(',') + ']'; - t.deepEqual(got, a[1], msg); - }); - t.end(); -}); - -// durations // - -// ['March 7th-11th 1987', [2, 7, 1987]], -// ['June 1st-11th 1999', [5, 1, 1999]], -// ['28th of September to 5th of October 2008', [8, 28, 2008]], -// ['2nd of January to 5th of October 2008', [9, 5, 2008]], -// ['March 7th to june 11th 1987', [2, 7, 1987]], -// ['April 17th to september 11th 1981', [3, 17, 1981]], -// ['June 1st to June 11th 2014', [5, 1, 2014]], -// ['between 13 February and 15 February 1945', [1, 13, 1945]], -// ['between March 7th and june 11th 1987', [2, 7, 1987]], -// ['3rd - 5th of March 1969', [2, 3, 1969]], -// ["September 1939 to April 1945", ["month":null,"day":null,"year":1939]], -// ["June 1969 to April 1975", ["month":null,"day":null,"year":1969]], -// ["2014-1998", ["month":null,"day":null,"year":null]], diff --git a/test/unit/subset/date/misc_date.test.js b/test/unit/subset/date/misc_date.test.js deleted file mode 100644 index 053e18ca7..000000000 --- a/test/unit/subset/date/misc_date.test.js +++ /dev/null @@ -1,40 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('short+long form', function(t) { - var r = nlp('wednesday, january 2nd, 2016'); - var shorter = r.dates().toShortForm().out('normal'); - t.equal(shorter, 'wed jan 2nd 2016'); - - var r2 = nlp('Thurs, feb 2nd, 2016'); - var longer = r2.dates().toLongForm().out('normal'); - t.equal(longer, 'thursday february 2nd 2016'); - - - var doc = nlp('April, June, and Sept'); - shorter = doc.dates().toShortForm().all().out('normal'); - t.equal(shorter, 'apr jun and sept', 'months-short'); - longer = doc.dates().toLongForm().all().out('normal'); - t.equal(longer, 'april june and september', 'months-longer'); - - r2 = nlp('Thurs, feb 2nd, 2016'); - longer = r2.dates().toLongForm().out('normal'); - t.equal(longer, 'thursday february 2nd 2016'); - - - var str = nlp('April, June, and Sept').dates().toShortForm().all().out(); - t.equal('Apr, Jun, and Sept', str, 'toShortForm-comma'); - - str = nlp('Apr, June, and Sept').dates().toLongForm().all().out(); - t.equal('April, June, and September', str, 'toShortForm-comma'); - - doc = nlp('January 10, 2018 7:20 AM'); - var obj = doc.dates().data()[0].date; - t.equal(obj.month, 0, 'month'); - t.equal(obj.date, 10, 'date'); - t.equal(obj.year, 2018, 'year'); - t.equal(obj.time.hour, 7, 'hour'); - t.equal(obj.time.minute, 20, 'minute'); - - t.end(); -}); diff --git a/test/unit/subset/ngrams/edgegram.test.js b/test/unit/subset/ngrams/edgegram.test.js deleted file mode 100644 index 89a359c48..000000000 --- a/test/unit/subset/ngrams/edgegram.test.js +++ /dev/null @@ -1,26 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('edgegram-sizes:', function(t) { - var r = nlp(`he is cool. john was cool. He is really nice.`); - - var arr = r.startGrams(null, 5).data(); - t.equal(arr.length, 0, 'no-overgrams'); - - arr = r.startGrams(null, 4).data(); - t.equal(arr.length, 1, 'one-4-startgram'); - - arr = r.endGrams(null, 4).data(); - t.equal(arr.length, 1, 'one-4-endgram'); - - t.end(); -}); - -test('start-sizes:', function(t) { - var r = nlp(`he is cool. john was cool. He is really nice.`); - var arr = r.startGrams().data(); - t.equal(arr[0].normal, 'he is', 'sorted-by-freq'); - t.equal(arr[0].count, 2, 'normalized-counted'); - t.equal(arr[0].size, 2, 'normalized-counted'); - t.end(); -}); diff --git a/test/unit/subset/ngrams/ngram.test.js b/test/unit/subset/ngrams/ngram.test.js deleted file mode 100644 index fd2f31759..000000000 --- a/test/unit/subset/ngrams/ngram.test.js +++ /dev/null @@ -1,61 +0,0 @@ -'use strict'; -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('ngram-test:', function(t) { - var r = nlp('he is strong. he is cool'); - var arr = r.ngrams().data(); - - t.equal(arr[0].normal, 'he is', 'sorted-by-freq'); - t.equal(arr[0].count, 2, 'normalized-counted'); - t.equal(arr[0].size, 2, 'normalized-counted'); - - t.equal(arr.length, 9, 'ngram-length'); - t.end(); -}); - -test('sort-bigrams:', function(t) { - var r = nlp('he is strong. he is cool'); - var arr = r.ngrams({ - size: 2 - }).data(); - t.equal(arr[0].normal, 'he is', '#1-by-freq'); - t.equal(arr[1].normal, 'is strong', '#2-by-freq'); - t.equal(arr[2].normal, 'is cool', '#3-by-freq'); - t.equal(arr.length, 3, 'ngram-length'); - t.end(); -}); - -test('contractions-support:', function(t) { - var r = nlp('It\'s free for me and free for you'); - var arr = r.ngrams().data(); - var obj = arr.find((o) => o.normal === 'free for'); - t.equal(obj.count, 2, 'dont include empty contraction'); - t.end(); -}); - -test('ngrams-options:', function(t) { - var doc = nlp('one two three four five, one two three four five, one two three four five'); - var arr = doc.ngrams({ - max: 5 - }).data(); - t.equal(arr[0].size, 5, 'ngram-max-size-5'); - arr = doc.ngrams({ - max: 2 - }).data(); - t.equal(arr[0].size, 2, 'ngram-max-size-2'); - arr = doc.ngrams({ - max: 9 - }).data(); - t.equal(arr[0].size, 5, 'ngram-max-size-9'); - - arr = doc.ngrams({ - size: 2 - }).data(); - t.equal(arr[0].size, 2, 'ngram-size-2'); - arr = doc.ngrams({ - size: 4 - }).data(); - t.equal(arr[0].size, 4, 'ngram-size-4'); - t.end(); -}); diff --git a/test/unit/subset/noun/article.test.js b/test/unit/subset/noun/article.test.js deleted file mode 100644 index ad305e696..000000000 --- a/test/unit/subset/noun/article.test.js +++ /dev/null @@ -1,24 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('.article():', function(t) { - [ - ['duck', 'a'], - ['eavesdropper', 'an'], - ['alligator', 'an'], - // ['hour', 'an'], - ['NDA', 'an'], - ['F.B.I', 'an'], - ['N.D.A.', 'an'], - ['eulogy', 'a'], - ['ukalele', 'a'], - ['skateboards', 'the'], - ['John Smith', ''], - ['Tony Danza', ''] - ].forEach(function(a) { - var o = nlp(a[0]).tag('Noun').nouns().data()[0]; - var msg = a[0] + ' -> ' + o.article; - t.equal(o.article, a[1], msg); - }); - t.end(); -}); diff --git a/test/unit/subset/noun/inflect.test.js b/test/unit/subset/noun/inflect.test.js deleted file mode 100644 index bb254aecd..000000000 --- a/test/unit/subset/noun/inflect.test.js +++ /dev/null @@ -1,206 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); -var str_test = require('../../lib/fns').str_test; - -test('==Plurals==', function(T) { - T.test('is_plural():', function(t) { - [ - ['octopus', false], - ['tree', false], - ['trees', true], - // ['i', false], - ['mayor of chicago', false], - ['mayors of chicago', true], - ['octopus', false], - ['octopi', true], - ['eyebrow', false], - ['eyebrows', true], - ['child', false], - ['children', true], - ['spencer\'s', false], - ['toronto\'s', false], - ['circus', false], - ['circuses', true] - // ['simpsons\'', false], - // ['she\'s', false], - ].forEach(function(a) { - var r = nlp(a[0]).nouns(); - var msg = a[0]; - t.equal(r.isPlural().found, a[1], msg); - }); - t.end(); - }); - - T.test('singularize:', function(t) { - [ - // ["Joneses", "Jones"], - ['children', 'child'], - ['women', 'woman'], - ['men', 'man'], - ['people', 'person'], - ['geese', 'goose'], - ['mice', 'mouse'], - ['barracks', 'barracks'], - ['deer', 'deer'], - ['nuclei', 'nucleus'], - ['syllabi', 'syllabus'], - ['fungi', 'fungus'], - ['cacti', 'cactus'], - ['theses', 'thesis'], - ['crises', 'crisis'], - ['phenomena', 'phenomenon'], - ['embryos', 'embryo'], - ['frescos', 'fresco'], - ['ghettos', 'ghetto'], - ['halos', 'halo'], - ['mangos', 'mango'], - ['mementos', 'memento'], - ['mottos', 'motto'], - ['tornados', 'tornado'], - ['tuxedos', 'tuxedo'], - ['volcanos', 'volcano'], - ['crises', 'crisis'], - ['analyses', 'analysis'], - ['aircraft', 'aircraft'], - ['bass', 'bass'], - ['bison', 'bison'], - ['fish', 'fish'], - ['fowl', 'fowl'], - ['kilos', 'kilo'], - ['kimonos', 'kimono'], - ['logos', 'logo'], - ['memos', 'memo'], - ['ponchos', 'poncho'], - ['photos', 'photo'], - ['pimentos', 'pimento'], - ['pros', 'pro'], - ['sombreros', 'sombrero'], - ['tacos', 'taco'], - ['memos', 'memo'], - ['torsos', 'torso'], - ['xylophones', 'xylophone'], - ['quintuplets', 'quintuplet'], - ['worrywarts', 'worrywart'], - ['nerds', 'nerd'], - ['lollipops', 'lollipop'], - ['eyebrows', 'eyebrow'], - // ['mayors of chicago', 'mayor of chicago'], - //test that sungular.singularize()==singular.. - ['mango', 'mango'], - ['memento', 'memento'], - ['motto', 'motto'], - ['tornado', 'tornado'], - ['person', 'person'], - ['goose', 'goose'], - ['mouse', 'mouse'], - ['calves', 'calf'], - ['olives', 'olive'], - ['loaves', 'loaf'], - ['oafs', 'oaf'], - ['wives', 'wife'], - ['roofs', 'roof'], - ['hooves', 'hoof'], - ['buses', 'bus'], - ['tosses', 'toss'], - ['wishes', 'wish'], - ['geniouses', 'genious'], - ['prognoses', 'prognosis'], - ['analyses', 'analysis'], - ['synopses', 'synopsis'], - ['parentheses', 'parenthesis'], - ['theses', 'thesis'], - ['bases', 'base'] - ].forEach(function(a) { - var r = nlp(a[0]).tag('Noun').nouns(); - var str = r.toSingular().out('normal'); - str_test(str, a[0], a[1], t); - }); - t.end(); - }); - - T.test('pluralize:', function(t) { - [ - ['snake', 'snakes'], - ['ski', 'skis'], - // ["Barrymore", "Barrymores"], - ['witch', 'witches'], - ['box', 'boxes'], - ['gas', 'gases'], - ['kiss', 'kisses'], - ['index', 'indices'], - ['appendix', 'appendices'], - ['criterion', 'criteria'], - ['berry', 'berries'], - ['activity', 'activities'], - ['daisy', 'daisies'], - ['church', 'churches'], - ['fox', 'foxes'], - ['stomach', 'stomachs'], - ['epoch', 'epochs'], - ['knife', 'knives'], - ['half', 'halves'], - ['scarf', 'scarves'], - ['chief', 'chiefs'], - ['spoof', 'spoofs'], - ['cafe', 'cafes'], - ['gulf', 'gulfs'], - ['alternative', 'alternatives'], - ['solo', 'solos'], - ['zero', 'zeros'], - ['avocado', 'avocados'], - ['studio', 'studios'], - ['zoo', 'zoos'], - ['embryo', 'embryos'], - ['hero', 'heroes'], - ['banjo', 'banjos'], - ['cargo', 'cargos'], - ['flamingo', 'flamingos'], - ['fresco', 'frescos'], - ['ghetto', 'ghettos'], - ['halo', 'halos'], - ['mango', 'mangos'], - ['memento', 'mementos'], - ['motto', 'mottos'], - ['tornado', 'tornados'], - ['tuxedo', 'tuxedos'], - ['volcano', 'volcanos'], - ['bus', 'buses'], - ['crisis', 'crises'], - ['analysis', 'analyses'], - ['neurosis', 'neuroses'], - ['aircraft', 'aircraft'], - ['halibut', 'halibut'], - ['moose', 'moose'], - ['salmon', 'salmon'], - ['sheep', 'sheep'], - ['spacecraft', 'spacecraft'], - ['tuna', 'tuna'], - ['trout', 'trout'], - ['armadillo', 'armadillos'], - ['auto', 'autos'], - ['bravo', 'bravos'], - ['bronco', 'broncos'], - ['casino', 'casinos'], - ['combo', 'combos'], - ['gazebo', 'gazebos'], - //test that plural.pluralize()==plural.. - ['snakes', 'snakes'], - ['skis', 'skis'], - // ['mayor of chicago', 'mayors of chicago'], - // ["Barrymores", "Barrymores"], - ['witches', 'witches'], - ['boxes', 'boxes'], - ['gases', 'gases'], - ['spoofs', 'spoofs'], - ['solos', 'solos'], - ['avocados', 'avocados'], - ['studios', 'studios'], - ['zoos', 'zoos'] - ].forEach(function(a) { - var r = nlp(a[0]).tag('Noun').nouns(); - var str = r.toPlural().out('normal'); - str_test(str, a[0], a[1], t); - }); - t.end(); - }); -}); diff --git a/test/unit/subset/noun/possessive.test.js b/test/unit/subset/noun/possessive.test.js deleted file mode 100644 index bc3ed148f..000000000 --- a/test/unit/subset/noun/possessive.test.js +++ /dev/null @@ -1,21 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('.toPossessive():', function(t) { - [ - ['duck', `duck's`], - ['eavesdropper', `eavesdropper's`], - ['John', `John's`], - ['hour', `hour's`], - ['F.B.I', `F.B.I's`], - ['John Smith', `John Smith's`], - ['skateboards', `skateboards'`], - ['Flanders', `Flanders'`], - // ['she', 'hers'], - ['peaches', `peaches'`] - ].forEach(function(a) { - var doc = nlp(a[0]).tag('Noun').nouns().toPossessive(); - t.equal(doc.out(), a[1], a[0]); - }); - t.end(); -}); diff --git a/test/unit/subset/noun/pronoun.test.js b/test/unit/subset/noun/pronoun.test.js deleted file mode 100644 index 6575f55cf..000000000 --- a/test/unit/subset/noun/pronoun.test.js +++ /dev/null @@ -1,27 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('pronoun:', function(t) { - [ - ['John', 'he'], - ['John Smith', 'he'], - ['Jane', 'she'], - // ['turtle', 'it'], - // ['turtles', 'they'], - // ['Toronto', 'it'], - // ['studying', 'it'], - // ['horses', 'they'], - // ['road bikes', 'they'], - // ['NHL goaltenders', 'they'], - ['Tony Danza', 'he'], - ['Tanya Danza', 'she'], - ['Mrs. Tanya Danza', 'she'] - // ['John G. Fishermore Institute', 'it'], - // ['John Fisher & sons', 'it'], - ].forEach(function(a) { - var str = nlp(a[0]).people().pronoun()[0]; - var msg = a[0] + ' -> ' + str; - t.equal(str, a[1], msg); - }); - t.end(); -}); diff --git a/test/unit/subset/person/person.test.js b/test/unit/subset/person/person.test.js deleted file mode 100644 index 2f744b85b..000000000 --- a/test/unit/subset/person/person.test.js +++ /dev/null @@ -1,159 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -//list of famous names taken from https://gate.ac.uk/gate/plugins/ANNIE/resources/gazetteer/ -var tests = { - 'john stewart': 'Male', - 'martha stewart': 'Female', - // 'Gaugin': 'Male', - // 'Gauguin': 'Male', - 'George Bush': 'Male', - 'Hillary Clinton': 'Female', - 'Hillary Rodham Clinton': 'Female', - // 'John the Baptist': 'Male', - 'Margaret Thatcher': 'Female', - Messiaen: 'Male', - Mozart: 'Male', - Nixon: 'Male', - 'Pope John Paul II': 'Male', - 'Richard Nixon': 'Male', - 'Ronald Reagan': 'Male', - 'Saddam Hussain': 'Male', - 'Saint - Saens': 'Male', - // 'Shostakovich': 'Male', - // 'Strauss': 'Male', - // 'Thatcher': 'Female', - // 'The Queen': 'Female', - // 'the Queen': 'Female', - 'Van Gogh': 'Male', - 'Virgin Mary': 'Female', - Vivaldi: 'Male', - 'van Gogh': 'Male', - 'Carl Marx': 'Male', - Lenin: 'Male', - Stalin: 'Male', - 'George W.Bush': 'Male', - 'Mitt Romney': 'Male', - 'Barack Obama': 'Male', - Obama: 'Male', - 'Lady Gaga': 'Female', - 'Kanye West': 'Male', - 'Abu Hamza': 'Male', - 'Abu Hamza Al - Masri': 'Male', - 'Osama bin Laden': 'Male', - 'Osama Bin Laden': 'Male', - Mubarek: 'Male', - 'Muhammad Ali': 'Male', - 'Jennifer Aniston': 'Female', - 'Tyra Banks': 'Female', - 'Mario Batali': 'Male', - 'David Beckham': 'Male', - 'Halle Berry': 'Female', - 'Tom Brady': 'Male', - 'Matthew Broderick': 'Male', - 'Nathan Lane': 'Male', - 'Mel Brooks': 'Male', - 'Dan Brown': 'Male', - 'Jerry Bruckheimer': 'Male', - 'Kobe Bryant': 'Male', - 'Gisele Bundchen': 'Female', - 'Jim Carrey': 'Male', - 'Dave Chappelle': 'Male', - 'Sean Combs': 'Male', - 'Katie Couric': 'Female', - 'Simon Cowell': 'Male', - 'Tom Cruise': 'Male', - 'Johnny Depp': 'Male', - 'Cameron Diaz': 'Female', - 'Leonardo DiCaprio': 'Male', - 'Celine Dion': 'Female', - 'Jodie Foster': 'Female', - 'John Grisham': 'Male', - 'Tom Hanks': 'Male', - 'Paris Hilton': 'Female', - 'Eric Idle': 'Male', - 'Mike Nichols': 'Male', - 'Peter Jackson': 'Male', - 'LeBron James': 'Male', - 'Derek Jeter': 'Male', - 'Scarlett Johansson': 'Female', - 'Elton John': 'Male', - 'Angelina Jolie': 'Female', - 'Michael Jordan': 'Male', - 'Nicole Kidman': 'Female', - 'Heidi Klum': 'Female', - 'Emeril Lagasse': 'Male', - 'Jay Leno': 'Male', - 'David Letterman': 'Male', - 'Adriana Lima': 'Female', - 'Rush Limbaugh': 'Male', - 'George Lopez': 'Male', - 'Jennifer Lopez': 'Female', - 'George Lucas': 'Male', - 'Paul McCartney': 'Male', - 'Dr. Phil McGraw': 'Male', - 'Phil Mickelson': 'Male', - 'Kate Moss': 'Female', - 'Neil Diamond': 'Male', - "Bill O'Reilly": 'Male', - "Shaquille O'Neal": 'Male', - 'Carson Palmer': 'Male', - 'James Patterson': 'Male', - 'Ty Pennington': 'Male', - 'Regis Philbin': 'Male', - 'Brad Pitt': 'Male', - 'Wolfgang Puck': 'Male', - 'Rachael Ray': 'Female', - 'Nicole Richie': 'Female', - 'Alex Rodriguez': 'Male', - 'Ray Romano': 'Male', - Ronaldinho: 'Male', - 'Valentino Rossi': 'Male', - 'J.K. Rowling': 'Female', - 'Adam Sandler': 'Male', - 'Diane Sawyer': 'Female', - 'Michael Schumacher': 'Male', - 'Ryan Seacrest': 'Male', - 'Jerry Seinfeld': 'Male', - 'Maria Sharapova': 'Female', - 'Jessica Simpson': 'Female', - 'Will Smith': 'Male', - 'Annika Sorenstam': 'Female', - 'Steven Spielberg': 'Male', - 'Bruce Springsteen': 'Male', - 'Howard Stern': 'Male', - 'Rod Stewart': 'Male', - 'Kiefer Sutherland': 'Male', - 'Donald Trump': 'Male', - 'Rick Warren': 'Male', - 'Denzel Washington': 'Male', - 'Sardinia F Jones': 'Female', - 'Andrew Lloyd Webber': 'Male', - 'Michelle Wie': 'Female', - 'Serena Williams': 'Female', - 'Venus Williams': 'Female', - 'Oprah Winfrey': 'Female', - 'Reese Witherspoon': 'Female', - 'Dick Wolf': 'Male', - 'Tiger Woods': 'Male', - 'Renee Zellweger': 'Female', - 'Whitney Houston': 'Female', - 'Adolf Hitler': 'Male', - 'Shania Twain': 'Female', - 'Hulk Hogan': 'Male', - 'Michelle Obama': 'Female', - 'Ashton Kutcher': 'Male', - 'Cardinal Wolsey': 'Male', - 'Slobodan Milosevic': 'Male', - 'Renee Zellweger ': 'Female', - 'Whitney Houston ': 'Female' -}; - -test('celebrity names:', function(t) { - Object.keys(tests).forEach(function(k) { - var str = nlp(k).people().out('text'); - var msg = "'" + k + "' is a person - - have: '" + str + "'"; - t.equal(str, k, msg); - }); - t.end(); -}); diff --git a/test/unit/subset/place/places.test.js b/test/unit/subset/place/places.test.js deleted file mode 100644 index 8626df268..000000000 --- a/test/unit/subset/place/places.test.js +++ /dev/null @@ -1,37 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); -var str_test = require('../../lib/fns').str_test; - -test('known-regions:', function(t) { - [ - ['i want to go to Ohio to see George Harrison', 'ohio'], - ['we are visiting Gloucestershire, before we leave', 'gloucestershire'], - ['manitoba is nice this time of year', 'manitoba'] - ].forEach(function(a) { - var str = nlp(a[0]).match('#Region').out('normal'); - str_test(str, a[0], a[1], t); - }); - t.end(); -}); - -test('unknown-places:', function(t) { - [ - ['live in the Rekcjd Province', 'rekcjd province'], - ['live in the Lekfjs District', 'lekfjs district'], - ['visiting Tojbs Kjeh Region', 'tojbs kjeh region'], - ['visiting the State of Lkjfhe', 'state of lkjfhe'], - ['see you in West Nunavut', 'west nunavut'], - ['see you in western Hunan', 'western hunan'], - ['see you in Northern Hunan province', 'northern hunan province'] - ].forEach(function(a) { - var str = nlp(a[0]).places(0).out('normal'); - str_test(str, a[0], a[1], t); - }); - t.end(); -}); - -test('mixed continents-places:', function(t) { - var doc = nlp('in north africa, eastern asia, guatemala, europe, north america, and japan'); - t.equal(doc.places().length, 6, '6-places'); - t.end(); -}); diff --git a/test/unit/subset/possessives/possessives.test.js b/test/unit/subset/possessives/possessives.test.js deleted file mode 100644 index 8c1152eae..000000000 --- a/test/unit/subset/possessives/possessives.test.js +++ /dev/null @@ -1,37 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('possessives tagger', function(t) { - var arr = [ - `Spencer's`, - `Spencer Kelly's`, - `Spencer C. Kelly's`, - `Union Corp's`, - `Los Angeles's`, - ]; - arr.forEach((a) => { - var doc = nlp(a); - var m = doc.possessives(); - t.equal(m.length, 1, 'one possessive -' + a); - t.equal(m.out(), a, 'possessive match -' + a); - }); - t.end(); -}); - -test('possessives strip', function(t) { - var arr = [ - [`Spencer's`, 'Spencer'], - [`Corey Hart's`, 'Corey Hart'], - [`Corey M. Hart's`, 'Corey M. Hart'], - [`Spencer C. Kelly's`, 'Spencer C. Kelly'], - [`Agility Inc's`, 'Agility Inc'], - [`University of Wisconsin's`, 'University of Wisconsin'], - [`Los Angeles's`, 'Los Angeles'], - ]; - arr.forEach((a) => { - var doc = nlp(a[0]); - doc.possessives().strip(); - t.equal(doc.out('text'), a[1], a[0]); - }); - t.end(); -}); diff --git a/test/unit/subset/quotations/quotation-tag.test.js b/test/unit/subset/quotations/quotation-tag.test.js deleted file mode 100644 index 4ce2c2ea9..000000000 --- a/test/unit/subset/quotations/quotation-tag.test.js +++ /dev/null @@ -1,221 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); -var str_test = require('../../lib/fns').str_test; - -function testAllQuotes(a, t) { - var str = nlp(a[0]).match('#Quotation+').out('normal'); - str_test(str, a[0], a[1], t); -} - -test('quotation test:', function(t) { - [ - [`he is "really good"`, `really good`], - [`he is "really good" i guess`, `really good`], - [`he is "good" i guess`, `good`], - [`he is "completely and utterly great" i guess`, `completely and utterly great`], - [`“quote”`, `quote`], - [`“quote is here”`, `quote is here`] - ].forEach(function(a) { - var str = nlp(a[0]).match('#Quotation+').out('normal'); - str_test(str, a[0], a[1], t); - }); - t.end(); -}); - -test('Quotations - U+0022 to U+0022', function (t) { - [ - ['he is \u0022really good\u0022', 'really good'], - ['he is \u0022really good\u0022 i guess', 'really good'], - ['he is not \u0022good\u0022 at all :/', 'good'], - ['\u0022quote\u0022', 'quote'], - ['\u0022a quote here\u0022', 'a quote here'] - ].forEach(a => testAllQuotes(a, t)); - t.end(); -}); - -test('Quotations - U+FF02 to U+FF02', function (t) { - [ - ['he is \uFF02really good\uFF02', 'really good'], - ['he is \uFF02really good\uFF02 i guess', 'really good'], - ['he is not \uFF02good\uFF02 at all :/', 'good'], - ['\uFF02quote\uFF02', 'quote'], - ['\uFF02a quote here\uFF02', 'a quote here'] - ].forEach(a => testAllQuotes(a, t)); - t.end(); -}); - -test('Quotations - U+0027 to U+0027', function (t) { - [ - ['he is \u0027really good\u0027', 'really good'], - ['he is \u0027really good\u0027 i guess', 'really good'], - ['he is not \u0027good\u0027 at all :/', 'good'], - ['\u0027quote\u0027', 'quote'], - ['\u0027a quote here\u0027', 'a quote here'] - ].forEach(a => testAllQuotes(a, t)); - t.end(); -}); -// -test('Quotations - U+201C to U+201D', function (t) { - [ - ['he is \u201Creally good\u201D', 'really good'], - ['he is \u201Creally good\u201D i guess', 'really good'], - ['he is not \u201Cgood\u201D at all :/', 'good'], - ['\u201Cquote\u201D', 'quote'], - ['\u201Ca quote here\u201D', 'a quote here'] - ].forEach(a => testAllQuotes(a, t)); - t.end(); -}); - -test('Quotations - U+2018 to U+2019', function (t) { - [ - ['he is \u2018really good\u2019', 'really good'], - ['he is \u2018really good\u2019 i guess', 'really good'], - ['he is not \u2018good\u2019 at all :/', 'good'], - ['\u2018quote\u2019', 'quote'], - ['\u2018a quote here\u2019', 'a quote here'] - ].forEach(a => testAllQuotes(a, t)); - t.end(); -}); - -test('Quotations - U+201F to U+201D', function (t) { - [ - ['he is \u201Freally good\u201D', 'really good'], - ['he is \u201Freally good\u201D i guess', 'really good'], - ['he is not \u201Fgood\u201D at all :/', 'good'], - ['\u201Fquote\u201D', 'quote'], - ['\u201Fa quote here\u201D', 'a quote here'] - ].forEach(a => testAllQuotes(a, t)); - t.end(); -}); - -test('Quotations - U+201B to U+2019', function (t) { - [ - ['he is \u201Breally good\u2019', 'really good'], - ['he is \u201Breally good\u2019 i guess', 'really good'], - ['he is not \u201Bgood\u2019 at all :/', 'good'], - ['\u201Bquote\u2019', 'quote'], - ['\u201Ba quote here\u2019', 'a quote here'] - ].forEach(a => testAllQuotes(a, t)); - t.end(); -}); - -test('Quotations - U+201E to U+201D', function (t) { - [ - ['he is \u201Ereally good\u201D', 'really good'], - ['he is \u201Ereally good\u201D i guess', 'really good'], - ['he is not \u201Egood\u201D at all :/', 'good'], - ['\u201Equote\u201D', 'quote'], - ['\u201Ea quote here\u201D', 'a quote here'] - ].forEach(a => testAllQuotes(a, t)); - t.end(); -}); - -test('Quotations - U+2E42 to U+201D', function (t) { - [ - ['he is \u2E42really good\u201D', 'really good'], - ['he is \u2E42really good\u201D i guess', 'really good'], - ['he is not \u2E42good\u201D at all :/', 'good'], - ['\u2E42quote\u201D', 'quote'], - ['\u2E42a quote here\u201D', 'a quote here'] - ].forEach(a => testAllQuotes(a, t)); - t.end(); -}); - -test('Quotations - U+201A to U+2019', function (t) { - [ - ['he is \u201Areally good\u2019', 'really good'], - ['he is \u201Areally good\u2019 i guess', 'really good'], - ['he is not \u201Agood\u2019 at all :/', 'good'], - ['\u201Aquote\u2019', 'quote'], - ['\u201Aa quote here\u2019', 'a quote here'] - ].forEach(a => testAllQuotes(a, t)); - t.end(); -}); - -test('Quotations - U+00AB to U+00BB', function (t) { - [ - ['he is \u00ABreally good\u00BB', 'really good'], - ['he is \u00ABreally good\u00BB i guess', 'really good'], - ['he is not \u00ABgood\u00BB at all :/', 'good'], - ['\u00ABquote\u00BB', 'quote'], - ['\u00ABa quote here\u00BB', 'a quote here'] - ].forEach(a => testAllQuotes(a, t)); - t.end(); -}); - -test('Quotations - U+2039 to U+203A', function (t) { - [ - ['he is \u2039really good\u203A', 'really good'], - ['he is \u2039really good\u203A i guess', 'really good'], - ['he is not \u2039good\u203A at all :/', 'good'], - ['\u2039quote\u203A', 'quote'], - ['\u2039a quote here\u203A', 'a quote here'] - ].forEach(a => testAllQuotes(a, t)); - t.end(); -}); - -test('Quotations - U+2035 to U+2032', function (t) { - [ - ['he is \u2035really good\u2032', 'really good'], - ['he is \u2035really good\u2032 i guess', 'really good'], - ['he is not \u2035good\u2032 at all :/', 'good'], - ['\u2035quote\u2032', 'quote'], - ['\u2035a quote here\u2032', 'a quote here'] - ].forEach(a => testAllQuotes(a, t)); - t.end(); -}); - -test('Quotations - U+2036 to U+2033', function (t) { - [ - ['he is \u2036really good\u2033', 'really good'], - ['he is \u2036really good\u2033 i guess', 'really good'], - ['he is not \u2036good\u2033 at all :/', 'good'], - ['\u2036quote\u2033', 'quote'], - ['\u2036a quote here\u2033', 'a quote here'] - ].forEach(a => testAllQuotes(a, t)); - t.end(); -}); - -test('Quotations - U+2037 to U+2034', function (t) { - [ - ['he is \u2037really good\u2034', 'really good'], - ['he is \u2037really good\u2034 i guess', 'really good'], - ['he is not \u2037good\u2034 at all :/', 'good'], - ['\u2037quote\u2034', 'quote'], - ['\u2037a quote here\u2034', 'a quote here'] - ].forEach(a => testAllQuotes(a, t)); - t.end(); -}); - -test('Quotations - U+301D to U+301E', function (t) { - [ - ['he is \u301Dreally good\u301E', 'really good'], - ['he is \u301Dreally good\u301E i guess', 'really good'], - ['he is not \u301Dgood\u301E at all :/', 'good'], - ['\u301Dquote\u301E', 'quote'], - ['\u301Da quote here\u301E', 'a quote here'] - ].forEach(a => testAllQuotes(a, t)); - t.end(); -}); - -test('Quotations - U+0060 to U+00B4', function (t) { - [ - ['he is \u0060really good\u00B4', 'really good'], - ['he is \u0060really good\u00B4 i guess', 'really good'], - ['he is not \u0060good\u00B4 at all :/', 'good'], - ['\u0060quote\u00B4', 'quote'], - ['\u0060a quote here\u00B4', 'a quote here'] - ].forEach(a => testAllQuotes(a, t)); - t.end(); -}); - -test('Quotations - U+301F to U+301E', function (t) { - [ - ['he is \u301Freally good\u301E', 'really good'], - ['he is \u301Freally good\u301E i guess', 'really good'], - ['he is not \u301Fgood\u301E at all :/', 'good'], - ['\u301Fquote\u301E', 'quote'], - ['\u301Fa quote here\u301E', 'a quote here'] - ].forEach(a => testAllQuotes(a, t)); - t.end(); -}); diff --git a/test/unit/subset/quotations/quotations.test.js b/test/unit/subset/quotations/quotations.test.js deleted file mode 100644 index 400d0cd95..000000000 --- a/test/unit/subset/quotations/quotations.test.js +++ /dev/null @@ -1,73 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('quotation test', function(t) { - var arr = [ - ['so I said "nah forget it"', 'nah forget it'], - ['so I said "nah, forget it" go home to bel-air!', 'nah forget it'], - ['so I said \'nah, forget it\' go home to bel-air!', 'nah forget it'], - ['so I said "nah" go home to bel-air!', 'nah'], - ['so \'as if\' i said', 'as if'], - ['the \'truthiness\' i said', 'truthiness'], - ['yeah, “fun” and stuff', 'fun'], - ['“Fun” and stuff', 'fun'], - //dangling start/end - ['\'twas good cookin', ''], - ['twas good cookin\'', ''], - ['twas \'good cookin\'', 'good cookin'], - ['\'twas \'good cookin\'', 'twas good cookin'], - [`and "Dig Your own grave and Save".`, 'dig your own grave and save'], - ]; - arr.forEach(function(a) { - var r = nlp(a[0]); - var str = r.quotations().out('normal'); - var msg = a[0] + ' - ' + str; - t.equal(str, a[1], msg); - }); - t.end(); -}); - -test('multiple quotation test', function(t) { - var arr = [ - [`My "String" "with many" adjacent "nested" 'quotes'`, - [ - 'string', - 'with many', - 'nested', - 'quotes' - ] - ], - [`My "String 'with manys' adjacent" "nested" 'quotes'`, - [ - 'string with manys adjacent', - 'nested', - 'quotes' - ] - ], - [`"May's" 'third day' 'will be a "really cold" day' "in a" 'really cold "month"'`, - [ - 'may\'s', - 'third day', - 'will be a really cold day', - 'in a', - 'really cold month', - ] - ], - ]; - arr.forEach(function(a) { - var r = nlp(a[0]); - var str = r.quotations().out('array'); - var msg = a[0] + ' - ' + str; - t.deepEqual(str, a[1], msg); - }); - t.end(); -}); - -test('false-positives', function(t) { - var txt = `Probably the renovation right away from the amount of work, which has been done to the property. - I have one two, three, four five six properties, which came on the market in the month. - I think that the number one quite comfortable looking at the two properties, which I'm working on now.`; - var questions = nlp(txt).sentences().isQuestion().out('array'); - t.equal(questions.length, 0, 'no questions here'); - t.end(); -}); diff --git a/test/unit/subset/random.test.js b/test/unit/subset/random.test.js deleted file mode 100644 index 201c25f12..000000000 --- a/test/unit/subset/random.test.js +++ /dev/null @@ -1,41 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('random', function(t) { - var r = nlp('one two three four five six'); - var arr = r.terms().random().out('array'); - t.equal(arr.length, 1, 'default is size 1'); - - arr = r.terms().random(2).out('array'); - t.equal(arr.length, 2, 'size 2'); - - arr = r.terms().random(3).out('array'); - t.equal(arr.length, 3, 'size 3'); - - arr = r.terms().random(4).out('array'); - t.equal(arr.length, 4, 'size 4'); - - arr = r.terms().random(5).out('array'); - t.equal(arr.length, 5, 'size 5'); - - arr = r.terms().random(6).out('array'); - t.equal(arr.length, 6, 'size 6'); - - arr = r.terms().random(7).out('array'); - t.equal(arr.length, 6, '7 is too big'); - - arr = r.terms().random(17).out('array'); - t.equal(arr.length, 6, '17 is too big'); - - t.end(); -}); - -test('random-null', function(t) { - var r = nlp('toronto'); - var arr = r.people().random(5).out('array'); - t.equal(arr.length, 0, 'random can be empty'); - - arr = r.places().random(5).out('array'); - t.equal(arr.length, 1, 'random can be full-match'); - t.end(); -}); diff --git a/test/unit/subset/reference.ignore.js b/test/unit/subset/reference.ignore.js deleted file mode 100644 index ffa42a74d..000000000 --- a/test/unit/subset/reference.ignore.js +++ /dev/null @@ -1,61 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('reference-test', function(t) { - var orig = nlp('is'); - var subset = orig.terms().replace('is', 'was'); - t.equal(orig.out(), 'was', 'terms-original'); - t.equal(subset.out(), 'was', 'terms-subset'); - - orig = nlp('is'); - subset = orig.verbs().replace('is', 'was'); - t.equal(orig.out(), 'was', 'verbs-original'); - t.equal(subset.out(), 'was', 'verbs-subset'); - - orig = nlp('is'); - subset = orig.sentences().replace('is', 'was'); - t.equal(orig.out(), 'was', 'sentences-original'); - t.equal(subset.out(), 'was', 'sentences-subset'); - - orig = nlp('old'); - subset = orig.adjectives().replace('old', 'new'); - t.equal(orig.out(), 'new', 'adjectives-original'); - t.equal(subset.out(), 'new', 'adjectives-subset'); - - orig = nlp('baby'); - subset = orig.nouns().replace('baby', 'adult'); - t.equal(orig.out(), 'adult', 'nouns-original'); - t.equal(subset.out(), 'adult', 'nouns-subset'); - - orig = nlp('9'); - subset = orig.values().replace('9', '5'); - t.equal(orig.out(), '5', 'values-original'); - t.equal(subset.out(), '5', 'values-subset'); - - orig = nlp('toronto'); - subset = orig.topics().replace('toronto', 'vancouver'); - t.equal(orig.out(), 'vancouver', 'topics-original'); - t.equal(subset.out(), 'vancouver', 'topics-subset'); - - t.end(); -}); - -test('.all()-multiple', function(t) { - var orig = nlp('i am from new jersey'); - var sub1 = orig.match('new jersey'); - var sub2 = sub1.match('jersey'); - var sub3 = sub2.replace('jersey', 'york'); - - //check doc.out() - t.equal(orig.out('normal'), 'i am from new york', 'original-text'); - t.equal(sub1.out('normal'), 'new york', 'sub1-text'); - t.equal(sub2.out('normal'), 'york', 'sub2-text'); - t.equal(sub3.out('normal'), 'york', 'sub3-text'); - - //call .all().out() - t.equal(orig.all().out('normal'), 'i am from new york', 'orig-all'); - t.equal(sub1.all().out('normal'), 'i am from new york', 'sub1-all'); - t.equal(sub2.all().out('normal'), 'i am from new york', 'sub2-all'); - t.equal(sub3.all().out('normal'), 'i am from new york', 'sub3-all'); - t.end(); -}); diff --git a/test/unit/subset/reservedwords.test.js b/test/unit/subset/reservedwords.test.js deleted file mode 100644 index 367984d7e..000000000 --- a/test/unit/subset/reservedwords.test.js +++ /dev/null @@ -1,127 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('reserved words:', function(t) { - var reserved = [ - 'abstract', - 'boolean', - 'break', - 'byte', - 'case', - 'catch', - 'char', - 'class', - 'const', - 'constructor', - 'continue', - 'debugger', - 'default', - 'delete', - 'do', - 'double', - 'else', - 'enum', - 'export', - 'extends', - 'false', - 'final', - 'finally', - 'float', - 'for', - 'function', - 'goto', - 'if', - 'implements', - 'import', - 'in', - 'instanceof', - 'int', - 'interface', - 'let', - 'long', - 'native', - 'new', - 'null', - 'package', - 'private', - 'protected', - 'prototype', - 'public', - 'return', - 'short', - 'static', - 'super', - 'switch', - 'synchronized', - 'this', - 'throw', - 'throws', - 'transient', - 'true', - 'try', - 'typeof', - 'var', - 'void', - 'volatile', - 'while', - 'with', - 'yeild', - '__prototype__', - '&&', - '||', - '|', - "'", - '&', - 'Math.PI', - 12e34, - '#§$%', - 'π', - 'привет', - // 'hasOwnProperty', - 'café', - '$$$', - 1e2, - '{}', - '[]', - 'constructor', - 'prototype', - ')&@)^', - ' -@%@', - '-constructor', - '#!^@#$', - '..(' - ]; - var str = reserved.join(' '); - var r = nlp(str); - t.equal(r.out('text'), str, 'reserved-words-are-printed'); - t.equal(r.terms().length, reserved.length, 'reserved-length'); - t.ok(r.verbs().data(), 'runs verb subset'); - t.ok(r.values().data(), 'runs value subset'); - t.ok(r.nouns().data(), 'runs noun subset'); - t.ok(r.ngrams().data(), 'runs ngrams subset'); - t.ok(r.people().data(), 'runs people subset'); - t.ok(r.places().data(), 'runs places subset'); - t.ok(r.adjectives().data(), 'runs adjectives subset'); - t.ok(r.sentences().data(), 'runs sentences subset'); - t.ok(r.dates().data(), 'runs dates subset'); - t.ok(r.contractions().data(), 'runs contractions subset'); - t.ok(r.terms().data(), 'runs terms subset'); - t.end(); -}); - -test('co-erce reserved words', function(t) { - var r = nlp('constructor prototype'); - r.tag('Verb'); - t.ok(r.verbs().data(), 'runs verb subset'); - r.tag('Adjective'); - t.ok(r.adjectives().data(), 'runs adjective subset'); - r.tag('Value'); - t.ok(r.values().data(), 'runs values subset'); - r.tag('Person'); - t.ok(r.people().data(), 'runs values subset'); - r.tag('Noun'); - t.ok(r.nouns().data(), 'runs values subset'); - r.tag('Place'); - t.ok(r.places().data(), 'runs place subset'); - t.end(); -}); diff --git a/test/unit/subset/sentence/inflect.ignore.js b/test/unit/subset/sentence/inflect.ignore.js deleted file mode 100644 index 1e2ffbdae..000000000 --- a/test/unit/subset/sentence/inflect.ignore.js +++ /dev/null @@ -1,44 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); -var str_test = require('../../lib/fns').str_test; - -test('toPlural():', function(t) { - [ - ['cranberry', 'cranberries'], - ['a cranberry', 'the cranberries'], - ['a red cranberry', 'the red cranberries'], - ['mayor of chicago', 'mayors of chicago'], - ['chicago mayor', 'chicago mayors'] - ].forEach(function(a) { - var str = nlp(a[0]).sentences().toPlural().out('normal'); - str_test(str, a[0], a[1], t); - }); - t.end(); -}); - -test('toSingular:', function(t) { - var str = 'i\'d buy those nachos'; - var m = nlp(str).sentences().toSingular(); - t.equal(m.out('normal'), 'i\'d buy that nacho', str); - - str = 'i\'d buy these nachos'; - m = nlp(str).sentences().toSingular(); - t.equal(m.out('normal'), 'i\'d buy this nacho', str); - - str = 'i\'d buy nachos'; - m = nlp(str).sentences().toSingular(); - t.equal(m.out('normal'), 'i\'d buy a nacho', str); - - str = 'i\'d buy the nachos'; - m = nlp(str).sentences().toSingular(); - t.equal(m.out('normal'), 'i\'d buy a nacho', str); - - str = 'i\'d buy the eggs'; - m = nlp(str).sentences().toSingular(); - t.equal(m.out('normal'), 'i\'d buy an egg', str); - - str = 'men go'; - m = nlp(str).sentences().toPast().nouns().toSingular(); - t.equal(m.out('normal'), 'a man went', str); - t.end(); -}); diff --git a/test/unit/subset/sentence/insert.test.js b/test/unit/subset/sentence/insert.test.js deleted file mode 100644 index 67a3f12c5..000000000 --- a/test/unit/subset/sentence/insert.test.js +++ /dev/null @@ -1,38 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('prepend:', function(t) { - var r = nlp('john is nice. he lives in SOHO.'); - r.sentences().prepend('so'); - t.equal(r.out('text'), 'so john is nice. so he lives in SOHO.', 'prepend-without-capital'); - - r = nlp('It is nice. He lives in SOHO.'); - r.sentences().prepend('so'); - t.equal(r.out('text'), 'So it is nice. So he lives in SOHO.', 'prepend-with-capital'); - - r = nlp('John is nice. FBI are in SOHO.'); - r.sentences().prepend('so'); - t.equal(r.out('text'), 'So John is nice. So FBI are in SOHO.', 'prepend-with-persistent-capital'); - - r = nlp('It is nice. He lives in SOHO.'); - r.sentences().prepend('believe me'); - t.equal(r.out('text'), 'Believe me it is nice. Believe me he lives in SOHO.', 'multiple-word-prepend'); - - t.end(); -}); - -test('append:', function(t) { - var r = nlp('john is nice. he lives in SOHO'); - r.sentences().append('not'); - t.equal(r.out('text'), 'john is nice not. he lives in SOHO not', 'append-with-without-period'); - - r = nlp('It is nice! He lives in SOHO? I don\'t know...'); - r.sentences().append('dawg'); - t.equal(r.out('text'), 'It is nice dawg! He lives in SOHO dawg? I don\'t know dawg...', 'append-with-exclamations'); - - r = nlp('It is nice. He lives in SOHO.'); - r.sentences().append('believe me'); - t.equal(r.out('text'), 'It is nice believe me. He lives in SOHO believe me.', 'multiple-word-append'); - - t.end(); -}); diff --git a/test/unit/subset/sentence/misc.test.js b/test/unit/subset/sentence/misc.test.js deleted file mode 100644 index 0fb351185..000000000 --- a/test/unit/subset/sentence/misc.test.js +++ /dev/null @@ -1,13 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('setPunctuation:', function(t) { - var r = nlp('john is nice. he lives in SOHO.'); - r.sentences(1).toExclamation(); - t.equal(r.out(), 'john is nice. he lives in SOHO!', 'toexclamation-change'); - - r = nlp('john is nice. he lives in SOHO'); - r.sentences(1).toExclamation(); - t.equal(r.out(), 'john is nice. he lives in SOHO!', 'toexclamation-insert'); - t.end(); -}); diff --git a/test/unit/subset/sentence/question.test.js b/test/unit/subset/sentence/question.test.js deleted file mode 100644 index d930e5f66..000000000 --- a/test/unit/subset/sentence/question.test.js +++ /dev/null @@ -1,64 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('question-test :', function (t) { - [ - 'which party was it again?', - 'so then which day was it?', - 'he\'s fun?', - 'but who did you really go with??', - 'go where?!', - 'go with who!?', - 'then you said ... ?', - - //no question-mark.. - 'which party was it', - 'which day was it', - // 'but who did you go with', - 'what time did you show up', - `why'd you come so early`, - `when'll you show up`, - `where'd you go afterwards`, - `is it fun`, - `was it fun`, - `did you think it was fun`, - `so, is it fun`, - `so, where'd you go`, - `if you said so, why are you upset`, - `does wayne gretsky skate`, - `would you go to france`, - `can we go to france`, - `can you explain`, - `won't they go to france`, - `wouldn't they go to france`, - `can Jamaican nationals go to france`, - `Anyway the wind blows, what doesn't really matter to me`, - `is wayne gretskzy alive`, - `Do I need no sympathy`, - // `I'm what a poor boy` - ].forEach(function (str) { - var doc = nlp(str); - t.equal(doc.questions().length, 1, str); - t.equal(doc.statements().length, 0, ' -- 0-statements'); - }); - t.end(); -}); - -test('statements-test :', function (t) { - [ - 'the thing is, it isn\'t cool', - 'does the orchestra respond, no.', - 'where there is smoke, there\'s fire', - 'does the orchestra ever suck!', - 'when i go fishing i eat plankton', - 'let me run get a price take about 5-10 minutes to process and then I\'ll send you what I got.', - 'how he got out of his cage, i don\'t know', - 'who else but the rolling stones...', - 'what i\'d like to say is, thanks for the fish', - ].forEach(function (str) { - var doc = nlp(str); - t.equal(doc.statements().length, 1, str); - t.equal(doc.questions().length, 0, ' -- 0-questions'); - }); - t.end(); -}); diff --git a/test/unit/subset/sentence/tense.test.js b/test/unit/subset/sentence/tense.test.js deleted file mode 100644 index 86f726856..000000000 --- a/test/unit/subset/sentence/tense.test.js +++ /dev/null @@ -1,207 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('sentence-change-tense:', function(t) { - [ - ['john walks quickly', 'john walked quickly', 'john will walk quickly'], - ['he is quick', 'he was quick', 'he will be quick'], - ['the stool falls over', 'the stool fell over', 'the stool will fall over'], - ['i usually take the stairs', 'i usually took the stairs', 'i usually will take the stairs'], - ['i usually use the stairs', 'i usually used the stairs', 'i usually will use the stairs'], - ['he finishes first', 'he finished first', 'he will finish first'], - ['our house looks great', 'our house looked great', 'our house will look great'], - //infinitives - // ['he does what he can to stop', 'he did what he could to stop', 'he will do what he can to stop'], - ['goes to sleep', 'went to sleep', 'will go to sleep'], - - //passive - // ['cardboard is made of tree fibre', 'cardboard was made of tree fibre', 'cardboard will be made of tree fibre'], - - //grammatical-number - // ['we do what we can to stop', 'we did what we could to stop', 'we will do what we can to stop'], - - //multi-sentence - [ - 'this is one sentence. This makes two now.', - 'this was one sentence. This made two now.', - 'this will be one sentence. This will make two now.' - ] - - //support negative - // ['this isn\'t one sentence. This doesn\'t make two now.', 'this was not one sentence. This didn\'t make two now.', 'this won\'t be one sentence. This won\'t make two now.'] - ].forEach(function(a) { - var r = nlp(a[0]).sentences(); - - r.toPastTense(); - var str = r.out('text'); - t.equal(str, a[1], 'pastTense-' + str); - - r.toFutureTense(); - str = r.out('text'); - t.equal(str, a[2], 'futureTense-' + str); - - r.toPresentTense(); - str = r.out('text'); - t.equal(str, a[0], 'presentTense-' + str); - }); - t.end(); -}); - -test('copula-form', function(t) { - var m = nlp('john is nice').sentences(); - - m.toPastTense(); - t.equal(m.out(), 'john was nice', 'toPast-1'); - - m.toPresentTense(); - t.equal(m.out(), 'john is nice', 'toPres-1'); - - m.toFutureTense(); - t.equal(m.out(), 'john will be nice', 'toFuture-1'); - - m.toNegative(); - t.equal(m.out(), 'john will not be nice', 'toNeg-future'); - - //negative forms - m.toPastTense(); - t.equal(m.out(), 'john was not nice', 'toPast-neg'); - - m.toPresentTense(); - t.equal(m.out(), 'john is not nice', 'toPres-neg'); - - m.toFutureTense(); - t.equal(m.out(), 'john will not be nice', 'toFuture-neg'); - - t.end(); -}); -// // -test('conjugate-form', function(t) { - var m = nlp('john walks quickly').sentences(); - - m.toPastTense(); - t.equal(m.out(), 'john walked quickly', 'toPast-1'); - - m.toPresentTense(); - t.equal(m.out(), 'john walks quickly', 'toPres-1'); - - m.toFutureTense(); - t.equal(m.out(), 'john will walk quickly', 'toFuture-1'); - - m.toNegative(); - t.equal(m.out(), 'john will not walk quickly', 'toNeg'); - - //negative forms - m.toPastTense(); - t.equal(m.out(), 'john did not walk quickly', 'toPast-neg'); - - m.toPresentTense(); - t.equal(m.out(), 'john does not walk quickly', 'toPres-neg'); - - m.toFutureTense(); - t.equal(m.out(), 'john will not walk quickly', 'toFuture-neg'); - - t.end(); -}); - -test('particle-form', function(t) { - var m = nlp('the stool falls over').sentences(); - - m.toPastTense(); - t.equal(m.out(), 'the stool fell over', 'toPast-1'); - - m.toPresentTense(); - t.equal(m.out(), 'the stool falls over', 'toPres-1'); - - m.toFutureTense(); - t.equal(m.out(), 'the stool will fall over', 'toFuture-1'); - - m.toNegative(); - t.equal(m.out(), 'the stool will not fall over', 'toNeg'); - - //negative forms - m.toPastTense(); - t.equal(m.out(), 'the stool did not fall over', 'toPast-neg'); - - m.toPresentTense(); - t.equal(m.out(), 'the stool does not fall over', 'toPres-neg'); - - m.toFutureTense(); - t.equal(m.out(), 'the stool will not fall over', 'toFuture-neg'); - - t.end(); -}); - -test('contraction-cases', function(t) { - var arr = [[`I'm going to the shops`, `I went to the shops`], [`I'll go to the shops`, `I went to the shops`]]; - arr.forEach(a => { - var str = nlp(a[0]).sentences().toPastTense().out(); - t.equal(str, a[1], 'past-tense ' + a.join(' - ')); - }); - arr = [[`We're looking`, `We looked`], [`We'll look`, `We looked`], [`We are looking`, `We looked`]]; - arr.forEach(a => { - var str = nlp(a[0]).sentences().toPastTense().out(); - t.equal(str, a[1], 'past-tense ' + a.join(' - ')); - }); - arr = [[`I'm going to the shops`, `I will go to the shops`], [`I'll go to the shops`, `I will go to the shops`]]; - arr.forEach(a => { - var str = nlp(a[0]).sentences().toFutureTense().out(); - t.equal(str, a[1], 'future-tense ' + a.join(' - ')); - }); - arr = [[`I'm going to the shops`, `I go to the shops`], [`I'll go to the shops`, `I go to the shops`]]; - arr.forEach(a => { - var str = nlp(a[0]).sentences().toPresentTense().out(); - t.equal(str, a[1], 'present-tense ' + a.join(' - ')); - }); - arr = [[`I'm looking for a bug`, `I look for a bug`], [`I'll look for a bug`, `I look for a bug`]]; - arr.forEach(a => { - var str = nlp(a[0]).sentences().toPresentTense().out(); - t.equal(str, a[1], 'present-tense ' + a.join(' - ')); - }); - var str = nlp('I’m lookin’ for Amanda Hugginkiss').sentences().toPastTense().out(); - t.equal(str, 'i looked for Amanda Hugginkiss', 'present-tense slang'); - t.end(); -}); - - -test('pronoun-specific', function(t) { - //from present - var m = nlp('i am cool').sentences().toPresentTense(); - t.equal(m.out(), 'i am cool', 'toPresent-I'); - m = nlp('i am cool').sentences().toPastTense(); - t.equal(m.out(), 'i was cool', 'toPastTense-I'); - m = nlp('i am cool').sentences().toFutureTense(); - t.equal(m.out(), 'i will be cool', 'toFutureTense-I'); - - //from future - m = nlp('i will be cool').sentences().toFutureTense(); - t.equal(m.out(), 'i will be cool', 'toFutureTense-I-2'); - m = nlp('i will be cool').sentences().toPastTense(); - t.equal(m.out(), 'i was cool', 'toPastTense-I-2'); - m = nlp('i will be cool').sentences().toPresentTense(); - t.equal(m.out(), 'i am cool', 'toPresentTense-I-2'); - - //from past - m = nlp('i was cool').sentences().toPresentTense(); - t.equal(m.out(), 'i am cool', 'toPresentTense-I-3'); - m = nlp('i was cool').sentences().toPastTense(); - t.equal(m.out(), 'i was cool', 'toPastTense-I-3'); - m = nlp('i was cool').sentences().toFutureTense(); - t.equal(m.out(), 'i will be cool', 'toFutureTense-I-3'); - - //with negative - m = nlp('i was not cool').sentences().toPresentTense(); - t.equal(m.out(), 'i am not cool', 'neg-1'); - m = nlp('i wasn\'t cool').sentences().toPastTense(); - t.equal(m.out(), 'i was not cool', 'neg-2'); - m = nlp('i was not cool').sentences().toFutureTense(); - t.equal(m.out(), 'i will not be cool', 'neg-3'); - - //with adverbs - m = nlp('i was really cool').sentences().toPresentTense(); - t.equal(m.out(), 'i am really cool', 'toPresentTense-I-3'); - m = nlp('i was really cool').sentences().toPastTense(); - t.equal(m.out(), 'i was really cool', 'toPastTense-I-3'); - m = nlp('i was really cool').sentences().toFutureTense(); - t.equal(m.out(), 'i will be really cool', 'toFutureTense-I-3'); - t.end(); -}); diff --git a/test/unit/subset/sentence/toContinuous.test.js b/test/unit/subset/sentence/toContinuous.test.js deleted file mode 100644 index 5dd97ece9..000000000 --- a/test/unit/subset/sentence/toContinuous.test.js +++ /dev/null @@ -1,21 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('sentence-to-gerund:', function(t) { - [ - ['john walked', 'john is walking'], - // ['i should win', 'i should be winning'],//support (some) modals? - ['spencer will convert', 'spencer is converting'], - ['everyone will see', 'everyone is seeing'], - ['he is cool', 'he is being cool'], - ['he was good', 'he is being good'], - ['i am fun', 'i am being fun'], - ['i am not fun', 'i am not being fun'], - ['we will sing', 'we are singing'], - ['we really will sing', 'we are really singing'], - ].forEach(function(a) { - var str = nlp(a[0]).sentences().toContinuous().out('normal'); - t.equal(str, a[1], str + ' -> ' + a[1]); - }); - t.end(); -}); diff --git a/test/unit/subset/sentence/toNegative.test.js b/test/unit/subset/sentence/toNegative.test.js deleted file mode 100644 index 7d8237a6d..000000000 --- a/test/unit/subset/sentence/toNegative.test.js +++ /dev/null @@ -1,94 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); -var str_test = require('../../lib/fns').str_test; - -test('==negation==', function(T) { - T.test('negate:', function(t) { - [ - //copula-sentences - [`john is good`, `john is not good`], - [`they are good`, `they are not good`], - [`they will be good`, `they will not be good`], - [`they will really be good`, `they will not really be good`], - //different verb tenses - [`he walks`, `he does not walk`], - [`he will walk`, `he will not walk`], - [`he walked`, `he did not walk`], - [`he has walked`, `he has not walked`], - [`he will have walked`, `he will not have walked`], - [`he is walking`, `he is not walking`], - //add adverbs - [`he really walks`, `he really does not walk`], - [`he will really walk`, `he will not really walk`], - [`he really walked`, `he really did not walk`], - [`he has really walked`, `he has not really walked`], - [`he will have really walked`, `he will not have really walked`], - [`he is really walking`, `he is not really walking`], - //plural noun - [`they walk`, `they do not walk`], - //pronoun + infinitive - [`i like running`, `i do not like running`], - [`they swim`, `they do not swim`], - [`we enjoy playing`, `we do not enjoy playing`], - [`we swim`, `we do not swim`], - [`we do swim`, `we do not swim`], - [`i do care`, `i do not care`], - [`they do care`, `they do not care`], - - //does not, is not, are not, etc. - [`apples are bad`, `apples are not bad`], - [`he does like it`, `he does not like it`], - [`have died yet`, `have not died yet`], - //logical negations - ['john always walks', 'john never walks'], - ['john always walks quickly', 'john never walks quickly'], - ['everybody walks quickly', 'nobody walks quickly'], - - [`has played`, `has not played`], - [`he has played`, `he has not played`], - [`spencer is playing`, `spencer is not playing`], - [`he will play`, `he will not play`], - [`he will be playing`, `he will not be playing`], - [`he had played`, `he had not played`], - [`he plays`, `he does not play`], - [`he played`, `he did not play`], - [`he walked`, `he did not walk`] - // [`he quietly walked`, `he did not quietly walk`], - // [`he quietly walks`, `he does not quietly walk`], - // [`we quietly walked`, `we do not quietly walk`], - // [`we quietly walks`, `we do not quietly walk`] - ].forEach(function(a) { - var str = nlp(a[0]).sentences().toNegative().out('text'); - str_test(str, a[0], a[1], t); - }); - t.end(); - }); - - T.test('sentence un-negate:', function(t) { - [ - //copula-sentences - [`john is not good`, `john is good`], - [`they are not good`, `they are good`], - [`they will not be good`, `they will be good`], - //different verb tenses - [`he does not walk`, `he does walk`], - [`he did not walk`, `he did walk`], - [`he is not walking`, `he is walking`], - [`he has not been walking`, `he has been walking`], - [`he did not walk`, `he did walk`], - [`he does not walk`, `he does walk`], - - [`he has not walked`, `he has walked`], - [`he will not have walked`, `he will have walked`], - [`he is not walking`, `he is walking`], - // //logical negations - ['john never walks', 'john always walks'], - ['john never walks quickly', 'john always walks quickly'] - // ['everybody walks quickly', 'nobody walks quickly'], - ].forEach(function(a) { - var str = nlp(a[0]).sentences().toPositive().out('text'); - str_test(str, a[0], a[1], t); - }); - t.end(); - }); -}); diff --git a/test/unit/subset/sentence/tokenize.test.js b/test/unit/subset/sentence/tokenize.test.js deleted file mode 100644 index f6c105e12..000000000 --- a/test/unit/subset/sentence/tokenize.test.js +++ /dev/null @@ -1,76 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('sentence tokenize:', function(t) { - [ - ['Tony is nice. He lives in Japan.', 2], - ['I like that Color', 1], - [ - 'Hi there Dr. Joe, the price is 4.59 for N.A.S.A. Ph.Ds. I hope that\'s fine, etc. and you can attend Feb. 8th. Bye', - 3 - ], - ['Soviet bonds to be sold in the U.S. market. Everyone wins.', 2], - ['Hi there! Everyone wins!', 2], - ['Hi there!!! Everyone wins.', 2], - ['Hi there\u203C Everyone wins\u203C', 2], - ['Hi there\u203C Everyone wins.', 2], - ['he bought Yahoo! the company.', 1], - ['he is ill', 1], - ['he is ill.', 1], - ['he is ill\u2047', 1], - ['she is fine. he is ill.', 2], - ['she is fine. he is ill', 2], - ['lkajsdflkjeicclksdfjefifh', 1], - ['i think it is good ie. fantastic.', 1], - ['i think it is good i.e. fantastic.', 1], - ['You did what\u2048', 1], - ['You did what\u2048 How could you\u2049', 2], - ['i think it is good or else.', 1], - ['i think it is good… or else.', 1], - ['i think it is good… ', 1], - ['i think it is good ... or else.', 1], - ['i think it is good ... ', 1], - ['What\'s my age again? What\'s my age again?', 2], - ['the problem, eg. the javascript', 1], - ['Dr. Tony is nice. He lives on Elm St. in Vancouver BC. Canada', 2], - ['I made $5.60 today in 1 hour of work. The E.M.T.\'s were on time, but only barely.', 2], - - ['In some notations, A or B is shown as A|B. In others A or B is shown as A||B.', 2], - ['hello. 1234. ëėö.', 3], - - // Linux EOL - ['Hi there.\nEveryone wins.', 2], - ['Hi there!\n\nEveryone wins.', 2], - ['Hi there\nEveryone wins', 2], - ['Hi there.\n Everyone wins', 2], - ['Hi there!!\nEveryone wins\n\n', 2], - - // Mac EOL - ['Hi there.\rEveryone wins.', 2], - ['Hi there!\r\rEveryone wins.', 2], - ['Hi there\rEveryone wins', 2], - ['Hi there.\r Everyone wins', 2], - ['Hi there!!\rEveryone wins\r\r', 2], - - // Windows EOL - ['Hi there.\r\nEveryone wins.', 2], - ['Hi there!\r\n\r\nEveryone wins.', 2], - ['Hi there\r\nEveryone wins', 2], - ['Hi there.\r\n Everyone wins', 2], - ['Hi there!!\r\nEveryone wins\r\n\r\n', 2] - ].forEach(function(a) { - var num = nlp(a[0]).list.length; - var msg = '"' + a[0] + '" -> ' + num; - t.equal(num, a[1], msg); - }); - t.end(); -}); - -test('fancy tokenize:', function(t) { - var doc = nlp('boris becker ?? he is nice.'); - t.equal(doc.sentences().length, 2, 'sentence-split'); - - // doc = nlp('Is Trump the president of U.S. ? i guess so') - // t.equal(doc.sentences().length, 2, 'sentence-split-2') - t.end(); -}); diff --git a/test/unit/subset/sentence/whitespace.test.js b/test/unit/subset/sentence/whitespace.test.js deleted file mode 100644 index 495f33590..000000000 --- a/test/unit/subset/sentence/whitespace.test.js +++ /dev/null @@ -1,75 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); -var str_test = require('../../lib/fns').str_test; - -test('=Whitespace=', function(T) { - T.test('preserve whitespace:', function(t) { - [ - 'John Smith', - 'John Smith', - 'John Smith ', - 'John Smith ', - ' John', - ' John Smith ', - //no joins - 'he is nice', - 'he is nice', - 'he is nice', - 'he is nice ', - ' he is nice ', - //contractions - `he isn't nice`, - `he isn't nice`, - `he isn't nice`, - `he isn't nice `, - ` he isn't nice `, - //multiples - 'it is ipso facto', - 'it is ipso facto ', - 'it is ipso facto ', - 'it is ipso facto ', - '2nd of march, 2015' - ].forEach(function(a) { - var str = nlp(a).out('text'); - str_test(str, a, a, t); - }); - t.end(); - }); - - T.test('inter-sentence whitespace:', function(t) { - [ - 'John Smith is nice.', - ' John Smith is nice.', - ' John Smith is nice. ', - 'John Smith is nice. He lives in Spain.', - 'John Smith is nice. He lives in Spain.', - 'John Smith is nice. He lives in Spain. ', - ' John Smith is nice. He lives in Spain. ', - 'Dr. Smith is nice. He lives in Spain. ', - ' Dr. Smith is nice. He lives in Spain. ', - 'Dr. Smith is nice? He lives in Spain. ', - ' Dr. Smith is nice? He lives in Spain? ', - ' Dr. Smith is nice? He lives in UCLA? He does? ', - ' Dr. Smith is nice? He lives in Spain? He does?? ' - ].forEach(function(a) { - var str = nlp(a).out('text'); - str_test(str, a, a, t); - }); - t.end(); - }); - - T.test('contraction whitespace:', function(t) { - [ - ['John\'s nice.', 'John is nice.'], - ['John Smith\'s nice.', 'John Smith is nice.'], - ['John isn\'t nice.', 'John is not nice.'], - ['John didn\'t go.', 'John did not go.'], - ['I wanna go.', 'I want to go.'], - ['they\'ve gone.', 'they have gone.'] - ].forEach(function(a) { - var str = nlp(a[0]).contractions().expand().all().out('text'); - str_test(str, a[0], a[1], t); - }); - t.end(); - }); -}); diff --git a/test/unit/subset/subset.test.js b/test/unit/subset/subset.test.js deleted file mode 100644 index 98277c866..000000000 --- a/test/unit/subset/subset.test.js +++ /dev/null @@ -1,62 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -var mustBe = function(arr) { - return arr.map(function(t) { - return t.normal; - }); -}; - -test('clauses', function(t) { - var m = nlp('he is nice - which is cool... but whatever'); - var have = mustBe(m.clauses().data()); - var want = ['he is nice', 'which is cool', 'but whatever']; - var msg = have.join(' -- '); - t.deepEqual(have, want, msg); - - m = nlp('he is nice. If so, then good'); - have = mustBe(m.clauses().data()); - want = ['he is nice', 'if so', 'then good']; - msg = have.join(' -- '); - t.deepEqual(have, want, msg); - - t.end(); -}); - -test('adjectives', function(t) { - var m = nlp('he is nice, cool and very fun'); - var have = mustBe(m.adjectives().data()); - var want = ['nice', 'cool', 'fun']; - var msg = have.join(' -- '); - t.deepEqual(have, want, msg); - - t.end(); -}); - -test('quotations', function(t) { - var have = nlp('My "String" "with many" adjacent "nested" \'quotes\'').quotations().out('array'); - var want = ['string', 'with many', 'nested', 'quotes']; - t.deepEqual(have, want, 'consecutive quoations'); - t.end(); -}); - -test('parentheses', function(t) { - var have = nlp('Use a pointed stick (a pencil) or congealed petroleum (an eraser) or a similar tool').parentheses().out('array'); - var want = ['a pencil', 'an eraser']; - t.deepEqual(have, want, 'two parentheses'); - - have = nlp('born in Canada (Toronto), Drake (Aubrey Graham) became a hit (a success)').parentheses().out('array'); - want = ['toronto', 'aubrey graham', 'a success']; - t.deepEqual(have, want, 'different-sized parentheses'); - t.end(); -}); - -test('contractions-subset', function(t) { - var m = nlp('he\'s nice. She could\'ve seen.'); - var have = mustBe(m.contractions().data()); - var want = ['he\'s', 'could\'ve']; - var msg = have.join(' -- '); - t.deepEqual(have, want, msg); - - t.end(); -}); diff --git a/test/unit/subset/terms/bestTag.js b/test/unit/subset/terms/bestTag.js deleted file mode 100644 index 29c7e76ec..000000000 --- a/test/unit/subset/terms/bestTag.js +++ /dev/null @@ -1,33 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('bestTag', function(t) { - var str = 'john smith was really working'; - var m = nlp(str); - var have = m.terms().data().map(function(o) { - return o.bestTag; - }); - var want = ['MaleName', 'LastName', 'Copula', 'Adverb', 'Gerund']; - var msg = str + ' - [' + have.join(', ') + ']'; - t.deepEqual(have, want, msg); - - str = 'he sang on June 5th 1991'; - m = nlp(str); - have = m.terms().data().map(function(o) { - return o.bestTag; - }); - want = ['Pronoun', 'PastTense', 'Preposition', 'Month', 'Ordinal', 'Year']; - msg = str + ' - [' + have.join(', ') + ']'; - t.deepEqual(have, want, msg); - - str = 'fastest shooter in Canada'; - m = nlp(str); - have = m.terms().data().map(function(o) { - return o.bestTag; - }); - want = ['Superlative', 'Noun', 'Preposition', 'Country']; - msg = str + ' - [' + have.join(', ') + ']'; - t.deepEqual(have, want, msg); - - t.end(); -}); diff --git a/test/unit/subset/topics/things.test.js b/test/unit/subset/topics/things.test.js deleted file mode 100644 index 98b00ce11..000000000 --- a/test/unit/subset/topics/things.test.js +++ /dev/null @@ -1,9 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('topics concat:', function(t) { - var things = nlp('spencer and danny are in Paris France, and germany for Google Inc and IBM').topics().out('array'); - var want = ['spencer', 'danny', 'paris france', 'germany', 'google inc', 'ibm']; - t.equal(things.join(', '), want.join(', '), 'found right things'); - t.end(); -}); diff --git a/test/unit/subset/topics/topics.test.js b/test/unit/subset/topics/topics.test.js deleted file mode 100644 index 2a65aad0d..000000000 --- a/test/unit/subset/topics/topics.test.js +++ /dev/null @@ -1,37 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); -var str_test = require('../../lib/fns').str_test; - -test('topics:', function(t) { - [ - ['James and Tony Hawk both live in Toronto. Tony Hawk is cool.', 'tony hawk'], - ['I live Toronto. I think Toronto is cool.', 'toronto'], - ['The EACD united in 1972. EACD must follow regulations.', 'eacd'], - // ['The Elkjsdflkjsdf sells hamburgers. I think the Elkjsdflkjsdf eats turky.', 'elkjsdflkjsdf'], - ['Toronto\'s citizens love toronto!', 'toronto'], - ].forEach(function(a) { - var arr = nlp(a[0]) - .topics() - .out('freq'); - str_test(arr[0].normal, a[0], a[1], t); - }); - t.end(); -}); - -test('topics-false-positives:', function(t) { - var arr = [ - 'somone ate her lunch', - 'everybody is dancing all night', - 'a man and a woman ate her son\'s breakfast', - 'my brother walks to school', - `She's coming by`, - `if she doesn't like something about us she can keep us off`, - ` She's it! She could be a soap opera.`, - `she's a little dare-devil!`, - ]; - arr.forEach(function(str, i) { - var doc = nlp(str).topics(); - t.equal(doc.length, 0, 'topics #' + i + ' -> ' + doc.out()); - }); - t.end(); -}); diff --git a/test/unit/subset/value/bigNumber.test.js b/test/unit/subset/value/bigNumber.test.js deleted file mode 100644 index 7aeca1d83..000000000 --- a/test/unit/subset/value/bigNumber.test.js +++ /dev/null @@ -1,73 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -var cardinal = function(str) { - return nlp(str).values().numbers()[0]; -}; - -test('a very large cardinal', function(t) { - t.equal(cardinal('nine trillion two hundred'), 9000000000200); - t.equal(cardinal('nine quadrillion two thousand and six'), 9000000000002006); - t.equal(cardinal('ninety quintillion two thousand and six'), 90000000000000002006); - t.equal(cardinal('ninety nine quintillion two thousand and six'), 99000000000000002006); - t.equal(cardinal('nine sextillion'), 9000000000000000000000); - // t.equal(cardinal('nine septillion'), 9000000000000000000000000); - // t.equal(cardinal('ninety nine septillion two thousand and six'), 99000000000000000002006); - // t.equal(cardinal('one hundred and twenty-three septillion, four hundred and fifty-six sextillion, seven hundred and eighty-nine quintillion, one hundred and twenty-three quadrillion, four hundred and fifty-six trillion, seven hundred and eighty-nine billion, one hundred and twenty-three million, four hundred and fifty-six thousand and seven hundred and eighty-nine'), 123456789123456789123456789); - // t.equal(cardinal('seven hundred and eighty-nine quintillion, one hundred and twenty-three quadrillion, four hundred and fifty-six trillion, seven hundred and eighty-nine billion, one hundred and twenty-three million, four hundred and fifty-six thousand and seven hundred and eighty-nine'), 789123456789123456789); - t.end(); -}); - -test('number ordinal', function(t) { - t.equal(nlp('two hundred sextillion').values().data()[0].niceOrdinal, '200,000,000,000,000,000,000,000th'); - t.equal(nlp('thirty seven quadrillion and two hundred').values().data()[0].niceOrdinal, '37,000,000,000,000,200th'); - t.equal(nlp('thirty seven quadrillion, two thousand').values().data()[0].niceOrdinal, '37,000,000,000,002,000th'); - t.equal(nlp('ninety nine quadrillion, two hundred thousand').values().data()[0].niceOrdinal, '99,000,000,000,200,000th'); - //javascript math can't do this. - // t.equal(nlp('thirty sextillion and two').values().data()[0].niceOrdinal, '30,000,000,000,000,000,000,002nd'); - // t.equal(nlp('ninety nine quadrillion, two hundred and fifty thousand').values().data()[0].niceOrdinal, '99,000,000,000,250,000th'); - t.end(); -}); - -test('text ordinal', function(t) { - t.equal(nlp('thirty quadrillion and two hundred').values().data()[0].textOrdinal, 'thirty quadrillion two hundredth'); - t.equal(nlp('nine trillion seven hundred fifty').values().data()[0].textOrdinal, 'nine trillion seven hundred and fiftieth'); - t.equal(nlp('a quintillion').values().data()[0].textOrdinal, 'one quintillionth'); - t.equal(nlp('seventy-two quintillion').values().data()[0].textOrdinal, 'seventy two quintillionth'); - t.end(); -}); - -test('from number', function(t) { - t.equal(nlp('9000000000200').values().toText().out(), 'nine trillion two hundred'); - t.equal(nlp('70000000000200').values().toText().out(), 'seventy trillion two hundred'); - t.equal(nlp('9000000000002006').values().toText().out(), 'nine quadrillion two thousand and six'); - t.equal(nlp('900,000,000').values().toText().out(), 'nine hundred million'); - t.equal(nlp('9,000,000,030').values().toText().out(), 'nine billion and thirty'); - t.equal(nlp('10,000,000,000').values().toText().out(), 'ten billion'); - // t.equal(nlp('900,000,000,037').values().toText().out(), 'nine hundred billion and thirty seven'); - //javascript can't do this - // t.equal(nlp('90000000000000002006').values().toText().out(), 'ninety quintillion two thousand and six'); - // t.equal(nlp('99000000000000002006').values().toText().out(), 'ninety nine quintillion two thousand and six'); - // t.equal(nlp('9000000000000000000000').values().toText().out(), 'nine sextillion'); - t.end(); -}); - -// test('cardinal numbers', function(t) { -// t.equal(cardinal('sixty-one trillion, six hundred and eighty-nine billion, four hundred and seventy-three million, four hundred and fifty-three thousand and five hundred and ninety'), 61689473453590); -// t.end(); -// }); - -// test('cardinal numbers in american form (with ands)', function(t) { -// t.equal(cardinal('six hundred eighty-nine billion, four hundred seventy-three million, four hundred fifty-three thousand, five hundred ninety'), 689473453590); -// t.end(); -// }); - -// test('ordinal numbers', function(t) { -// t.equal(cardinal('six hundred and eighty-nine billion, four hundred and seventy-three million, four hundred and fifty-three thousand and five hundred and ninetieth'), 689473453590); -// t.end(); -// }); - -// test('cardinal numbers in american form (with ands)', function(t) { -// t.equal(cardinal('six hundred eighty-nine billion, four hundred seventy-three million, four hundred fifty-three thousand, five hundred ninetieth'), 689473453590); -// t.end(); -// }); diff --git a/test/unit/subset/value/comparison.test.js b/test/unit/subset/value/comparison.test.js deleted file mode 100644 index 270a908e3..000000000 --- a/test/unit/subset/value/comparison.test.js +++ /dev/null @@ -1,86 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('isEqual:', function(t) { - var str = nlp('he is 7 years old').values().isEqual(7).out('normal'); - t.equal(str, '7 years old', 'isEqual 7'); - - str = nlp('he is seven years old').values().isEqual(7).out('normal'); - t.equal(str, 'seven years old', 'isEqual seven'); - - str = nlp("it's his 7th birthday").values().isEqual(7).out('normal'); - t.equal(str, '7th birthday', 'isEqual 7th'); - - str = nlp("it's his seventh birthday").values().isEqual(7).out('normal'); - t.equal(str, 'seventh birthday', 'isEqual seventh'); - - str = nlp('i have 7 potatoes and 12 tomatoes').values().isEqual(7).out('normal'); - t.equal(str, '7 potatoes', 'only 7'); - - str = nlp('i have 17 potatoes and fourteen tomatoes').values().isEqual('seventeen').out('normal'); - t.equal(str, '17 potatoes', 'only 17'); - - str = nlp('i have 15 books and eight hundred tomatoes').values().isEqual('fifteenth').out('normal'); - t.equal(str, '15 books', 'only 15'); - - str = nlp('i have 152 potatoes and eight hundred and two tomatoes').values().isEqual('152nd').out('normal'); - t.equal(str, '152 potatoes', 'only 152'); - - str = nlp('i have 9 potatoes and 77 tomatoes').values().isEqual(7).out('normal'); - t.equal(str, '', 'no equal-to'); - t.end(); -}); - -test('greaterThan:', function(t) { - var str = nlp('he is 8 years old').values().greaterThan(7).out('normal'); - t.equal(str, '8 years old', '8 greaterThan 7'); - - str = nlp('he is forty years old').values().greaterThan(7).out('normal'); - t.equal(str, 'forty years old', 'fourty greaterThan 7'); - - str = nlp('fifteen donuts').values().greaterThan(7).out('normal'); - t.equal(str, 'fifteen donuts', 'fifteen greaterThan 7'); - - str = nlp('my fifteenth donut').values().greaterThan(7).out('normal'); - t.equal(str, 'fifteenth donut', 'fifteenth greaterThan 7'); - - str = nlp('i have 9 potatoes and 77 tomatoes').values().greaterThan(700).out('normal'); - t.equal(str, '', 'no greaterThan'); - - t.end(); -}); - -test('lessThan:', function(t) { - var str = nlp('he is 8 years old').values().lessThan(700).out('normal'); - t.equal(str, '8 years old', '8 lessThan 700'); - - str = nlp('he is forty years old').values().lessThan('forty-one').out('normal'); - t.equal(str, 'forty years old', 'fourty lessThan forty-one'); - - str = nlp('my fifteenth book').values().lessThan(70).out('normal'); - t.equal(str, 'fifteenth book', 'fifteenth lessThan 70'); - - str = nlp('i have 9 potatoes and 77 tomatoes').values().lessThan(9).out('normal'); - t.equal(str, '', 'no lessThan'); - - t.end(); -}); - -test('negative comparisons:', function(t) { - var str = nlp('i am 8 years old').values().greaterThan(-2).out('normal'); - t.equal(str, '8 years old', '8 greaterThan -2'); - - str = nlp('i am eighty years old').values().greaterThan('-200').out('normal'); - t.equal(str, 'eighty years old', 'eighty greaterThan -200'); - - str = nlp('it is minus seven degrees out').values().lessThan('seven').out('normal'); - t.equal(str, 'minus seven degrees', 'minus seven lessThan seven'); - - str = nlp('i am minus two years old').values().isEqual('-2').out('normal'); - t.equal(str, 'minus two years old', 'minus two isEqual -2'); - - str = nlp('i am -2 years old').values().isEqual(-2).out('normal'); - t.equal(str, '-2 years old', '-2 isEqual -2'); - - t.end(); -}); diff --git a/test/unit/subset/value/money.test.js b/test/unit/subset/value/money.test.js deleted file mode 100644 index 566392087..000000000 --- a/test/unit/subset/value/money.test.js +++ /dev/null @@ -1,53 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('money-basic:', function(t) { - var r = nlp('it is $70.23'); - var m = r.match('#Money'); - t.equal(m.out('normal'), '$70.23', 'match-$70.23'); - - r = nlp('it is $703'); - m = r.match('#Money+'); - t.equal(m.out('normal'), '$703', 'match-$703'); - - r = nlp('it is five euros'); - m = r.match('#Money+'); - t.equal(m.out('normal'), 'five euros', 'match-five-euros'); - - r = nlp('i said five times, you should pay 12 dollars'); - m = r.match('#Money+'); - t.equal(m.out('normal'), '12 dollars', 'match-12 dollars'); - - r = nlp('you should pay sixty five dollars and four cents USD'); - m = r.match('#Money+'); - t.equal(m.out('normal'), 'sixty five dollars and four cents usd', 'match-long-usd'); - - t.end(); -}); - -test('money-has:', function(t) { - var tests = [ - ['$7', true], - ['$7.0', true], - ['$7.00', true], - ['$7.003', false], - - ['$7082.03', true], - ['$2,082.03', true], - ['€7.00', true], - ['¥70', true], - ['£0.20', true], - ['@0.20', false], - - ['8 cents', true], - ['60 pence', true], - ['sixty pence', true], - ['sixty USD', true] - ]; - tests.forEach(function(a) { - var r = nlp(a[0]); - var m = r.match('#Money'); - t.equal(m.found, a[1], "money-has: '" + a[0] + "'"); - }); - t.end(); -}); diff --git a/test/unit/subset/value/percent.test.js b/test/unit/subset/value/percent.test.js deleted file mode 100644 index bc63f04a0..000000000 --- a/test/unit/subset/value/percent.test.js +++ /dev/null @@ -1,58 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('percent-basic:', function(t) { - var m = nlp('it is 33%').match('#Percent'); - t.equal(m.out('normal'), '33%', 'match-33%'); - - //parse number - var arr = nlp('it is 50% of our budget').values().data(); - t.equal(arr[0].number, 50, 'match-50'); - - arr = nlp('it is 9,000% of our budget').values().data(); - t.equal(arr[0].number, 9000, 'match-9000'); - - //percent-decimal - arr = nlp('it is 0.5% of our budget').values().data(); - t.equal(arr[0].number, 0.5, 'match-0.5'); - - t.end(); -}); - -test('percent-conversion:', function(t) { - var str = '3% of the budget'; - var r = nlp(str).values().toNumber().all(); - t.equal(r.out(), str, '3% to number'); - - str = "it's 39% of the budget"; - r = nlp(str).values().toNumber().all(); - t.equal(r.out(), str, '39% to number'); - - str = '39% of the budget'; - r = nlp(str).values().toText().all(); - t.equal(r.out(), 'thirty nine percent of the budget', 'to text'); - - str = 'around 100% of the budget'; - r = nlp(str).values().toText().all(); - t.equal(r.out(), 'around one hundred percent of the budget', 'to text'); - - t.end(); -}); - -test('percent-tag:', function(t) { - var tests = [ - ['7%', true], - ['7.0%', true], - ['2.22%', true], - ['.2%', true], - ['0.2%', true], - ['2,999%', true], - ['2asdf99%', false], - ['99%3', false] - ]; - tests.forEach(function(a) { - var r = nlp(a[0]); - t.equal(r.has('#Percent'), a[1], "Percent-has: '" + a[0] + "'"); - }); - t.end(); -}); diff --git a/test/unit/subset/value/toCardinal.test.js b/test/unit/subset/value/toCardinal.test.js deleted file mode 100644 index a65685c66..000000000 --- a/test/unit/subset/value/toCardinal.test.js +++ /dev/null @@ -1,123 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); -//tests 'borrowed' from Travis Savo's lib 💝 https://github.com/TSavo/english2number-js -var cardinal = function(str) { - return nlp(str).values().numbers()[0]; -}; - -test('should give right answers for numbers', function(t) { - t.equal(cardinal('0'), 0); - t.equal(cardinal('1'), 1); - t.equal(cardinal('2'), 2); - t.equal(cardinal('3'), 3); - t.equal(cardinal('4'), 4); - t.equal(cardinal('5'), 5); - t.equal(cardinal('6'), 6); - t.equal(cardinal('7'), 7); - t.equal(cardinal('8'), 8); - t.equal(cardinal('9'), 9); - t.equal(cardinal('10'), 10); - t.equal(cardinal('20'), 20); - t.equal(cardinal('30'), 30); - t.equal(cardinal('50'), 50); - t.equal(cardinal('90'), 90); - t.equal(cardinal('100'), 100); - t.end(); -}); -test('should give right answers for numbered ordinals', function(t) { - t.equal(cardinal('0th'), 0); - t.equal(cardinal('1st'), 1); - t.equal(cardinal('2nd'), 2); - t.equal(cardinal('3rd'), 3); - t.equal(cardinal('4th'), 4); - t.equal(cardinal('5th'), 5); - t.equal(cardinal('6th'), 6); - t.equal(cardinal('7th'), 7); - t.equal(cardinal('8th'), 8); - t.equal(cardinal('9th'), 9); - t.equal(cardinal('10th'), 10); - t.equal(cardinal('20th'), 20); - t.equal(cardinal('30th'), 30); - t.equal(cardinal('50th'), 50); - t.equal(cardinal('77th'), 77); - t.equal(cardinal('90th'), 90); - t.equal(cardinal('100th'), 100); - t.end(); -}); -test('should give right answers for english names of numbers', function(t) { - t.equal(cardinal('zero'), 0); - t.equal(cardinal('one'), 1); - t.equal(cardinal('two'), 2); - t.equal(cardinal('three'), 3); - t.equal(cardinal('four'), 4); - t.equal(cardinal('five'), 5); - t.equal(cardinal('six'), 6); - t.equal(cardinal('seven'), 7); - t.equal(cardinal('eight'), 8); - t.equal(cardinal('nine'), 9); - t.equal(cardinal('ten'), 10); - t.equal(cardinal('eleven'), 11); - t.equal(cardinal('twelve'), 12); - t.equal(cardinal('thirteen'), 13); - t.equal(cardinal('fourteen'), 14); - t.equal(cardinal('fifteen'), 15); - t.equal(cardinal('sixteen'), 16); - t.equal(cardinal('seventeen'), 17); - t.equal(cardinal('eighteen'), 18); - t.equal(cardinal('nineteen'), 19); - t.equal(cardinal('twenty'), 20); - t.equal(cardinal('twenty-five'), 25); - t.equal(cardinal('twenty five'), 25); - t.equal(cardinal('forty-four'), 44); - t.equal(cardinal('forty four'), 44); - t.equal(cardinal('fourty four'), 44); //typo - t.equal(cardinal('seventy'), 70); - t.equal(cardinal('seventy-seven'), 77); - t.equal(cardinal('eighty eight'), 88); - t.equal(cardinal('ninety nine'), 99); - t.equal(cardinal('one-hundred'), 100); - t.equal(cardinal('one hundred'), 100); - t.end(); -}); -test('should give right answers for english names of ordinal positions', function(t) { - t.equal(cardinal('zeroth'), 0); - t.equal(cardinal('first'), 1); - t.equal(cardinal('second'), 2); - t.equal(cardinal('third'), 3); - t.equal(cardinal('fourth'), 4); - t.equal(cardinal('fifth'), 5); - t.equal(cardinal('sixth'), 6); - t.equal(cardinal('seventh'), 7); - t.equal(cardinal('eighth'), 8); - t.equal(cardinal('ninth'), 9); - t.equal(cardinal('tenth'), 10); - t.equal(cardinal('eleventh'), 11); - t.equal(cardinal('twelfth'), 12); - t.equal(cardinal('thirteenth'), 13); - t.equal(cardinal('fourteenth'), 14); - t.equal(cardinal('fifteenth'), 15); - t.equal(cardinal('sixteenth'), 16); - t.equal(cardinal('seventeenth'), 17); - t.equal(cardinal('eighteenth'), 18); - t.equal(cardinal('nineteenth'), 19); - t.equal(cardinal('twentieth'), 20); - t.equal(cardinal('twenty first'), 21); - t.equal(cardinal('twenty second'), 22); - t.equal(cardinal('twenty third'), 23); - t.equal(cardinal('twenty fourth'), 24); - t.equal(cardinal('twenty-fifth'), 25); - t.equal(cardinal('forty-fourth'), 44); - t.equal(cardinal('seventieth'), 70); - t.equal(cardinal('seventy-seventh'), 77); - t.equal(cardinal('ninetieth'), 90); - t.equal(cardinal('ninety ninth'), 99); - t.equal(cardinal('one-hundredth'), 100); - t.end(); -}); - -test('should deal with negative numbers', function(t) { - t.equal(cardinal('negative thirty eight thousand two hundred sixty three'), -38263); - t.equal(cardinal('negative thirty eight thousand two hundred sixty three'), -38263); - t.equal(cardinal('negative zero'), -0); - t.end(); -}); diff --git a/test/unit/subset/value/toNumber.test.js b/test/unit/subset/value/toNumber.test.js deleted file mode 100644 index f2cad7239..000000000 --- a/test/unit/subset/value/toNumber.test.js +++ /dev/null @@ -1,146 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('to_number:', function(t) { - [ - ['twenty two thousand five hundred', 22500], - ['two thousand five hundred and sixty', 2560], - ['a hundred and two', 102], - ['a hundred', 100], - ['seven', 7], - ['seven grand', 7000], - ['104', 104], - ['13 thousand', 13000], - ['17,983', 17983], - ['nine hundred', 900], - ['twenty one hundred', 2100], - ['twenty one', 21], - ['seventy two', 72], - ['two hundred two', 202], - ['one thousand one', 1001], - ['minus five hundred', -500], - ['minus fifteen', -15], - ['five hundred million', 500000000], - // ['$12.03', 12.03], - // ['$12', 12], - ['5 hundred', 500], - ['5.2 thousand', 5200], - ['million', 1000000], - ['hundred one', 101], - ['minus fifty', -50], - ['twenty thousand', 20000], - ['four point six', 4.6], - ['nine hundred point five', 900.5], - ['sixteen hundred sixteen point eight', 1616.8], - ['four point seven nine', 4.79], - ['four point sixteen', 4.16], - ['twenty first', '21st'], - ['fifty ninth', '59th'], - ['nine hundred fiftieth', '950th'], - ['nine hundred and second', '902nd'], - ['five thousand nine hundred fiftieth', '5950th'], - ['six hundred and fifty nine', 659], - ['six hundred and fifty nine thousand', 659000], - [950, 950], - [999999950, 999999950], - [8080999999950, 8080999999950], - ['fifteen million and two', 15000002], - ['six hundred and eighteen', 618], - ['two hundred thousand', 200000], - ['six million ninety', 6000090], - ['twenty-two hundred', 2200], - - ['two million five hundred thousand', 2500000], - ['one billion five hundred thousand', 1000500000], - ['one billion five hundred thousand and eight', 1000500008], - ['a million fifty thousand and eight', 1050008], - ['a million twenty five thousand and fifty-two', 1025052], - ['minus two million twenty five thousand and eighty', -2025080], - - ['7 hundred and 8 thousand', 708000], - ['2 hundred and sixty 9 thousand seven hundred', 269700], - ['2 hundred and six million 7 hundred thousand seven hundred', 206700700], - - ['minus 70', -70], - ['minus eight', -8], - ['minus 8 hundred', -800], - ['twenty-seven hundred', 2700], - ['minus eight thousand two hundred', -8200], - ['twenty-five', 25], - ['half a million', 500000], - ['five hundred eighteen', 518], - ['eighty eight point nine nine', 88.99], - ['minus eighty eight point nine nine', -88.99], - // ['1/2', 1 / 2], - // ['-1/5', -1 / 5], - // ['-1 1/10', -1 - 1 / 10], - // ['1 1/20', 1 + 1 / 20], - // ['1/2 million', 500000], - // ['1 1/2 million', 1500000], - ['negative five', -5], - ['negative hundred', -100], - // ['12:32', ''], - // ['123-1231', ''], - ['seven eleven', '7 11'], - ['ten-four', '10 4'], - ['one seven', '1 7'], - ['one ten', '1 10'], - ['one twelve', '1 12'], - ['one thirty', '1 30'], - ['nine fifty', '9 50'], - ['five six', '5 6'], - ['nine seventy', '9 70'], - ['nine two hundred', '9 200'], - ['ten one', '10 1'], - ['twelve one', '12 1'], - ['seventy five two', '75 2'], - // ['two hundred three hundred', '200 300'], //tricky - ['sixty fifteen hundred', '60 1500'], - ['one twenty', '1 20'], - ['twenty five twenty', '25 20'] - // ['',''], - // [null,''], - ].forEach(function(a) { - var num = nlp(a[0]).values().toNumber().out('text'); - var want = String(a[1]) || a[0]; - var msg = '\'' + a[0] + '\' - - have: \'' + num + '\' want:\'' + a[1] + '\''; - t.equal(num, String(want), msg); - }); - t.end(); -}); - -test('all-to-number:', function(t) { - var num = nlp('1st').values().numbers()[0]; - t.equal(num, 1, '1st'); - num = nlp('1').values().numbers()[0]; - t.equal(num, 1, '1'); - num = nlp('first').values().numbers()[0]; - t.equal(num, 1, 'first'); - num = nlp('one').values().numbers()[0]; - t.equal(num, 1, 'one'); - //long-numbers - num = nlp('55575').values().numbers()[0]; - t.equal(num, 55575, '55575'); - num = nlp('55,575').values().numbers()[0]; - t.equal(num, 55575, '55,575'); - num = nlp('55,575.279').values().numbers()[0]; - t.equal(num, 55575.279, '55,575.279'); - num = nlp('$55,575').values().numbers()[0]; - t.equal(num, 55575, '$55,575'); - //decimals - num = nlp('2.5').values().numbers()[0]; - t.equal(num, 2.5, '2.5'); - num = nlp('2.5th').values().numbers()[0]; - t.equal(num, 2.5, '2.5th'); - //two-terms - num = nlp('fifty seven').values().numbers()[0]; - t.equal(num, 57, 'fifty seven'); - num = nlp('fifty 7').values().numbers()[0]; - t.equal(num, 57, 'fifty 7'); - num = nlp('2 hundred').values().numbers()[0]; - t.equal(num, 200, '2 hundred'); - num = nlp('2 hundredth').values().numbers()[0]; - t.equal(num, 200, '2 hundredth'); - - t.end(); -}); diff --git a/test/unit/subset/value/toOrdinal.test.js b/test/unit/subset/value/toOrdinal.test.js deleted file mode 100644 index fea8265c8..000000000 --- a/test/unit/subset/value/toOrdinal.test.js +++ /dev/null @@ -1,52 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('to-numOrdinal:', function(t) { - var arr = [ - 'one', - '1', - '1.0', //not sure.. - '1st', - 'first' - ]; - arr.forEach(txt => { - var o = nlp(txt).values().data()[0] || {}; - t.equal(o.ordinal, '1st', txt + ' -> 1st'); - }); - arr = ['500', '500.0', '500th', 'five hundred', 'five hundredth']; - arr.forEach(txt => { - var o = nlp(txt).values().data()[0] || {}; - t.equal(o.ordinal, '500th', txt + ' -> 500th'); - }); - arr = ['2012', '2012.0', '2,012', '2012th', 'two thousand and twelve', 'two thousand and twelfth']; - arr.forEach(txt => { - var o = nlp(txt).values().data()[0] || {}; - t.equal(o.ordinal, '2012th', txt + ' -> 2012th'); - }); - t.end(); -}); - -test('to-textOrdinal:', function(t) { - var arr = [ - 'one', - '1', - '1.0', //not sure.. - '1st', - 'first' - ]; - arr.forEach(txt => { - var o = nlp(txt).values().data()[0] || {}; - t.equal(o.textOrdinal, 'first', txt + ' -> first'); - }); - arr = ['500', '500.0', '500th', 'five hundred', 'five hundredth']; - arr.forEach(txt => { - var o = nlp(txt).values().data()[0] || {}; - t.equal(o.textOrdinal, 'five hundredth', txt + ' -> five hundredth'); - }); - arr = ['2012', '2012.0', '2,012', '2012th', 'two thousand and twelve', 'two thousand and twelfth']; - arr.forEach(txt => { - var o = nlp(txt).values().data()[0] || {}; - t.equal(o.textOrdinal, 'two thousand and twelfth', txt + ' -> two thousand and twelfth'); - }); - t.end(); -}); diff --git a/test/unit/subset/value/toText.test.js b/test/unit/subset/value/toText.test.js deleted file mode 100644 index f0f1e6cec..000000000 --- a/test/unit/subset/value/toText.test.js +++ /dev/null @@ -1,70 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); -var str_test = require('../../lib/fns').str_test; - -test('to_text:', function(t) { - [ - // [-5, 'negative five'], - [5, 'five'], - [15, 'fifteen'], - [10, 'ten'], - [20, 'twenty'], - [75, 'seventy five'], - [97, 'ninety seven'], - [111, 'one hundred and eleven'], - [175, 'one hundred and seventy five'], - [900, 'nine hundred'], - [1175, 'one thousand one hundred and seventy five'], - [2000, 'two thousand'], - [2100, 'two thousand one hundred'], - [2102, 'two thousand one hundred and two'], - [70000, 'seventy thousand'], - [72000, 'seventy two thousand'], - [900000, 'nine hundred thousand'], - [900001, 'nine hundred thousand and one'], - [900200, 'nine hundred thousand two hundred'], - [900205, 'nine hundred thousand two hundred and five'], - [7900205, 'seven million nine hundred thousand two hundred and five'], - [90000000, 'ninety million'], - [900000000, 'nine hundred million'], - [900000080, 'nine hundred million and eighty'] - ].forEach(function(a) { - var str = nlp(a[0]).values().toText().out('text'); - str_test(str, a[0], a[1], t); - }); - t.end(); -}); - -test('all-to-text:', function(t) { - var arr = [ - 'one', - '1', - '1.0', //not sure.. - '1st', - 'first' - ]; - arr.forEach(txt => { - var str = nlp(txt).values().data()[0].text; - t.equal(str, 'one', txt + ' -> one'); - }); - arr = ['500', '500.0', '500th', 'five hundred', 'five hundredth']; - arr.forEach(txt => { - var str = nlp(txt).values().data()[0].text; - t.equal(str, 'five hundred', txt + ' -> five hundred'); - }); - arr = ['2012', '2012.0', '2,012', '2012th', 'two thousand and twelve', 'two thousand and twelfth']; - arr.forEach(txt => { - var str = nlp(txt).values().data()[0].text; - t.equal(str, 'two thousand and twelve', txt + ' -> two thousand and twelve'); - }); - t.end(); -}); - -test('big number:', function(t) { - var str = nlp('twenty-three quadrillion').values().toText().out(); - t.equal(str, 'twenty three quadrillion', 'quadrillion'); - - // str = nlp('nine quintillion two hundred').values().toText().out(); - // t.equal(str, 'nine quintillion two hundred', 'quantillion'); - t.end(); -}); diff --git a/test/unit/subset/value/value.test.js b/test/unit/subset/value/value.test.js deleted file mode 100644 index f13668d3c..000000000 --- a/test/unit/subset/value/value.test.js +++ /dev/null @@ -1,204 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); -var str_test = require('../../lib/fns').str_test; - -test('value-lumper-splitter:', function(t) { - var r = nlp('202 199'); - t.equal(r.values().length, 2, 'two-numbers'); - - r = nlp('two hundred and fifty times six'); - t.equal(r.values().length, 2, 'two-numbers2'); - - r = nlp('one two'); - t.equal(r.values().length, 2, 'two-numbers3'); - - r = nlp('fifth ninth'); - t.equal(r.values().length, 2, 'two-numbers4'); - t.end(); -}); - -test('value-basic:', function(t) { - var r = nlp('third month of 2019'); - r.values().toNumber(); - t.equal(r.out(), '3rd month of 2019', 'toNumber'); - - r.values().toText(); - t.equal(r.out(), 'third month of two thousand and nineteen', 'toText'); - - r = nlp('third month of two thousand and nineteen'); - r.values().toCardinal(); - t.equal(r.out(), 'three month of two thousand and nineteen', 'toCardinal'); - - r = nlp('three month of two thousand nineteen'); - r.values().toOrdinal(); - t.equal(r.out(), 'third month of two thousand and nineteenth', 'toOrdinal'); - - r.values().toNumber().all(); - t.equal(r.out(), '3rd month of 2019th', 'toNumber2'); - - t.end(); -}); - -test('value-to_ordinal:', function(t) { - [[11, '11th'], [5, '5th'], [22, '22nd']].forEach(function(a) { - var str = nlp(a[0]).values().toOrdinal().out('normal'); - str_test(str, a[0], a[1], t); - }); - t.end(); -}); - -test('value-number:', function(t) { - [ - ['five hundred feet', 500], - ['fifty square feet', 50], - ['90 hertz', 90], - // ['5 six-ounce containers', 5], - ['twelve 2-gram containers', 12], - ['thirty-seven forever-21 stores', 37] - ].forEach(function(a) { - var str = nlp(a[0]).values().toNumber().term(0).first().out('normal'); - a[1] = String(a[1]); - str_test(str, a[0], a[1], t); - }); - t.end(); -}); - -test('add/subtract:', function(t) { - var r = nlp('beginning of 2019').values().add(2).all(); - t.equal(r.out(), 'beginning of 2021', 'add-2-cardinal'); - - r = nlp('beginning of the 2019th').values().add(2).all(); - t.equal(r.out(), 'beginning of the 2021st', 'add-2-ordinal'); - - r = nlp('beginning of the 2019th').values().add(-2).all(); - t.equal(r.out(), 'beginning of the 2017th', 'add-minus-2-ordinal'); - - r = nlp('beginning of 2019').values().subtract(2).all(); - t.equal(r.out(), 'beginning of 2017', 'subtract-2-cardinal'); - - r = nlp('beginning of the 2019th').values().subtract(2).all(); - t.equal(r.out(), 'beginning of the 2017th', 'subtract-2-ordinal'); - - r = nlp('seventeen years old').values().add(2).all(); - t.equal(r.out(), 'nineteen years old', 'text-add-2-ordinal'); - r = nlp('seventeenth birthday').values().add(2).all(); - t.equal(r.out(), 'nineteenth birthday', 'text-add-2-ordinal'); - - r = nlp('seventeen years old').values().subtract(2).all(); - t.equal(r.out(), 'fifteen years old', 'text-subtract-2-cardinal'); - r = nlp('seventeenth birthday').values().subtract(2).all(); - t.equal(r.out(), 'fifteenth birthday', 'text-subtract-2-cardinal'); - - r = nlp('seven apples and 1,231 peaches').values().add(50).all(); - t.equal(r.out(), 'fifty seven apples and 1,281 peaches', 'two-add-50s'); - t.end(); -}); - -test('increment:', function(t) { - var r = nlp('seven apples and 231 peaches'); - r.values().increment(); - t.equal(r.out(), 'eight apples and 232 peaches', 'increment-cardinal'); - r.values().decrement(); - t.equal(r.out(), 'seven apples and 231 peaches', 'decrement-cardinal'); - - r = nlp('seventh place and 12th place'); - r.values().increment().increment(); - t.equal(r.out(), 'ninth place and 14th place', 'increment-ordinal'); - r.values().decrement().decrement(); - t.equal(r.out(), 'seventh place and 12th place', 'decrement-ordinal'); - t.end(); -}); - -test('nounit:', function(t) { - var r = nlp('seven apples and 231 peaches'); - var arr = r.values().out('array'); - t.deepEqual(arr, ['seven apples', '231 peaches']); - - arr = r.values().noUnits().out('array'); - t.deepEqual(arr, ['seven', '231']); - t.end(); -}); - -test('value-unit:', function(t) { - [ - ['five hundred feet', 'feet'], - ['fifty hertz', 'hertz'], - ['100 dollars', 'dollars'], - // ['$100', 'dollar'], - // ['¥2.5', 'yen'], - // ['€3,000,100', 'euro'], - // ['EUR 9.99', 'eur'], - // ['5 g', 'g'], - // ['2 in', 'in'], - // ['5 g sugar', 'g'], - ['3 grams', 'grams'], - ['2 inches', 'inches'], - ['10 grams of sugar', 'grams'], - ['fifty inches of snow', 'inches'], - ['7 years', 'years'], - ['7.5 days', 'days'], - - ['7th year', 'year'], - ['7th years', ''], - ['1 day', 'day'], - ['one book', 'book'], - ['first book', 'book'], - ['7 day', ''], - ].forEach(function(a) { - var r = nlp(a[0]).values().units(); - str_test(r.out('normal'), a[0], a[1], t); - }); - t.end(); -}); - -test('number splits', function(t) { - var arr = [ - '12, 34, 56', - '12 34 56', - '12, 34, 56', - '1 2 4' - ]; - arr.forEach((str) => { - var tokens = nlp(str).values().out('array'); - t.equal(tokens.length, 3, str); - }); - t.end(); -}); - -// test('value-measurement:', function(t) { -// [ -// ['five hundred feet', 'Distance'], -// ['100 kilometers', 'Distance'], -// ['fifty hertz', 'Frequency'], -// ['59 thousand $', 'Money'], -// ['100 mb', 'Data'], -// ['50 руб', 'Money'], -// ['EUR 9.99', 'Money'], -// ['100 dollars', 'Money'], -// ['256 bitcoins', 'Money'], -// ].forEach(function (a) { -// var str = nlp.value(a[0]).measurement; -// str_test(str, a[0], a[1], t); -// }); -// t.end(); -// }); -// -// test('value-of_what:', function(t) { -// [ -// ['nine kg', 'kg'], -// ['5 kg of copper', 'copper'], -// ['many of these stories', 'many of these stories'], -// ['room full of beautiful creatures', 'full of beautiful creatures'], -// ['boxes of bags of food', 'boxes of bags of food'], -// ['5 boxes of water', 'boxes of water'], -// ['6 of kids', 'kids'], -// ['10 kids', 'kids'], -// ['just nothing', 'just nothing'], -// ['EUR 77', 'eur'], -// ['kg', 'kg'] -// ].forEach(function (a) { -// var str = nlp.value(a[0]).of_what; -// str_test(str, a[0], a[1], t); -// }); -// t.end(); -// }); diff --git a/test/unit/subset/verb/conjugate.test.js b/test/unit/subset/verb/conjugate.test.js deleted file mode 100644 index 0b2041775..000000000 --- a/test/unit/subset/verb/conjugate.test.js +++ /dev/null @@ -1,252 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -var arr = [ - { - Infinitive: 'convolute', - PresentTense: 'convolutes', - Gerund: 'convoluting', - PastTense: 'convoluted' - }, - { - PresentTense: 'presents', - Gerund: 'presenting', - PastTense: 'presented', - Infinitive: 'present' - }, - { - PresentTense: 'angulates', - Gerund: 'angulating', - PastTense: 'angulated', - Infinitive: 'angulate' - }, - { - PresentTense: 'conjures', - Gerund: 'conjuring', - PastTense: 'conjured', - Infinitive: 'conjure' - }, - { - PresentTense: 'denounces', - Gerund: 'denouncing', - PastTense: 'denounced', - Infinitive: 'denounce' - }, - { - PresentTense: 'watches', - Gerund: 'watching', - PastTense: 'watched', - Infinitive: 'watch' - }, - { - PresentTense: 'tingles', - Gerund: 'tingling', - PastTense: 'tingled', - Infinitive: 'tingle' - }, - { - PresentTense: 'mortises', - Gerund: 'mortising', - PastTense: 'mortised', - Infinitive: 'mortise' - }, - { - PresentTense: 'disguises', - Gerund: 'disguising', - PastTense: 'disguised', - Infinitive: 'disguise' - }, - { - Infinitive: 'effect', - Gerund: 'effecting', - PastTense: 'effected', - PresentTense: 'effects' - }, - { - Infinitive: 'want', - Gerund: 'wanting', - PastTense: 'wanted', - PresentTense: 'wants' - }, - { - Infinitive: 'power', - Gerund: 'powering', - PastTense: 'powered', - PresentTense: 'powers' - }, - { - Infinitive: 'overcompensate', - PresentTense: 'overcompensates', - PastTense: 'overcompensated', - Gerund: 'overcompensating' - }, - { - Infinitive: 'ice', - PresentTense: 'ices', - PastTense: 'iced', - Gerund: 'icing' - }, - { - Infinitive: 'buy', - PresentTense: 'buys', - PastTense: 'bought', - Gerund: 'buying' - }, - { - Infinitive: 'flower', - PresentTense: 'flowers', - PastTense: 'flowered', - Gerund: 'flowering' - }, - { - Infinitive: 'rage', - PresentTense: 'rages', - PastTense: 'raged', - Gerund: 'raging' - }, - { - Infinitive: 'drive', - PresentTense: 'drives', - PastTense: 'drove', - Gerund: 'driving' - }, - { - Infinitive: 'foul', - PresentTense: 'fouls', - PastTense: 'fouled', - Gerund: 'fouling' - }, - { - Infinitive: 'overthrow', - PresentTense: 'overthrows', - Gerund: 'overthrowing', - PastTense: 'overthrew' - }, - { - Infinitive: 'aim', - PresentTense: 'aims', - PastTense: 'aimed', - Gerund: 'aiming' - }, - { - PresentTense: 'unifies', - Gerund: 'unifying', - PastTense: 'unified', - Infinitive: 'unify' - }, - { - PresentTense: 'addresses', - Gerund: 'addressing', - PastTense: 'addressed', - Infinitive: 'address' - }, - { - Infinitive: 'bumble', - PresentTense: 'bumbles', - PastTense: 'bumbled', - Gerund: 'bumbling' - }, - { - Infinitive: 'snipe', - PresentTense: 'snipes', - PastTense: 'sniped', - Gerund: 'sniping' - }, - { - PresentTense: 'relishes', - Gerund: 'relishing', - PastTense: 'relished', - Infinitive: 'relish' - }, - { - Infinitive: 'lengthen', - Gerund: 'lengthening', - PastTense: 'lengthened', - PresentTense: 'lengthens' - }, - { - Infinitive: 'farm', - PresentTense: 'farms', - PastTense: 'farmed', - Gerund: 'farming' - }, - { - Infinitive: 'develop', - PresentTense: 'develops', - PastTense: 'developed', - Gerund: 'developing' - }, - { - Infinitive: 'study', - PresentTense: 'studies', - PastTense: 'studied', - Gerund: 'studying' - }, - { - Infinitive: 'criticise', - PresentTense: 'criticises', - PastTense: 'criticised', - Gerund: 'criticising' - }, - { - Infinitive: 'speak', - PresentTense: 'speaks', - PastTense: 'spoke', - Gerund: 'speaking' - }, - { - Infinitive: 'fuzz', - PresentTense: 'fuzzes', - PastTense: 'fuzzed', - Gerund: 'fuzzing' - }, - { - Infinitive: 'invest', - PresentTense: 'invests', - PastTense: 'invested', - Gerund: 'investing' - }, - { - Infinitive: 'age', - PresentTense: 'ages', - PastTense: 'aged', - Gerund: 'ageing' - }, - { - Infinitive: 'shed', - PresentTense: 'sheds', - PastTense: 'shed', - Gerund: 'shedding' - }, - { - Infinitive: 'ace', - PresentTense: 'aces', - PastTense: 'aced', - Gerund: 'acing' - }, - { - Infinitive: 'egg', - PresentTense: 'eggs', - PastTense: 'egged', - Gerund: 'egging' - } -]; -test('conjugation:', function(t) { - var test_conjugation = function(inf, o, form, original) { - var msg = 'from ' + original + ' to ' + form + ': [' + o[original] + '] -> [' + inf[form] + ']'; - t.equal(inf[form], o[form], msg); - }; - - arr.forEach(function(o) { - var forms = ['Infinitive', 'PastTense', 'PresentTense', 'Gerund']; - for (var i = 0; i < forms.length; i++) { - var from = forms[i]; - var inf = nlp(o[from]).tag('Verb').verbs().conjugate()[0]; - test_conjugation(inf, o, 'Infinitive', from); - test_conjugation(inf, o, 'PastTense', from); - test_conjugation(inf, o, 'PresentTense', from); - test_conjugation(inf, o, 'Gerund', from); - } - }); - t.end(); -}); diff --git a/test/unit/subset/verb/contractions.test.js b/test/unit/subset/verb/contractions.test.js deleted file mode 100644 index fc0c3ba57..000000000 --- a/test/unit/subset/verb/contractions.test.js +++ /dev/null @@ -1,19 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('conjugate-contractions:', function(t) { - [ - [`i'm good`, 'i was good'], - [`they're good`, 'they were good'], - //TODO: missing auxillary - // [`we've said`, 'we said'], //or 'we have said' - // [`they'd said`, 'they said'], //or 'they have said' - // (ambiguous) - // [`he's good`, 'he was good'], - ].forEach(function(a) { - var doc = nlp(a[0]); - doc.verbs().toPastTense(); - t.equal(doc.out(), a[1], a[1]); - }); - t.end(); -}); diff --git a/test/unit/subset/verb/modal.test.js b/test/unit/subset/verb/modal.test.js deleted file mode 100644 index 401d9adf9..000000000 --- a/test/unit/subset/verb/modal.test.js +++ /dev/null @@ -1,33 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -//ignore some modals during conjugation, i guess -test('ignore-would-behaviour', function(t) { - var str = nlp('he would walk').sentences().toPastTense().out(); - t.equal(str, 'he walked', 'would-past'); - - str = nlp('he would walk').sentences().toFutureTense().out(); - t.equal(str, 'he will walk', 'would-future'); - - str = nlp('he would walk').sentences().toPresentTense().out(); - t.equal(str, 'he walks', 'would-present'); - - str = nlp('he would walk').sentences().toContinuous().out(); - t.equal(str, 'he is walking', 'would-continuous'); - - t.end(); -}); - - -test('ignore-would-behaviour', function(t) { - var doc = nlp(`best look after`).verbs(); - var out = doc.conjugation(); - t.equal(doc.length, 1, 'one-verb'); - t.ok(out, 'no-error'); - t.end(); -}); - - -//can/could -//might -//should diff --git a/test/unit/subset/verb/parts.test.js b/test/unit/subset/verb/parts.test.js deleted file mode 100644 index e8b499f1d..000000000 --- a/test/unit/subset/verb/parts.test.js +++ /dev/null @@ -1,91 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('verb-parts:', function(t) { - var tests = [ - ['john is walking', '', 'is', ''], - ['john was walking', '', 'was', ''], - ['john will be walking', '', 'will be', ''], - ['john has been walking', '', 'has been', ''], - ['john had been walking', '', 'had been', ''], - ['john would have had been walking', '', 'would have had been', ''], - //negatives - ['john is not walking', 'not', 'is', ''], - ['john was not walking', 'not', 'was', ''], - ['john will not be walking', 'not', 'will be', ''], - ['john will be not walking', 'not', 'will be', ''], - ['john has not been walking', 'not', 'has been', ''], - ['john has been not walking', 'not', 'has been', ''], - ['john had not been walking', 'not', 'had been', ''], - ['john had been not walking', 'not', 'had been', ''], - ['john would be walking', '', 'would be', ''], - ['john would not be walking', 'not', 'would be', ''], - ['john would be not walking', 'not', 'would be', ''], - ['john would not have had been walking', 'not', 'would have had been', ''], - ['john would have not had been walking', 'not', 'would have had been', ''], - ['john would have had not been walking', 'not', 'would have had been', ''], - ['john would have had been not walking', 'not', 'would have had been', ''], - //adverbs + negatives combinations - ['john is really walking', '', 'is', 'really'], - ['john really is walking', '', 'is', 'really'], - ['john is walking really', '', 'is', 'really'], - ['john is not really walking', 'not', 'is', 'really'], - ['john is really not walking', 'not', 'is', 'really'], - ['john really is not walking', 'not', 'is', 'really'], - ['john is not walking really', 'not', 'is', 'really'], - ['john has really been not walking', 'not', 'has been', 'really'], - ['john has been really not walking', 'not', 'has been', 'really'], - ['john has been not really walking', 'not', 'has been', 'really'], - ['john has been not walking really', 'not', 'has been', 'really'], - ['john would really not have had been walking', 'not', 'would have had been', 'really'], - ['john would not really have had been walking', 'not', 'would have had been', 'really'], - ['john would not have really had been walking', 'not', 'would have had been', 'really'], - ['john would not have had really been walking', 'not', 'would have had been', 'really'], - ['john would not have had been really walking', 'not', 'would have had been', 'really'], - ['john would not have had been walking really', 'not', 'would have had been', 'really'] - ]; - tests.forEach(function(a) { - var arr = nlp(a[0]).verbs().data(); - t.equal(arr.length, 1, '#verbs - ' + arr.length); - t.equal(arr[0].parts.negative, a[1], "neg-test - '" + a[0] + "'"); - t.equal(arr[0].parts.auxiliary, a[2], "aux-test - '" + a[0] + "'"); - t.equal(arr[0].parts.verb, 'walking', "verb-test - '" + a[0] + "'"); - t.equal(arr[0].parts.adverbs, a[3], "adverb-test - '" + a[0] + "'"); - }); - t.end(); -}); - -//dont take it too-far -test('verb-greedy:', function(t) { - var arr = nlp('he would be, had he survived').verbs().data(); - t.equal(arr.length, 3, 'split-on-clause'); - - arr = nlp('we walked, talked, and sang').verbs().data(); - t.equal(arr.length, 3, 'split-on-list'); - - arr = nlp('we walked, talked, and quickly sang').verbs().data(); - t.equal(arr.length, 3, 'split-on-list2'); - - arr = nlp('we suddenly walked, talked, and abruptly sang').verbs().data(); - t.equal(arr.length, 3, 'split-on-list3'); - - arr = nlp('we really').verbs().data(); - t.equal(arr.length, 0, 'adverb-isnt-a-verb'); - - arr = nlp('we really really').verbs().data(); - t.equal(arr.length, 0, 'two-adverbs-isnt-a-verb'); - - arr = nlp('not good').verbs().data(); - t.equal(arr.length, 0, 'not-isnt-a-verb'); - - var str = nlp('we must not').verbs().out('normal'); - t.equal(str, 'must not', 'verb-not'); - - str = nlp('we must really').verbs().out('normal'); - t.equal(str, 'must really', 'verb-adverb'); - - str = nlp('we must really not').verbs().out('normal'); - t.equal(str, 'must really not', 'verb-adverb-not'); - - t.end(); -}); diff --git a/test/unit/subset/verb/phrasal.test.js b/test/unit/subset/verb/phrasal.test.js deleted file mode 100644 index 57e83edad..000000000 --- a/test/unit/subset/verb/phrasal.test.js +++ /dev/null @@ -1,22 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('phrasal-verbs:', function(t) { - [ - [`he is really good`, ['he', 'is', 'really', 'good']], - [`he is upset about it`, ['he', 'is', 'upset', 'about', 'it']], - [`he will mess about with it`, ['he', 'will', 'mess about', 'with', 'it']], - - [`come forward`, ['come forward']], - [`come together`, ['come together']], - [`come apart`, ['come apart']], - - [`frighten back`, ['frighten', 'back']], - [`frighten away`, ['frighten away']] - ].forEach(function(a) { - var terms = nlp(a[0]).out('array'); - var msg = terms.join(' ') + ' -- ' + a[1].join(' '); - t.equal(terms.join(' '), a[1].join(' '), msg); - }); - t.end(); -}); diff --git a/test/unit/subset/verb/plural.test.js b/test/unit/subset/verb/plural.test.js deleted file mode 100644 index ac819cad7..000000000 --- a/test/unit/subset/verb/plural.test.js +++ /dev/null @@ -1,25 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('plural-verbs:', function(t) { - var r = nlp('i look at'); - var len = r.verbs().isPlural().length; - t.equal(len, 0, 'i singular'); - - r = nlp('we look at it. They report on it'); - len = r.verbs().isPlural().length; - t.equal(len, 2, 'they plural'); - - r = nlp('lkjsdf are cool'); - var str = r.verbs().isPlural().out('normal'); - t.equal(str, 'are', 'are plural'); - - r = nlp('lkjsdf does eat bugs'); - str = r.verbs().isPlural().out('normal'); - t.equal(str, 'does eat', 'does plural'); - - r = nlp('lkjsdf is cool'); - str = r.verbs().isPlural().out('normal'); - t.equal(str, '', 'is singular'); - t.end(); -}); diff --git a/test/unit/subset/verb/toAdjective.test.js b/test/unit/subset/verb/toAdjective.test.js deleted file mode 100644 index 8d292df3c..000000000 --- a/test/unit/subset/verb/toAdjective.test.js +++ /dev/null @@ -1,16 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('verb-to-adjective:', function(t) { - [ - ['walk', 'walkable'], - ['sing', 'singable'], - ['win', 'winnable'], - ['convert', 'convertible'], - ['see', 'visible'] - ].forEach(function(a) { - var str = nlp(a[0]).verbs().asAdjective()[0]; - t.equal(str, a[1], str + ' -> ' + a[1]); - }); - t.end(); -}); diff --git a/test/unit/subset/verb/toGerund.test.js b/test/unit/subset/verb/toGerund.test.js deleted file mode 100644 index 972e6d075..000000000 --- a/test/unit/subset/verb/toGerund.test.js +++ /dev/null @@ -1,19 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('verb-to-gerund:', function(t) { - [ - ['walk', 'is walking'], - ['sing', 'is singing'], - ['win', 'is winning'], - ['will convert', 'is converting'], - ['see', 'is seeing'], - ['is', 'is being'], - ['was', 'is being'], - ['am', 'is being'] - ].forEach(function(a) { - var str = nlp(a[0]).verbs().toGerund().out('normal'); - t.equal(str, a[1], str + ' -> ' + a[1]); - }); - t.end(); -}); diff --git a/test/unit/subset/verb/toNegative.test.js b/test/unit/subset/verb/toNegative.test.js deleted file mode 100644 index b18b963e7..000000000 --- a/test/unit/subset/verb/toNegative.test.js +++ /dev/null @@ -1,46 +0,0 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); - -test('verb-to-negative:', function(t) { - [ - [`he is nice`, 'is not'], - [`she was nice`, 'was not'], - - [`she has walked`, 'has not walked'], - [`she had walked`, 'had not walked'], - [`we have had problems`, 'have not had'], - [`we would walk`, 'would not walk'], - [`we would have walked`, 'would not have walked'], - - //conjugations - [`she walked`, 'did not walk'], - [`it all came apart`, 'all did not come apart'], - - //phrasals - [`he would come forward`, 'would not come forward'], - [`we come together`, 'do not come together'], - [`he was frightened`, 'was not'], - [`i didn't want to`, 'didn\'t want'], - - //===singular - // pastTense - - ['john played', 'did not play'], - // presentTense - - ['john plays', 'does not play'], - // futureTense - - ['john will play', 'will not play'], - - ///===plural - // pastTense - - ['we played', 'did not play'], - // presentTense - - ['we play', 'do not play'], - // futureTense - - ['we will play', 'will not play'] - ].forEach(function(a) { - var vb = nlp(a[0]).verbs().toNegative(); - var str = vb.out('normal'); - t.equal(str, a[1], '\'' + str + '\' - - want: ' + a[1]); - }); - t.end(); -}); diff --git a/test/unit/tagger/emoji.test.js b/test/unit/tagger/emoji.test.js deleted file mode 100644 index b37c514f0..000000000 --- a/test/unit/tagger/emoji.test.js +++ /dev/null @@ -1,46 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('keyword emojis', function(t) { - [ - ['he is so nice :heart:', ':heart:'], - [':cool: :wine_glass: yeah party', ':cool: :wine_glass:'], - ['to be or not to be: this is a question :cookie:', ':cookie:'] - ].forEach(function(a) { - var have = nlp(a[0]).match('#Emoji').out('normal'); - var msg = "have: '" + have + "' want: '" + a[1] + "'"; - t.equal(have, a[1], msg); - }); - t.end(); -}); - -test('unicode emojis', function(t) { - [ - ['nice job 💯 ❤️', '💯 ❤️'], - ['💚 good job 🎇', '💚 🎇'], - ['visit Brunei', ''], - ['visit Brunei 🇧🇳', '🇧🇳'], - ['visit Brunei 🇧🇳🇧🇳🇧🇳', '🇧🇳🇧🇳🇧🇳'] - ].forEach(function(a) { - var have = nlp(a[0]).match('#Emoji').out('normal'); - var msg = "have: '" + have + "' want: '" + a[1] + "'"; - t.equal(have, a[1], msg); - }); - t.end(); -}); - -test('emoticon emojis', function(t) { - [ - ['nice job :)', ':)'], - [';) good work', ';)'], - [';( oh no :(', ';( :('], - ['to be: that is th3 question', ''], - ['3 3 3 sad', '3 3 3'] - // ['33', '33'], - ].forEach(function(a) { - var have = nlp(a[0]).match('#Emoji').out('normal'); - var msg = "have: '" + have + "' want: '" + a[1] + "'"; - t.equal(have, a[1], msg); - }); - t.end(); -}); diff --git a/test/unit/tagger/inline.test.js b/test/unit/tagger/inline.test.js deleted file mode 100644 index 792ea6673..000000000 --- a/test/unit/tagger/inline.test.js +++ /dev/null @@ -1,29 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('inline tagging linear:', function(t) { - var r = nlp('one two three four'); - - r.match('one two three').tag('. #Person .'); - var found = r.match('#Person').out('normal'); - t.equal(found, 'two', 'skip-tag-skip'); - - r.match('one two three').tag('#FooBar .'); - found = r.match('#FooBar').out('normal'); - t.equal(found, 'one', 'tag-skip-null'); - - r.match('two three').tag('#Two #Three #Four'); - t.equal(r.match('#Two').out('normal'), 'two', 'two-is-two'); - t.equal(r.match('#Three').out('normal'), 'three', 'three-is-three'); - t.equal(r.match('#Four').out('normal'), '', 'four is ignored'); - - t.end(); -}); - -test('compound tags from lexicon:', function(t) { - var doc = nlp('it was cold'); - var arr = doc.verbs().conjugation(); - t.equal(arr.length, 1, 'one verb'); - t.equal(arr[0], 'Past', 'past-tense'); - t.end(); -}); diff --git a/test/unit/tagger/lexicon.test.js b/test/unit/tagger/lexicon.test.js deleted file mode 100644 index 2f55bc9db..000000000 --- a/test/unit/tagger/lexicon.test.js +++ /dev/null @@ -1,103 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); -var pos_test = require('../lib/fns').pos_test; - -test('default lexicon:', function(t) { - [ - ['great', 'Adjective'], - ['walked', 'PastTense'], - ['singing', 'Gerund'], - ['funniest', 'Superlative'], - ['sillier', 'Comparative'], - ['the', 'Determiner'], - ['iraqi', 'Demonym'], - ['december', 'Date'], - ['fifth', 'Value'], - ['suddenly', 'Adverb'], - ['shanghai', 'City'], - ['google', 'Organization'] - ].forEach(function(a) { - var r = nlp(a[0]); - pos_test(r, [a[1]], t); - }); - t.end(); -}); - -test('root-in-lexicon:', function(t) { - [ - ['wash', 'Infinitive'], - ['rewash', 'Infinitive'], - ['re-wash', 'Infinitive'], - ['re-washed', 'PastTense'], - ['rewashed', 'PastTense'], - ['rewashes', 'PresentTense'], - ['rewashing', 'Gerund'], - - ['repurchase', 'Infinitive'], - ['re-purchase', 'Infinitive'], - ['unpurchase', 'Infinitive'], - ['purchased', 'PastTense'], - ['unpurchasing', 'Gerund'], - ['unpurchases', 'PresentTense'], - ['resolve', 'Infinitive'], - ['restructure', 'Infinitive'], - ['reconcile', 'Infinitive'], - ['repeat', 'Infinitive'], - ].forEach(function(a) { - var r = nlp(a[0]); - pos_test(r, [a[1]], t); - }); - t.end(); -}); - -test('adjusted lexicon:', function(t) { - //place new words - var lexicon = { - paris: 'Person', - lkjj: 'Adjective', - 'donkey kong': 'City' - }; - - var arr = [ - ['paris is nice', ['Person', 'Copula', 'Adjective']], - ['he is lkjj', ['Pronoun', 'Copula', 'Adjective']], - ['donkey kong wins the award', ['City', 'City', 'Verb', 'Determiner', 'Noun']] - ]; - arr.forEach(function(a) { - var r = nlp(a[0], lexicon); - pos_test(r, a[1], t); - }); - // - //set gender from lexicon - var terms = nlp('Kelly', lexicon); - pos_test(terms, ['FemaleName'], t); - //set as male: - lexicon = { - kelly: 'MaleName' - }; - terms = nlp('Kelly', lexicon); - pos_test(terms, ['MaleName'], t); - //gender follows lumping - terms = nlp('Kelly Gruber', lexicon); - pos_test(terms, ['MaleName', 'LastName'], t); - - t.end(); -}); - -test('tricky lexicon:', function(t) { - var lexicon = { - 'bed bath and beyond': 'Organization' - }; - var r = nlp('shopping at Bed Bath and Beyond, the store', lexicon); - var str = r.organizations().out('normal'); - t.equal(str, 'bed bath and beyond', 'four-word'); - - r = nlp('shopping at Bed, Bath, and-beyond the store', lexicon); - str = r.organizations().out('normal'); - t.equal(str, 'bed bath and beyond', 'partially-hyphenated-word'); - - r = nlp('shopping at Bed-bath and-beyond the store', lexicon); - str = r.organizations().out('normal'); - t.equal(str, 'bed bath and beyond', 'many-hyphenated-word'); - t.end(); -}); diff --git a/test/unit/tagger/organization.test.js b/test/unit/tagger/organization.test.js deleted file mode 100644 index 8d76f5b16..000000000 --- a/test/unit/tagger/organization.test.js +++ /dev/null @@ -1,25 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('organization test', function(t) { - var arr = [ - 'google', - 'google inc', - 'Capital One', - 'HSBC', - 'NASA', - '7-eleven', - 'al qaeda', - 'FBI', - 'monsanto', - 'Johnson & Johnson' - // 'Johnson & Johnson LLC', - ]; - arr.forEach(function(str) { - var r = nlp(str); - var orgs = r.organizations(); - var msg = orgs.out('text') + ' - ' + str; - t.equal(orgs.out('text'), str, msg); - }); - t.end(); -}); diff --git a/test/unit/tagger/penn.test.js b/test/unit/tagger/penn.test.js deleted file mode 100644 index d6c01bfbf..000000000 --- a/test/unit/tagger/penn.test.js +++ /dev/null @@ -1,59 +0,0 @@ -var test = require('tape') -var nlp = require('../lib/nlp') -var penn = require('../lib/pennSample') - -var softMapping = { - CC: 'Conjunction', - CD: 'Cardinal', - DT: 'Determiner', - FW: 'Expression', - IN: 'Preposition', - JJ: 'Adjective', - JJR: 'Comparative', - JJS: 'Superlative', - MD: 'Verb', - NN: 'Noun', - NNS: 'Noun', - NNP: 'Noun', - NNPS: 'Noun', - POS: 'Possessive', - PRP: 'Pronoun', - PRP$: 'Pronoun', - RB: 'Adverb', - RBR: 'Comparative', - RBS: 'Superlative', - TO: 'Conjunction', - UH: 'Expression', - VB: 'Verb', - VBD: 'Verb', - VBG: 'Verb', - VBN: 'Verb', // past participle - VBP: 'Verb', // non-3rd person singular present - VBZ: 'Verb', // 3rd person singular present - WDT: 'Determiner', - WP: 'Pronoun', - WP$: 'Noun', - WRB: 'Adverb' -} - -test('pennTreebank-test:', function(t) { - penn.forEach((o, index) => { - var terms = nlp(o.text).terms() - o.pos = o.pos.split(', ') - t.equal(terms.length, o.pos.length, 'tokenize#' + index) - - var equal = true - var msg = '' - for (var i = 0; i < o.pos.length; i++) { - var want = softMapping[o.pos[i]] - var term = terms.list[i].terms[0] - if (!term.tags[want]) { - equal = false - msg += " - '" + term.normal + "' " + want - break - } - } - t.ok(equal, msg + ' - "' + o.text + '"') - }) - t.end() -}) diff --git a/test/unit/tagger/people.test.js b/test/unit/tagger/people.test.js deleted file mode 100644 index 0faf6d442..000000000 --- a/test/unit/tagger/people.test.js +++ /dev/null @@ -1,34 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('people:', function(t) { - var doc = nlp('Mary is in the boat. Nancy is in the boat. Fred is in the boat. Jack is too.'); - var arr = doc.people().out('array'); - t.deepEqual(arr, ['mary', 'nancy', 'fred', 'jack'], 'people-easy'); - - doc = nlp('jean jacket. jean Slkje'); - arr = doc.people().out('array'); - t.deepEqual(arr, ['jean slkje'], 'people-context'); - - doc = nlp('The Bill was passed by James MacCarthur'); - arr = doc.people().out('array'); - t.deepEqual(arr, ['james maccarthur'], 'the-bill'); - - doc = nlp('Rod MacDonald bought a Rod'); - arr = doc.people().out('array'); - t.deepEqual(arr, ['rod macdonald',], 'the-rod-1'); - - doc = nlp('Rod L. MacDonald bought a lightening rod'); - arr = doc.people().out('array'); - t.deepEqual(arr, ['rod l macdonald',], 'the-rod-2'); - - doc = nlp('Matt \'the doctor\' Smith lasted three seasons.'); - arr = doc.people().out('array'); - t.deepEqual(arr, ['matt the doctor smith',], 'nickname-1'); - - doc = nlp('Randal Kieth Orton and Dwayne \'the rock\' Johnson had a really funny fight.'); - arr = doc.people().out('array'); - t.deepEqual(arr, ['randal kieth orton','dwayne the rock johnson',], 'nickname-2'); - - t.end(); -}); diff --git a/test/unit/tagger/single-word.test.js b/test/unit/tagger/single-word.test.js deleted file mode 100644 index 4c9af53d2..000000000 --- a/test/unit/tagger/single-word.test.js +++ /dev/null @@ -1,74 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('specific_noun :', function(t) { - [ - // ['five hundred feet', 'Value'], - // ['50 square feet', 'Value'], - // ['90 hertz', 'Value'], - // ['two books', 'Value'], - ['two hundred', 'Value'], - ['4 hundred and ten', 'Value'], - ['4 and a half million', 'Value'], - ['499 thousand', 'Value'], - ['499', 'Value'], - ['4,899', 'Value'], - - ['John Smith', 'Person'], - ['dr. John Smith', 'Person'], - ['John Smith jr.', 'Person'], - ['John Jacob Smith', 'Person'], - ['Jani K. Smith', 'Person'], - - ['asdfefs', 'Noun'], - ['octopus', 'Noun'], - ['tree', 'Noun'], - // ['i', 'Noun'], - - ['FBI', 'Organization'], - ['F.B.I.', 'Organization'], - ['Fun ltd.', 'Organization'], - ['Fun co', 'Organization'], - ['Smith & Rogers', 'Organization'], - ['Google', 'Organization'], - - ['tuesday', 'Date'], - ['february', 'Date'], - ['february fifth', 'Date'], - ['tuesday march 5th', 'Date'], - ['tuesday march 5th, 2015', 'Date'] - ].forEach(function(a) { - var r = nlp(a[0]); - - if (a[1] === 'Person') { - t.equal(r.people().out(), a[0], a[0] + ' - is_person'); - } else { - t.equal(r.people().out(), '', a[0] + ' - not-a-person'); - } - - if (a[1] === 'Place') { - t.equal(r.places().out(), a[0], a[0] + ' - is_place'); - } else { - t.equal(r.places().out(), '', a[0] + ' - not-a-place'); - } - - if (a[1] === 'Value') { - t.equal(r.values().noDates().out(), a[0], a[0] + ' - is_value'); - } else { - t.equal(r.values().noDates().out(), '', a[0] + ' - not-a-value'); - } - - if (a[1] === 'Date') { - t.equal(r.dates().out(), a[0], a[0] + ' - is_date'); - } else { - t.equal(r.dates().out(), '', a[0] + ' - not-a-date'); - } - - if (a[1] === 'Organization') { - t.equal(r.organizations().out(), a[0], a[0] + ' - is_organization'); - } else { - t.equal(r.organizations().out(), '', a[0] + ' - not-a-org'); - } - }); - t.end(); -}); diff --git a/test/unit/tagger/swears.test.js b/test/unit/tagger/swears.test.js deleted file mode 100644 index 0eb72992f..000000000 --- a/test/unit/tagger/swears.test.js +++ /dev/null @@ -1,28 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); -//nsfw! - -test('swears:', function(t) { - var m = nlp('shit, i am tired').match('^#Expression'); - t.ok(m.found, 'swear-1'); - - m = nlp('the shit keeps piling up').match('the #Noun'); - t.ok(m.found, 'swear-2'); - - m = nlp('damn them all').match('^#Verb'); - t.ok(m.found, 'swear-3'); - - m = nlp('fuck the government').match('^#Verb'); - t.ok(m.found, 'swear-4'); - - // m = nlp('when hell freezes over').match('^when #Noun'); - // t.ok(m.found, 'swear-5'); - - // m = nlp('he fucked up').match('he #Verb #Particle'); - // t.ok(m.found, 'swear-6'); - - m = nlp('it is fucked up').match('is #Adjective #Adjective'); - t.ok(m.found, 'swear-7'); - - t.end(); -}); diff --git a/test/unit/tagger/tagCustom.test.js b/test/unit/tagger/tagCustom.test.js deleted file mode 100644 index 82fdcb5b8..000000000 --- a/test/unit/tagger/tagCustom.test.js +++ /dev/null @@ -1,10 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('tag-multiples:', function(t) { - var r = nlp('twas brillig in the doofgafoof.'); - r.match('brillig').tag(['Foo', 'Barr']); - t.ok(r.match('#Foo').found, 'tagged-foo'); - t.ok(r.match('#barr').found, 'tagged-barr'); - t.end(); -}); diff --git a/test/unit/tagger/tagWord.test.js b/test/unit/tagger/tagWord.test.js deleted file mode 100644 index 59dbbabdd..000000000 --- a/test/unit/tagger/tagWord.test.js +++ /dev/null @@ -1,71 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -//test a word from each file in ./data/** -test('pos from-lexicon', function(t) { - var arr = [ - ['toronto', 'City'], - ['mexico', 'Country'], - ['Jamaica', 'Country'], - ['legendary', 'Adjective'], - ['above', 'Adjective'], - ['moderate', 'Adjective'], - ['extreme', 'Adjective'], - ['august', 'Month'], - ['saturday', 'WeekDay'], - ['minute', 'Duration'], - ['valentines day', 'Holiday'], - ['ash wednesday', 'Holiday'], - ['really', 'Adverb'], - ['each', 'Determiner'], - ['voila', 'Expression'], - ['new england', 'Place'], - ['hers', 'Possessive'], - ['onto', 'Preposition'], - ['blvd', 'Place'], - ['belgian', 'Demonym'], - ['cactus', 'Singular'], - ['cacti', 'Plural'], - ['economy', 'Noun'], - ['engineer', 'Noun'], - ['clothing', 'Noun'], - ['duran duran', 'Organization'], - ['american express', 'Organization'], - ['brotherhood', 'Noun'], - ['oakland athletics', 'SportsTeam'], - ['jamie', 'Person'], - ['claire', 'FemaleName'], - ['arthur', 'MaleName'], - ['¥', 'Currency'], - ['pence', 'Currency'], - ['seven', 'Value'], - ['seventeen', 'Value'], - ['twenty', 'Value'], - ['thousand', 'Value'], - ['eighteenth', 'Value'], - ['tbsp', 'Unit'], - ['wrote', 'PastTense'], - ['write', 'Verb'], - ['survive', 'Verb'], - ['attempt', 'Verb'], - ['mc\'adams', 'LastName'], - ['Müller', 'LastName'], - ['muller', 'LastName'], - ['425-1231', 'PhoneNumber'], - ['823-425-1231', 'PhoneNumber'], - ['823 425-1231', 'PhoneNumber'], - ['(823) 425-1231', 'PhoneNumber'], - ['invest', 'Verb'], - ['investing', 'Verb'], - [`wallys'`, 'Possessive'], - [`JDI University'`, 'Organization'], - ['ocean', 'Noun'], - ['shiver', 'Verb'] - ]; - arr.forEach(function(a) { - var term = nlp(a[0]).list[0].terms[0]; - var msg = '\'' + term.normal + '\' has - ' + a[1] + ' (' + Object.keys(term.tags).join(',') + ')'; - t.equal(term.tags[a[1]], true, msg); - }); - t.end(); -}); diff --git a/test/unit/tagger/tagger.test.js b/test/unit/tagger/tagger.test.js deleted file mode 100644 index 156bbdbc7..000000000 --- a/test/unit/tagger/tagger.test.js +++ /dev/null @@ -1,117 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); -var pos_test = require('../lib/fns').pos_test; - -test('=Tagger=', function(T) { - T.test('pos-basic-tag:', function(t) { - [ - ['John is pretty', ['Person', 'Copula', 'Adjective']], - ['John was lofty', ['Person', 'Copula', 'Adjective']], - ['John Smith was lofty', ['FirstName', 'LastName', 'Copula', 'Adjective']], - ['asdfes was lofty', ['Noun', 'Copula', 'Adjective']], - ['asdfes lksejfj was lofty', ['Noun', 'Noun', 'Copula', 'Adjective']], - ['Spencer Kelly is in Canada', ['Person', 'Person', 'Copula', 'Preposition', 'Place']], - ['He is in Canada', ['Pronoun', 'Copula', 'Preposition', 'Place']], - ['5 red roses', ['Value', 'Adjective', 'Noun']], - // ['3 trains', ['Value', 'Noun']], - ['3 trainers', ['Value', 'Noun']], - ['5 buses', ['Value', 'Noun']], - ['101010101010101010101010101010101010101010', ['NumericValue']], - - ['walk the walk', ['Verb', 'Determiner', 'Noun']], - ['Peter the man', ['Person', 'Determiner', 'Noun']], - // ['book the flight', ['Verb', 'Determiner', 'Noun']], - - //slang, contractions - // ['u r nice', ['Pronoun', 'Copula', 'Adjective']], - ['canadian bacon', ['Demonym', 'Noun']], - ['canadian dollar', ['Currency', 'Currency']], - - //possessive rules - ['john lkjsdf\'s', ['Person', 'Possessive']], - ['john lkjsdf\'s house', ['Person', 'Possessive', 'Noun']], - ['john Lkjsdf\'s house', ['Person', 'Possessive', 'Noun']], - ['john Lkjsdf\'s House', ['Person', 'Possessive', 'Noun']], - ['mark\'s question mark', ['Possessive', 'Noun', 'Noun']], - - //question-words - ['who is good?', ['QuestionWord', 'Copula', 'Adjective']], - ['which is good?', ['QuestionWord', 'Copula', 'Adjective']], - // ['bacon which is good', ['Noun', 'Pronoun', 'Copula', 'Adjective']], - // ['bacon which really is good', ['Noun', 'Pronoun', 'Adverb', 'Copula', 'Adjective']], - // ['Douglas who really is good', ['Person', 'Pronoun', 'Adverb', 'Copula', 'Adjective']], - - //web text things - ['lkj@fun.com', ['Email']], - ['j@f.ti', ['Email']], - ['j@ti', ['Noun']], - ['@ti', ['AtMention']], - ['#funtimes', ['HashTag']], - ['http://fun.com/cool?fun=yes', ['Url']], - ['#cool fun.com @cooman', ['HashTag', 'Url', 'AtMention']], - - //determiner-corrections - ['this rocks dude', ['Determiner', 'Verb', 'Noun']], - ['that rocks dude', ['Determiner', 'Verb', 'Noun']], - ['the rocks dude', ['Determiner', 'Plural', 'Noun']], - ['these rocks dude', ['Determiner', 'Plural', 'Noun']], - ['those rocks dude', ['Determiner', 'Plural', 'Noun']], - ['the test string', ['Determiner', 'Noun', 'Noun']], - - //people - ['John swim', ['Person', 'Verb']], - ['John, John', ['Person', 'Person']], - ['John, you', ['FirstName', 'Pronoun']], - ['John you', ['MaleName', 'Pronoun']], - ['you John you', ['Pronoun', 'Person', 'Pronoun']], - // ['10 + 9', ['Value', 'Symbol', 'Value']], - // ['2 * 90 = 180', ['Value', 'Symbol', 'Value', 'Symbol', 'Value']], - // ['one - seventy-six', ['Value', 'Symbol', 'Value']], - ['The stream runs', ['Determiner', 'Noun', 'Verb']], - ['The stream really runs', ['Determiner', 'Noun', 'Adverb', 'Verb']], - ['The nice stream really runs', ['Determiner', 'Adjective', 'Noun', 'Adverb', 'Verb']], - - ['he is walking', ['Pronoun', 'Copula', 'Gerund']], - ['walking is fun', ['Activity', 'Copula', 'Adjective']], - ['walking\'s great', ['Activity', 'Copula', 'Adjective']], - ['jack cheered', ['Person', 'PastTense']], - ['jack guarded', ['Person', 'PastTense']], - ['jack is guarded', ['Person', 'Copula', 'Adjective']], - ['jack seems guarded', ['Person', 'Verb', 'Adjective']], - //more - ['there are reasons', ['Noun', 'Copula', 'Plural']], - ['there were many walks', ['Noun', 'Copula', 'Adjective', 'Plural']], - ['there were the walks', ['Noun', 'Copula', 'Determiner', 'Noun']], - - ['it was fixed', ['Noun', 'Copula', 'PastTense']], - ['it will be boxed', ['Noun', 'Verb', 'Verb', 'PastTense']], - //ambiguous adverbs - ['it was pretty', ['Noun', 'Copula', 'Adjective']], - ['it was pretty cool', ['Noun', 'Copula', 'Adverb', 'Adjective']], - // ['it was really pretty cool', ['Noun', 'Copula', 'Adverb', 'Adverb', 'Adjective']], - ['it was just', ['Noun', 'Copula', 'Adjective']], - ['it was just gorgeous', ['Noun', 'Copula', 'Adverb', 'Adjective']], - - ['N.V.,', ['Noun']], - ['16.125', ['Cardinal']], - ['$19', ['Money']], - ['butterfly', ['Singular']], - ['he blamed the girl', ['Pronoun', 'PastTense', 'Determiner', 'Singular']], - ['his fine', ['Possessive', 'Noun']], - ['contracted AIDS', ['PastTense', 'Acronym']], - ['city/town', ['Noun', 'Noun']], - ['boyfriend to Jane', ['Noun', 'Conjunction', 'Person']], - // ['boyfriend of Jane', ['Noun', 'Conjunction', 'Person']], - ['his fines', ['Possessive', 'Noun']], - ['100+ rumours', ['Value', 'Plural']], - ['John & John,', ['Noun', 'Noun', 'Noun']], - - //abbreviations - ['col. Patrick said march and feb. etc.', ['Abbreviation', 'Person', 'PastTense', 'Month', 'Conjunction', 'Abbreviation', 'Abbreviation']] - ].forEach(function(a) { - var terms = nlp(a[0]).terms(); //.data(); - pos_test(terms, a[1], t); - }); - t.end(); - }); -}); diff --git a/test/unit/tagger/topics.test.js b/test/unit/tagger/topics.test.js deleted file mode 100644 index 1a5baefac..000000000 --- a/test/unit/tagger/topics.test.js +++ /dev/null @@ -1,26 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('proper-nouns', function(t) { - var arr = [ - ['I met John Smith in Toronto.', ['john smith', 'toronto']], - ['Toronto and Vancouver Canada.', ['toronto', 'vancouver canada']], - ['we ate shellfish at 23 Main st.', []], - ['google is suing motorola inc.', ['google', 'motorola inc']], - ['the doctor and his brother see the mayor of france', ['france']], - ]; - arr.forEach((a) => { - var out = nlp(a[0]).match('#ProperNoun+').out('array'); - t.deepEqual(out, a[1], a[0]); - }); - t.end(); -}); - -//after we change pos, untag propernoun -test('remove-proper-nouns', function(t) { - var doc = nlp('do what Theresa May'); - t.equal(doc.match('may').has('#ProperNoun'), true, 'propernoun-init'); - doc.match('may').tag('Verb'); - t.equal(doc.match('may').has('#ProperNoun'), false, 'propernoun-missing'); - t.end(); -}); diff --git a/test/unit/tagger/untag.test.js b/test/unit/tagger/untag.test.js deleted file mode 100644 index 3a3586eae..000000000 --- a/test/unit/tagger/untag.test.js +++ /dev/null @@ -1,68 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('tag inference:', function(t) { - var m = nlp('aasdf2').unTag('Noun').unTag('NounPhrase'); - var term = m.list[0].terms[0]; - t.equal(Object.keys(term.tags).length, 0, 'aasdf2 has no tags'); - //give it a specific tag- - m.tag('SportsTeam'); - term = m.list[0].terms[0]; - t.equal(term.tags.Noun, true, 'aasdf2 now has Noun'); - t.equal(term.tags.Organization, true, 'aasdf2 now has Organization(inferred)'); - //give it a redundant tag- - m.tag('Organization'); - term = m.list[0].terms[0]; - t.equal(term.tags.Noun, true, 'aasdf2 still has Noun'); - t.equal(term.tags.Organization, true, 'aasdf2 still has Organization'); - t.end(); -}); - -test('untag inference:', function(t) { - var m = nlp('aasdf'); - m.tag('FemaleName'); - var term = m.list[0].terms[0]; - t.equal(term.tags.FemaleName, true, 'aasdf first has FemaleName'); - t.equal(term.tags.Person, true, 'aasdf first has person'); - t.equal(term.tags.Noun, true, 'aasdf first has noun'); - //remove the assumption.. - term.unTag('Noun'); - t.equal(term.tags.Noun, undefined, 'aasdf now has no noun'); - t.equal(term.tags.Person, undefined, 'aasdf now has no person(inferred)'); - t.equal(term.tags.FemaleName, undefined, 'aasdf now has no FemaleName(inferred)'); - t.end(); -}); - -test('tag idempodence:', function(t) { - var m = nlp('walk').tag('Verb'); - var term = m.list[0].terms[0]; - t.equal(term.tags.Verb, true, 'walk has Verb'); - t.equal(term.tags.Value, undefined, 'walk has no Value'); - //untag irrelevant stuff - term.unTag('Value'); - term.unTag('Determiner'); - term.unTag('Country'); - term.unTag('Place'); - t.equal(term.tags.Verb, true, 'walk has Verb after'); - t.equal(term.tags.Value, undefined, 'walk has no Value after'); - t.end(); -}); - -test('tags are self-removing', function(t) { - var terms = ['Person', 'Place', 'PastTense', 'FemaleName', 'Infinitive', 'HashTag', 'Month']; - terms.forEach(function(tag) { - var m = nlp('aasdf').tag(tag).unTag(tag); - var t0 = m.list[0].terms[0]; - t.equal(t0.tags[tag], undefined, 'tag removes self ' + tag); - }); - t.end(); -}); - -test('untag wildcard', function(t) { - var r = nlp('we live in Toronto Canada and it is cold'); - r.match('#Place+').unTag('*'); - t.equal(r.match('#Place').found, false, 'place-tag-is-gone'); - var term = r.list[0].terms[3]; - t.equal(Object.keys(term.tags).length, 0, 'toronto-has-no-tags-now'); - t.end(); -}); diff --git a/test/unit/tagger/web.test.js b/test/unit/tagger/web.test.js deleted file mode 100644 index e78910afa..000000000 --- a/test/unit/tagger/web.test.js +++ /dev/null @@ -1,68 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('=Web Terminology=', function(T) { - T.test('is-email:', function(t) { - [ - [`s@s.com`, true], - [`sasdf@sasdf.com`, true], - [`sasdf@sasdf.ti`, true], - [`sasdf@sasdf.t`], - [`sasdf@sasdft`], - [`sasdfsasdft.com`], - [`@sasdft.com`], - [`_@_.com`, true], - [`_@_._`], - [`sas df@sasdf.com`], - [`sasdf@sa sdf.com`] - ].forEach(function(a) { - var term = nlp(a[0]).list[0].terms[0]; - var msg = a[0] + ' is email: ' + a[1]; - t.equal(term.tags['Email'], a[1], msg); - }); - t.end(); - }); - - T.test('is-hashtag:', function(t) { - [ - [`#lkjsdf`, true], - [`#ll`, true], - [`#22ll`, true], - [`#_22ll`, true], - // [`#l`,], - [`# l`], - [`l#l`] - ].forEach(function(a) { - var term = nlp(a[0]).list[0].terms[0]; - var msg = a[0] + ' is hashtag: ' + a[1]; - t.equal(term.tags['HashTag'], a[1], msg); - }); - t.end(); - }); - - T.test('is-url:', function(t) { - [ - [`http://cool.com/fun`, true], - [`https://cool.com`, true], - [`https://cool.com/`, true], - [`https://www.cool.com/`, true], - [`http://subdomain.cool.com/`, true], - [`www.fun.com/`, true], - [`www.fun.com`, true], - [`www.fun.com/foobar/fun`, true], - [`www.subdomain.cool.com/`, true], - [`wwwsubdomain.cool.com/`, true], - [`woo.br`, true], - [`woohoo.biz`, true], - [`woop.org/news`, true], - [`http://woop.org/news?foo=bar`, true], - [`http:subdomain.cool.com/`], - [`coolcom`] - ].forEach(function(a) { - var term = nlp(a[0]).list[0].terms[0]; - var msg = a[0] + ' is url: ' + a[1]; - t.equal(term.tags['Url'], a[1], msg); - }); - t.end(); - }); -}); diff --git a/test/unit/term/hyphens.test.js b/test/unit/term/hyphens.test.js deleted file mode 100644 index 4c541ca34..000000000 --- a/test/unit/term/hyphens.test.js +++ /dev/null @@ -1,52 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('hyphen-tokenize', function(t) { - var r = nlp('super-cool work'); - t.equal(r.terms().length, 3, 'super, cool'); - t.equal(r.out('text'), 'super-cool work', 'preserve hyphen'); - t.equal(r.out('normal'), 'super cool work', 'normalize-out hyphen'); - - r = nlp('http://about.com/my-summer-vacation'); - t.equal(r.terms().length, 1, 'url hyphen'); - r = nlp('http://about.com/my-summer'); - t.equal(r.terms().length, 1, 'url hyphen2'); - - r = nlp('421-0059'); - t.equal(r.terms().length, 1, 'phoneNuumber hyphen'); - - r = nlp('sept-2'); - t.equal(r.terms().length, 2, 'date hyphen'); - - r = nlp('-2 degrees'); - t.equal(r.terms().length, 2, 'minus hyphen'); - - r = nlp('re-enactment'); - t.equal(r.out('normal'), 'reenactment', 're-hyphen'); - r = nlp('un-do'); - t.equal(r.out('normal'), 'undo', 'un-hyphen'); - - t.end(); -}); - -test('hyphenate', function(t) { - var str = 'it is cool. he is nice'; - var m = nlp(str); - m.hyphenate(); - t.equal(m.terms().length, 6, 'seperate terms'); - t.equal(m.out('text'), 'it-is-cool. he-is-nice', 'hyphenate'); - m.dehyphenate(); - t.equal(m.out('text'), str, 'dehyphenate'); - - str = 'i payed seven-hundred for the sandwich'; - m = nlp(str); - m.values().dehyphenate(); - t.equal(m.out('text'), 'i payed seven hundred for the sandwich', 'dehyphenate-values'); - - str = 'he is the king of rock. she is the queen of cool.'; - m = nlp(str); - m.match('(king|queen) of (#Noun|#Adjective)').hyphenate(); - t.equal(m.out('text'), 'he is the king-of-rock. she is the queen-of-cool.', 'hyphenate-match'); - - t.end(); -}); diff --git a/test/unit/term/term_negate.test.js b/test/unit/term/term_negate.test.js deleted file mode 100644 index 64fb9e16a..000000000 --- a/test/unit/term/term_negate.test.js +++ /dev/null @@ -1,30 +0,0 @@ -var test = require('tape'); -var nlp = require('../lib/nlp'); - -test('verb negate:', function(t) { - [ - ['is', 'is not'], - ['will', 'will not'], - ['will be', 'will not be'], - ['was', 'was not'], - - ['walks', 'does not walk'], - ['walked', 'did not walk'], - // ['walking', 'not walking'], - // ['walk', 'do not walk'], - ['will walk', 'will not walk'], - ['will have walked', 'will not have walked'], - - // ['corrupted', 'did not corrupt'], - ['jumped', 'did not jump'], - ['stunk up', 'did not stink up'], - - [`would study`, `would not study`], - [`could study`, `could not study`], - [`should study`, `should not study`] - ].forEach(function(a) { - var str = nlp(a[0]).verbs().toNegative().out('normal'); - t.equal(str, a[1], a[1] + ' --- ' + str); - }); - t.end(); -}); diff --git a/tests/_backlog/backlog.ignore.js b/tests/_backlog/backlog.ignore.js new file mode 100644 index 000000000..cd74e4ac5 --- /dev/null +++ b/tests/_backlog/backlog.ignore.js @@ -0,0 +1,95 @@ +const test = require('tape') +const nlp = require('../_lib') + +// test('match-from-object :', function(t) { +// const m = nlp('spencer is really cool').match({ +// spencer: true, +// }) +// t.equal(m.out('normal'), 'spencer', 'just-spencer') +// t.equal(m.length, 1, 'one-result') +// t.end() +// }) + +/* +test('replace-capture-group', function(t) { + const m = nlp('John eats glue').replace('john [#Verb]', 'sniffs') + t.equal(m.out('text'), 'John sniffs glue', 'capture-2-simple') + // + // m = nlp('John eats glue. john is fun.').replace('[john]', '$1 smith'); + // t.equal(m.out('text'), 'John smith eats glue. john smith is fun.', 'capture-group-multiple'); + // + // m = nlp('John Smith eats glue').replace('[#Person+]', 'dr. $1'); + // t.equal(m.out('text'), 'dr. John Smith eats glue', 'capture-two'); + // + // m = nlp('ralf eats the glue').replace('ralf [#Verb]', 'he $1'); + // t.equal(m.out('text'), 'he eats the glue', 'simple subset'); + // + // m = nlp('John eats the glue').replace('the [#Noun]', 'the cyber-$1'); + // t.equal(m.out('text'), 'John eats the cyber-glue', 'capture-group as subset'); + // + t.end() +}) +*/ + +//test object-form +// test('not-from-object :', function(t) { +// const m = nlp('spencer is not really cool.') +// const r = m.not({ +// not: true, +// really: true, +// }) +// t.equal(m.out('normal'), 'spencer is not really cool.', 'double-obj-remains') +// t.equal(r.out('normal'), 'spencer is cool.', 'spencer-double-obj') + +// m = nlp('everyone is cool. I said hi to everyone.').not({ +// everyone: true, +// totally: true, +// }) +// t.equal(m.out('normal'), 'is cool. i said hi to', 'not-everyone') + +// m = nlp('spencer is really, secretly, very cool.') +// const adv = m.match('#Adverb').not({ +// really: true, +// }) +// t.equal(adv.out('normal'), 'secretly very', 'not-subset') +// t.equal(adv.length, 2, 'one-result-obj') + +// const adv2 = m.match('#Adverb').not('secretly') +// t.equal(adv2.out('normal'), 'really very', 'not-subset2') +// t.equal(adv2.length, 2, 'two-results-obj') + +// t.end() +// }) + +// test('normalize quotes ', function(t) { +// const str = `،one’ «two» ‘three’ “four” 'five' "six."` +// const doc = nlp(str) +// t.equal(doc.text(), str, 'text out-3') +// t.equal(doc.text(), 'one two three four five six.', 'normal out-3') +// t.end() +// }) + +// test('false-positives', function(t) { +// const txt = `Probably the renovation right away from the amount of work, which has been done to the property. +// I have one two, three, four five six properties, which came on the market in the month. +// I think that the number one quite comfortable looking at the two properties, which I'm working on now.` +// const questions = nlp(txt) +// .sentences() +// .isQuestion() +// .out('array') +// t.equal(questions.length, 0, 'no questions here') +// t.end() +// }) + +// let r = nlp('Homer, have you been eating that sandwich again?') +// .terms() +// .slice(0, 3) +// t.equal(r.out('text'), 'Homer, have you', 'result.slice') + +// str = 'men go' +// m = nlp(str) +// .sentences() +// .toPastTense() +// .nouns() +// .toSingular() +// t.equal(m.out('normal'), 'a man went', str) diff --git a/tests/_backlog/clone.ignore.js b/tests/_backlog/clone.ignore.js new file mode 100644 index 000000000..6f7ea54cb --- /dev/null +++ b/tests/_backlog/clone.ignore.js @@ -0,0 +1,34 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('nlp.clone() -change original', function(t) { + let nlp2 = nlp.clone() + t.ok(nlp('bat').has('#Noun'), 'nlp1-init') + t.ok(nlp2('bat').has('#Noun'), 'nlp2-init') + + //change nlp1 + nlp.extend((Doc, world) => { + world.addWords({ + bat: 'Man', + }) + }) + t.ok(nlp('bat').has('#Man'), 'nlp1-changed') + t.ok(nlp2('bat').has('#Man') === false, 'nlp2-unchanged') + + //change nlp2 + nlp2.extend((Doc, world) => { + world.addWords({ + bat: 'ManTwo', + }) + }) + t.ok(nlp('bat').has('#ManTwo') === false, 'nlp1-changed') + t.ok(nlp2('bat').has('#ManTwo') === true, 'nlp2-unchanged') + + //try nlp3 + let nlp3 = nlp.clone() + t.ok(nlp3('bat').has('#Noun'), 'nlp3-normal-default') + t.ok(nlp3('bat').has('#Man') === false, 'nlp3-normal') + t.ok(nlp3('bat').has('#ManTwo') === false, 'nlp3-normal-again') + + t.end() +}) diff --git a/tests/_backlog/pronoun.ignore.js b/tests/_backlog/pronoun.ignore.js new file mode 100644 index 000000000..3ae62801c --- /dev/null +++ b/tests/_backlog/pronoun.ignore.js @@ -0,0 +1,92 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('pronoun:', function(t) { + ;[ + ['John', 'he'], + ['John Smith', 'he'], + ['Jane', 'she'], + // ['turtle', 'it'], + // ['turtles', 'they'], + // ['Toronto', 'it'], + // ['studying', 'it'], + // ['horses', 'they'], + // ['road bikes', 'they'], + // ['NHL goaltenders', 'they'], + ['Tony Danza', 'he'], + ['Tanya Danza', 'she'], + ['Mrs. Tanya Danza', 'she'], + // ['John G. Fishermore Institute', 'it'], + // ['John Fisher & sons', 'it'], + ].forEach(function(a) { + const str = nlp(a[0]) + .people() + .pronoun()[0] + const msg = a[0] + ' -> ' + str + t.equal(str, a[1], msg) + }) + t.end() +}) + +// #Determiner, #Possessive - "his book", "her book", "their book", +// #Adjective, #Possessive - "the book is his", "the book is hers", "the book is theirs", +// #Pronoun, #Object - "the book belongs to him", "the book belongs to her", "the book belongs to them", and for completeness, +// #Pronoun, #Subject - "he owns the book", "she owns the book", "they own the book". + +test('to her:', function(t) { + let doc = nlp('her book belongs to her') //Det ... Pronoun + // her book... + let m = doc + .match('^her') + .match('#Determiner') + .match('#Possessive') + t.ok(m.found, 'her book - det/poss') + + // ...to her + m = doc + .match('her$') + .match('#Pronoun') + .match('#Possessive') + t.ok(m.found, 'to her - det/poss') + t.end() +}) + +test('to him:', function(t) { + let doc = nlp('his book belongs to him') //Det ... Pronoun + // his book... + let m = doc + .match('^hi') + .match('#Determiner') + .match('#Possessive') + t.ok(m.found, 'him book - det/poss') + + // ...to him + m = doc + .match('him$') + .match('#Pronoun') + .match('#Possessive') + t.ok(m.found, 'to him - det/poss') + t.end() +}) + +test('is his forms:', function(t) { + let doc = nlp('his book is his') //Det ... Adjective + //...is his + let m = doc + .match('his$') + .match('#Adjective') + .match('#Possessive') + t.ok(m.found, 'is his - adj/poss') + t.end() +}) + +test('is his forms:', function(t) { + let doc = nlp('her book is hers') //Det ... Adjective + //...is hers + let m = doc + .match('hers$') + .match('#Adjective') + .match('#Possessive') + t.ok(m.found, 'is hers - adj/poss') + t.end() +}) diff --git a/tests/_backlog/toAdjective.ignore.js b/tests/_backlog/toAdjective.ignore.js new file mode 100644 index 000000000..73a6e0a34 --- /dev/null +++ b/tests/_backlog/toAdjective.ignore.js @@ -0,0 +1,19 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('verb-to-adjective:', function(t) { + let arr = [ + ['walk', 'walkable'], + ['sing', 'singable'], + ['win', 'winnable'], + ['convert', 'convertible'], + ['see', 'visible'], + ] + arr.forEach(function(a) { + const str = nlp(a[0]) + .verbs() + .asAdjective()[0] + t.equal(str, a[1], str + ' -> ' + a[1]) + }) + t.end() +}) diff --git a/tests/_lib.js b/tests/_lib.js new file mode 100644 index 000000000..f260b728f --- /dev/null +++ b/tests/_lib.js @@ -0,0 +1,11 @@ +if (typeof process !== undefined && typeof module !== undefined) { + let nlp + if (process.env.TESTENV === 'prod') { + console.warn('== production build test 🚀 ==') + nlp = require('../') + } else { + nlp = require('../src') + } + + module.exports = nlp +} diff --git a/tests/_pennSample.js b/tests/_pennSample.js new file mode 100644 index 000000000..65f7ced8e --- /dev/null +++ b/tests/_pennSample.js @@ -0,0 +1,325 @@ +//(very) small subset of the Penn-treebank that should always pass +module.exports = [ + { + text: + 'Dr. Talcott led a team of researchers from the National Cancer Institute and the medical schools of Harvard University and Boston University.', + tags: 'NNP, NNP, VBD, DT, NN, IN, NNS, IN, DT, NNP, NNP, NNP, CC, DT, JJ, NNS, IN, NNP, NNP, CC, NNP, NNP', + }, + { + text: 'The monthly sales have been setting records every month since March.', + tags: 'DT, JJ, NNS, VBP, VBN, VBG, NNS, DT, NN, IN, NNP', + }, + { + text: ' Cray Computer will be a concept stock, he said.', + tags: 'NNP, NNP, MD, VB, DT, NN, NN, PRP, VBD', + }, + { + text: 'Esso said the Whiting field started production Tuesday.', + tags: 'NNP, VBD, DT, NNP, NN, VBD, NN, NNP', + }, + { + text: 'Pick a country, any country.', + tags: 'VB, DT, NN, DT, NN', + }, + { + text: 'They fell into oblivion after the 1929 crash.', + tags: 'PRP, VBD, IN, NN, IN, DT, CD, NN', + }, + { + text: 'Political and currency gyrations can whipsaw the funds.', + tags: 'JJ, CC, NN, NNS, MD, VB, DT, NNS', + }, + { + text: 'They cite a lack of imbalances that provide early warning signals of a downturn.', + tags: 'PRP, VBP, DT, NN, IN, NNS, WDT, VBP, JJ, NN, NNS, IN, DT, NN', + }, + { + text: 'Characters drink Salty Dogs, whistle Johnny B. Goode and watch Bugs Bunny reruns.', + tags: 'NNS, VBP, NNP, NNP, VBP, NNP, NNP, NNP, CC, VBP, NNP, NNP, NNS', + }, + { + text: 'They read Mickey Spillane and talk about Groucho and Harpo.', + tags: 'PRP, VBP, NNP, NNP, CC, VBP, IN, NNP, CC, NNP', + }, + { + text: ' Consider Jim Courter.', + tags: 'VB, NNP, NNP', + }, + { + text: 'But it resists yielding political ground.', + tags: 'CC, PRP, VBZ, VBG, JJ, NN', + }, + { + text: ' In Asia, as in Europe, a new order is taking shape, Mr. Baker said.', + tags: 'IN, NNP, IN, IN, NNP, DT, JJ, NN, VBZ, VBG, NN, NNP, NNP, VBD', + }, + { + text: 'And South Carolina says it is getting results.', + tags: 'CC, NNP, NNP, VBZ, PRP, VBZ, VBG, NNS', + }, + { + text: " It was full of violence and gangs and kids cutting class, says Linda Ward, the school's principal.", + tags: 'PRP, VBD, JJ, IN, NN, CC, NNS, CC, NNS, VBG, NN, VBZ, NNP, NNP, DT, NN, NN', + }, + { + text: 'I believe in the system.', + tags: 'PRP, VBP, IN, DT, NN', + }, + { + text: 'Mrs. Yeargin declined.', + tags: 'NNP, NNP, VBD', + }, + { + text: 'Yeargin won widespread local support.', + tags: 'NNP, VBD, JJ, JJ, NN', + }, + { + text: 'But Learning Materials matched on 66.5 of 69 subskills.', + tags: 'CC, NNP, NNPS, VBD, IN, CD, IN, CD, NNS', + }, + { + text: 'The two banks merged in 1985.', + tags: 'DT, CD, NNS, VBD, IN, CD', + }, + { + text: "He said the company's core business remains strong.", + tags: 'PRP, VBD, DT, NN, NN, NN, VBZ, JJ', + }, + { + text: 'Estimated volume was a moderate 3.5 million ounces.', + tags: 'VBN, NN, VBD, DT, JJ, CD, CD, NNS', + }, + { + text: 'Mr. Gillespie at Viacom says the ratings are rising.', + tags: 'NNP, NNP, IN, NNP, VBZ, DT, NNS, VBP, VBG', + }, + { + text: 'Ad Notes....', + tags: 'NNP, NNPS', + }, + { + text: 'The business had been handled by VanSant Dugdale, Baltimore.', + tags: 'DT, NN, VBD, VBN, VBN, IN, NNP, NNP, NNP', + }, + { + text: ' The economy is clearly slowing, says Robert Black, president of the Richmond Federal Reserve Bank.', + tags: 'DT, NN, VBZ, RB, VBG, VBZ, NNP, NNP, NN, IN, DT, NNP, NNP, NNP, NNP', + }, + { + text: 'They will mature Dec. 21.', + tags: 'PRP, MD, VB, NNP, CD', + }, + { + text: 'Lancaster Colony Corp. said it acquired Reames Foods Inc. in a cash transaction.', + tags: 'NNP, NNP, NNP, VBD, PRP, VBD, NNP, NNP, NNP, IN, DT, NN, NN', + }, + { + text: 'NL is officially making the offer.', + tags: 'NNP, VBZ, RB, VBG, DT, NN', + }, + { + text: "The Japanese fret openly about the U.S. public's rancor.", + tags: 'DT, NNP, NN, RB, IN, DT, NNP, NN, NN', + }, + { + text: 'They operate ships and banks.', + tags: 'PRP, VBP, NNS, CC, NNS', + }, + { + text: + 'Adds Takeshi Kondo, senior vice president of C. Itoh America Inc.: We have a great interest in making investments, particularly in new ventures.', + tags: 'VBZ, NNP, NNP, JJ, NN, NN, IN, NNP, NNP, NNP, NNP, PRP, VBP, DT, JJ, NN, IN, VBG, NNS, RB, IN, JJ, NNS', + }, + { + text: 'But many banks are turning away from strict price competition.', + tags: 'CC, JJ, NNS, VBP, VBG, RB, IN, JJ, NN, NN', + }, + { + text: 'One big reason: thin margins.', + tags: 'CD, JJ, NN, JJ, NNS', + }, + { + text: 'Buy a new Chevrolet.', + tags: 'VB, DT, JJ, NNP', + }, + { + text: 'Buy a diamond necklace.', + tags: 'VB, DT, NN, NN', + }, + { + text: 'They are keeping a close watch on the yield on the S&P 500.', + tags: 'PRP, VBP, VBG, DT, JJ, NN, IN, DT, NN, IN, DT, NNP, CD', + }, + { + text: 'In fact, the market has always tanked.', + tags: 'IN, NN, DT, NN, VBZ, RB, VBN', + }, + { + text: 'Always.', + tags: 'RB', + }, + { + text: 'China pulled out of the program in July.', + tags: 'NNP, VBD, VB, IN, DT, NN, IN, NNP', + }, + { + text: 'But regulators are wary.', + tags: 'CC, NNS, VBP, JJ', + }, + { + text: 'He also is a consensus manager, insiders say.', + tags: 'PRP, RB, VBZ, DT, NN, NN, NNS, VBP', + }, + { + text: 'Compromises are possible.', + tags: 'NNS, VBP, JJ', + }, + { + text: 'The company acknowledges some problems.', + tags: 'DT, NN, VBZ, DT, NNS', + }, + { + text: + 'A number of cities including Minneapolis, Philadelphia and Houston have vacant grain elevators, Eggers says.', + tags: 'DT, NN, IN, NNS, VBG, NNP, NNP, CC, NNP, VBP, JJ, NN, NNS, NNP, VBZ', + }, + { + text: + 'They suffered from malnutrition, chest diseases, cardiovascular disorders, skin problems, infectious diseases and the aftereffects of assaults and rape.', + tags: 'PRP, VBD, IN, NN, NN, NNS, JJ, NNS, NN, NNS, JJ, NNS, CC, DT, NNS, IN, NNS, CC, NN', + }, + { + text: 'That was the law.', + tags: 'DT, VBD, DT, NN', + }, + { + text: 'It was censorship.', + tags: 'PRP, VBD, NN', + }, + { + text: 'It was outrageous.', + tags: 'PRP, VBD, JJ', + }, + { + text: 'But the court disagreed.', + tags: 'CC, DT, NN, VBD', + }, + { + text: 'The man was Charles Z. Wick.', + tags: 'DT, NN, VBD, NNP, NNP, NNP', + }, + { + text: ' Bob has handled the extraordinary growth of the company quite brilliantly, said Mr. Newhouse.', + tags: 'NNP, VBZ, VBN, DT, JJ, NN, IN, DT, NN, RB, RB, VBD, NNP, NNP', + }, + { + text: + 'This species of congressional action is predicated on an interpretation of the appropriations clause that is erroneous and unconstitutional.', + tags: 'DT, NN, IN, JJ, NN, VBZ, VBN, IN, DT, NN, IN, DT, NNS, NN, WDT, VBZ, JJ, CC, JJ', + }, + { + text: 'President Reagan learned that lesson.', + tags: 'NNP, NNP, VBD, DT, NN', + }, + { + text: 'Mr. Sidak served as an attorney in the Reagan administration.', + tags: 'NNP, NNP, VBD, IN, DT, NN, IN, DT, NNP, NN', + }, + { + text: 'The death of the Herald, a newsstand paper in a freeway town, was perhaps inevitable.', + tags: 'DT, NN, IN, DT, NNP, DT, NN, NN, IN, DT, NN, NN, VBD, RB, JJ', + }, + { + text: ' The Herald was a survivor from a bygone age, said J. Kendrick Noble, a media analyst with PaineWebber Inc.', + tags: 'DT, NNP, VBD, DT, NN, IN, DT, JJ, NN, VBD, NNP, NNP, NNP, DT, NNS, NN, IN, NNP, NNP', + }, + { + text: 'The reaction in the newsroom was emotional.', + tags: 'DT, NN, IN, DT, NN, VBD, JJ', + }, + { + text: + 'The program traders, on the other hand, portray old-fashioned stock pickers as the Neanderthals of the industry.', + tags: 'DT, NN, NNS, IN, DT, JJ, NN, VBP, JJ, JJ, NN, NNS, IN, DT, NNS, IN, DT, NN', + }, + { + text: 'Reducing volatility.', + tags: 'VBG, NN', + }, + { + text: 'Ballot watchers say attention already is focused on the 1990 elections.', + tags: 'NN, NNS, VBP, NN, RB, VBZ, VBN, IN, DT, CD, NNS', + }, + { + text: 'Colleges, she says, are eyeing registration through 900 service.', + tags: 'NNS, PRP, VBZ, VBP, VBG, NN, IN, CD, NN', + }, + { + text: 'FAMILY PETS are improving recovery rates of patients at Columbia Hospital, Milwaukee.', + tags: 'NN, NNS, VBP, VBG, NN, NNS, IN, NNS, IN, NNP, NNP, NNP', + }, + { + text: 'The appointment takes effect Nov. 13.', + tags: 'DT, NN, VBZ, NN, NNP, CD', + }, + { + text: 'Heiwado Co.', + tags: 'NNP, NNP', + }, + { + text: 'Guaranteed by Svenska Handelsbanken.', + tags: 'VBN, IN, NNP, NNP', + }, + { + text: 'Mitsubishi Pencil Co.', + tags: 'NNP, NNP, NNP', + }, + { + text: 'Koizumi Sangyo Corp.', + tags: 'NNP, NNP, NNP', + }, + { + text: 'A stadium craze is sweeping the country.', + tags: 'DT, NN, NN, VBZ, VBG, DT, NN', + }, + { + text: 'Stock prices closed higher in Stockholm, Amsterdam and Frankfurt and lower in Zurich.', + tags: 'NN, NNS, VBD, JJR, IN, NNP, NNP, CC, NNP, CC, JJR, IN, NNP', + }, + { + text: 'A faster version, the SuperDot, was launched in 1984.', + tags: 'DT, JJR, NN, DT, NNP, VBD, VBN, IN, CD', + }, + { + text: 'Valley Federal is currently being examined by regulators.', + tags: 'NNP, NNP, VBZ, RB, VBG, VBN, IN, NNS', + }, + { + text: 'Columbia has only about 10 million common shares in public hands.', + tags: 'NNP, VBZ, RB, IN, CD, CD, JJ, NNS, IN, JJ, NNS', + }, + { + text: 'But the concept is workable.', + tags: 'CC, DT, NN, VBZ, JJ', + }, + { + text: "Mr. Spiegel's next career move is a subject of speculation on Wall Street.", + tags: 'NNP, NNP, JJ, NN, NN, VBZ, DT, NN, IN, NN, IN, NNP, NNP', + }, + { + text: 'Wedtech management used the merit system.', + tags: 'NNP, NN, VBD, DT, NN, NN', + }, + { + text: 'Numerous other scandals, among them the ones at HUD, have the same characteristics as Wedtech.', + tags: 'JJ, JJ, NNS, IN, PRP, DT, NNS, IN, NNP, VBP, DT, JJ, NNS, IN, NNP', + }, + { + text: 'Railroad companies and some ports are reaping a sudden windfall of business.', + tags: 'NN, NNS, CC, DT, NNS, VBP, VBG, DT, JJ, NN, IN, NN', + }, + { + text: ' The recent rally in precious metals was a result of uncertainty and volatility in equities, he said.', + tags: 'DT, JJ, NN, IN, JJ, NNS, VBD, DT, NN, IN, NN, CC, NN, IN, NNS, PRP, VBD', + }, +] diff --git a/tests/abbreviation.test.js b/tests/abbreviation.test.js new file mode 100644 index 000000000..e02f53451 --- /dev/null +++ b/tests/abbreviation.test.js @@ -0,0 +1,16 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('abbreviations', function(t) { + let doc = nlp(`mr. and Mrs. Kelly live on Shoreditch st.`) + doc.abbreviations().stripPeriods() + t.equal(doc.text(), 'mr and Mrs Kelly live on Shoreditch st.', 'no-periods') + + doc + .abbreviations() + .addPeriods() + .addPeriods() + t.equal(doc.text(), 'mr. and Mrs. Kelly live on Shoreditch st.', 'one-period') + + t.end() +}) diff --git a/tests/acronym.test.js b/tests/acronym.test.js new file mode 100644 index 000000000..93632cf28 --- /dev/null +++ b/tests/acronym.test.js @@ -0,0 +1,16 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('acronyms', function(t) { + let doc = nlp(`mr. and Mrs. Smith are in the FBI and the c.i.a.`) + doc.acronyms().stripPeriods() + t.equal(doc.text(), 'mr. and Mrs. Smith are in the FBI and the cia.', 'no-periods') + + doc + .acronyms() + .addPeriods() + .addPeriods() + t.equal(doc.text(), 'mr. and Mrs. Smith are in the F.B.I and the c.i.a.', 'one-period') + + t.end() +}) diff --git a/tests/append.test.js b/tests/append.test.js new file mode 100644 index 000000000..23d65c972 --- /dev/null +++ b/tests/append.test.js @@ -0,0 +1,74 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('append parent', function(t) { + let doc = nlp(`one two three`) + doc.append('four five') + t.equal(doc.text(), 'one two three four five', 'appended in parent') + t.end() +}) + +test('append one child', function(t) { + let doc = nlp(`one two three`) + let m = doc.match('three') + m.append('four five') + t.equal(doc.text(), 'one two three four five', 'appended in parent') + t.end() +}) + +test('append multi', function(t) { + let doc = nlp('one two. three four') + doc.append('oooo') + t.equal(doc.text(), 'one two oooo. three four oooo') + t.end() +}) + +test('append two children', function(t) { + let doc = nlp(`one two three`) + let m = doc.match('two three') + let m2 = m.match('three') + m2.append('four five') + t.equal(m.text('normal'), 'two three four five', 'append in child 1') + t.equal(m2.text('normal'), 'three four five', 'append in child 2') + t.equal(doc.text(), 'one two three four five', 'appended in parent') + t.end() +}) + +test('append in middle', function(t) { + let doc = nlp(`one two three six`) + let m = doc.match('three').append('four five') + t.equal(m.text('normal'), 'three four five', 'append in child 1') + t.equal(doc.text(), 'one two three four five six', 'inserted in parent') + t.end() +}) + +test('append in middle many children', function(t) { + let doc = nlp(`one two three six`) + let mx = doc + .match('one two three') + .match('three') + .match('.') + .match('three') + mx.append('four five') + t.equal(mx.text('normal'), 'three four five', 'append in child n') + t.equal(doc.text(), 'one two three four five six', 'inserted in parent') + t.end() +}) + +test('append check false-positives', function(t) { + let doc = nlp(`one two three five`) + let one = doc.match('one') + let twoThree = doc.match('two three') + let noFour = doc.match('four') + let five = doc.match('five') + doc.match('three').append('four') + t.equal(one.text('normal'), 'one', 'not in one') + t.equal(twoThree.text('normal'), 'two three', 'not in twoThree') + t.equal(noFour.text('normal'), '', 'still no four') + t.equal(five.text('normal'), 'five', 'not in five') + t.equal(doc.text(), 'one two three four five', 'inserted in parent') + t.equal(doc.match('four').text('normal'), 'four', 'now has four') + t.equal(doc.match('four five').text('normal'), 'four five', 'now has four-five') + t.equal(doc.match('. four').text('normal'), 'three four', 'now has three four') + t.end() +}) diff --git a/tests/bestTag.test.js b/tests/bestTag.test.js new file mode 100644 index 000000000..0f19d7ba7 --- /dev/null +++ b/tests/bestTag.test.js @@ -0,0 +1,36 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('bestTag', function(t) { + let str = 'john smith was really working' + let m = nlp(str) + let have = m + .terms() + .json({ terms: { bestTag: true } }) + .map(p => p.terms[0].bestTag) + let want = ['MaleName', 'LastName', 'Copula', 'Adverb', 'Gerund'] + let msg = str + ' - [' + have.join(', ') + ']' + t.deepEqual(have, want, msg) + + str = 'he sang in June' + m = nlp(str) + have = m + .terms() + .json({ terms: { bestTag: true } }) + .map(p => p.terms[0].bestTag) + want = ['Pronoun', 'PastTense', 'Preposition', 'Month'] + msg = str + ' - [' + have.join(', ') + ']' + t.deepEqual(have, want, msg) + + str = 'fastest shooter in Canada' + m = nlp(str) + have = m + .terms() + .json({ terms: { bestTag: true } }) + .map(p => p.terms[0].bestTag) + want = ['Superlative', 'Singular', 'Preposition', 'Country'] + msg = str + ' - [' + have.join(', ') + ']' + t.deepEqual(have, want, msg) + + t.end() +}) diff --git a/tests/canBe.test.js b/tests/canBe.test.js new file mode 100644 index 000000000..ceb96e673 --- /dev/null +++ b/tests/canBe.test.js @@ -0,0 +1,18 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('canBe', function(t) { + let doc = nlp(`spencer was going crazy. He walks quickly.`) + + let canBeNoun = doc.canBe('Noun') + t.equal(canBeNoun.length, 2, 'two results') + t.equal(canBeNoun.terms(0).text('normal'), 'spencer', 'first result') + t.equal(canBeNoun.terms(1).text(), 'He', 'first result') + + let canBeVerb = nlp('spencer kelly').canBe('Verb') + t.equal(canBeVerb.length, 0, 'no results') + + let canBeMisc = nlp('spencer kelly').canBe('asdf') + t.equal(canBeMisc.length, 1, 'all results are one') + t.end() +}) diff --git a/tests/case.test.js b/tests/case.test.js new file mode 100644 index 000000000..a1d16ee11 --- /dev/null +++ b/tests/case.test.js @@ -0,0 +1,52 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('sanity-check case:', function(t) { + let str = 'John xoo, John fredman' + let r = nlp(str) + str = r.toUpperCase().out('text') + t.equal(str, 'JOHN XOO, JOHN FREDMAN', 'uppercase') + + str = r.toLowerCase().out('text') + t.equal(str, 'john xoo, john fredman', 'lowercase') + + str = r.toCamelCase().out('text') + t.equal(str, 'johnXooJohnFredman', 'camelcase') //removes comma + t.end() +}) + +test('camel case:', function(t) { + let doc = nlp('and check this out! a walk-in microwave.') + doc.hyphenated().toCamelCase() + t.equal(doc.text(), 'and check this out! a walkIn microwave.', 'hyphenated-camelcase') + t.end() +}) + +test('tricky case:', function(t) { + let str = 'i am spencer kelly here with Amy Adams.' + let r = nlp(str) + r.match('#Person').toUpperCase() + str = r.out('text') + t.equal(str, 'i am SPENCER KELLY here with AMY ADAMS.', 'tricky-uppercase') + + str = 'the Spencer Kelly Festival of Silly Walks' + r = nlp(str) + r.match('@titleCase+').toCamelCase() + t.equal(r.out('text'), 'the SpencerKellyFestival of SillyWalks', 'tricky-camelcase') + + t.end() +}) + +test('unicode case:', function(t) { + let doc = nlp(`ümasdfs`) + doc.toTitleCase() + t.equal(doc.text(), 'Ümasdfs', 'unicode-titlecase') + + doc = nlp(`Ümasdfs`) + doc.toUpperCase() + t.equal(doc.text(), 'ÜMASDFS', 'unicode-uppercase') + doc.toLowerCase() + t.equal(doc.text(), 'ümasdfs', 'unicode-lowercase') + + t.end() +}) diff --git a/tests/celebrity-names.test.js b/tests/celebrity-names.test.js new file mode 100644 index 000000000..5ccf94610 --- /dev/null +++ b/tests/celebrity-names.test.js @@ -0,0 +1,159 @@ +const test = require('tape') +const nlp = require('./_lib') + +//list of famous names taken from https://gate.ac.uk/gate/plugins/ANNIE/resources/gazetteer/ +const tests = { + 'john stewart': 'Male', + 'martha stewart': 'Female', + // 'Gaugin': 'Male', + // 'Gauguin': 'Male', + 'George Bush': 'Male', + 'Hillary Clinton': 'Female', + 'Hillary Rodham Clinton': 'Female', + // 'John the Baptist': 'Male', + 'Margaret Thatcher': 'Female', + Messiaen: 'Male', + Mozart: 'Male', + Nixon: 'Male', + 'Pope John Paul II': 'Male', + 'Richard Nixon': 'Male', + 'Ronald Reagan': 'Male', + 'Saddam Hussain': 'Male', + 'Saint - Saens': 'Male', + // 'Shostakovich': 'Male', + // 'Strauss': 'Male', + // 'Thatcher': 'Female', + // 'The Queen': 'Female', + // 'the Queen': 'Female', + 'Van Gogh': 'Male', + 'Virgin Mary': 'Female', + Vivaldi: 'Male', + 'van Gogh': 'Male', + 'Carl Marx': 'Male', + Lenin: 'Male', + Stalin: 'Male', + 'George W.Bush': 'Male', + 'Mitt Romney': 'Male', + 'Barack Obama': 'Male', + Obama: 'Male', + 'Lady Gaga': 'Female', + 'Kanye West': 'Male', + 'Abu Hamza': 'Male', + 'Abu Hamza Al - Masri': 'Male', + 'Osama bin Laden': 'Male', + 'Osama Bin Laden': 'Male', + Mubarek: 'Male', + 'Muhammad Ali': 'Male', + 'Jennifer Aniston': 'Female', + 'Tyra Banks': 'Female', + 'Mario Batali': 'Male', + 'David Beckham': 'Male', + 'Halle Berry': 'Female', + 'Tom Brady': 'Male', + 'Matthew Broderick': 'Male', + 'Nathan Lane': 'Male', + 'Mel Brooks': 'Male', + 'Dan Brown': 'Male', + 'Jerry Bruckheimer': 'Male', + 'Kobe Bryant': 'Male', + 'Gisele Bundchen': 'Female', + 'Jim Carrey': 'Male', + 'Dave Chappelle': 'Male', + 'Sean Combs': 'Male', + 'Katie Couric': 'Female', + 'Simon Cowell': 'Male', + 'Tom Cruise': 'Male', + 'Johnny Depp': 'Male', + 'Cameron Diaz': 'Female', + 'Leonardo DiCaprio': 'Male', + 'Celine Dion': 'Female', + 'Jodie Foster': 'Female', + 'John Grisham': 'Male', + 'Tom Hanks': 'Male', + 'Paris Hilton': 'Female', + 'Eric Idle': 'Male', + 'Mike Nichols': 'Male', + 'Peter Jackson': 'Male', + 'LeBron James': 'Male', + 'Derek Jeter': 'Male', + 'Scarlett Johansson': 'Female', + 'Elton John': 'Male', + 'Angelina Jolie': 'Female', + 'Michael Jordan': 'Male', + 'Nicole Kidman': 'Female', + 'Heidi Klum': 'Female', + 'Emeril Lagasse': 'Male', + 'Jay Leno': 'Male', + 'David Letterman': 'Male', + 'Adriana Lima': 'Female', + 'Rush Limbaugh': 'Male', + 'George Lopez': 'Male', + 'Jennifer Lopez': 'Female', + 'George Lucas': 'Male', + 'Paul McCartney': 'Male', + 'Dr. Phil McGraw': 'Male', + 'Phil Mickelson': 'Male', + 'Kate Moss': 'Female', + 'Neil Diamond': 'Male', + "Bill O'Reilly": 'Male', + "Shaquille O'Neal": 'Male', + 'Carson Palmer': 'Male', + 'James Patterson': 'Male', + 'Ty Pennington': 'Male', + 'Regis Philbin': 'Male', + 'Brad Pitt': 'Male', + 'Wolfgang Puck': 'Male', + 'Rachael Ray': 'Female', + 'Nicole Richie': 'Female', + 'Alex Rodriguez': 'Male', + 'Ray Romano': 'Male', + Ronaldo: 'Male', + 'Valentino Rossi': 'Male', + 'J.K. Rowling': 'Female', + 'Adam Sandler': 'Male', + 'Diane Sawyer': 'Female', + 'Michael Schumacher': 'Male', + 'Ryan Seacrest': 'Male', + 'Jerry Seinfeld': 'Male', + 'Maria Sharapova': 'Female', + 'Jessica Simpson': 'Female', + 'Will Smith': 'Male', + 'Annika Sorenstam': 'Female', + 'Steven Spielberg': 'Male', + 'Bruce Springsteen': 'Male', + 'Howard Stern': 'Male', + 'Rod Stewart': 'Male', + 'Kiefer Sutherland': 'Male', + 'Donald Trump': 'Male', + 'Rick Warren': 'Male', + 'Denzel Washington': 'Male', + 'Sardinia F Jones': 'Female', + 'Andrew Lloyd Webber': 'Male', + 'Michelle Wie': 'Female', + 'Serena Williams': 'Female', + 'Venus Williams': 'Female', + 'Oprah Winfrey': 'Female', + 'Reese Witherspoon': 'Female', + 'Dick Wolf': 'Male', + 'Tiger Woods': 'Male', + 'Renee Zellweger': 'Female', + 'Whitney Houston': 'Female', + 'Adolf Hitler': 'Male', + 'Shania Twain': 'Female', + 'Hulk Hogan': 'Male', + 'Michelle Obama': 'Female', + 'Ashton Kutcher': 'Male', + 'Cardinal Wolsey': 'Male', + 'Slobodan Milosevic': 'Male', +} + +test('celebrity names:', function(t) { + Object.keys(tests).forEach(function(k) { + const str = nlp(k) + .people() + .text() + const msg = "'" + k + "' is a person - - have: '" + str + "'" + t.equal(str, k, msg) + }) + t.end() +}) diff --git a/tests/clause.test.js b/tests/clause.test.js new file mode 100644 index 000000000..9a941724c --- /dev/null +++ b/tests/clause.test.js @@ -0,0 +1,48 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('clauses-parentheses:', function(t) { + let m = nlp("i said, 'did you have to do that' and then left, like nothing happened (which it didn't).").clauses() + t.equal(m.length, 5, 'found 5 clauses') + t.equal(m.eq(0).text(), 'i said', 'clause 1') + t.equal(m.eq(1).text(), `did you have to do that`, 'clause 2') + t.equal(m.eq(2).text(), `and then left`, 'clause 3') + t.equal(m.eq(3).text(), `like nothing happened`, 'clause 4') + t.equal(m.eq(4).text(), `which it didn't`, 'clause 5') + t.end() +}) + +test('clauses-commas:', function(t) { + let doc = nlp(`in Toronto, Canada`).clauses() + t.equal(doc.length, 1, 'place-comma') + + // doc = nlp(`July 4, 1776`).clauses() + // t.equal(doc.length, 1, 'date-comma') + + doc = nlp(`“You have a spider on your nose!” my friend yelled.`).clauses() + t.equal(doc.length, 2, 'found 2 clauses-1') + t.end() +}) + +test('clauses-condition:', function(t) { + let m = nlp('if you must, go to the basement').clauses() + t.equal(m.length, 2, 'found 2 clauses2') + t.equal(m.eq(0).text(), 'if you must', 'clause 1') + t.equal(m.eq(1).text(), `go to the basement`, 'clause 2') + t.end() +}) + +test('clauses-conjunction:', function(t) { + let m = nlp(`it is cool but it is not`).clauses() + t.equal(m.length, 2, 'found 2 clauses3') + t.equal(m.eq(0).text(), 'it is cool', 'clause 1') + t.equal(m.eq(1).text(), `but it is not`, 'clause 2') + t.end() +}) + +test('clauses-list:', function(t) { + let m = nlp('he is nice, cool and fun.').clauses() + t.equal(m.length, 1, 'found 1 clause') + t.equal(m.eq(0).text(), 'he is nice, cool and fun.', 'clause 1') + t.end() +}) diff --git a/tests/constructor.test.js b/tests/constructor.test.js new file mode 100644 index 000000000..d74ac7c9a --- /dev/null +++ b/tests/constructor.test.js @@ -0,0 +1,48 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('extra exports:', function(t) { + t.ok(nlp.version, 'version number exported') + + t.doesNotThrow(function() { + nlp.verbose(true) + nlp.verbose(false) + }, 'can set verbosity') + + t.end() +}) + +test('tokenize() runs without pos-tagging', function(t) { + const str = 'Miss Hoover, I glued my head to my shoulder.' + const r = nlp.tokenize(str) + t.equal(r.out('text'), str, 'tokenize output is same') + + t.equal(r.list.length, 1, 'sentence-parser-working') + + const found = r.match('#Noun').found + t.equal(found, false, 'no sneaky-tagging') + + t.end() +}) + +//make sure it can handle garbage inputs +test('garbage:', function(t) { + const garbage = ['', ' ', null, '\n\n', []] //{} + garbage.forEach(function(g, i) { + let num = nlp(g).list.length + let msg = typeof g + ' text input #' + i + t.equal(num, 0, msg) + }) + let str = nlp(2).out() + t.equal(str, '2', 'integer-casted') + str = nlp(2.2).out() + t.equal(str, '2.2', 'float-casted') + + //garbage in lexicon too + str = nlp('hello', null).out() + t.equal(str, 'hello', 'null-lexicon') + + str = nlp('hello', 2).out() + t.equal(str, 'hello', 'int-lexicon') + t.end() +}) diff --git a/tests/contractions/contract.test.js b/tests/contractions/contract.test.js new file mode 100644 index 000000000..b7e84eb5e --- /dev/null +++ b/tests/contractions/contract.test.js @@ -0,0 +1,64 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('contract basic', function(t) { + let r = nlp(`he is cool.`) + r.contract() + t.equal(r.out('text'), `he's cool.`, 'expanded-contract') + + r = nlp(`he's cool.`) + r.contract() + t.equal(r.out('text'), `he's cool.`, 'contracted-contract') + + r = nlp(`please do not eat the marshmellow`) + r.contract() + t.equal(r.out('text'), `please don't eat the marshmellow`, 'expanded-contract') + + r = nlp(`please don't eat the marshmellow`) + r.contract() + t.equal(r.out('text'), `please don't eat the marshmellow`, 'contracted-contract') + + r = nlp(`i have stood`) + r.contract() + t.equal(r.out('text'), `i've stood`, 'expanded-contract') + + r = nlp(`i've stood`) + r.contract() + t.equal(r.out('text'), `i've stood`, 'contracted-contract') + + r = nlp('i am good') + r.contract() + t.equal(r.out('text'), `i'm good`, 'contract-1') + r.contractions().expand() + t.equal(r.out('text'), `i am good`, 'expand-2') + r.contract() + t.equal(r.out('text'), `i'm good`, 'contract-2') + + r.contractions() + .contract() + .contract() + .contract() + t.equal(r.out('text'), `i'm good`, 'contract-n') + + t.end() +}) + +test('avoid contraction messes', function(t) { + let doc = nlp('Tony, is').contract() + t.equal(doc.text('reduced'), 'tony is', 'avoid-contraction 1') + + doc = nlp('(Tony) is').contract() + t.equal(doc.text('reduced'), 'tony is', 'avoid-contraction 2') + + doc = nlp(`'Tony' is`).contract() + t.equal(doc.text('reduced'), 'tony is', 'avoid-contraction 3') + + doc = nlp('Tony-is').contract() + t.equal(doc.text('reduced'), 'tony is', 'avoid-contraction 4') + + doc = nlp(`Tony +is`).contract() + t.equal(doc.text('reduced'), 'tony is', 'avoid-contraction 5') + + t.end() +}) diff --git a/tests/contractions/contraction.test.js b/tests/contractions/contraction.test.js new file mode 100644 index 000000000..1c9d40a52 --- /dev/null +++ b/tests/contractions/contraction.test.js @@ -0,0 +1,28 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('match contractions/possessives', function(t) { + let doc = nlp(`i think it's spencer's`) + t.equal(doc.has('it'), true, 'has it') + t.equal(doc.has('spencer'), true, 'has spencer') + t.equal(doc.has(`spencer's`), true, "has spencer's") + t.equal(doc.has(`i'm`), false, 'not false-positive') + t.equal(doc.has(`it'll`), false, 'not false-positive-2') + t.end() +}) + +test('contraction whitespace', function(t) { + let doc = nlp(`i didn't know.`) + t.equal(doc.text(), `i didn't know.`, 'init-whitespace') + + doc.contractions().expand() + t.equal(doc.text(), `i did not know.`, 'expanded-whitespace') + + doc = nlp(`i didn't.`) + t.equal(doc.text(), `i didn't.`, 'init-period') + + doc.contractions().expand() + t.equal(doc.text(), `i did not.`, 'expanded-period') + + t.end() +}) diff --git a/tests/contractions/expand.test.js b/tests/contractions/expand.test.js new file mode 100644 index 000000000..6643d3a69 --- /dev/null +++ b/tests/contractions/expand.test.js @@ -0,0 +1,94 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('basic is contractions', function(t) { + let r = nlp(`he is cool.`) + r.contractions().expand() + t.equal(r.out('text'), `he is cool.`, 'expanded-expand') + + r = nlp(`he's cool.`) + r.contractions().expand() + t.equal(r.out('text'), `he is cool.`, 'contracted-expand') + + r = nlp(`that's really great.`) + r.contractions().expand() + t.equal(r.out('text'), `that is really great.`, 'contracted-expand') + + r = nlp(`she'll, eat icecream`) + r.contractions().expand() + t.equal(r.out('text'), `she will, eat icecream`, 'with-punctuation') + + r = nlp("we're not gonna take it, no we're not gonna take it") + r.contractions().expand() + t.equal(r.out('text'), `we are not going to take it, no we are not going to take it`, 'expand gonna twice') + + r = nlp("we're we're gonna gonna") + r.contractions().expand() + t.equal(r.out('text'), `we are we are going to going to`, 'expand consecutive') + t.end() +}) + +test('do-not contractions', function(t) { + let r = nlp(`please do not eat the marshmellow`) + r.contractions().expand() + t.equal(r.out('text'), `please do not eat the marshmellow`, 'expanded-expand') + + r = nlp(`please don't eat the marshmellow`) + r.contractions().expand() + t.equal(r.out('text'), `please do not eat the marshmellow`, 'contracted-expand') + + t.end() +}) + +test('have contractions', function(t) { + let r = nlp(`i have stood`) + r.contractions().expand() + t.equal(r.out('text'), `i have stood`, 'expanded-expand') + + r = nlp(`i've stood`) + r.contractions().expand() + t.equal(r.out('text'), `i have stood`, 'contracted-expand') + + t.end() +}) + +test('repeated contract-expand', function(t) { + let r = nlp(`i'm good`) + r.contractions().expand() + + r.contractions() + .expand() + .expand() + .expand() + t.equal(r.out('text'), `i am good`, 'expand-n') + t.end() +}) + +test('contracted', function(t) { + let r = nlp(`I'll go to Toronto. I will see.`) + let m = r.contractions() + let str = m.out('text') + t.equal(str, `I'll`, 'contracted') + + t.equal(m.length, 1, 'no-expanded') + t.end() +}) + +test('would-or-did', function(t) { + let r = nlp(`i'd contemplate`) + let str = r + .contractions() + .expand() + .all() + .out('text') + t.equal(str, `i would contemplate`, 'i-would') + + r = nlp(`i'd contemplated`) + str = r + .contractions() + .expand() + .all() + .out('text') + t.equal(str, `i had contemplated`, 'i-had') + t.end() +}) diff --git a/tests/emoji.test.js b/tests/emoji.test.js new file mode 100644 index 000000000..ddd667372 --- /dev/null +++ b/tests/emoji.test.js @@ -0,0 +1,72 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('keyword emojis', function(t) { + ;[ + ['he is so nice :heart:', ':heart:'], + [':cool: :wine_glass: yeah party', ':cool: :wine_glass:'], + ['to be or not to be: this is a question :cookie:', ':cookie:'], + ].forEach(function(a) { + const have = nlp(a[0]) + .match('#Emoji') + .text() + .trim() + const msg = "have: '" + have + "' want: '" + a[1] + "'" + t.equal(have, a[1], msg) + }) + t.end() +}) + +test('unicode emojis', function(t) { + ;[ + ['nice job 💯 ❤️', '💯 ❤️'], + ['💚 good job 🎇', '💚 🎇'], + ['visit Brunei', ''], + ['visit Brunei 🇧🇳', '🇧🇳'], + ['visit Brunei 🇧🇳🇧🇳🇧🇳', '🇧🇳🇧🇳🇧🇳'], + ].forEach(function(a) { + const have = nlp(a[0]) + .match('#Emoji') + .out('normal') + const msg = "have: '" + have + "' want: '" + a[1] + "'" + t.equal(have, a[1], msg) + }) + t.end() +}) + +test('emoticon emojis', function(t) { + ;[ + ['nice job :)', ':)'], + [';) good work', ';)'], + [';( oh no :(', ';( :('], + ['to be: that is th3 question', ''], + ['3 3 3 sad', '3 3 3'], + // ['33', '33'], + ].forEach(function(a) { + const have = nlp(a[0]) + .match('#Emoticon') + .out('normal') + const msg = "have: '" + have + "' want: '" + a[1] + "'" + t.equal(have, a[1], msg) + }) + t.end() +}) + +test('result methods', function(t) { + const text = 'this :cookie: <3 💯 so good. It is really nice. Yes it is <3' + + //has method + const m = nlp(text) + t.equal(m.match('#Emoji').found, true, 'nlp.has positive') + t.equal(m.match('#SportsTeam').found, false, 'nlp.has neg') + + //filter string + let small = m.if('(#Emoji|#Emoticon)') + t.equal(small.out('text'), 'this :cookie: <3 💯 so good. Yes it is <3', 'nlp.filter string') + + //filter method + small = m.ifNo('(#Emoji|#Emoticon)') + t.equal(small.out('normal'), 'it is really nice.', 'nlp.filter method') + + t.end() +}) diff --git a/tests/full-api.test.js b/tests/full-api.test.js new file mode 100644 index 000000000..198039811 --- /dev/null +++ b/tests/full-api.test.js @@ -0,0 +1,223 @@ +const test = require('tape') +const nlp = require('./_lib') + +//run every method once, and check against runtime errors +test('constructor api', function(t) { + const fns = { + tokenize: '()=>{ nlp.tokenize("you\'re sure you haven\'t just made thousands of mistakes?") }', + extend: "()=>{ nlp.extend((Doc, world)=>world.addWords({bort:'FirstName'})) }", + clone: "()=>{ let b=nlp.clone(); b.extend((Doc, world)=>{world.addWords({ 'a hoy hoy' : 'Greeting'})}) }", + load: "()=>{ let json=nlp('Tony Hawk').export();nlp.load(json).match('#Person') }", + verbose: "()=>{ nlp.verbose(false);nlp('I am the very model of a modern Major-General') }", + version: '()=>{ nlp.version}', + } + Object.keys(fns).forEach(k => { + t.doesNotThrow(eval(fns[k]), k) + }) + t.end() +}) + +test('utils api', function(t) { + const fns = { + all: "()=>{ nlp('this is yelling').match('#Verb').toTitleCase().all().text() }", + found: "()=>{ nlp('oh say can you see?').match('see').found }", + parent: '()=>{ }', + parents: '()=>{ }', + tagger: '()=>{ }', + wordCount: '()=>{ }', + length: "()=>{ nlp('jackie kennedy and aristotle onassis').match('#Person+').length }", + clone: "()=>{ nlp('would somebody please think of the children').clone().toUpperCase().parent().text() }", + cache: "()=>{ let doc=nlp(\"I'm looking for Amanda Hugginkiss\").cache({root:true});doc.match('~look~') }", + uncache: '()=>{ let doc=nlp("urine-soaked hell-hole").uncache();doc.tag("Insult") }', + } + Object.keys(fns).forEach(k => { + t.doesNotThrow(eval(fns[k]), k) + }) + t.end() +}) + +test('accessors api', function(t) { + const fns = { + first: '()=>{ }', + last: '()=>{ }', + slice: "()=>{ nlp('Homer, have you been eating that sandwich again?').terms().slice(0, 3).text() }", + eq: '()=>{ }', + firstTerm: '()=>{ }', + lastTerm: '()=>{ }', + termList: '()=>{ }', + } + Object.keys(fns).forEach(k => { + t.doesNotThrow(eval(fns[k]), k) + }) + t.end() +}) + +test('match api', function(t) { + const fns = { + match: "()=>{ nlp('we understand, we are from the land of chocolate.').match('land of #Noun').text() }", + not: "()=>{ nlp('wait, there’s a new mexico?').match('#Place').not('new').text() }", + matchOne: '()=>{ }', + if: "()=>{ nlp('We’re here, we’re clear, we don’t want anymore bears.').clauses().if('anymore').text() }", + ifNo: "()=>{ nlp('We’re here, we’re clear, we don’t want anymore bears.').clauses().ifNo('anymore').text() }", + has: "()=>{ nlp('I am the very model of a modern Major-General').has('#Pronoun') }", + lookBehind: '()=>{ }', + lookAhead: '()=>{ }', + before: "()=>{ nlp('one two three four five').before('three').text() }", + after: "()=>{ nlp('one two three four five').after('three').text() }", + lookup: "()=>{ nlp('chocolate microscopes? double guitars?').lookup(['double guitars']).length }", + } + Object.keys(fns).forEach(k => { + t.doesNotThrow(eval(fns[k]), k) + }) + t.end() +}) + +test('case api', function(t) { + const fns = { + toUpperCase: "()=>{ nlp('Dental plan. Lisa needs braces.').match('dental .').toUpperCase().text() }", + toLowerCase: "()=>{ nlp('Careful! They’re RUFFLED!!').toLowerCase().text() }", + toTitleCase: "()=>{ nlp('jupiter, pluto and mars').match('#Noun').toTitleCase().all().text() }", + toCamelCase: "()=>{ nlp('natural language processing').toCamelCase().text() }", + } + Object.keys(fns).forEach(k => { + t.doesNotThrow(eval(fns[k]), k) + }) + t.end() +}) + +test('whitespace api', function(t) { + const fns = { + pre: '()=>{ nlp("we\'re here. we\'re clear. we don\'t want anymore bears.").pre(" ") }', + post: "()=>{ nlp(\"we're here. we're clear. we don't want anymore bears.\").post('!') }", + trim: "()=>{ nlp(' Lenny and Carl ').match('#Person').trim().text() }", + hyphenate: "()=>{ nlp('natural language processing').hyphenate().text() }", + dehyphenate: "()=>{ nlp('natural-language processing').dehyphenate().text() }", + } + Object.keys(fns).forEach(k => { + t.doesNotThrow(eval(fns[k]), k) + }) + t.end() +}) + +test('tag api', function(t) { + const fns = { + tag: + "()=>{ nlp('Michael Apple ate a delicious apple.').match('#FirstName apple').tag('Person').all().match('#Person+').text() }", + tagSafe: '()=>{ }', + unTag: + "()=>{ nlp('they made a catch & scored a run').match('(run|catch)').unTag('#Verb').all().match('#Verb').out('array') }", + canBe: "()=>{ nlp('it’s fusilli jerry!').canBe('Person').text() }", + } + Object.keys(fns).forEach(k => { + t.doesNotThrow(eval(fns[k]), k) + }) + t.end() +}) + +test('loops api', function(t) { + const fns = { + map: "()=>{ nlp('yahoo serious festival').terms().map((m)=> m.toUpperCase()).text() }", + forEach: "()=>{ nlp('Oh, no! Bette Midler!').match('#Person+').forEach((m,i)=> m.text()) }", + filter: "()=>{ nlp('Hey, anymore arboretum’s around here?').terms().filter(m => m.has('#Plural') ).length }", + find: + "()=>{ nlp('Always do the opposite of what bart says') .terms() .find(t => t.out('normal').match(/b[ao]rt/)) .text() }", + some: "()=>{ nlp('Don’t make me run, I’m full of chocolate!').terms().some(m => m.out('normal')==='run' )}", + random: "()=>{ nlp('one two three four').terms().random(2).out('array') }", + } + Object.keys(fns).forEach(k => { + t.doesNotThrow(eval(fns[k]), k) + }) + t.end() +}) + +test('insert api', function(t) { + const fns = { + replaceWith: "()=>{ nlp('it was the worst of times').match('worst').replaceWith('blurst', true).all().text() }", + replace: "()=>{ nlp('trust me folks, big league.').replace('big league','bigly').all().text() }", + delete: "()=>{ nlp('you don’t win friends with salad').delete('do not').text() }", + append: "()=>{ nlp('i know so many words').insertAfter('bigly').all().text() }", + prepend: "()=>{ nlp('stupid flanders').match('flanders').insertBefore('sexy').all().text() }", + concat: "()=>{ nlp('My name is Otto').concat('and i love to get blotto').all().length }", + } + Object.keys(fns).forEach(k => { + t.doesNotThrow(eval(fns[k]), k) + }) + t.end() +}) + +test('transform api', function(t) { + const fns = { + sort: "()=>{ nlp('Larry, Curly, Moe').terms().sort('alphabetical').out('array') }", + reverse: '()=>{ }', + normalize: + "()=>{ nlp(' so... you like DONUTS? have all the donuts in the WORLD!!!').normalize().all().get(0).text() }", + unique: '()=>{ }', + split: "()=>{ nlp('Monorail...Once again! Monorail... Monorail!').splitOn('monorail').eq(0).text() }", + splitAfter: "()=>{ nlp('Monorail...Once again! Monorail... Monorail!').splitAfter('monorail').eq(0).text() }", + splitBefore: "()=>{ nlp('Monorail...Once again! Monorail... Monorail!').splitBefore('monorail').eq(0).text()}", + segment: '()=>{ }', + join: '()=>{ }', + } + Object.keys(fns).forEach(k => { + t.doesNotThrow(eval(fns[k]), k) + }) + t.end() +}) + +test('out api', function(t) { + const fns = { + text: "()=>{ nlp('you might say there’s a little Uter in all of us').match('#Adjective uter').out('array') }", + debug: '()=>{ }', + out: '()=>{ }', + json: + "()=>{ nlp('The stage was set for the Alan Parsons Project! Which I believe was some sort of hovercraft.').data() }", + + export: '()=>{ }', + } + Object.keys(fns).forEach(k => { + t.doesNotThrow(eval(fns[k]), k) + }) + t.end() +}) + +test('selectors api', function(t) { + const fns = { + terms: "()=>{ nlp('we should all be more like little Ruttiger').terms().json() }", + clauses: + "()=>{ nlp('All right, Colossus, you’re free to go, but stay away from Death Mountain').clauses().data() }", + hyphenated: '()=>{ }', + phoneNumbers: "()=>{ nlp('Moe Sizlak. That’s right. I’m a surgeon. (800) 555-0000.').phoneNumbers().json() }", + hashTags: "()=>{ nlp('oh, but where is the #anykey').hashTags().json() }", + emails: '()=>{ }', + emoticons: '()=>{ }', + emoji: '()=>{ }', + atMentions: '()=>{ }', + urls: "()=>{ nlp('thank you http://simpsons.wikia.com').urls().json()}", + adverbs: '()=>{ }', + pronouns: '()=>{ }', + conjunctions: '()=>{ }', + prepositions: '()=>{ }', + abbreviations: '()=>{ }', + } + Object.keys(fns).forEach(k => { + t.doesNotThrow(eval(fns[k]), k) + }) + t.end() +}) + +test('subsets api', function(t) { + const fns = { + contractions: '()=>{ }', + parentheses: "()=>{ nlp('Use a pointed stick (a pencil) or a similar tool').parentheses().data() }", + possessives: "()=>{ nlp('moe’s tavern').possessives().text() }", + quotations: '()=>{ nlp(\'the he said "crazy like a fox!".\').quotations().data().length }', + acronyms: '()=>{ }', + lists: '()=>{ }', + nouns: '()=>{ }', + verbs: `()=>{ nlp('Moe Sizlak. That’s right. I’m a surgeon.').verbs() }`, + } + Object.keys(fns).forEach(k => { + t.doesNotThrow(eval(fns[k]), k) + }) + t.end() +}) diff --git a/tests/hyphens.test.js b/tests/hyphens.test.js new file mode 100644 index 000000000..870d539f7 --- /dev/null +++ b/tests/hyphens.test.js @@ -0,0 +1,62 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('hyphen-tokenize', function(t) { + let r = nlp('super-cool work') + t.equal(r.terms().length, 3, 'super, cool') + t.equal(r.out('text'), 'super-cool work', 'preserve hyphen') + t.equal(r.out('normal'), 'super cool work', 'normalize-out hyphen') + + r = nlp('http://about.com/my-summer-vacation') + t.equal(r.terms().length, 1, 'url hyphen') + r = nlp('http://about.com/my-summer') + t.equal(r.terms().length, 1, 'url hyphen2') + + r = nlp('421-0059') + t.equal(r.terms().length, 1, 'phoneNuumber hyphen') + + r = nlp('sept-2') + t.equal(r.terms().length, 2, 'date hyphen') + + r = nlp('-2 degrees') + t.equal(r.terms().length, 2, 'minus hyphen') + + r = nlp('re-enactment') + t.equal(r.out('normal'), 'reenactment', 're-hyphen') + r = nlp('un-do') + t.equal(r.out('normal'), 'undo', 'un-hyphen') + + t.end() +}) + +test('hyphenate', function(t) { + let str = 'it is cool. he is nice' + let m = nlp(str) + m.hyphenate() + t.equal(m.terms().length, 6, 'seperate terms') + t.equal(m.out('text'), 'it-is-cool. he-is-nice', 'hyphenate') + m.dehyphenate() + t.equal(m.out('text'), str, 'dehyphenate') + + str = 'i payed seven-hundred for the sandwich' + m = nlp(str) + m.match('#Value').dehyphenate() + t.equal(m.out('text'), 'i payed seven hundred for the sandwich', 'dehyphenate-values') + + str = 'he is the king of rock. she is the queen of cool.' + m = nlp(str) + m.match('(king|queen) of (#Noun|#Adjective)').hyphenate() + t.equal(m.out('text'), 'he is the king-of-rock. she is the queen-of-cool.', 'hyphenate-match') + + t.end() +}) + +test('hasHyphen', function(t) { + let doc = nlp(`super-cool and hunky-dory. Connected with-a-dash.`) + let arr = doc.match('@hasHyphen+ .').out('array') + t.equal(arr.length, 3, 'three found') + t.equal(arr[0], 'super-cool', 'first found') + t.equal(arr[1], 'hunky-dory.', 'second found') + // t.equal(arr[2], 'with-a-dash', 'third found') //FIXME:hyphens + t.end() +}) diff --git a/tests/lists.test.js b/tests/lists.test.js new file mode 100644 index 000000000..b37f0a161 --- /dev/null +++ b/tests/lists.test.js @@ -0,0 +1,52 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('list-types', function(t) { + let doc = nlp('he is nice, cool, and really fun.').lists() + t.equal(doc.length, 1, 'found adj list') + t.equal(doc.things().length, 3, 'three adjs') + + doc = nlp('his sweat, blood, and tears').lists() + t.equal(doc.length, 1, 'found noun list') + t.equal(doc.things().length, 3, 'three nouns') + + doc = nlp('we ran, biked, swam, and then ate').lists() + t.equal(doc.length, 1, 'found verb list') + t.equal(doc.things().length, 4, 'four verbs') + + doc = nlp('there is Spencer Kelly, Donald Glover, and Justin Trudeau').lists() + t.equal(doc.length, 1, 'found person list') + t.equal(doc.things().length, 3, 'three people') + + t.end() +}) + +// test('support-2-part-list', function(t) { +// const doc = nlp('spencer is cool, and great. He is nice and tired.') +// let m = doc.lists() +// t.equal(m.length, 2, 'has two lists') +// t.end() +// }) + +test('muti-word things', function(t) { + const doc = nlp('spencer is nice, quite warm, and tired.') + let m = doc.lists() + t.equal(m.length, 1, 'has one list') + t.equal(m.things().length, 3, 'has three things') + t.end() +}) + +test('add', function(t) { + const doc = nlp('spencer is nice, warm and tired.') + doc.lists().add('CRAAZY') + t.equal(doc.text(), 'spencer is nice, warm, CRAAZY and tired.', 'without no-oxford') + t.end() +}) + +test('hasOxfordComma', function(t) { + const doc = nlp('spencer is cool, fun, and great. He is nice, tired and not smart.') + let m = doc.lists().hasOxfordComma() + t.equal(m.length, 1, 'only one has oxford-comma') + t.equal(m.text(), 'cool, fun, and great', 'first-one has oxford-comma') + t.end() +}) diff --git a/tests/load.test.js b/tests/load.test.js new file mode 100644 index 000000000..d7cb440cc --- /dev/null +++ b/tests/load.test.js @@ -0,0 +1,53 @@ +const test = require('tape') +const nlp = require('./_lib') + +// do an elaborate comparison between json objects +const isEqual = function(a, b, t) { + let jsonA = a.json() + let jsonB = b.json() + t.equal(a.length, b.length, 'same length') + t.equal(a.text(), b.text(), 'same text') + jsonA.forEach((o, i) => { + t.equal(o.text, jsonB[i].text, o.text) + t.equal(o.terms.length, jsonB[i].terms.length, 'terms-length ' + i) + o.terms.forEach(term => { + term.tags.forEach(tag => { + let p = b.eq(i) + t.equal(p.has('#' + tag), true, p.text() + ' has ' + tag) + }) + }) + }) +} + +test('load-export basic', function(t) { + let a = nlp('it was cold. John K. Smith was verrrrry cold ! ') + let b = nlp.load(a.export()) + isEqual(a, b, t) + t.end() +}) + +test('load-export-empty', function(t) { + let a = nlp('') + let b = nlp.load(a.export()) + isEqual(a, b, t) + + t.end() +}) + +test('load-export-garbage', function(t) { + let a = nlp('[]. oh yeah. function the null prototype. - \n "two| (time()7 77') + let b = nlp.load(a.export()) + isEqual(a, b, t) + + t.end() +}) + +test('export-unknown tag', function(t) { + let a = nlp('cookie monster was a boomer. ok boomer', { boomer: 'Generation' }) + a.match('. monster').tag('Character') + a.match('ok boomer').tag('Diss') + let json = a.export() + let b = nlp.load(json) + isEqual(a, b, t) + t.end() +}) diff --git a/tests/lookahead.test.js b/tests/lookahead.test.js new file mode 100644 index 000000000..d9fbc4c1e --- /dev/null +++ b/tests/lookahead.test.js @@ -0,0 +1,71 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('look-ahead', function(t) { + let doc = nlp(`i walked to a store today. the store was very nice`) + let m = doc.match('store').lookAhead('#Adjective') + t.deepEqual(m.out('array'), ['nice'], 'found all upcoming adjectives') + + let m2 = doc.match('store').lookAhead('.') + t.deepEqual(m2.out('array'), ['today.', 'was', 'very', 'nice'], 'found all upcoming words') + + let m3 = doc.match('store').lookAhead('farmer') + t.equal(m3.found, false, 'lookahead empty') + + let m4 = doc.match('nice').lookAhead('.') + t.equal(m4.found, false, 'lookahead on edge empty') + + let m5 = nlp('it is raining') + .match('raining') + .lookAhead() + t.equal(m5.found, false, 'lookahead at end empty') + + let m6 = nlp('it is raining today and tomorrow') + .match('raining') + .lookAhead() + t.equal(m6.text(), 'today and tomorrow', 'lookahead blank finds all') + + t.end() +}) + +test('lookahead from parent is blank', function(t) { + let doc = nlp('it is raining') + t.equal(doc.lookAhead('.').found, false, 'no after 1') + + doc = nlp('oh wow, it is raining. it is snowing? it is very cold.') + t.equal(doc.lookAhead('.').found, false, 'no after 1') + t.end() +}) + +test('look-behind', function(t) { + let doc = nlp(`i walked to a store. the store was very nice`) + let m = doc.match('store').lookBehind('#Determiner') + t.deepEqual(m.out('array'), ['a', 'the'], 'found previous determiners') + + let m2 = doc.match('store').lookBehind('.') + t.deepEqual(m2.out('array'), ['i', 'walked', 'to', 'a', 'the'], 'found all previous words') + + let m3 = doc.match('store').lookBehind('farmer') + t.equal(m3.found, false, 'lookbehind not found') + + let m4 = doc.match('i').lookBehind('.') + t.equal(m4.found, false, 'lookbehind on edge empty') + + let m6 = nlp('it is raining today and tomorrow') + .match('raining') + .lookBehind() + t.equal(m6.text(), 'it is', 'lookbehind blank finds all') + + t.end() +}) + +test('look-behind-last', function(t) { + let doc = nlp(`a priest walked into a bar`) + let m = doc + .match('bar') + .lookBehind('a') + .last() + m.replace('the') + t.equal(doc.text(), `a priest walked into the bar`, 'lookbehind most-recent') + t.end() +}) diff --git a/tests/lookup.test.js b/tests/lookup.test.js new file mode 100644 index 000000000..c0f1bd5c4 --- /dev/null +++ b/tests/lookup.test.js @@ -0,0 +1,16 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('lookup tests', function(t) { + let doc = nlp("he isn't AT Spencer's house of pain. The haunted house of not pain. Third sentence spencer.") + let m = doc.lookup(['house of pain', 'house of']) + t.equal(m.length, 3, 'found redundant match') + + m = doc.lookup(["spencer's"]) + t.equal(m.length, 1, 'found possessive lookup') + + m = doc.lookup('spencer') + t.equal(m.length, 2, 'end of line lookup') + + t.end() +}) diff --git a/tests/loop-mutate.test.js b/tests/loop-mutate.test.js new file mode 100644 index 000000000..e86883791 --- /dev/null +++ b/tests/loop-mutate.test.js @@ -0,0 +1,36 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('foreach replace', function(t) { + let doc = nlp('guns and roses') + doc.match('guns').forEach(p => { + p.replaceWith('flowers') + }) + t.equal(doc.text(), 'flowers and roses', 'simple loop-replace') + + doc = nlp('guns and roses. roses and guns') + doc.match('guns').forEach(p => { + p.replaceWith('flowers') + }) + t.equal(doc.text(), 'flowers and roses. roses and flowers', 'two loop-replacements') + + doc = nlp('guns and roses') + doc.match('guns').forEach(p => { + p.replaceWith('flowers, kittens') + }) + t.equal(doc.text(), 'flowers, kittens and roses', 'loop-replace-grow') + + doc = nlp('guns, bombs, and roses') + doc.match('guns bombs').forEach(p => { + p.replaceWith('flowers') + }) + t.equal(doc.text(), 'flowers, and roses', 'loop-replace-shrink') + + doc = nlp('the end') + doc.match('end').forEach(p => { + p.replaceWith('more words') + }) + t.equal(doc.text(), 'the more words', 'loop-replace-expand-end') + + t.end() +}) diff --git a/tests/loops.test.js b/tests/loops.test.js new file mode 100644 index 000000000..8f7e694ab --- /dev/null +++ b/tests/loops.test.js @@ -0,0 +1,88 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('map-stuff', function(t) { + let doc = nlp('and').map(d => { + return d.replaceWith('or') + }) + t.equal(doc.text(), 'or', 'replace-with') + + doc = nlp('one two three. three four five.').map(d => { + return d.match('three') + }) + t.equal(doc.eq(0).text(), 'three', 'match-one') + t.equal(doc.eq(1).text(), 'three', 'match-two') + + t.end() +}) + +test('foreach-stuff', function(t) { + let doc = nlp('one two three. three four five.').forEach(p => { + p.toUpperCase() + }) + t.equal(doc.out('text'), 'ONE TWO THREE. THREE FOUR FIVE.', 'foreach-uppercase') + t.end() +}) + +test('filter-stuff', function(t) { + let doc = nlp('one two three. three four five.').filter(p => { + return p.has('four') + }) + t.equal(doc.out('normal'), 'three four five.', 'filter-has') + + doc = nlp('one two three. three four five.') + .terms() + .filter(p => { + return p.has('four') + }) + t.equal(doc.out('normal'), 'four', 'filter-four') + + doc = nlp('one two three. three four five.') + .terms() + .filter(p => { + return p.has('asdf') + }) + t.equal(doc.out('normal'), '', 'empty-filter') + t.end() +}) + +test('find-stuff', function(t) { + let doc = nlp('one two three. three four five.').find(m => m.has('four')) + t.equal(doc && doc.out('normal') === 'three four five.', true, 'found four') + + doc = nlp('one two three. three four five.').find(m => m.has('asdf')) + t.equal(doc, undefined, 'undefined find result') + t.end() +}) + +test('some-stuff', function(t) { + let bool = nlp('one two three. three four five.').some(m => m.has('three')) + t.equal(bool, true, 'found-three') + + bool = nlp('one two three. three four five.').some(m => m.has('asdf')) + t.equal(bool, false, 'not-found') + t.end() +}) + +test('map array return', function(t) { + let doc = nlp('Larry, Curly, and Moe') + let people = doc.match('#Noun') // (any one noun) + people.sort('alpha') + let arr = people.map(d => d.text('normal')) + t.deepEqual(arr, ['curly, ', 'larry, ', 'moe'], 'got array in response') + t.end() +}) + +// test('reduce-stuff', function(t) { +// let doc = nlp('one two three. three four five.') +// .terms() +// .reduce((p, d) => { +// if (!p.has('two')) { +// return d +// } +// return null +// }) +// doc.debug() +// // t.equal(doc.out('text'), 'ONE TWO THREE. THREE FOUR FIVE.', 'foreach-uppercase') +// t.end() +// }) diff --git a/tests/match/and-operand.test.js b/tests/match/and-operand.test.js new file mode 100644 index 000000000..7fa041e20 --- /dev/null +++ b/tests/match/and-operand.test.js @@ -0,0 +1,43 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('and-match', function(t) { + let doc = nlp('june and july cool') + let m = doc.match('(#Date && july)') + t.equal(m.out(), 'july', 'found july') + + m = doc.match('(#Date && !july)') + t.equal(m.out(), 'june', 'found not july') + + m = doc.match('(and && !foo && #Conjunction && .)') + t.equal(m.out(), 'and', 'three-match') + + t.end() +}) + +test('and-match-more', function(t) { + let doc = nlp('toronto and montreal. Sydney and Paris.') + let m = doc.match('(#Place && .)') + t.equal(m.length, 4, 'found all four') + + m = doc.match('(#Place && /e/)') + t.equal(m.out(), 'montreal. Sydney', 'found e words') + + m = doc.match('(#Place && !#Verb)') + t.equal(m.length, 4, 'and not') + + m = doc.match('(#Place && #Verb)') + t.equal(m.length, 0, 'no and') + + m = doc.match('(#Place && #Noun && * && .{1,3})') + t.equal(m.length, 4, 'four towns') + + t.end() +}) + +// test('and-match-multi', function(t) { +// let doc = nlp('toronto and montreal. Sydney and Paris.') +// let m = doc.match('(#Place and && toronto .)') +// t.equal(m.out(), 'toronto and', 'found one multi') +// t.end() +// }) diff --git a/tests/match/before-after.test.js b/tests/match/before-after.test.js new file mode 100644 index 000000000..866393c25 --- /dev/null +++ b/tests/match/before-after.test.js @@ -0,0 +1,65 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('before-basic', function(t) { + let doc = nlp('one two three four five. one three four') + let arr = doc.before('three four').out('array') + t.equal(arr.length, 2, 'two-matches') + t.equal(arr[0], 'one two', 'first-match') + t.equal(arr[1], 'one', 'second-match') + + doc = nlp('one two three four five. one three four. three four') + arr = doc.before('three').out('array') + t.equal(arr.length, 2, 'two-matches') + t.equal(arr[0], 'one two', 'first-match') + t.equal(arr[1], 'one', 'second-match') + t.end() +}) + +test('before-match:', function(t) { + let r = nlp('one two three four five').before('two') + t.equal(r.out('normal'), 'one', 'before-two') + + r = nlp('one two three four five').before('three . five') + t.equal(r.out('normal'), 'one two', 'before-several') + + r = nlp('one two three four five').before('one two') + t.equal(r.out('normal'), '', 'no-before-start') + + // r = nlp('one two three four').before('.'); //tricky + // t.equal(r.out('normal'), '', 'before-any'); + + r = nlp('one two three four. No, not here. He said two days a week.').before('two') + let arr = r.out('array') + t.equal(arr[0], 'one', 'before-twice-1') + t.equal(arr[1], 'He said', 'before-twice-2') + + r = nlp('it was all the way over to two. It was the number two.').before('it') + t.equal(r.found, false, 'no-empty-matches') + + t.end() +}) + +test('after-match:', function(t) { + let r = nlp('one two three four five').after('two') + t.equal(r.out('normal'), 'three four five', 'after-one') + + r = nlp('one two three four five').after('one . three') + t.equal(r.out('normal'), 'four five', 'after-several') + + r = nlp('one two three four five').after('four five') + t.equal(r.out('normal'), '', 'no-afters-end') + + r = nlp('one two three four').after('.') + t.equal(r.out('normal'), 'two three four', 'after-any') + + r = nlp('one two three four. No, not here. He said two days a week.').after('two') + let arr = r.out('array') + t.equal(arr[0], 'three four.', 'after-twice-1') + t.equal(arr[1], 'days a week.', 'after-twice-2') + + r = nlp('all the way over to two. It was the number two.').after('two') + t.equal(r.found, false, 'no-empty-matches') + + t.end() +}) diff --git a/tests/match/capture.test.js b/tests/match/capture.test.js new file mode 100644 index 000000000..3c6ce8c58 --- /dev/null +++ b/tests/match/capture.test.js @@ -0,0 +1,40 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('match-capture-group', function(t) { + let m = nlp('John eats glue').match('[john]') + t.equal(m.out('text'), 'John', 'capture-group-simple') + + m = nlp('John Smith eats glue').match('[#Person+]') + t.equal(m.out('text'), 'John Smith', 'capture-two') + + m = nlp('ralf eats the glue').match('ralf [#Verb] the') + t.equal(m.out('normal'), 'eats', 'simple subset') + + m = nlp('ralf eats the glue').match('[ralf] [#Verb] the') + t.equal(m.out('normal'), 'ralf eats', 'two-word capture') + + m = nlp('i saw ralf eat the glue Mrs. Hoover').match('ralf [#Verb the glue] mrs') + t.equal(m.out('normal'), 'eat the glue', 'three-word capture') + + m = nlp('ralf eats the glue').match('* [#Verb]') + t.equal(m.out('normal'), 'eats', 'capture after wildcard') + + // m = nlp('ralf eats the glue').match('ralf eats [*]') + // t.equal(m.out('normal'), 'the glue', 'wildcard capture at the end') + + // m = nlp('ralf eats the glue').match('ralf eats [*] glue') + // t.equal(m.out('normal'), 'the', 'wildcard capture in the middle') + + m = nlp('saw the Toronto International Documentary Film Festival yesterday').match('saw the? [#Noun+] yesterday') + t.equal(m.trim().out('text'), 'Toronto International Documentary Film Festival', 'greedy capture') + + t.end() +}) + +test('tricky capture', function(t) { + let doc = nlp.tokenize('during august') + let m = doc.match('^(on|during|in) [.]') + t.equal(m.text('normal'), 'august', 'found capture') + t.end() +}) diff --git a/tests/match/doc-match.test.js b/tests/match/doc-match.test.js new file mode 100644 index 000000000..99a4dcc9e --- /dev/null +++ b/tests/match/doc-match.test.js @@ -0,0 +1,37 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('doc-as-input', function(t) { + let doc = nlp('if so, he is the best, that i see. he is the greatest') + let m = doc.match('he is the .') + let found = doc.match(m) + t.equal(found.length, 2, 'found both phrases') + t.equal(found.eq(0).text('reduced'), 'he is the best', 'found first match') + t.equal(found.eq(1).text('reduced'), 'he is the greatest', 'found second match') + t.end() +}) + +test('doc-sibling-as-input', function(t) { + let doc = nlp('he is the best, that i see. he is the greatest') + let childA = doc.if('greatest') + let childB = doc.match('he is') + let found = childA.match(childB) + t.equal(found.length, 1, 'found self in sibling') + //try false-positive example + childA = doc.if('foobar') + childB = doc.match('he is') + found = childA.match(childB) + t.equal(found.length, 0, 'false-positive not found') + t.end() +}) + +test('split-doc-input', function(t) { + let doc = nlp('he is the best, that i see. he is the greatest') + let childA = doc.if('greatest') + let childB = doc.match('he is') + let found = childA.splitAfter(childB) + t.equal(found.length, 2, 'split from sibling') + t.equal(found.eq(0).text('reduced'), 'he is', 'found first match') + t.equal(found.eq(1).text('reduced'), 'the greatest', 'found second match') + t.end() +}) diff --git a/tests/match/encoding.test.js b/tests/match/encoding.test.js new file mode 100644 index 000000000..728dac883 --- /dev/null +++ b/tests/match/encoding.test.js @@ -0,0 +1,70 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('encoding-match:', function(t) { + const r = nlp('it is * nice') + const str = r + .match('is \\*') + .trim() + .out() + t.equal(str, 'is *', 'encode asterix') + + // r = nlp('it is + nice'); + // str = r.match('is \\+ nice').trim().out(); + // t.equal(str, 'is + nice', 'encode plus'); + + t.end() +}) + +test('reserved-word-in-src:', function(t) { + const r = nlp('buy eggs constructor yeah prototype') + t.equal(r.has(`backburner`), false, 'single') + t.equal(r.has(`#Foo`), false, 'tag') + t.equal(r.has(`(upcoming|backburner)`), false, 'anyOf') + t.equal(r.has(`lala+`), false, 'manyOf') + t.equal(r.has(`nword{2,4}`), false, 'someOf') + t.end() +}) + +test('reserved-word-in-match:', function(t) { + const r = nlp('fo foo fulala repeat') + t.equal(r.has(`constructor`), false, 'single') + t.equal(r.has(`#constructor`), false, 'tag') + t.equal(r.has(`(upcoming|constructor)`), false, 'anyOf') + t.equal(r.has(`constructor+`), false, 'manyOf') + t.equal(r.has(`constructor{2,4}`), false, 'someOf') + t.end() +}) + +test('test-infinite-loop', function(t) { + const weirdDoc = nlp('^ ? * . + $') + weirdDoc.match('is?') + weirdDoc.match('.?') + weirdDoc.match('*') + weirdDoc.match('.+') + weirdDoc.match('+') + weirdDoc.match('?') + weirdDoc.match('.') + weirdDoc.match('? * . +') + weirdDoc.not('?') + weirdDoc.not('*') + weirdDoc.not('^') + weirdDoc.not('$') + weirdDoc.not('+') + weirdDoc.not('? * . +') + t.ok(true, 'didnt regress') + + let str = 'And you are?. Marshal' + let have = nlp(str) + .all() + .out() + t.equal(have, str, 'regression #1') + + str = `- where is she.Oh. you guys don't know?` + have = nlp(str) + .all() + .out() + t.equal(have, str, 'regression #2') + + t.end() +}) diff --git a/tests/match/fancy-match.test.js b/tests/match/fancy-match.test.js new file mode 100644 index 000000000..a171319aa --- /dev/null +++ b/tests/match/fancy-match.test.js @@ -0,0 +1,47 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('matchOne', function(t) { + let doc = nlp('one two three four five. one three four') + let arr = doc.matchOne('three four').out('array') + t.equal(arr.length, 1, 'one-match') + t.equal(arr[0], 'three four', 'found-match') + t.end() +}) + +test('match-from-array :', function(t) { + let m = nlp('spencer is really cool').match(['spencer']) + t.equal(m.out('normal'), 'spencer', 'just-spencer') + t.equal(m.length, 1, 'one-result') + + m = nlp('spencer is really cool').match([]) + t.equal(m.out('normal'), '', 'empty match') + t.equal(m.length, 0, 'zero-results') + + m = nlp('spencer is really cool') + let r = m.match(['spencer', 'really']).toUpperCase() + t.equal(r.out('text'), 'SPENCER REALLY', 'match-spencer-really') + t.equal(r.length, 2, 'two-results') + + t.equal(m.out('text'), 'SPENCER is REALLY cool', 'match-spencer-really') + t.equal(m.length, 1, 'still-one-result') + t.end() +}) + +test('greedy-capture', function(t) { + let m = nlp('so ralf and really eats the glue').match('* [eats] the') + t.equal(m.out('normal'), 'eats', 'one-captures') + + m = nlp('so ralf really, really eats the glue').match('[#Adverb+] eats the') + t.equal(m.out('normal'), 'really, really', 'greedy-capture') + + m = nlp('so ralf and really eats the glue').match('* [eats the]') + t.equal(m.out('normal'), 'eats the', 'two-captures') + + m = nlp('so ralf really eats the glue').match('really [eats the] *') + t.equal(m.out('normal'), 'eats the', 'astrix after') + + m = nlp('so ralf really eats the glue').match('really * [eats the]') + t.equal(m.out('normal'), 'eats the', 'astrix is not necessary') + t.end() +}) diff --git a/tests/match/has.test.js b/tests/match/has.test.js new file mode 100644 index 000000000..ee0218d66 --- /dev/null +++ b/tests/match/has.test.js @@ -0,0 +1,44 @@ +const test = require('tape') +const nlp = require('../_lib') + +let arr = [ + ['five hundred feet', 'Value'], + ['50 square feet', 'Value'], + ['90 hertz', 'Value'], + ['two books', 'Value'], + ['two hundred', 'Value'], + ['4 hundred and ten', 'Value'], + ['4 and a half million', 'Value'], + ['499 thousand', 'Value'], + ['499', 'Value'], + ['4,899', 'Value'], + ['John Smith', 'Person'], + ['dr. John Smith', 'Person'], + ['John Smith jr.', 'Person'], + ['John Jacob Smith', 'Person'], + ['Jani K. Smith', 'Person'], + ['asdfefs', 'Noun'], + ['octopus', 'Noun'], + ['tree', 'Noun'], + ['i', 'Noun'], + + ['FBI', 'Organization'], + ['F.B.I.', 'Organization'], + ['Fun ltd.', 'Organization'], + ['Fun co', 'Organization'], + ['Smith & Rogers', 'Organization'], + ['Google', 'Organization'], + ['tuesday', 'Date'], + ['february', 'Date'], + ['february fifth', 'Date'], + ['tuesday march 5th', 'Date'], + ['tuesday march 5th, 2015', 'Date'], +] + +test('has-test', function(t) { + arr.forEach(function(a) { + let doc = nlp(a[0]) + t.equal(doc.has('#' + a[1]), true, a[0]) + }) + t.end() +}) diff --git a/tests/match/if.test.js b/tests/match/if.test.js new file mode 100644 index 000000000..d6648b08a --- /dev/null +++ b/tests/match/if.test.js @@ -0,0 +1,32 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('if-basic:', function(t) { + let r = nlp('spencer is here') + let m = r.if('asdf') + t.equal(m.out('text'), '', 'if-negative') + + m = r.if('spencer') + t.equal(m.out('text'), 'spencer is here', 'if-positive') + + r = nlp('spencer is here. john was here.') + m = r.if('is') + t.equal(m.out('normal'), 'spencer is here.', 'if-partial') + + t.end() +}) + +test('ifNo:', function(t) { + let r = nlp('spencer is here') + let m = r.ifNo('spencer') + t.equal(m.out('text'), '', 'ifNo-positive') + + m = r.ifNo('asdf') + t.equal(m.out('text'), 'spencer is here', 'ifNo-negative') + + r = nlp('spencer is here. john was here.') + m = r.ifNo('is') + t.equal(m.out('normal'), 'john was here.', 'if-no-partial') + + t.end() +}) diff --git a/tests/match/insert.test.js b/tests/match/insert.test.js new file mode 100644 index 000000000..a64627d46 --- /dev/null +++ b/tests/match/insert.test.js @@ -0,0 +1,44 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('insert-basic :', function(t) { + let m = nlp('the dog sat').insertBefore('and') + t.equal(m.out('text'), 'and the dog sat', 'and-dog') + + m = nlp('the dog sat').insertAfter('patiently') + t.equal(m.out('text'), 'the dog sat patiently', 'sat-patiently') + + m = nlp('the dog sat') + m.match('dog').insertBefore('nice') + t.equal(m.out('text'), 'the nice dog sat', 'nice-dog') + + m = nlp('a dog sat') + m.match('sat').insertAfter('quickly') + t.equal(m.out('text'), 'a dog sat quickly', 'sat-quickly') + + m = nlp('a dog sat') + m.match('a dog sat').insertAfter('quickly') + t.equal(m.out('text'), 'a dog sat quickly', 'multi-match-quickly') + + m = nlp('a dog sat') + m.match('asdf').insertAfter('no no no') + t.equal(m.out('text'), 'a dog sat', 'no no no no') + + t.end() +}) + +test('insert-subset-include :', function(t) { + let m = nlp('the dog is nice') + let sub = m.match('is') + sub.insertAfter('really') + t.equal(sub.out('normal'), 'is really', 'is-really') + t.equal(m.out('normal'), 'the dog is really nice', 'dog-is-really-nice') + + m = nlp('the dog climbed the fence') + sub = m.match('climbed') + sub.insertBefore('really') + t.equal(sub.out('normal'), 'really climbed', 'really-quickly') + t.equal(m.out('normal'), 'the dog really climbed the fence', 'dog-really-climbed') + + t.end() +}) diff --git a/tests/match/match-method.test.js b/tests/match/match-method.test.js new file mode 100644 index 000000000..0ba2ce969 --- /dev/null +++ b/tests/match/match-method.test.js @@ -0,0 +1,48 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('match-methods:', function(t) { + let arr = [ + ['@hasComma bravery', 'truth, bravery'], + ['spencer @hasPeriod', 'spencer did.'], + ['spencer @hasExclamation', 'spencer did!'], + ['spencer @hasQuestionMark', 'spencer did?'], + ['spencer @hasEllipses', 'spencer did...'], + ['no @hasSemicolon i said', 'no fair; i said'], + ['hurricane', 'tornado/hurricane'], + ['tornado', 'tornado/hurricane'], + ['@hasSlash', 'tornado/hurricane'], + ['a @hasSlash', 'rock you like a tornado/hurricane'], + ['he @hasContraction', "he isn't going"], + ['@isAcronym', 'FIFA'], + ['@isKnown', 'spencer'], + ] + arr.forEach(a => { + let doc = nlp(a[1]) + t.equal(doc.has(a[0]), true, a[0]) + }) + t.end() +}) + +test('match @functions', function(t) { + let doc = nlp(`jamie's much, much better.`) + + let m = doc.match('@hasComma') + t.equal(m.text(), 'much', 'hasComma') + + m = doc.match('(@hasPeriod|cool)') + t.equal(m.text(), 'better', 'hasPeriod') + + m = doc.match('(@hasSemicolon|better)') + t.equal(m.text(), 'better', 'false-positive') + + doc = nlp(`i am much, much better and faster`) + m = doc.match('!@hasComma') + t.equal(m.text(), 'i am much better and faster', 'negative function') + + doc = nlp(`i am much, much better and faster`) + m = doc.match('(foo|!@hasComma)') + t.equal(m.text(), 'i am much better and faster', 'negative in optional function') + + t.end() +}) diff --git a/tests/match/match-tricky.test.js b/tests/match/match-tricky.test.js new file mode 100644 index 000000000..8987b664c --- /dev/null +++ b/tests/match/match-tricky.test.js @@ -0,0 +1,154 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('fancy match', function(t) { + let arr = [ + //misc + ['doug is good', '', 0], + ['doug is good', '.', 3], + ['doug is good', '.?', 3], + ['doug is good', '.+', 3], + + //contractions + ["he's nice", 'he is', 2], + ["he's nice", 'is nice', 2], + ["he's nice", "he's", 1], + ["he's nice", "he's nice", 3], + ["he's nice", 'nice', 1], + + //over/under + ['he is nice', 'is nice and good', 0], + ['is nice', 'he is nice', 0], + + //dot + ['doug is good', 'doug is good', 3], + ['doug is good', 'doug . good', 3], + ['doug is good', 'doug is .', 3], + ['doug is good', '. is .', 3], + ['doug is good', '. . .', 3], + ['doug is good', '. . . .', 0], + + //optional miss + ['doug is good', 'doug is really? good', 3], + ['doug is good', 'doug is good? good', 3], + ['doug is good', 'doug is .? good', 3], //tricky 'anything optional' bug + ['doug is good', 'doug is #Adverb? good', 3], + //optional has + ['doug is really good', 'doug is really? good', 4], + ['doug is really good', 'doug is .? good', 4], + ['doug is really good', 'doug is #Adverb? good', 4], + //asterix empty + ['doug is good', 'doug *', 3], + ['doug is good', 'doug is *', 3], + ['doug is good', '*', 3], + //asterix positive + ['doug is good', 'doug * good', 3], + ['doug is really good', 'doug * good', 4], + ['doug is really so very good', 'doug * good', 6], + ['doug is really so very good at stuff', 'doug * good', 6], + ['we think doug is really so very good at stuff', 'doug * good', 6], + //asterix negative + ['doug is good', 'doug * bad', 0], + ['doug is good', 'spencer * bad', 0], + ['doug is good', 'spencer *', 0], + ['doug is good', 'doug * is', 2], //another tricky 'greedy optional' bug + ['cool, fun, great, nice', '#Adjective+ great', 3], + + // + ['Dr. Spencer Smith says hi', 'dr', 1], + ['Dr. Spencer Smith says hi', 'dr spencer', 2], + ['Dr. Spencer Smith says hi', 'dr spencer smith', 3], + ['Dr. Spencer Smith says hi', 'dr spencer smith says', 4], + ['Lately, Dr. Spencer Smith says hi', 'lately dr spencer smith', 4], + //start ^ + ['in toronto', '^toronto', 0], + ['toronto', '^toronto', 1], + ['in toronto', '^in toronto', 2], + ['in toronto', 'in ^toronto', 0], + //end $ + ['it snows', 'it snows', 2], + ['it snows', 'snows$', 1], + ['it snows', 'it snows$', 2], + ['it snows', 'it$ snows', 0], + ['it snows', 'foo$', 0], + //negative ! + ['it really snows', 'it #adverb snows', 3], + ['it really snows', 'it !#adverb snows', 0], + ['it really snows. it goes.', 'it !#adverb', 2], + ['it is nice.', '!#adverb', 3], + //max/min {} + ['if it goes really well', 'if .{1,2} well', 0], + ['if it goes really well', 'if .{1,6} well', 5], + ['so i said that spencer is nice', '^.{1,3} spencer', 0], + ['so i said that spencer is nice', '^.{1,6} spencer', 5], + ['one two three four five', 'one two{1,2}', 2], + ['one two three four five', 'one two{1,3}', 2], + ['one two three four five', 'one two{0,3}', 2], + ['one two three four five', 'one .{0,3} two', 2], + ['one two three four five', 'one .{0,3} three', 3], + ['one two three four five', 'one .{1,3} two', 0], + ['one two three four five six seven', 'one .{0,4} six seven', 7], + //optional/consecutive + ['is really walking', 'is #Adverb+? walking', 3], + ['is walking', 'is #Adverb+? walking', 2], + ['is really really walking', 'is #Adverb+? walking', 4], + ['is really not walking', 'is (#Adverb|not)+? walking', 4], + ['is really not quickly walking', 'is (#Adverb|not)+? walking', 5], + ['is walking', 'is (#Adverb|not)+? walking', 2], + ['Phoenix AZ', '#City #Region', 2], + //this isn't working + ['the canadian senate', 'the (united states|canadian) senate', 3], + ['the canadian senate', '(canadian|united states|british)', 1], + ] + arr.forEach(function(a) { + const r = + nlp(a[0]) + .match(a[1]) + .terms() || [] + const msg = "'" + a[0] + "' - - - '" + a[1] + "' - - got:" + r.length + ' want:' + a[2] + t.equal(r.length, a[2], msg) + }) + t.end() +}) + +test('consecutive greedy cases', function(t) { + let doc = nlp('one two') + let m = doc.match('#Value #Value') + t.equal(m.length, 1, 'consecutive-found one') + t.equal(m.eq(0).text(), 'one two', 'consecutive-found both') + + m = doc.match('#Value+ #Value') + t.equal(m.length, 1, 'plus-found one') + t.equal(m.eq(0).text(), 'one two', 'plus-found both') + + m = doc.match('#Value* #Value') + t.equal(m.length, 1, 'astrix-found one') + t.equal(m.eq(0).text(), 'one two', 'astrix-found both') + + m = doc.match('#Value? #Value') + t.equal(m.length, 1, 'optional-found one') + t.equal(m.eq(0).text(), 'one two', 'optional-found both') + + m = nlp.tokenize('one one').match('one? one') + t.equal(m.length, 1, 'optional-double') + m = nlp.tokenize('one one two').match('one? one two') + t.equal(m.text(), 'one one two', 'found all three terms') + + t.end() +}) + +test('tricky-case', function(t) { + t.equal(nlp('Number II').has('Number II'), true, 'uppercase-match') + t.equal(nlp('Number I').has('Number I'), true, 'uppercase-match') + t.end() +}) + +test('text-as-input', function(t) { + const doc = nlp('he is from Phoenix AZ') + const m = doc.match('#City') + const matchWith = doc.match(m).out('normal') + const without = doc.not(m).out('text') + t.equal(matchWith, 'phoenix', 'text-as-match') + t.equal(without, 'he is from AZ', 'text-as-not') + t.end() +}) diff --git a/tests/match/match.test.js b/tests/match/match.test.js new file mode 100644 index 000000000..e94d2088e --- /dev/null +++ b/tests/match/match.test.js @@ -0,0 +1,93 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('term-match :', function(t) { + let arr = [ + ['quick', 'quick', true], + ['Quick', 'Quick', true], + ['quick', 's', false], + ['quick', '#Adjective', true], + ['quick', '#Noun', false], + ['quick', '(fun|nice|quick|cool)', true], + ['quick', '(fun|nice|good)', false], + ] + arr.forEach(function(a) { + const m = nlp(a[0]).match(a[1]) + const msg = a[0] + ' matches ' + a[1] + ' ' + a[2] + t.equal(m.found, a[2], msg) + }) + t.end() +}) + +test('sentence-match:', function(t) { + let arr = [ + ['the dog played', 'the dog', 'the dog'], + ['the dog played', 'the dog played', 'the dog played'], + ['the dog played', 'the #Noun', 'the dog'], + ['the dog played', 'the #Noun played', 'the dog played'], + ['the dog played', 'the cat played', ''], + ['the dog played', 'the #Adjective played', ''], + ['the dog played', 'the (cat|dog|piano) played', 'the dog played'], + ['the dog played', 'the (cat|piano) played', ''], + ['the dog played', 'the . played', 'the dog played'], + //optional + ['the dog played', 'the dog quickly? played', 'the dog played'], + ['the dog played', 'the dog #Adverb? played', 'the dog played'], + ['the dog quickly played', 'the dog #Adverb? played', 'the dog quickly played'], + ['the dog quickly played', 'the dog #Adverb played', 'the dog quickly played'], + ['the dog quickly played', 'the dog . played', 'the dog quickly played'], + ['the dog quickly played', 'the dog .? played', 'the dog quickly played'], + // ['the dog played', 'the dog .? played', 'the dog played'], + + //leading/trailing logic + ['the dog played', 'the dog played$', 'the dog played'], + ['the dog played', 'the dog', 'the dog'], + ['the dog played', 'the dog$', ''], + ['the dog played', 'the dog$ played', ''], + ['the dog played', '^the dog', 'the dog'], + ['the dog played', 'dog played', 'dog played'], + ['the dog played', '^dog played', ''], + ['the dog played', '^played', ''], + ['the dog played', '^the', 'the'], + + ['john eats glue', 'john eats glue', 'john eats glue'], + ['john eats glue', 'john eats', 'john eats'], + ['john eats glue', 'eats glue', 'eats glue'], + ['john eats glue', 'eats glue all day', ''], + + //test contractions + // [`if you don't mind`, `you don't mind`, `you don't mind`], + [`if you don't mind`, `you don't care`, ``], + // [`if you don't mind`, `you don't`, `you don't`], + // [`if you don't mind`, `don't mind`, `don't mind`], + + // [`if you didn't care`, `didn't`, `didn't`], //TODO:support me + + // [`if you wouldn't care, i'll eat here`, `i'll eat`, `i'll eat`], //omg hard one + + // [`don't go`, `do not`, `don't`], + [`do not go`, `do not`, `do not`], + // [`i dunno`, `do not`, `dunno`], + //bugs + // [`really remind me to buy`, '#Adverb? #Infinitive (me|us) (to|for)', `really remind me to`], + ] + arr.forEach(function(a) { + const m = nlp(a[0]).match(a[1]) + if (!m.found) { + t.equal(a[2], '', 'no-match: ' + a[0] + ' - -' + a[1]) + } else { + const msg = "'" + a[0] + "' - " + a[1] + " - - have : '" + m.out('normal') + "'" + t.equal(m.out('normal'), a[2], msg) + } + }) + t.end() +}) + +test('tag-match-tag :', function(t) { + const m = nlp('apple is cool') + m.match(['apple', 'susan']).tag('Person') + const p = m.match('#Person') + t.equal(p.out('normal'), 'apple', 'apple-tagged') + t.equal(m.length, 1, 'one-result') + t.end() +}) diff --git a/tests/match/min-max.test.js b/tests/match/min-max.test.js new file mode 100644 index 000000000..bf2d84866 --- /dev/null +++ b/tests/match/min-max.test.js @@ -0,0 +1,24 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('match min-max', function(t) { + let doc = nlp('hello1 one hello2').match('#Value{7,9}') + t.equal(doc.out(), '', 'match was too short') + + doc = nlp('hello1 one two three four five hello2').match('#Value{3}') + t.equal(doc.out(), 'one two three', 'exactly three') + + doc = nlp('hello1 one two three four five hello2').match('#Value{3,3}') + t.equal(doc.out(), 'one two three', 'still exactly three') + + doc = nlp('hello1 one two three four five hello2').match('#Value{3,}') + t.equal(doc.out(), 'one two three four five', 'minimum three') + + doc = nlp('hello1 one two three four five hello2').match('hello1 .{3}') + t.equal(doc.out(), 'hello1 one two three', 'unspecific greedy exact length') + + doc = nlp('hello1 one two').match('hello1 .{3}') + t.equal(doc.out(), '', 'unspecific greedy not long enough') + + t.end() +}) diff --git a/tests/match/not.test.js b/tests/match/not.test.js new file mode 100644 index 000000000..98fd3c725 --- /dev/null +++ b/tests/match/not.test.js @@ -0,0 +1,49 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('not-basic :', function(t) { + let m = nlp('spencer is really cool').not('brown') + t.equal(m.out('text'), 'spencer is really cool', 'missing-not') + t.equal(m.length, 1, 'one-result') + + m = nlp('spencer is really cool').not('#Adverb') + t.equal(m.out('text'), 'spencer is cool', 'one-not') + t.equal(m.length, 2, 'two-results') + + m = nlp('spencer is really cool').not('#Adverb+') + t.equal(m.out('text'), 'spencer is cool', 'still-one-not') + t.equal(m.length, 2, 'two-results-2') + + m = nlp('spencer is really cool').not('#Adverb+') + t.equal(m.out('text'), 'spencer is cool', 'two-not') + t.equal(m.length, 2, 'two-results-3') + + m = nlp('spencer is really cool').not('is #Adverb+') + t.equal(m.out('text'), 'spencer cool', 'three-not') + t.equal(m.length, 2, 'two-results-4') + + m = nlp('spencer is really cool. John is really nice').not('#Adverb') + t.equal(m.out('text'), 'spencer is cool. John is nice', 'two-terms-matches') + t.equal(m.length, 4, 'four-results') + + m = nlp('spencer is really cool. John is really nice.').not('pardon me, #Adverb') + t.equal(m.out('text'), 'spencer is really cool. John is really nice.', 'tricky-no-match') + t.equal(m.length, 2, 'two-original-results') + + t.end() +}) + +test('not-from-array :', function(t) { + let m = nlp('spencer is really cool').not(['spencer']) + t.equal(m.out('normal'), 'is really cool', 'not-spencer') + t.equal(m.length, 1, 'one-results') + + m = nlp('spencer is really cool').not(['']) + t.equal(m.out('normal'), 'spencer is really cool', 'not-spencer') + t.equal(m.length, 1, 'one-results-2') + + m = nlp('spencer is really cool').not(['spencer', 'really']) + t.equal(m.out('normal'), 'is cool', 'not-spencer-really') + t.equal(m.length, 2, 'two-results-arr') + t.end() +}) diff --git a/tests/match/or-operand.test.js b/tests/match/or-operand.test.js new file mode 100644 index 000000000..ebdb8d45c --- /dev/null +++ b/tests/match/or-operand.test.js @@ -0,0 +1,22 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('or-match-basic', function(t) { + let doc = nlp('toronto and montreal. Sydney and Paris') + let m = doc.match('(#Place | and )') + t.equal(m.out(), 'toronto and montreal. Sydney and Paris', 'whitespace-or') + + m = doc.match('(#Place | nonono no no| and )') + t.equal(m.out(), 'toronto and montreal. Sydney and Paris', 'whitespace-or-multi1') + + m = doc.match('( nonono no no|| . )') + t.equal(m.out(), 'toronto and montreal. Sydney and Paris', 'whitespace-or-dot') + t.end() +}) + +// test('or-match-multi', function(t) { +// let doc = nlp('toronto and montreal. Sydney and Paris') +// let m = doc.match('(#Place and montreal )') +// t.equal(m.out(), 'toronto and montreal', 'whitespace-or') +// t.end() +// }) diff --git a/tests/match/prefix.test.js b/tests/match/prefix.test.js new file mode 100644 index 000000000..bf73428f9 --- /dev/null +++ b/tests/match/prefix.test.js @@ -0,0 +1,37 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('prefix/infix/suffix basic', function(t) { + let r = nlp('it is funny and weird') + let m = r.match('/nny$/', true) + t.equal(m.out('normal'), 'funny', 'suffix-match') + m = r.match('/^fu/', true) + t.equal(m.out('normal'), 'funny', 'prefix_match') + m = r.match('/nn/', true) + t.equal(m.out('normal'), 'funny', 'infix-match') + + m = r.match('/ff$/', true) + t.equal(m.out('normal'), '', 'no-false-suffix') + m = r.match('/^ff/', true) + t.equal(m.out('normal'), '', 'no-false-prefix') + m = r.match('/ff/', true) + t.equal(m.out('normal'), '', 'no-false-infix') + + m = r.match('_', true) + t.equal(m.out('normal'), '', 'no-throw1') + m = r.match(' _ ', true) + t.equal(m.out('normal'), '', 'no-throw2') + m = r.match(' __ ', true) + t.equal(m.out('normal'), '', 'no-throw3') + m = r.match(' _ _ ', true) + t.equal(m.out('normal'), '', 'no-throw4') + + m = r.match('/^w/', true) + t.equal(m.out('normal'), 'weird', 'one-char-one-word') + m = r.match('/r/', true) + t.equal(m.out('normal'), 'weird', 'one-char-one-word2') + m = r.match('/y$/', true) + t.equal(m.out('normal'), 'funny', 'one-char-one-word3') + + t.end() +}) diff --git a/tests/match/regex.test.js b/tests/match/regex.test.js new file mode 100644 index 000000000..1ea81af72 --- /dev/null +++ b/tests/match/regex.test.js @@ -0,0 +1,19 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('regex-match:', function(t) { + let doc = nlp('it is waaaay cool') + let m = doc.match('/aaa/') + t.equal(m.out('normal'), 'waaaay', 'basic-match') + + m = doc.match('/[ao]{2}/') + t.equal(m.out('array').length, 2, 'trickier-match') + + m = doc.match('is /aaam?/ .') + t.equal(m.out('normal'), 'is waaaay cool', 'trickier-match') + + m = doc.match('#Copula /a+/ /ool$/') + t.equal(m.out('normal'), 'is waaaay cool', 'even-trickier-match') + + t.end() +}) diff --git a/tests/match/remove.test.js b/tests/match/remove.test.js new file mode 100644 index 000000000..5b64e391e --- /dev/null +++ b/tests/match/remove.test.js @@ -0,0 +1,68 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('remove-basic :', function(t) { + let m = nlp('the brown cat played') + .match('brown') + .delete() + .all() + t.equal(m.out('text'), 'the cat played', 'brown-cat') + + m = nlp('the nice brown cat played') + .match('nice brown') + .delete() + .all() + t.equal(m.out('text'), 'the cat played', 'nice-brown') + + m = nlp('the nice brown cat played') + .match('#Adjective') + .delete() + .all() + t.equal(m.out('text'), 'the cat played', 'adj-each') + + m = nlp('the nice brown cat played') + .match('#Adjective+') + .delete() + .all() + t.equal(m.out('text'), 'the cat played', 'adj-consecutive') + + t.end() +}) + +test('remove-match :', function(t) { + let m = nlp('the brown cat played').delete('brown') + t.equal(m.out('text'), 'the cat played', 'brown-cat') + + m = nlp('the brown cat played. The brown dog sat down.').delete('brown') + t.equal(m.out('text'), 'the cat played. The dog sat down.', 'brown-cat') + + m = nlp('the nice brown cat played. The nice dog waited.').delete('nice brown') + t.equal(m.out('text'), 'the cat played. The nice dog waited.', 'nice-brown') + + m = nlp('the nice brown cat played. The cute dogs ate.').delete('#Adjective') + t.equal(m.out('text'), 'the cat played. The dogs ate.', 'adj-each') + + m = nlp('the nice brown cat played. The cute dogs ate.').delete('#Adjective+') + t.equal(m.out('text'), 'the cat played. The dogs ate.', 'adj-consecutive') + + t.end() +}) + +test('remove-logic :', function(t) { + let m = nlp('spencer kelly is here') + .match('spencer kelly') + .delete('spencer') + t.equal(m.out('normal'), 'kelly', 'remove(reg) returns this') + + m = nlp('spencer kelly is here') + .match('spencer kelly') + .delete() + .all() + t.equal(m.out('normal'), 'is here', 'remove() returns parent') + + m = nlp('spencer kelly is here') + .match('spencer kelly') + .delete('notfound') + t.equal(m.out('normal'), 'spencer kelly', 'remove(notfound) returns this') + t.end() +}) diff --git a/tests/match/replace.test.js b/tests/match/replace.test.js new file mode 100644 index 000000000..c93cb09af --- /dev/null +++ b/tests/match/replace.test.js @@ -0,0 +1,126 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('replace-basic :', function(t) { + let m = nlp('the dog played') + .match('dog') + .replace('cat') + .all() + t.equal(m.out('text'), 'the cat played', 'dog-cat') + + m = nlp('the dog played') + .match('the dog') + .replace('a cat') + .all() + t.equal(m.out('text'), 'a cat played', 'a-cat') + + m = nlp('the dog played') + .match('#Noun') + .replace('snake') + .all() + t.equal(m.out('text'), 'the snake played', 'snake') + + m = nlp('the pit bull played') + .match('#Noun+') + .replace('snake') + .all() + t.equal(m.out('text'), 'the snake played', 'pit bull') + + m = nlp('the pit bull dog played') + .match('#Noun+') + .replace('grey snake') + .all() + t.equal(m.out('text'), 'the grey snake played', 'pit bull dog') + + t.end() +}) + +test('match-replace :', function(t) { + ;[ + ['the dog played', 'the dog', 'the cat', 'the cat played'], + ['the dog played', 'the #Noun', 'the cat', 'the cat played'], + ['the dog played', 'the (dog|hamster|pet-snake)', 'the cat', 'the cat played'], + ['the boy and the girl', 'the #Noun', 'the house', 'the house and the house'], + ['the boy and the girl', 'the cat', 'the house', 'the boy and the girl'], + ].forEach(function(a) { + const str = nlp(a[0]) + .replace(a[1], a[2]) + .out('text') + const msg = str + ' -- ' + a[3] + t.equal(str, a[3], msg) + }) + + t.end() +}) + +test('replace-with-punctuation', function(t) { + const doc = nlp('Simon, how is Pamela and Jason?') + const str = doc + .match('#Person') + .replace('PERSON') + .all() + .out() + t.equal(str, 'PERSON, how is PERSON and PERSON?', 'replace-with-punctuation') + t.end() +}) + +test('structured-object-replace :', function(t) { + let r = nlp('fun times in cool town') + const term = r.match('times') + r.replace(term, 'day') + t.equal(r.out(), 'fun day in cool town', 'structured-replace') + + r = nlp('fun times in cool town') + const terms = r.match('cool town') + r.replace(terms, 'shitsville') + t.equal(r.out(), 'fun times in shitsville', 'structured-replace-multi') + t.end() +}) + +test('replace-keep some punctuation', function(t) { + let doc = nlp('two weeks').tag('Cool') + doc.replace('two', '2', true) + t.equal(doc.match('#Cool+').text(), '2 weeks', 'replace-keep-tags') + + doc = nlp('first sentence. I am trying it out.') + doc.match('#Gerund').tag('HashTag') + doc.match('trying').replaceWith('working', true) + t.equal(doc.match('#HashTag+').text(), 'working', 'replacewith-keep-tags') + t.end() +}) + +test('replace over implict', function(t) { + let doc = nlp("i'm good") + doc.match('am').replaceWith('was') + t.equal(doc.text(), 'i was good', 'replace over implicit') + t.end() +}) + +test('replace-with-Doc', function(t) { + let b = nlp('sneaks').tag('Cool') + + let doc = nlp(`john walks quickly`) + + doc.match('walks').replaceWith(b) + t.equal(doc.text(), 'john sneaks quickly') + t.equal(doc.has('#Cool'), true) + t.end() +}) + +test('replace-with-function', function(t) { + const repl = p => { + if (p.has('john')) { + return 'johnny' + } + return 'nancy' + } + let doc = nlp('spencer and John').replace('#Person', repl, true, true) + t.equal(doc.text(), 'nancy and Johnny', 'replace function') + + doc = nlp('Thurs, Feb 2nd, 2016') + doc.match('feb').replaceWith(m => { + return m.text({ trim: true }) + '!' + }) + t.equal(doc.text(), 'Thurs, Feb! 2nd, 2016', 'replaceWith function') + t.end() +}) diff --git a/tests/match/soft-match.test.js b/tests/match/soft-match.test.js new file mode 100644 index 000000000..33b19b7f4 --- /dev/null +++ b/tests/match/soft-match.test.js @@ -0,0 +1,15 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('soft-match', function(t) { + let doc = nlp(`a priest walked into the bars`) + doc.cache({ root: true }) + t.equal(doc.match('bars').found, true, 'found bars') + t.equal(doc.match('bar').found, false, 'missed bar without ~') + t.equal(doc.match('~bars~').found, true, 'found ~ bars') + t.equal(doc.match('~bar~').found, true, 'found ~ bar') + t.equal(doc.match('~walk~ into').found, true, 'found infinitive') + t.equal(doc.match('~bar~').found, true, 'found singular') + t.equal(doc.text('root'), 'a priest walk into the bar', 'root-output') + t.end() +}) diff --git a/tests/match/syntax.test.js b/tests/match/syntax.test.js new file mode 100644 index 000000000..0e02798dc --- /dev/null +++ b/tests/match/syntax.test.js @@ -0,0 +1,39 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('negative parentheses', function(t) { + let doc = nlp.tokenize('if he does. does he?') + let m = doc.if('!^(if|cool)') + t.equals(m.out('normal'), 'does he?', 'negative-start') + + m = doc.if('^!(if|cool)') + t.equals(m.out('normal'), 'does he?', 'start-negative') + + doc = nlp.tokenize('spencer other') + t.equals(doc.match('(cool|spencer)').text(), 'spencer', 'optional-true') + t.equals(doc.match('!(cool|spencer)').text(), 'other', 'outside-negative') + t.equals(doc.match('!(foobar)').text(), 'spencer other', 'has-everthing') + t.equals(doc.match('(!spencer)').text(), 'other', 'has-other') + t.equals(doc.match('!(spencer)').text(), 'other', 'has-other-outside') + t.equals(doc.match('(!other|!spencer)').text(), 'spencer other', 'tricky-negative-swap') + t.equals(doc.match('!(!other|!spencer)').text(), '', 'double-tricky') + t.end() +}) + +test('start-end parentheses', function(t) { + let doc = nlp("matt does but matthew doesn't") + let m = doc.match('^(/matt/|frank) .') + t.equals(m.out('normal'), 'matt does', 'choice-start') + + m = doc.match('(^#Person|#Person$)') + t.equals(m.out('normal'), 'matt', 'matt-start') + + doc = nlp("now matt doesn't but yes for matthew") + m = doc.match('(^#Person|#Person$)') + t.equals(m.out('normal'), 'matthew', 'matthew-end') + + doc = nlp("now matt doesn't but yes for matthew") + m = doc.match('(woo|#Person)$') + t.equals(m.out('normal'), 'matthew', 'matthew-end-outside') + t.end() +}) diff --git a/tests/misc.test.js b/tests/misc.test.js new file mode 100644 index 000000000..690fc864b --- /dev/null +++ b/tests/misc.test.js @@ -0,0 +1,34 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('tag-multiples:', function(t) { + const r = nlp('twas brillig in the doofgafoof.') + r.match('brillig').tag(['Foo', 'Barr']) + t.ok(r.match('#Foo').found, 'tagged-foo') + t.ok(r.match('#Barr').found, 'tagged-barr') + t.end() +}) + +// ----- + +test('root-text vs match-text', function(t) { + let str = ` paper, scissors, rock. I run with scissors.` + let doc = nlp(str) + .match('*') + .all() + t.equal(doc.text(), str, 'perfect-root-text') + + let m = doc.match('scissors') + t.equal(m.text(), 'scissors, scissors', 'match-text') + t.end() +}) + +test('barely a term', function(t) { + let str = '.(' + let doc = nlp(str) + t.equal(doc.out(), str, 'barely-term-no-space') + str = '.( ' + doc = nlp(str) + t.equal(doc.out(), str, 'barely-term-with-space') + t.end() +}) diff --git a/tests/money.test.js b/tests/money.test.js new file mode 100644 index 000000000..be2761173 --- /dev/null +++ b/tests/money.test.js @@ -0,0 +1,17 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('money tests', function(t) { + let doc = nlp('i paid 5 USD for the thing, and got $2.50 back.') + let m = doc.money() + t.equal(m.length, 2, 'both money forms') + t.equal(m.eq(0).text(), '5 USD', 'val-currency') + t.equal(m.eq(1).text(), '$2.50', 'sybol-val') + + doc = nlp('i got 1 peso and £30.') + m = doc.money() + t.equal(m.length, 2, 'both intl money forms') + t.equal(m.eq(0).text(), '1 peso', 'val-currency-2') + t.equal(m.eq(1).text(), '£30', 'sybol-val-2') + t.end() +}) diff --git a/tests/multiTag.test.js b/tests/multiTag.test.js new file mode 100644 index 000000000..93ccc46ba --- /dev/null +++ b/tests/multiTag.test.js @@ -0,0 +1,40 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('tag-sequence:', function(t) { + const doc = nlp('it was cold') + doc.tag('#One #Two #Three') + t.equal(doc.match('#One').text(), 'it', 'one') + t.equal(doc.match('#Two').text(), 'was', 'two') + t.equal(doc.match('#Three').text(), 'cold', 'three') + t.end() +}) + +test('multiple-tags:', function(t) { + const doc = nlp('it was cold') + doc.tag(['#One', '#Two', '#Three']) + t.equal(doc.match('#One').text(), 'it was cold', 'multi- all have #One') + t.equal(doc.match('#Two').text(), 'it was cold', 'multi- all have #Two') + t.equal(doc.match('#Three').text(), 'it was cold', 'multi- all have #Three') + t.end() +}) + +test('tag-sequence-skip:', function(t) { + const doc = nlp('it was cold') + doc.tag('#One . #Three') + t.equal(doc.match('#One').text(), 'it', 'one') + t.equal(doc.match('#Two').text(), '', 'no-two') + t.equal(doc.match('#.').text(), '', 'no-dot') + t.equal(doc.match('#Three').text(), 'cold', 'three') + t.end() +}) + +test('multiple-tags-skip:', function(t) { + const doc = nlp('it was cold') + doc.tag(['.', '#Two', '.']) + t.equal(doc.match('#One').found, false, 'skip - none have #One') + t.equal(doc.match('#Two').text(), 'it was cold', 'skip - all have #Two') + t.equal(doc.match('#Three').found, false, 'skip - none have #Three') + t.equal(doc.match('#.').found, false, 'skip - none have #dot') + t.end() +}) diff --git a/tests/nouns/article.test.js b/tests/nouns/article.test.js new file mode 100644 index 000000000..a1ad077f9 --- /dev/null +++ b/tests/nouns/article.test.js @@ -0,0 +1,28 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('.article():', function(t) { + let arr = [ + ['duck', 'a'], + ['eavesdropper', 'an'], + ['alligator', 'an'], + // ['hour', 'an'], + ['NDA', 'an'], + ['F.B.I', 'an'], + ['N.D.A.', 'an'], + ['eulogy', 'a'], + ['ukalele', 'a'], + ['skateboards', 'the'], + ['John Smith', ''], + ['Tony Danza', ''], + ] + arr.forEach(function(a) { + const o = nlp(a[0]) + .tag('Noun') + .nouns() + .json()[0] + const msg = a[0] + ' -> ' + o.article + t.equal(o.article, a[1], msg) + }) + t.end() +}) diff --git a/tests/nouns/inflect.test.js b/tests/nouns/inflect.test.js new file mode 100644 index 000000000..cb1e0b04c --- /dev/null +++ b/tests/nouns/inflect.test.js @@ -0,0 +1,210 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('isPlural:', function(t) { + let arr = [ + ['octopus', false], + ['tree', false], + ['trees', true], + // ['i', false], + ['mayor of chicago', false], + ['mayors of chicago', true], + ['octopus', false], + ['octopi', true], + ['eyebrow', false], + ['eyebrows', true], + ['child', false], + ['children', true], + ["spencer's", false], + ["toronto's", false], + ['circus', false], + ['circuses', true], + // ['simpsons\'', false], + // ['she\'s', false], + ] + arr.forEach(function(a) { + const r = nlp(a[0]).nouns() + const msg = a[0] + t.equal(r.isPlural().found, a[1], msg) + }) + t.end() +}) + +test('toSingular:', function(t) { + let arr = [ + // ["Joneses", "Jones"], + ['children', 'child'], + ['women', 'woman'], + ['men', 'man'], + ['people', 'person'], + ['geese', 'goose'], + ['mice', 'mouse'], + ['barracks', 'barracks'], + ['deer', 'deer'], + ['nuclei', 'nucleus'], + ['syllabi', 'syllabus'], + ['fungi', 'fungus'], + ['cacti', 'cactus'], + ['theses', 'thesis'], + ['crises', 'crisis'], + ['phenomena', 'phenomenon'], + ['embryos', 'embryo'], + ['frescos', 'fresco'], + ['ghettos', 'ghetto'], + ['halos', 'halo'], + ['mangos', 'mango'], + ['mementos', 'memento'], + ['mottos', 'motto'], + ['tornados', 'tornado'], + ['tuxedos', 'tuxedo'], + ['volcanos', 'volcano'], + ['crises', 'crisis'], + ['analyses', 'analysis'], + ['aircraft', 'aircraft'], + ['bass', 'bass'], + ['bison', 'bison'], + ['fish', 'fish'], + ['fowl', 'fowl'], + ['kilos', 'kilo'], + ['kimonos', 'kimono'], + ['logos', 'logo'], + ['memos', 'memo'], + ['ponchos', 'poncho'], + ['photos', 'photo'], + ['pimentos', 'pimento'], + ['pros', 'pro'], + ['sombreros', 'sombrero'], + ['tacos', 'taco'], + ['memos', 'memo'], + ['torsos', 'torso'], + ['xylophones', 'xylophone'], + ['quintuplets', 'quintuplet'], + ['worrywarts', 'worrywart'], + ['nerds', 'nerd'], + ['lollipops', 'lollipop'], + ['eyebrows', 'eyebrow'], + // ['mayors of chicago', 'mayor of chicago'], + //test that sungular.singularize()==singular.. + ['mango', 'mango'], + ['memento', 'memento'], + ['motto', 'motto'], + ['tornado', 'tornado'], + ['person', 'person'], + ['goose', 'goose'], + ['mouse', 'mouse'], + ['calves', 'calf'], + ['olives', 'olive'], + ['loaves', 'loaf'], + ['oafs', 'oaf'], + ['wives', 'wife'], + ['roofs', 'roof'], + ['hooves', 'hoof'], + ['buses', 'bus'], + ['tosses', 'toss'], + ['wishes', 'wish'], + ['geniouses', 'genious'], + ['prognoses', 'prognosis'], + ['analyses', 'analysis'], + ['synopses', 'synopsis'], + ['parentheses', 'parenthesis'], + ['theses', 'thesis'], + ['bases', 'base'], + ] + arr.forEach(function(a) { + const r = nlp(a[0]) + .tag('Noun') + .nouns() + const str = r.toSingular().out('normal') + t.equal(str, a[1], a[0]) + }) + t.end() +}) + +test('toPlural:', function(t) { + let arr = [ + ['snake', 'snakes'], + ['ski', 'skis'], + // ["Barrymore", "Barrymores"], + ['witch', 'witches'], + ['box', 'boxes'], + ['gas', 'gases'], + ['kiss', 'kisses'], + ['index', 'indices'], + ['appendix', 'appendices'], + ['criterion', 'criteria'], + ['berry', 'berries'], + ['activity', 'activities'], + ['daisy', 'daisies'], + ['church', 'churches'], + ['fox', 'foxes'], + ['stomach', 'stomachs'], + ['epoch', 'epochs'], + ['knife', 'knives'], + ['half', 'halves'], + ['scarf', 'scarves'], + ['chief', 'chiefs'], + ['spoof', 'spoofs'], + ['cafe', 'cafes'], + ['gulf', 'gulfs'], + ['alternative', 'alternatives'], + ['solo', 'solos'], + // ['zero', 'zeros'], + ['avocado', 'avocados'], + ['studio', 'studios'], + ['zoo', 'zoos'], + ['embryo', 'embryos'], + ['hero', 'heroes'], + ['banjo', 'banjos'], + ['cargo', 'cargos'], + ['flamingo', 'flamingos'], + ['fresco', 'frescos'], + ['ghetto', 'ghettos'], + ['halo', 'halos'], + ['mango', 'mangos'], + ['memento', 'mementos'], + ['motto', 'mottos'], + ['tornado', 'tornados'], + ['tuxedo', 'tuxedos'], + ['volcano', 'volcanos'], + ['bus', 'buses'], + ['crisis', 'crises'], + ['analysis', 'analyses'], + ['neurosis', 'neuroses'], + ['aircraft', 'aircraft'], + ['halibut', 'halibut'], + ['moose', 'moose'], + ['salmon', 'salmon'], + ['sheep', 'sheep'], + ['spacecraft', 'spacecraft'], + ['tuna', 'tuna'], + ['trout', 'trout'], + ['armadillo', 'armadillos'], + ['auto', 'autos'], + ['bravo', 'bravos'], + ['bronco', 'broncos'], + ['casino', 'casinos'], + ['combo', 'combos'], + ['gazebo', 'gazebos'], + //test that plural.pluralize()==plural.. + ['snakes', 'snakes'], + ['skis', 'skis'], + // ['mayor of chicago', 'mayors of chicago'], + // ["Barrymores", "Barrymores"], + ['witches', 'witches'], + ['boxes', 'boxes'], + ['gases', 'gases'], + ['spoofs', 'spoofs'], + ['solos', 'solos'], + ['avocados', 'avocados'], + ['studios', 'studios'], + ['zoos', 'zoos'], + ] + arr.forEach(function(a) { + const r = nlp(a[0]) + .tag('Noun') + .nouns() + const str = r.toPlural().out('normal') + t.equal(str, a[1], a[0]) + }) + t.end() +}) diff --git a/tests/nouns/misc.test.js b/tests/nouns/misc.test.js new file mode 100644 index 000000000..9c23ffb71 --- /dev/null +++ b/tests/nouns/misc.test.js @@ -0,0 +1,11 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('misc nouns', function(t) { + let doc = nlp(`quickly, suddenly`) + t.equal(doc.nouns().length, 0, 'found no nouns') + + doc = nlp(`john smith, and then Google Inc in Flordia`) + t.equal(doc.nouns().length, 3, 'found three nouns') + t.end() +}) diff --git a/test/unit/subset/noun/nounPhrase.test.js b/tests/nouns/nounPhrase.test.js similarity index 86% rename from test/unit/subset/noun/nounPhrase.test.js rename to tests/nouns/nounPhrase.test.js index 63067ece4..af157ee30 100644 --- a/test/unit/subset/noun/nounPhrase.test.js +++ b/tests/nouns/nounPhrase.test.js @@ -1,5 +1,5 @@ -var test = require('tape'); -var nlp = require('../../lib/nlp'); +const test = require('tape') +const nlp = require('../_lib') //(from https://brenocon.com/JustesonKatz1995.pdf) // AN: linear function; lexical ambiguity; mobile phase @@ -13,7 +13,7 @@ var nlp = require('../../lib/nlp'); // NPN: degrees of freedom; [no example]; energy of adsorption test('noun-phrases', function(t) { - [ + ;[ ['he was a person of interest in the case', 'person of interest'], ['he was the mayor of chicago', 'mayor of chicago'], ['he was the captain of the football team', 'captain of the football team'], @@ -28,14 +28,16 @@ test('noun-phrases', function(t) { ['the 5-person chairlift', '5-person chairlift'], ['he had the right of way', 'right of way'], ['my retail bank sucks', 'retail bank'], - ['my activation code isn\'t working', 'activation code'], + ["my activation code isn't working", 'activation code'], ['my speech recognition system', 'speech recognition system'], ['a typical machine learning documentary film', 'machine learning documentary film'], ['every cold war re-enactment is boring', 'cold war reenactment'], ['two slices of cranberry', 'slices of cranberry'], ].forEach(function(a) { - var str = nlp(a[0]).nouns(0).out('normal'); - t.equal(str, a[1], a[0] + ' -> "' + str + '"'); - }); - t.end(); -}); + const str = nlp(a[0]) + .nouns(0) + .out('normal') + t.equal(str, a[1], a[0] + ' -> "' + str + '"') + }) + t.end() +}) diff --git a/tests/nouns/possessive.test.js b/tests/nouns/possessive.test.js new file mode 100644 index 000000000..4ec3a0977 --- /dev/null +++ b/tests/nouns/possessive.test.js @@ -0,0 +1,24 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('.toPossessive():', function(t) { + let arr = [ + ['duck', `duck's`], + ['eavesdropper', `eavesdropper's`], + ['John', `John's`], + ['hour', `hour's`], + ['F.B.I', `F.B.I's`], + ['John Smith', `John Smith's`], + ['skateboards', `skateboards'`], + ['Flanders', `Flanders'`], + // ['she', 'hers'], + ['peaches', `peaches'`], + ] + arr.forEach(function(a) { + const doc = nlp(a[0]) + .nouns() + .toPossessive() + t.equal(doc.out(), a[1], a[0]) + }) + t.end() +}) diff --git a/tests/output/json.test.js b/tests/output/json.test.js new file mode 100644 index 000000000..6ee306684 --- /dev/null +++ b/tests/output/json.test.js @@ -0,0 +1,102 @@ +const test = require('tape') +const nlp = require('../_lib') + +const hasTag = function(term, want) { + if (!term || !term.tags) { + return false + } + return term.tags.some(tag => tag === want) +} + +test('json out default', function(t) { + let doc = nlp('who are you? what is this?') + let json = doc.json({ terms: true }) + t.equal(json.length, 2, 'json-len') + t.equal(json[1].text, 'what is this?', 'json-text') + t.equal(json[1].terms.length, 3, 'json-three-terms') + t.equal(hasTag(json[1].terms[1], 'Copula'), true, 'json-has-tag') + t.end() +}) + +test('json out trim', function(t) { + let doc = nlp('who are you? what is this?') + let json = doc.json({ trim: false, terms: false }) + t.equal(json.length, 2, 'json-len') + t.equal(json[0].text, 'who are you? ', 'json-text') + t.equal(json[1].text, 'what is this?', 'json-text') + t.equal(json[1].terms, undefined, 'json-no-terms') + t.end() +}) + +test('json out implicit', function(t) { + let str = `he isn't` + let doc = nlp(str) + let json = doc.json() + t.equal(json.length, 1, 'json-len') + t.equal(json[0].text, str, 'json-text') + t.equal(json[0].terms.length, 3, 'json-three-terms') + let t0 = json[0].terms[0] || {} + t.equal(t0.implicit, undefined, 'has-no-implicit') + let t1 = json[0].terms[1] || {} + t.equal(hasTag(t1, 'Copula'), true, 'json-has-tag') + t.equal(t1.implicit, 'is', 'has-implicit1') + t.equal(hasTag(json[0].terms[2], 'Negative'), true, 'json-has-tag2') + let t2 = json[0].terms[2] || {} + t.equal(t2.implicit, 'not', 'has-implicit2') + t.end() +}) + +test('json terms out', function(t) { + let doc = nlp(`she is not`) + let json = doc.json({ text: false, terms: { clean: true, id: true, bestTag: true, whitespace: true } }) + t.equal(json.length, 1, 'json-len') + t.equal(json[0].text, undefined, 'json-text') + t.equal(json[0].terms.length, 3, 'json-terms-length') + let t0 = json[0].terms[0] || {} + t.equal(t0.bestTag, 'Pronoun', 'json-terms-bestTag') + let t1 = json[0].terms[1] || {} + t.equal(t1.bestTag, 'Copula', 'json-terms-bestTag1') + let t2 = json[0].terms[2] || {} + t.equal(t2.bestTag, 'Negative', 'json-terms-bestTag2') + + t.equal(t1.pre, '', 'json-terms-whitespace-pre') + t.equal(t1.post, ' ', 'json-terms-whitespace-post') + t.end() +}) + +test('json-index:', function(t) { + let doc = nlp(`john is not really walking`) + let obj = doc.match('really').json({ index: true })[0] + t.equal(obj.terms[0].index, 3, 'index:3') + t.equal(obj.index, 3, 'phrase-index:3') + t.end() +}) + +test('json-unique:', function(t) { + let doc = nlp(`a b c b a`) + let arr = doc.terms().json({ unique: true, terms: false, count: true }) + t.equal(arr.length, 3, 'no duplicates') + t.end() +}) + +test('out-custom:', function(t) { + const doc = nlp('The competent drum work of Don Brewer?') + const arr = doc.json({ + terms: { + text: true, + normal: false, + tags: true, + sdf: true, + root: true, + }, + })[0].terms + t.equal(arr[0].text, 'The', 'has text') + // t.equal(arr[5].root, 'don', 'has root') + t.equal(arr[5].sdf, undefined, 'has no sdf') + t.equal( + arr[0].tags.some(tag => tag === 'Determiner'), + true, + 'has tags' + ) + t.end() +}) diff --git a/tests/output/out.test.js b/tests/output/out.test.js new file mode 100644 index 000000000..696923fad --- /dev/null +++ b/tests/output/out.test.js @@ -0,0 +1,19 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('out-tags', function(t) { + let out = nlp(`he's cool.`).out('tags') + t.equal(out.length, 1, 'one sentence') + t.equal(out[0].he.indexOf('Pronoun') !== -1, true, 'has he:Pronoun') + t.equal(out[0].is.indexOf('Copula') !== -1, true, 'has is:Copula') + t.equal(out[0].cool.indexOf('Adjective') !== -1, true, 'has cool:Adjective') + t.end() +}) +test('out-topk', function(t) { + let doc = nlp(`What'd I say? Monorail. What's it called? Monorail.`) + let out = doc.out('freq') + t.equal(out.length, 3, 'three results') + t.equal(out[0].reduced, 'monorail', 'top-result is most-frequent') + t.equal(out[0].count, 2, 'got count') + t.end() +}) diff --git a/tests/output/text.test.js b/tests/output/text.test.js new file mode 100644 index 000000000..6a51f6adc --- /dev/null +++ b/tests/output/text.test.js @@ -0,0 +1,84 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('text-formats', function(t) { + let doc = nlp(`Toronto's citizens LOVE toronto! they come here for food.`) + t.equal(doc.text('normal'), `toronto's citizens love toronto! they come here for food.`, 'normal') + t.end() +}) + +test('text(normal):', function(t) { + let arr = [ + ['he is good', 'he is good'], + ['Jack and Jill went up the hill.', 'jack and jill went up the hill.'], + ['Mr. Clinton did so.', 'mr clinton did so.'], + ['he is good', 'he is good'], + ['Jack and Jill went up the hill. She got water.', 'jack and jill went up the hill. she got water.'], + ['Joe', 'joe'], + ['just-right', 'just right'], + ['camel', 'camel'], + ['4', '4'], + ['four', 'four'], + ['john smith', 'john smith'], + ['Dr. John Smith-McDonald', 'dr john smith mcdonald'], + ['Contains no fruit juice. \n\n All rights reserved', 'contains no fruit juice. all rights reserved'], + ] + arr.forEach(function(a) { + const str = nlp(a[0]).text('normal') + t.equal(str, a[1], a[0]) + }) + t.end() +}) + +test('text-text', function(t) { + const str = `My dog LOVES pizza, and grapes!!` + let doc = nlp(str) + + t.equal(doc.json({ text: true })[0].text, str, 'json(text)') + + t.equal(doc.text('text'), str, 'text(text): ') + + t.end() +}) + +test('text-normal', function(t) { + let doc = nlp(`My dog LOVES pizza, and grapes!!`) + const str = 'my dog loves pizza, and grapes!' + + t.equal(doc.json({ normal: true })[0].normal, str, 'json(normal)') + + t.equal(doc.text('normal'), str, 'text(normal): ') + + // doc.normalize() + // t.equal(doc.text('text'), str, 'normalize(): ') + + t.end() +}) + +test('text-reduced', function(t) { + let doc = nlp(`My dog LOVES pizza, and grapes!!`) + const str = 'my dog loves pizza and grapes' + + t.equal(doc.json({ reduced: true })[0].reduced, str, 'json(reduced)') + + t.equal(doc.text('reduced'), str, 'text(reduced): ') + + doc.normalize('reduced') + t.equal(doc.text('reduced'), str, 'normalize(reduced): ') + + t.end() +}) + +test('text-root', function(t) { + let doc = nlp(`My dog LOVES pizza, and grapes!!`) + const str = 'my dog love pizza and grape' + + t.equal(doc.json({ root: true })[0].root, str, 'json(root)') + + t.equal(doc.text('root'), str, 'text(root): ') + + doc.normalize('root') + t.equal(doc.text('root'), str, 'normalize(root): ') + + t.end() +}) diff --git a/tests/people.test.js b/tests/people.test.js new file mode 100644 index 000000000..7b08e8661 --- /dev/null +++ b/tests/people.test.js @@ -0,0 +1,52 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('people:', function(t) { + let doc = nlp('Mary is in the boat. Nancy is in the boat. Fred is in the boat. Jack is too.') + let arr = doc + .people() + .toLowerCase() + .out('array') + t.deepEqual(arr, ['mary', 'nancy', 'fred', 'jack'], 'people-easy') + + doc = nlp('jean jacket. jean Slkje') + arr = doc + .people() + .toLowerCase() + .out('array') + t.deepEqual(arr, ['jean slkje'], 'people-context') + + doc = nlp('The Bill was passed by James MacCarthur') + arr = doc + .people() + .toLowerCase() + .out('array') + t.deepEqual(arr, ['james maccarthur'], 'the-bill') + + doc = nlp('Rod MacDonald bought a Rod') + arr = doc + .people() + .toLowerCase() + .out('array') + t.deepEqual(arr, ['rod macdonald'], 'the-rod-1') + + doc = nlp('Rod L. MacDonald bought a lightening rod') + arr = doc + .people() + .toLowerCase() + .out('text') + t.deepEqual(arr, 'rod l. macdonald', 'the-rod-2') + + doc = nlp("Matt 'the doctor' Smith lasted three seasons.") + arr = doc + .people() + .toLowerCase() + .out() + t.deepEqual(arr, "matt 'the doctor' smith", 'nickname-1') + + doc = nlp("Randal Kieth Orton and Dwayne 'the rock' Johnson had a really funny fight.") + t.equal(doc.people(0).out('normal'), 'randal kieth orton', 'nickname-2a') + t.equal(doc.people(1).out('normal'), 'dwayne the rock johnson', 'nickname-2b') + + t.end() +}) diff --git a/tests/places.test.js b/tests/places.test.js new file mode 100644 index 000000000..a7cec26c7 --- /dev/null +++ b/tests/places.test.js @@ -0,0 +1,42 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('known-regions:', function(t) { + let arr = [ + ['i want to go to Ohio to see George Harrison', 'ohio'], + ['we are visiting Gloucestershire, before we leave', 'gloucestershire'], + ['manitoba is nice this time of year', 'manitoba'], + ] + arr.forEach(function(a) { + const str = nlp(a[0]) + .match('#Region') + .text('normal') + t.equal(str, a[1], a[0]) + }) + t.end() +}) + +test('unknown-places:', function(t) { + let arr = [ + ['live in the Rekcjd Province', 'rekcjd province'], + ['live in the Lekfjs District', 'lekfjs district'], + ['visiting Tojbs Kjeh Region', 'tojbs kjeh region'], + ['visiting the State of Lkjfhe', 'state of lkjfhe'], + ['see you in West Nunavut', 'west nunavut'], + ['see you in western Hunan', 'western hunan'], + ['see you in Northern Hunan province', 'northern hunan province'], + ] + arr.forEach(function(a) { + const str = nlp(a[0]) + .places(0) + .text('normal') + t.equal(str, a[1], a[0]) + }) + t.end() +}) + +test('mixed continents-places:', function(t) { + const doc = nlp('in north africa, eastern asia, guatemala, europe, north america, and japan') + t.equal(doc.places().length, 6, '6-places') + t.end() +}) diff --git a/tests/plugin/addTags.test.js b/tests/plugin/addTags.test.js new file mode 100644 index 000000000..d23ee83d5 --- /dev/null +++ b/tests/plugin/addTags.test.js @@ -0,0 +1,103 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('tagset-change-isA-basic', function(t) { + nlp.extend((Doc, world) => { + world.addTags({ + Doctor: { + isA: 'Person', + }, + }) + world.addWords({ + surgeon: 'Doctor', + 'surgeon general': 'Doctor', + }) + }) + let doc = nlp('the surgeon operated') + + //basic isA + t.equal(doc.match('#Doctor').out('normal'), 'surgeon', 'surgeon is a doctor') + t.equal(doc.match('#Person+').length, 1, 'doctor is a person') + + doc = nlp('lkjsdf').tag('#Person') + t.equal(doc.match('#Doctor').length, 0, 'person isnt a doctor, necessarily') + + doc = nlp('lkjsdf').tag('#Doctor') + t.equal(doc.match('#Person').length, 1, 'post-hoc tags work, too') + + //multi-word + doc = nlp('the surgeon general operated') + t.equal(doc.match('#Doctor').out('normal'), 'surgeon general', 'multi-word') + t.equal(doc.match('#Person').out('normal'), 'surgeon general', 'multi-word-isA') + t.end() +}) + +test('tagset-change-isA', function(t) { + nlp.extend((Doc, world) => { + world.addTags({ + Doctor: { + isA: 'Person', + notA: ['Foo'], + }, + }) + world.addWords({ + lkjj: 'Foo', + }) + }) + let doc = nlp('he is lkjj') + t.equal(doc.match('#Foo').out('normal'), 'lkjj', 'init-there') + doc.match('lkjj').tag('#Doctor') + + t.equal(doc.match('#Doctor').out('normal'), 'lkjj', 'doctor-tag-there') + t.equal(doc.match('#Foo').out('normal'), '', 'foo-is-gone') + + t.end() +}) + +test('tagset-remove-downward', function(t) { + nlp.extend((Doc, world) => { + world.addTags({ + Doctor: { + isA: 'Person', + }, + Surgeon: { + isA: 'Doctor', + }, + }) + }) + let doc = nlp('george is a person.') + doc.match('george').tag('Surgeon') + + t.ok(doc.has('#Surgeon'), 'Surgeon-tag-there') + t.ok(doc.has('#Doctor'), 'doctor-tag-there') + t.ok(doc.has('#Person'), 'person-tag-there') + + //remove one in the middle.. + doc.match('george').unTag('Person') + t.ok(doc.has('#Person') === false, 'person-tag-gone') + t.ok(doc.has('#Doctor') === false, 'doctor-tag-gone1') + t.ok(doc.has('#Surgeon') === false, 'Surgeon-tag-gone') + t.end() +}) + +test('tagset-remove-half-downward', function(t) { + nlp.extend((Doc, world) => { + world.addTags({ + Doctor: { + isA: 'Person', + }, + Surgeon: { + isA: 'Doctor', + }, + }) + }) + let doc = nlp('george is a person.') + doc.match('george').tag('Surgeon') + + //remove one just under the top.. + doc.match('george').unTag('Doctor') + t.ok(doc.has('#Person') === true, 'person-tag-there') + t.ok(doc.has('#Doctor') === false, 'doctor-tag-gone2') + t.ok(doc.has('#Surgeon') === false, 'Surgeon-tag-gone') + t.end() +}) diff --git a/tests/plugin/addWords.test.js b/tests/plugin/addWords.test.js new file mode 100644 index 000000000..ac5857e26 --- /dev/null +++ b/tests/plugin/addWords.test.js @@ -0,0 +1,29 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('persistent-lexicon-change', function(t) { + let nlp2 = nlp.clone() + let doc = nlp('he is marko') + t.equal(doc.match('#Place+').length, 0, 'default-no-place') + t.equal(doc.match('#Person+').length, 1, 'default-one-person') + + nlp2.extend((Doc, world) => { + world.addWords({ + marko: 'Place', + }) + }) + doc = nlp2('he is marko') + t.equal(doc.match('#Place+').length, 1, 'now-one-place') + t.equal(doc.match('#Person+').length, 0, 'now-no-person') + + nlp2.extend((Doc, world) => { + world.addWords({ + foo: 'Place', + }) + }) + doc = nlp2('he is marko') + t.equal(doc.match('#Place+').length, 1, 'still-one-place') + t.equal(doc.match('#Person+').length, 0, 'still-no-person') + + t.end() +}) diff --git a/tests/plugin/clone.test.js b/tests/plugin/clone.test.js new file mode 100644 index 000000000..73e989783 --- /dev/null +++ b/tests/plugin/clone.test.js @@ -0,0 +1,33 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('clone:', function(t) { + const arr = [ + 'he eats the alligator', + 'Jumanji is the best move. He eats cheese.', + 'Uperman is wayyyy better than batman!', + ] + arr.forEach(function(str) { + let m = nlp(str) + t.equal(m.out(), str, 'equals input - ' + m.out()) + + let up = m.clone().toUpperCase() + t.notEqual(str, up.out(), 'neg not equal - ' + str) + + let adv = m + .clone() + .match('#Verb') + .append('really') + t.notEqual(str, adv.out(), 'adv not equal - ' + str) + + let rm = m + .clone() + .match('#Verb') + .delete('#Verb') + t.notEqual(str, rm.out(), 'rm not equal - ' + str) + + let tag = m.clone().tag('#Verb') + t.notEqual(m.match('#Verb').text(), tag.match('#Verb').text(), 'rm not equal - ' + str) + }) + t.end() +}) diff --git a/tests/plugin/plugin.test.js b/tests/plugin/plugin.test.js new file mode 100644 index 000000000..a1c265e66 --- /dev/null +++ b/tests/plugin/plugin.test.js @@ -0,0 +1,129 @@ +const test = require('tape') +const nlp = require('../_lib') + +const myPlugin = function(Doc, world) { + /** add a method */ + Doc.prototype.beNice = function() { + this.match('#Infinitive').prepend('kindly') + return this + } + /** add some tags */ + world.addTags({ + Character: { + isA: 'Person', + notA: 'Adjective', + }, + }) + /** add some words */ + world.addWords({ + gonzo: 'MaleName', + kermit: 'Frog', + 'minnie mouse': 'Character', + }) + /** post-process tagger */ + world.postProcess(doc => { + doc.match('light the lights').tag('#Verb . #Plural') + }) +} + +nlp.extend(myPlugin) + +//TODO: not sure why this doesn't pass when running all-tests +// must be a race-condition? +// test('plugin post-process tagger', function(t) { +// let doc = nlp(`it's time to light the lights.`) +// t.equal(doc.has('#Verb the #Plural'), true, 'post-tagger ran') +// t.end() +// }) + +test('plugin adds a method', function(t) { + let doc = nlp(`wash the floor`) + doc.beNice() + t.equal(doc.text(), 'kindly wash the floor', 'beNice method worked') + t.end() +}) + +test('plugin adds a tag', function(t) { + let doc = nlp(`goofy`) + t.equal(doc.has('#Adjective'), true, 'starts adjective') + // random unknown tag + doc.tag('FooBar') + // our tag + doc.tag('Character') + t.equal(doc.has('#Character'), true, 'has given tag') + t.equal(doc.has('#Person'), true, 'has implied tag') + t.equal(doc.has('#FooBar'), true, 'has unrelated tag') + t.equal(doc.has('#Adjective'), false, 'missing notA tag') + t.end() +}) + +test('plugin adds words', function(t) { + let doc = nlp(`gonzo, minnie mouse and kermit the frog`) + t.equal(doc.match('gonzo').has('#MaleName'), true, 'new word existing tag') + t.equal(doc.match('gonzo').has('#Person'), true, 'new word implied tag') + + t.equal(doc.match('#Frog').text('normal'), 'kermit', 'new word new tag') + + let m = doc.match('minnie mouse') + t.equal(m.has('#Character #Character'), true, 'multi word given tag') + t.equal(m.has('#Person #Person'), true, 'multi word implied tag') + t.end() +}) + +// test('extend-tagset-flat', function(t) { +// const tagSet = { +// Color: {}, +// } +// const lexicon = { +// 'mother of pearl': 'Color', +// } +// nlp.addTags(tagSet) +// var m = nlp('it is mother of pearl', lexicon).match('#Color+') +// t.equal(m.out('normal'), 'mother of pearl', 'text found') +// t.ok(m.has('#Noun'), 'it does not get in the way of the tagger') +// t.end() +// }) + +test('extend-tagset-nested', function(t) { + const tagSet = { + Color: {}, + OffWhite: { + isA: 'Color', + }, + } + nlp.extend((Doc, world) => { + world.addTags(tagSet) + }) + const lexicon = { + 'mother of pearl': 'OffWhite', + } + const m = nlp('it is mother of pearl', lexicon).match('#OffWhite') + t.equal(m.out('normal'), 'mother of pearl', 'text found') + // t.equal(m.has('#Noun'), true, 'it does not get in the way of the tagger') + t.equal(m.has('#Color'), true, 'has isA tag, too') + t.end() +}) + +test('basic-plugin', function(t) { + nlp.extend((Doc, world) => { + world.addWords({ + trex: 'Dinosaur', + }) + world.addTags({ + Dinosaur: { + isA: 'Animal', + }, + Animal: { + isA: 'Noun', + }, + }) + world.postProcess(d => { + d.match('/uuu/').tag('Exaggeration') + }) + }) + const doc = nlp('i saw a HUUUUGE trex') + t.equal(doc.match('#Exaggeration').out('normal'), 'huuuuge', 'regex-works') + t.equal(doc.match('#Dinosaur').out('normal'), 'trex', 'lexicon-works') + t.equal(doc.match('#Animal').out('normal'), 'trex', 'tagset-works') + t.end() +}) diff --git a/tests/possessives.test.js b/tests/possessives.test.js new file mode 100644 index 000000000..f8712d660 --- /dev/null +++ b/tests/possessives.test.js @@ -0,0 +1,31 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('possessives tagger', function(t) { + const arr = [`Spencer's`, `Spencer Kelly's`, `Spencer C. Kelly's`, `Union Corp's`, `Los Angeles's`] + arr.forEach(a => { + const doc = nlp(a) + const m = doc.possessives() + t.equal(m.length, 1, 'one possessive -' + a) + t.equal(m.out(), a, 'possessive match -' + a) + }) + t.end() +}) + +test('possessives strip', function(t) { + const arr = [ + [`Spencer's`, 'Spencer'], + [`Corey Hart's`, 'Corey Hart'], + [`Corey M. Hart's`, 'Corey M. Hart'], + [`Spencer C. Kelly's`, 'Spencer C. Kelly'], + [`Agility Inc's`, 'Agility Inc'], + [`University of Wisconsin's`, 'University of Wisconsin'], + [`Los Angeles's`, 'Los Angeles'], + ] + arr.forEach(a => { + const doc = nlp(a[0]) + doc.possessives().strip() + t.equal(doc.out('text'), a[1], a[0]) + }) + t.end() +}) diff --git a/tests/punctuation.test.js b/tests/punctuation.test.js new file mode 100644 index 000000000..c695546fa --- /dev/null +++ b/tests/punctuation.test.js @@ -0,0 +1,55 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('normalize elipses', function(t) { + const str = `[hello] spencęr…` + const doc = nlp(str) + t.equal(doc.text(), str, 'text out-1') + t.equal(doc.text('normal'), 'hello spencer...', 'normal out-1') + t.end() +}) + +test('normalize question mark', function(t) { + const str = `hello, Spencęr???` + const doc = nlp(str) + t.equal(doc.text(), str, 'text out-2') + t.equal(doc.text('normal'), 'hello, spencer?', 'normal out-2') + t.end() +}) + +test('hyphenated', function(t) { + let doc = nlp('and check this out! a walk-in microwave.') + doc.hyphenated().deHyphenate() + t.equal(doc.text(), 'and check this out! a walk in microwave.', 'dehyphenate') + t.end() +}) + +test('normalize unicode', function(t) { + // const str = `• Spencęr & JOhn™ ⟨lmt⟩.` + const str = ` Spencęr & JOhn™ ⟨lmt⟩.` + const doc = nlp(str) + t.equal(doc.text(), str, 'text out') + t.equal(doc.text('reduced'), 'spencer & john lmt', 'normal out') + t.end() +}) + +test('normalize quotes ', function(t) { + const str = `،one’ «two» ‘three’ “four” 'five' "six."` + const doc = nlp(str) + t.equal(doc.text(), str, 'text out-3') + t.equal(doc.text('clean'), 'one two three four five six.', 'normal out-3') + t.end() +}) + +test('toParentheses', function(t) { + let doc = nlp(`you could still go to McGill, the Harvard of Canada!`) + doc.match('the harvard of #Place').toParentheses() + t.equal(doc.text(), 'you could still go to McGill, (the Harvard of Canada)!', 'toparentheses') + t.end() +}) +test('toQuotation', function(t) { + let doc = nlp(`you could still go to McGill, the Harvard of Canada!`) + doc.match('harvard of #Place').toQuotation() + t.equal(doc.text(), 'you could still go to McGill, the "Harvard of Canada"!', 'toparentheses') + t.end() +}) diff --git a/tests/quotation-tag.test.js b/tests/quotation-tag.test.js new file mode 100644 index 000000000..5e513c5aa --- /dev/null +++ b/tests/quotation-tag.test.js @@ -0,0 +1,224 @@ +const test = require('tape') +const nlp = require('./_lib') + +function testAllQuotes(a, t) { + const str = nlp(a[0]) + .quotations() + .out('normal') + t.equal(str, a[1], a[0]) +} + +test('quotation test:', function(t) { + ;[ + [`he is "really good"`, `really good`], + [`he is "really good" i guess`, `really good`], + [`he is "good" i guess`, `good`], + [`he is "completely and utterly great" i guess`, `completely and utterly great`], + [`“quote”`, `quote`], + [`“quote is here”`, `quote is here`], + ].forEach(function(a) { + const str = nlp(a[0]) + .quotations() + .out('normal') + t.equal(str, a[1], a[0]) + }) + t.end() +}) + +test('Quotations - U+0022 to U+0022', function(t) { + ;[ + ['he is \u0022really good\u0022', 'really good'], + ['he is \u0022really good\u0022 i guess', 'really good'], + ['he is not \u0022good\u0022 at all :/', 'good'], + ['\u0022quote\u0022', 'quote'], + ['\u0022a quote here\u0022', 'a quote here'], + ].forEach(a => testAllQuotes(a, t)) + t.end() +}) + +test('Quotations - U+FF02 to U+FF02', function(t) { + ;[ + ['he is \uFF02really good\uFF02', 'really good'], + ['he is \uFF02really good\uFF02 i guess', 'really good'], + ['he is not \uFF02good\uFF02 at all :/', 'good'], + ['\uFF02quote\uFF02', 'quote'], + ['\uFF02a quote here\uFF02', 'a quote here'], + ].forEach(a => testAllQuotes(a, t)) + t.end() +}) + +test('Quotations - U+0027 to U+0027', function(t) { + ;[ + ['he is \u0027really good\u0027', 'really good'], + ['he is \u0027really good\u0027 i guess', 'really good'], + ['he is not \u0027good\u0027 at all :/', 'good'], + ['\u0027quote\u0027', 'quote'], + ['\u0027a quote here\u0027', 'a quote here'], + ].forEach(a => testAllQuotes(a, t)) + t.end() +}) +// +test('Quotations - U+201C to U+201D', function(t) { + ;[ + ['he is \u201Creally good\u201D', 'really good'], + ['he is \u201Creally good\u201D i guess', 'really good'], + ['he is not \u201Cgood\u201D at all :/', 'good'], + ['\u201Cquote\u201D', 'quote'], + ['\u201Ca quote here\u201D', 'a quote here'], + ].forEach(a => testAllQuotes(a, t)) + t.end() +}) + +test('Quotations - U+2018 to U+2019', function(t) { + ;[ + ['he is \u2018really good\u2019', 'really good'], + ['he is \u2018really good\u2019 i guess', 'really good'], + ['he is not \u2018good\u2019 at all :/', 'good'], + ['\u2018quote\u2019', 'quote'], + ['\u2018a quote here\u2019', 'a quote here'], + ].forEach(a => testAllQuotes(a, t)) + t.end() +}) + +test('Quotations - U+201F to U+201D', function(t) { + ;[ + ['he is \u201Freally good\u201D', 'really good'], + ['he is \u201Freally good\u201D i guess', 'really good'], + ['he is not \u201Fgood\u201D at all :/', 'good'], + ['\u201Fquote\u201D', 'quote'], + ['\u201Fa quote here\u201D', 'a quote here'], + ].forEach(a => testAllQuotes(a, t)) + t.end() +}) + +test('Quotations - U+201B to U+2019', function(t) { + ;[ + ['he is \u201Breally good\u2019', 'really good'], + ['he is \u201Breally good\u2019 i guess', 'really good'], + ['he is not \u201Bgood\u2019 at all :/', 'good'], + ['\u201Bquote\u2019', 'quote'], + ['\u201Ba quote here\u2019', 'a quote here'], + ].forEach(a => testAllQuotes(a, t)) + t.end() +}) + +test('Quotations - U+201E to U+201D', function(t) { + ;[ + ['he is \u201Ereally good\u201D', 'really good'], + ['he is \u201Ereally good\u201D i guess', 'really good'], + ['he is not \u201Egood\u201D at all :/', 'good'], + ['\u201Equote\u201D', 'quote'], + ['\u201Ea quote here\u201D', 'a quote here'], + ].forEach(a => testAllQuotes(a, t)) + t.end() +}) + +test('Quotations - U+2E42 to U+201D', function(t) { + ;[ + ['he is \u2E42really good\u201D', 'really good'], + ['he is \u2E42really good\u201D i guess', 'really good'], + ['he is not \u2E42good\u201D at all :/', 'good'], + ['\u2E42quote\u201D', 'quote'], + ['\u2E42a quote here\u201D', 'a quote here'], + ].forEach(a => testAllQuotes(a, t)) + t.end() +}) + +test('Quotations - U+201A to U+2019', function(t) { + ;[ + ['he is \u201Areally good\u2019', 'really good'], + ['he is \u201Areally good\u2019 i guess', 'really good'], + ['he is not \u201Agood\u2019 at all :/', 'good'], + ['\u201Aquote\u2019', 'quote'], + ['\u201Aa quote here\u2019', 'a quote here'], + ].forEach(a => testAllQuotes(a, t)) + t.end() +}) + +test('Quotations - U+00AB to U+00BB', function(t) { + ;[ + ['he is \u00ABreally good\u00BB', 'really good'], + ['he is \u00ABreally good\u00BB i guess', 'really good'], + ['he is not \u00ABgood\u00BB at all :/', 'good'], + ['\u00ABquote\u00BB', 'quote'], + ['\u00ABa quote here\u00BB', 'a quote here'], + ].forEach(a => testAllQuotes(a, t)) + t.end() +}) + +test('Quotations - U+2039 to U+203A', function(t) { + ;[ + ['he is \u2039really good\u203A', 'really good'], + ['he is \u2039really good\u203A i guess', 'really good'], + ['he is not \u2039good\u203A at all :/', 'good'], + ['\u2039quote\u203A', 'quote'], + ['\u2039a quote here\u203A', 'a quote here'], + ].forEach(a => testAllQuotes(a, t)) + t.end() +}) + +test('Quotations - U+2035 to U+2032', function(t) { + ;[ + ['he is \u2035really good\u2032', 'really good'], + ['he is \u2035really good\u2032 i guess', 'really good'], + ['he is not \u2035good\u2032 at all :/', 'good'], + ['\u2035quote\u2032', 'quote'], + ['\u2035a quote here\u2032', 'a quote here'], + ].forEach(a => testAllQuotes(a, t)) + t.end() +}) + +test('Quotations - U+2036 to U+2033', function(t) { + ;[ + ['he is \u2036really good\u2033', 'really good'], + ['he is \u2036really good\u2033 i guess', 'really good'], + ['he is not \u2036good\u2033 at all :/', 'good'], + ['\u2036quote\u2033', 'quote'], + ['\u2036a quote here\u2033', 'a quote here'], + ].forEach(a => testAllQuotes(a, t)) + t.end() +}) + +test('Quotations - U+2037 to U+2034', function(t) { + ;[ + ['he is \u2037really good\u2034', 'really good'], + ['he is \u2037really good\u2034 i guess', 'really good'], + ['he is not \u2037good\u2034 at all :/', 'good'], + ['\u2037quote\u2034', 'quote'], + ['\u2037a quote here\u2034', 'a quote here'], + ].forEach(a => testAllQuotes(a, t)) + t.end() +}) + +test('Quotations - U+301D to U+301E', function(t) { + ;[ + ['he is \u301Dreally good\u301E', 'really good'], + ['he is \u301Dreally good\u301E i guess', 'really good'], + ['he is not \u301Dgood\u301E at all :/', 'good'], + ['\u301Dquote\u301E', 'quote'], + ['\u301Da quote here\u301E', 'a quote here'], + ].forEach(a => testAllQuotes(a, t)) + t.end() +}) + +test('Quotations - U+0060 to U+00B4', function(t) { + ;[ + ['he is \u0060really good\u00B4', 'really good'], + ['he is \u0060really good\u00B4 i guess', 'really good'], + ['he is not \u0060good\u00B4 at all :/', 'good'], + ['\u0060quote\u00B4', 'quote'], + ['\u0060a quote here\u00B4', 'a quote here'], + ].forEach(a => testAllQuotes(a, t)) + t.end() +}) + +test('Quotations - U+301F to U+301E', function(t) { + ;[ + ['he is \u301Freally good\u301E', 'really good'], + ['he is \u301Freally good\u301E i guess', 'really good'], + ['he is not \u301Fgood\u301E at all :/', 'good'], + ['\u301Fquote\u301E', 'quote'], + ['\u301Fa quote here\u301E', 'a quote here'], + ].forEach(a => testAllQuotes(a, t)) + t.end() +}) diff --git a/tests/quotations.test.js b/tests/quotations.test.js new file mode 100644 index 000000000..3bc781a7d --- /dev/null +++ b/tests/quotations.test.js @@ -0,0 +1,46 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('quotation test', function(t) { + const arr = [ + ['so I said "nah forget it"', 'nah forget it'], + ['so I said "nah, forget it" go home to bel-air!', 'nah, forget it'], + ["so I said 'nah, forget it' go home to bel-air!", 'nah, forget it'], + ['so I said "nah" go home to bel-air!', 'nah'], + ["so 'as if' i said", 'as if'], + ["the 'truthiness' i said", 'truthiness'], + ['yeah, “fun” and stuff', 'fun'], + ['“Fun” and stuff', 'fun'], + //dangling start/end + ["'twas good cookin", ''], + ["twas good cookin'", ''], + // ["twas 'good cookin'", 'good cookin'], + // ["'twas 'good cookin'", 'twas good cookin'], + [`and "Dig Your own grave and Save".`, 'dig your own grave and save'], + ] + arr.forEach(function(a) { + const r = nlp(a[0]) + const str = r.quotations().out('normal') + const msg = a[0] + ' - ' + str + t.equal(str, a[1], msg) + }) + t.end() +}) + +// test('multiple quotation test', function(t) { +// const arr = [ +// [`My "String" "with many" adjacent "nested" 'quotes'`, ['string', 'with many', 'nested', 'quotes']], +// [`My "String 'with manys' adjacent" "nested" 'quotes'`, ['string with manys adjacent', 'nested', 'quotes']], +// [ +// `"May's" 'third day' 'will be a "really cold" day' "in a" 'really cold "month"'`, +// ["may's", 'third day', 'will be a really cold day', 'in a', 'really cold month'], +// ], +// ] +// arr.forEach(function(a) { +// const r = nlp(a[0]) +// const str = r.quotations().out('array') +// const msg = a[0] + ' - ' + str +// t.deepEqual(str, a[1], msg) +// }) +// t.end() +// }) diff --git a/tests/random.test.js b/tests/random.test.js new file mode 100644 index 000000000..4e58b5e61 --- /dev/null +++ b/tests/random.test.js @@ -0,0 +1,71 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('random', function(t) { + const r = nlp('one two three four five six') + let arr = r + .terms() + .random() + .out('array') + t.equal(arr.length, 1, 'default is size 1') + + arr = r + .terms() + .random(2) + .out('array') + t.equal(arr.length, 2, 'size 2') + + arr = r + .terms() + .random(3) + .out('array') + t.equal(arr.length, 3, 'size 3') + + arr = r + .terms() + .random(4) + .out('array') + t.equal(arr.length, 4, 'size 4') + + arr = r + .terms() + .random(5) + .out('array') + t.equal(arr.length, 5, 'size 5') + + arr = r + .terms() + .random(6) + .out('array') + t.equal(arr.length, 6, 'size 6') + + arr = r + .terms() + .random(7) + .out('array') + t.equal(arr.length, 6, '7 is too big') + + arr = r + .terms() + .random(17) + .out('array') + t.equal(arr.length, 6, '17 is too big') + + t.end() +}) + +test('random-null', function(t) { + const r = nlp('toronto') + let arr = r + .match('#Person') + .random(5) + .out('array') + t.equal(arr.length, 0, 'random can be empty') + + arr = r + .match('#Place+') + .random(5) + .out('array') + t.equal(arr.length, 1, 'random can be full-match') + t.end() +}) diff --git a/tests/reduced.test.js b/tests/reduced.test.js new file mode 100644 index 000000000..a708c3069 --- /dev/null +++ b/tests/reduced.test.js @@ -0,0 +1,12 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('reduced matches', function(t) { + let doc = nlp(`the donkey's hotel`) + t.equals(doc.has('donkey'), true, 'apostrophe-s') + + doc = nlp(`the donkeys' hotel`) + t.equals(doc.has('donkeys'), true, 's-apostrophe') + + t.end() +}) diff --git a/tests/remove.test.js b/tests/remove.test.js new file mode 100644 index 000000000..2d25adf54 --- /dev/null +++ b/tests/remove.test.js @@ -0,0 +1,34 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('remove-everything-basic', function(t) { + let doc = nlp(`2pm`) + doc.remove('#Time') + t.equal(doc.text(), '', 'empty-text') + t.equal(doc.length, 0, '0-length') + t.equal(doc.found, false, 'not-found') + + doc.remove('.') + t.equal(doc.found, false, 'still-not-found') + t.end() +}) + +test('remove-everything-nested', function(t) { + let doc = nlp(`see term. term. term after.`) + t.equal(doc.length, 3, 'start-3') + + doc.remove('term') + t.equal(doc.length, 2, 'only-2 now') + + doc.remove('after') + t.equal(doc.length, 1, 'only-1 now') + + doc.remove('.') + t.equal(doc.length, 0, '0 now') + t.equal(doc.found, false, 'not-found') + + doc.remove('.') + t.equal(doc.found, false, 'still-not-found') + + t.end() +}) diff --git a/tests/reservedwords.test.js b/tests/reservedwords.test.js new file mode 100644 index 000000000..a808359db --- /dev/null +++ b/tests/reservedwords.test.js @@ -0,0 +1,115 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('reserved words:', function(t) { + const reserved = [ + 'abstract', + 'boolean', + 'break', + 'byte', + 'case', + 'catch', + 'char', + 'class', + 'const', + 'constructor', + 'continue', + 'debugger', + 'default', + 'delete', + 'do', + 'double', + 'else', + 'enum', + 'export', + 'extends', + 'false', + 'final', + 'finally', + 'float', + 'for', + 'function', + 'goto', + 'if', + 'implements', + 'import', + 'in', + 'instanceof', + 'int', + 'interface', + 'let', + 'long', + 'native', + 'new', + 'null', + 'package', + 'private', + 'protected', + 'prototype', + 'public', + 'return', + 'short', + 'static', + 'super', + 'switch', + 'synchronized', + 'this', + 'throw', + 'throws', + 'transient', + 'true', + 'try', + 'typeof', + 'var', + 'void', + 'volatile', + 'while', + 'with', + 'yeild', + '__prototype__', + '&&', + '||', + '|', + "'", + '&', + 'Math.PI', + 12e34, + '#§$%', + 'π', + 'привет', + // 'hasOwnProperty', + 'café', + '$$$', + 1e2, + '{}', + '[]', + 'constructor', + 'prototype', + ')&@)^', + ' -@%@', + '-constructor', + '#!^@#$', + '..(', + ] + const str = reserved.join(' ') + const r = nlp(str) + t.equal(r.out('text'), str, 'reserved-words-are-printed') + t.equal(r.terms().length, reserved.length, 'reserved-length') + t.ok(r.contractions().data(), 'runs contractions subset') + t.ok(r.parentheses().data(), 'runs parentheses subset') + t.ok(r.lists().data(), 'runs lists subset') + t.ok(r.terms().data(), 'runs terms subset') + t.ok(r.pronouns().data(), 'runs pronouns subset') + t.end() +}) + +test('co-erce reserved words', function(t) { + const r = nlp('constructor prototype') + r.tag('Verb') + t.ok(r.match('#Verb').data(), 'runs tag/match') + r.tag('Adjective') + t.ok(r.match('#Noun').data(), 'runs untag') + t.equal(r.terms().slice(0, 2).length, 2, 'runs slice') + t.ok(r.append('constructor').text(), 'runs append') + t.end() +}) diff --git a/tests/setTag.test.js b/tests/setTag.test.js new file mode 100644 index 000000000..4cc076e2a --- /dev/null +++ b/tests/setTag.test.js @@ -0,0 +1,41 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('custom-tags-persist', function(t) { + let r = nlp('i am two years older now') + let two = r.match('#Value').tag('#FunTag') + two.replaceWith('never') + t.equal(two.has('#FunTag'), false, 'custom tag is forgotten') + + r = nlp('i am two years older now') + two = r.match('#Value').tag('#FunTag') + two.toUpperCase().trim() + t.equal(two.text(), 'TWO', 'term transformed') + t.equal(two.has('#Value'), true, 'original tag stays over transformations') + t.equal(two.has('#FunTag'), true, 'custom tag stays over transformations') + + // r = nlp('i am two years older now') + // two = r.match('#Value').tag('#FunTag') + // two.toUpperCase() + // two.values().toNumber() + // t.equal(two.has('#FunTag'), true, 'custom tag stays over transformations') + + // r = nlp('june 1999') + // r.values().toNumber() + // const year = r.match('#Year') + // t.equal(year.out('normal'), '1999', 'year-stays-a-year') + + //not sure if these should pass.. + // r = nlp('i am two years older now') + // r.match('am').tag('#FunTag') + // r = r.sentences().toFutureTense().toPresentTense().toPastTense() + // const verb = r.match('#FunTag') + // t.equal(verb.out('normal'), 'was', 'tag stays over sentence-change') + + // r = nlp('walked').tag('#FunTag'); + // r = r.verbs().toFutureTense().toPresentTense().toPastTense(); + // verb = r.match('#FunTag'); + // t.equal(verb.out('normal'), 'walked', 'tag stays over verb-change'); + + t.end() +}) diff --git a/tests/slash.test.js b/tests/slash.test.js new file mode 100644 index 000000000..7611c16f4 --- /dev/null +++ b/tests/slash.test.js @@ -0,0 +1,45 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('slash whitespace', function(t) { + let str = 'left his / her backpack ' + let doc = nlp(str) + t.equal(doc.out(), str, 'slash with normal spaces') + + str = 'left his/her backpack ' + doc = nlp(str) + t.equal(doc.out(), str, 'slash with no spaces') + + str = 'left his / her backpack' + doc = nlp(str) + t.equal(doc.out(), str, 'slash with lots of spaces') + t.end() +}) + +test('slash match', function(t) { + let str = 'left his / her backpack ' + let doc = nlp(str) + t.equal(doc.has('his'), true, 'slash with normal spaces - his') + t.equal(doc.has('her'), true, 'slash with normal spaces - her') + // t.equal(doc.has('his / her'), true, 'slash with normal spaces - his / her') + + str = 'left his/her backpack ' + doc = nlp(str) + t.equal(doc.has('his'), true, 'slash with no spaces - his') + t.equal(doc.has('her'), true, 'slash with no spaces - her') + t.equal(doc.has('his/her'), true, 'slash with no spaces - his/her') + + str = 'left his / her backpack' + doc = nlp(str) + t.equal(doc.has('his'), true, 'slash with lots of spaces') + t.equal(doc.has('her'), true, 'slash with lots of spaces') + + str = 'left his/her/their backpack ' + doc = nlp(str) + t.equal(doc.has('his'), true, 'three-slash - his') + t.equal(doc.has('her'), true, 'three-slash - her') + t.equal(doc.has('their'), true, 'three-slash - their') + t.equal(doc.has('his/her/their'), true, 'three-slash - his/her/their ') + + t.end() +}) diff --git a/tests/slashes.test.js b/tests/slashes.test.js new file mode 100644 index 000000000..119f54fee --- /dev/null +++ b/tests/slashes.test.js @@ -0,0 +1,23 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('slashes-basic', function(t) { + let doc = nlp(`spencer is/was trying.`) + t.equal(doc.terms().length, 3, 'three terms') + t.equal(doc.match('#Person #Verb trying').found, true, 'verb trying') + // t.equal(doc.match('#Person is trying').found, true, 'is trying') + t.end() +}) + +test('slashes-complex', function(t) { + // doc = nlp(`spencer is/was trying`) + // 1. doc.has('#PresentTense') == true (choose first) + // 2. doc.has('#Verb') (only common tags) + // 3. doc.has('#PastTense') && doc.has('#PresentTense') :/ + + // doc = nlp(`spencer is/was trying`) + // 1b. doc.has('is') == true (choose first) + // 2b. doc.has('was') == true (find both) + // 3b. doc.has('is') == false (find none) + t.end() +}) diff --git a/tests/sort.test.js b/tests/sort.test.js new file mode 100644 index 000000000..ba7e32286 --- /dev/null +++ b/tests/sort.test.js @@ -0,0 +1,95 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('sortAlpha:', function(t) { + const str = 'John xoo, John fredman, John davis, John fredman,' + let r = nlp(str) + r = r.split('@hasComma') + r.sort('alpha') + const want = ['John davis,', 'John fredman,', 'John fredman,', 'John xoo,'] + t.deepEqual(r.out('array'), want, 'sort-alpha') + t.end() +}) + +test('sortSequential:', function(t) { + const str = 'John xoo, John fredman, John davis' + let r = nlp(str) + r = r.split('@hasComma') + r.sort('alphabetical') + r.sort('seq') + const want = ['John xoo,', 'John fredman,', 'John davis'] + t.deepEqual(r.out('array'), want, 'sort-chron') + t.end() +}) + +test('reverse:', function(t) { + const str = 'John xoo, John fredman, John davis' + let r = nlp(str) + r = r.split('@hasComma') + r.sort('alphabetical') + r = r.reverse() + const want = ['John xoo,', 'John fredman,', 'John davis'] + t.deepEqual(r.out('array'), want, 'alpha-reverse') + t.end() +}) + +test('length:', function(t) { + const str = 'Amy, John Fredman, Dr. Bill, Alexis Smithsonian' + let r = nlp(str) + r = r.split('@hasComma') + r.sort('length') + r = r.reverse() + const want = ['Amy,', 'Dr. Bill,', 'John Fredman,', 'Alexis Smithsonian'] + t.deepEqual(r.out('array'), want, 'sort length') + t.end() +}) + +test('wordCount:', function(t) { + const str = 'John Fredman, Amy, Dr. Bill G. Gates' + let r = nlp(str) + r = r.split('@hasComma') + r.sort('wordCount') + r.reverse() + const want = ['Dr. Bill G. Gates', 'John Fredman,', 'Amy,'] + t.deepEqual(r.out('array'), want, 'sort-wordcount') + t.end() +}) + +test('unique:', function(t) { + const str = 'John xoo, John fredman, john xoo, John davis' + let r = nlp(str) + r = r.split('@hasComma') + r = r.unique() + const want = ['John xoo,', 'John fredman,', 'John davis'] + t.deepEqual(r.out('array'), want, 'sort-unique') + t.end() +}) + +test('custom-sort:', function(t) { + let doc = nlp('Eeny, meeny, miny, moe') + let terms = doc.terms() + terms.sort((a, b) => { + a = a.text('normal') + b = b.text('normal') + if (a.length > b.length) { + return -1 + } + if (a.length < b.length) { + return 1 + } + return 0 + }) + let arr = terms.map(d => d.text('normal')) + t.deepEqual(arr, ['meeny, ', 'eeny, ', 'miny, ', 'moe'], 'custom sort output') + t.end() +}) + +test('frequency:', function(t) { + const str = 'John xoo, John fredman, john xoo, John davis' + let r = nlp(str) + r = r.split('@hasComma') + const a = r.out('frequency') + t.equal(a[0].reduced, 'john xoo', 'topk is sorted') + t.equal(a[0].count, 2, 'topk finds two') + t.end() +}) diff --git a/tests/tagger/inline.test.js b/tests/tagger/inline.test.js new file mode 100644 index 000000000..03ccbfc75 --- /dev/null +++ b/tests/tagger/inline.test.js @@ -0,0 +1,29 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('inline tagging linear:', function(t) { + let r = nlp('one two three four') + + r.match('one two three').tag('. #Person .') + let found = r.match('#Person').out('normal') + t.equal(found, 'two', 'skip-tag-skip') + + r.match('one two three').tag('#FooBar .') + found = r.match('#FooBar').out('normal') + t.equal(found, 'one', 'tag-skip-null') + + r.match('two three').tag('#Two #Three #Four') + t.equal(r.match('#Two').out('normal'), 'two', 'two-is-two') + t.equal(r.match('#Three').out('normal'), 'three', 'three-is-three') + t.equal(r.match('#Four').out('normal'), '', 'four is ignored') + + t.end() +}) + +test('compound tags from lexicon:', function(t) { + const doc = nlp('it was cold') + const arr = doc.match('#Verb+') + t.equal(arr.length, 1, 'one verb') + t.equal(arr.has('#PastTense'), true, 'past-tense') + t.end() +}) diff --git a/tests/tagger/lexicon.test.js b/tests/tagger/lexicon.test.js new file mode 100644 index 000000000..eba60544f --- /dev/null +++ b/tests/tagger/lexicon.test.js @@ -0,0 +1,105 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('default lexicon:', function(t) { + let arr = [ + ['great', 'Adjective'], + ['walked', 'PastTense'], + ['singing', 'Gerund'], + ['funniest', 'Superlative'], + ['sillier', 'Comparative'], + ['the', 'Determiner'], + ['iraqi', 'Demonym'], + ['december', 'Date'], + ['fifth', 'Value'], + ['suddenly', 'Adverb'], + ['shanghai', 'City'], + ['google', 'Organization'], + ] + arr.forEach(function(a) { + const doc = nlp(a[0]) + t.equal(doc.has('#' + a[1]), true, a[0]) + }) + t.end() +}) + +test('root-in-lexicon:', function(t) { + let arr = [ + ['wash', 'Infinitive'], + ['rewash', 'Infinitive'], + ['re-wash', 'Infinitive'], + ['re-washed', 'PastTense'], + ['rewashed', 'PastTense'], + ['rewashes', 'PresentTense'], + ['rewashing', 'Gerund'], + + ['repurchase', 'Infinitive'], + ['re-purchase', 'Infinitive'], + ['unpurchase', 'Infinitive'], + ['purchased', 'PastTense'], + ['unpurchasing', 'Gerund'], + ['unpurchases', 'PresentTense'], + ['resolve', 'Infinitive'], + ['restructure', 'Infinitive'], + ['reconcile', 'Infinitive'], + ['repeat', 'Infinitive'], + ] + arr.forEach(function(a) { + const doc = nlp(a[0]) + t.equal(doc.has('#' + a[1]), true, a[0]) + }) + t.end() +}) + +test('adjusted lexicon:', function(t) { + //place new words + let lexicon = { + geneva: 'Person', + lkjj: 'Adjective', + 'donkey kong': 'City', + } + + const arr = [ + ['geneva is nice', '#Person #Copula #Adjective'], + ['he is lkjj', '#Pronoun #Copula #Adjective'], + ['donkey kong wins the award', '#City #City #Verb #Determiner #Noun'], + ] + arr.forEach(function(a) { + const doc = nlp(a[0], lexicon) + t.equal(doc.has(a[1]), true, a[0]) + }) + // + //set gender from lexicon + const doc = nlp('Kelly', lexicon) + t.equal(doc.has('#FemaleName'), true, 'kelly-female') + //set as male: + lexicon = { + kelly: 'MaleName', + } + const doc2 = nlp('Kelly', lexicon) + t.equal(doc2.has('#MaleName'), true, 'kelly-male') + + //gender follows lumping + const doc3 = nlp('Kelly Gruber', lexicon) + t.equal(doc3.has('#MaleName #LastName'), true, 'kelly-gruber') + + t.end() +}) + +test('tricky lexicon:', function(t) { + let lexicon = { + 'bed bath and beyond': 'Organization', + } + let r = nlp('shopping at Bed Bath and Beyond, the store', lexicon) + let str = r.match('#Organization+').out('normal') + t.equal(str, 'bed bath and beyond', 'four-word') + + r = nlp('shopping at Bed Bath and-beyond the store', lexicon) + str = r.match('#Organization+').out('normal') + t.equal(str, 'bed bath and beyond', 'partially-hyphenated-word') + + r = nlp('shopping at Bed-bath and-beyond the store', lexicon) + str = r.match('#Organization+').out('normal') + t.equal(str, 'bed bath and beyond', 'many-hyphenated-word') + t.end() +}) diff --git a/tests/tagger/multi.test.js b/tests/tagger/multi.test.js new file mode 100644 index 000000000..336896972 --- /dev/null +++ b/tests/tagger/multi.test.js @@ -0,0 +1,44 @@ +const test = require('tape') +const nlp = require('../_lib') + +const lexicon = { + 'Jardas al Abid': 'Place', + 'Umm Ar Rizam': 'Place', + Tobruk: 'Place', +} + +test('user-lex-with-hyphenation:', function(t) { + const sentence = + 'A suicide attack hit the centre of Jardas-al-Abid killing one person (and the attacker) and injuring more than twenty.' + const found = nlp(sentence, lexicon).match('#Place+') + t.equal('jardas al abid', found.eq(0).text('normal'), 'found-place1') + t.equal(lexicon, lexicon, 'lexicon-unchanged') + t.end() +}) + +test('user-lex-with-possessive form:', function(t) { + const sentence = + "A suicide attack hit Jardas al Abid's center killing one person (and the attacker) and injuring more than twenty." + const found = nlp(sentence, lexicon).match('#Place+') + t.equal("jardas al abid's", found.eq(0).text('normal'), 'found-place2') + t.equal(lexicon, lexicon, 'lexicon-unchanged') + t.end() +}) + +test('user-lex-with-proper name in front:', function(t) { + const sentence = + "A suicide attack hit Lybia's Jardas al Abid city killing one person (and the attacker) and injuring more than twenty." + const found = nlp(sentence, lexicon).match('#Place+') + t.equal('jardas al abid', found.eq(0).text('normal'), 'found-place3') + t.equal(lexicon, lexicon, 'lexicon-unchanged') + t.end() +}) + +test('user-lex-with-punctuation:', function(t) { + const sentence = + 'A suicide attack hit Jardas al Abid, which killed one person (and the attacker) and injured more than twenty.' + const found = nlp(sentence, lexicon).match('#Place+') + t.equal('jardas al abid', found.eq(0).text('normal'), 'found-place4') + t.equal(lexicon, lexicon, 'lexicon-unchanged') + t.end() +}) diff --git a/tests/tagger/organization.test.js b/tests/tagger/organization.test.js new file mode 100644 index 000000000..472e1b617 --- /dev/null +++ b/tests/tagger/organization.test.js @@ -0,0 +1,25 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('organization test', function(t) { + const arr = [ + 'google', + 'google inc', + 'Capital One', + 'HSBC', + 'NASA', + '7-eleven', + 'al qaeda', + 'FBI', + 'monsanto', + 'Johnson & Johnson', + // 'Johnson & Johnson LLC', + ] + arr.forEach(function(str) { + const r = nlp(str) + const orgs = r.match('#Organization+') + const msg = orgs.out('text') + ' - ' + str + t.equal(orgs.out('text'), str, msg) + }) + t.end() +}) diff --git a/tests/tagger/penn.test.js b/tests/tagger/penn.test.js new file mode 100644 index 000000000..f76be0f46 --- /dev/null +++ b/tests/tagger/penn.test.js @@ -0,0 +1,61 @@ +const test = require('tape') +const nlp = require('../_lib') +const penn = require('../_pennSample') + +const softMapping = { + CC: 'Conjunction', + CD: 'Cardinal', + DT: 'Determiner', + FW: 'Expression', + IN: 'Preposition', + JJ: 'Adjective', + JJR: 'Comparative', + JJS: 'Superlative', + MD: 'Verb', + NN: 'Noun', + NNS: 'Noun', + NNP: 'Noun', + NNPS: 'Noun', + POS: 'Possessive', + PRP: 'Pronoun', + PRP$: 'Pronoun', + RB: 'Adverb', + RBR: 'Comparative', + RBS: 'Superlative', + TO: 'Conjunction', + UH: 'Expression', + VB: 'Verb', + VBD: 'Verb', + VBG: 'Gerund', + VBN: 'Verb', // past participle + VBP: 'Verb', // non-3rd person singular present + VBZ: 'Verb', // 3rd person singular present + WDT: 'Determiner', + WP: 'Pronoun', + WP$: 'Noun', + WRB: 'Adverb', +} + +test('pennTreebank-test:', function(t) { + penn.forEach((sentence, index) => { + sentence.tags = sentence.tags.split(', ') + + let doc = nlp(sentence.text) + t.equal(doc.length, 1, 'one sentence #' + index) + let terms = doc.json(0).terms + t.equal(terms.length, sentence.tags.length, 'tokenize#' + index) + + for (let i = 0; i < sentence.tags.length; i++) { + const want = softMapping[sentence.tags[i]] + if (!terms[i]) { + t.ok(false, sentence.text) + return + } + let found = terms[i].tags.some(tag => tag === want) + let msg = `'` + sentence.text.substr(0, 20) + `'.. - ` + msg += `'${terms[i].text}' missing #${want}` + t.equal(found, true, msg) + } + }) + t.end() +}) diff --git a/tests/tagger/swears.test.js b/tests/tagger/swears.test.js new file mode 100644 index 000000000..7ef5198f9 --- /dev/null +++ b/tests/tagger/swears.test.js @@ -0,0 +1,28 @@ +const test = require('tape') +const nlp = require('../_lib') +//nsfw! + +test('swears:', function(t) { + let m = nlp('shit, i am tired').match('^#Expression') + t.ok(m.found, 'swear-1') + + m = nlp('the shit keeps piling up').match('the #Noun') + t.ok(m.found, 'swear-2') + + m = nlp('damn them all').match('^#Verb') + t.ok(m.found, 'swear-3') + + m = nlp('fuck the government').match('^#Verb') + t.ok(m.found, 'swear-4') + + // m = nlp('when hell freezes over').match('^when #Noun'); + // t.ok(m.found, 'swear-5'); + + // m = nlp('he fucked up').match('he #Verb #Particle'); + // t.ok(m.found, 'swear-6'); + + m = nlp('it is fucked up').match('is #Adjective #Adjective') + t.ok(m.found, 'swear-7') + + t.end() +}) diff --git a/tests/tagger/tagWord.test.js b/tests/tagger/tagWord.test.js new file mode 100644 index 000000000..4fbbe9735 --- /dev/null +++ b/tests/tagger/tagWord.test.js @@ -0,0 +1,75 @@ +const test = require('tape') +const nlp = require('../_lib') + +//test a word from each file in ./data/** +test('pos from-lexicon', function(t) { + const arr = [ + ['toronto', 'City'], + ['mexico', 'Country'], + ['Jamaica', 'Country'], + ['legendary', 'Adjective'], + ['above', 'Adjective'], + ['moderate', 'Adjective'], + ['extreme', 'Adjective'], + ['august', 'Month'], + ['saturday', 'WeekDay'], + ['really', 'Adverb'], + ['each', 'Determiner'], + ['voila', 'Expression'], + ['new england', 'Place'], + ['hers', 'Possessive'], + ['onto', 'Preposition'], + ['blvd', 'Place'], + ['belgian', 'Demonym'], + ['cactus', 'Singular'], + ['cacti', 'Plural'], + ['economy', 'Noun'], + ['engineer', 'Noun'], + ['clothing', 'Noun'], + ['duran duran', 'Organization'], + ['american express', 'Organization'], + ['brotherhood', 'Noun'], + ['oakland athletics', 'SportsTeam'], + ['jamie', 'Person'], + ['claire', 'FemaleName'], + ['arthur', 'MaleName'], + ['¥', 'Currency'], + ['pence', 'Currency'], + ['seven', 'Value'], + ['seventeen', 'Value'], + ['twenty', 'Value'], + ['thousand', 'Value'], + ['eighteenth', 'Value'], + ['tbsp', 'Unit'], + ['wrote', 'PastTense'], + ['write', 'Verb'], + ['survive', 'Verb'], + ['attempt', 'Verb'], + ["mc'adams", 'LastName'], + ['Müller', 'LastName'], + ['muller', 'LastName'], + ['425-1231', 'PhoneNumber'], + ['823-425-1231', 'PhoneNumber'], + ['823 425-1231', 'PhoneNumber'], + ['(823) 425-1231', 'PhoneNumber'], + ['invest', 'Verb'], + ['investing', 'Verb'], + [`wallys'`, 'Possessive'], + // [`JDI University'`, 'Organization'], + ['ocean', 'Noun'], + ['shiver', 'Verb'], + [`flanders'`, 'Possessive'], + [`chillin'`, 'Gerund'], + [`'cool'`, 'Adjective'], + ['MMMCMXXIII', 'RomanNumeral'], + ['MIMMCMXXIII', 'Acronym'], //invalid roman numeral + ['c.e.o', 'Acronym'], + ['MDMA', 'Acronym'], + ['unless', 'Condition'], + ] + arr.forEach(function(a) { + const term = nlp(a[0]).termList()[0] + t.equal(term.tags[a[1]], true, a[0]) + }) + t.end() +}) diff --git a/tests/tagger/tagger.test.js b/tests/tagger/tagger.test.js new file mode 100644 index 000000000..e94d7bb71 --- /dev/null +++ b/tests/tagger/tagger.test.js @@ -0,0 +1,128 @@ +const test = require('tape') +const nlp = require('../_lib') +// const pos_test = require('../_lib').pos_test + +test('pos-basic-tag:', function(t) { + ;[ + ['John is pretty', ['Person', 'Copula', 'Adjective']], + ['John was lofty', ['Person', 'Copula', 'Adjective']], + ['John Smith was lofty', ['FirstName', 'LastName', 'Copula', 'Adjective']], + ['asdfes was lofty', ['Noun', 'Copula', 'Adjective']], + ['asdfes lksejfj was lofty', ['Noun', 'Noun', 'Copula', 'Adjective']], + ['Spencer Kelly is in Canada', ['Person', 'Person', 'Copula', 'Preposition', 'Place']], + ['He is in Canada', ['Pronoun', 'Copula', 'Preposition', 'Place']], + ['5 red roses', ['Value', 'Adjective', 'Noun']], + ['3 trains', ['Value', 'Noun']], + ['3 trainers', ['Value', 'Noun']], + ['5 buses', ['Value', 'Noun']], + ['101010101010101010101010101010101010101010', ['NumericValue']], + + ['walk the walk', ['Verb', 'Determiner', 'Noun']], + ['Peter the man', ['Person', 'Determiner', 'Noun']], + // ['book the flight', ['Verb', 'Determiner', 'Noun']], + + //slang, contractions + ['u r nice', ['Pronoun', 'Copula', 'Adjective']], + ['canadian bacon', ['Demonym', 'Noun']], + ['canadian dollar', ['Currency', 'Currency']], + + //possessive rules + ["john lkjsdf's", ['Person', 'Possessive']], + ["john lkjsdf's house", ['Person', 'Possessive', 'Noun']], + ["john Lkjsdf's house", ['Person', 'Possessive', 'Noun']], + ["john Lkjsdf's House", ['Person', 'Possessive', 'Noun']], + ["mark's question mark", ['Possessive', 'Noun', 'Noun']], + + //question-words + ['who is good?', ['QuestionWord', 'Copula', 'Adjective']], + ['which is good?', ['QuestionWord', 'Copula', 'Adjective']], + // ['bacon which is good', ['Noun', 'Pronoun', 'Copula', 'Adjective']], + // ['bacon which really is good', ['Noun', 'Pronoun', 'Adverb', 'Copula', 'Adjective']], + // ['Douglas who really is good', ['Person', 'Pronoun', 'Adverb', 'Copula', 'Adjective']], + + //web text things + ['lkj@fun.com', ['Email']], + ['j@f.ti', ['Email']], + ['j@ti', ['Noun']], + ['@ti', ['AtMention']], + ['#funtimes', ['HashTag']], + ['http://fun.com/cool?fun=yes', ['Url']], + ['#cool fun.com @cooman', ['HashTag', 'Url', 'AtMention']], + + //determiner-corrections + ['this rocks dude', ['Determiner', 'Verb', 'Noun']], + ['that rocks dude', ['Determiner', 'Verb', 'Noun']], + ['the rocks dude', ['Determiner', 'Plural', 'Noun']], + ['these rocks dude', ['Determiner', 'Plural', 'Noun']], + ['those rocks dude', ['Determiner', 'Plural', 'Noun']], + ['the test string', ['Determiner', 'Noun', 'Noun']], + + //people + ['John swim', ['Person', 'Verb']], + ['John, John', ['Person', 'Person']], + ['John, you', ['FirstName', 'Pronoun']], + ['John you', ['MaleName', 'Pronoun']], + ['you John you', ['Pronoun', 'Person', 'Pronoun']], + // ['10 + 9', ['Value', 'Symbol', 'Value']], + // ['2 * 90 = 180', ['Value', 'Symbol', 'Value', 'Symbol', 'Value']], + // ['one - seventy-six', ['Value', 'Symbol', 'Value']], + ['The stream runs', ['Determiner', 'Noun', 'Verb']], + ['The stream really runs', ['Determiner', 'Noun', 'Adverb', 'Verb']], + ['The nice stream really runs', ['Determiner', 'Adjective', 'Noun', 'Adverb', 'Verb']], + + ['he is walking', ['Pronoun', 'Copula', 'Gerund']], + ['walking is fun', ['Activity', 'Copula', 'Adjective']], + ["walking's great", ['Activity', 'Copula', 'Adjective']], + ['jack cheered', ['Person', 'PastTense']], + ['jack guarded', ['Person', 'PastTense']], + ['jack is guarded', ['Person', 'Copula', 'Adjective']], + ['jack seems guarded', ['Person', 'Verb', 'Adjective']], + //more + ['there are reasons', ['Noun', 'Copula', 'Plural']], + ['there were many walks', ['Noun', 'Copula', 'Adjective', 'Plural']], + ['there were the walks', ['Noun', 'Copula', 'Determiner', 'Noun']], + + ['it was fixed', ['Noun', 'Copula', 'PastTense']], + ['it will be boxed', ['Noun', 'Verb', 'Verb', 'PastTense']], + //ambiguous adverbs + ['it was pretty', ['Noun', 'Copula', 'Adjective']], + ['it was pretty cool', ['Noun', 'Copula', 'Adverb', 'Adjective']], + // ['it was really pretty cool', ['Noun', 'Copula', 'Adverb', 'Adverb', 'Adjective']], + ['it was just', ['Noun', 'Copula', 'Adjective']], + ['it was just gorgeous', ['Noun', 'Copula', 'Adverb', 'Adjective']], + + ['N.V.,', ['Noun']], + ['16.125', ['Cardinal']], + ['$19', ['Money']], + ['butterfly', ['Singular']], + ['he blamed the girl', ['Pronoun', 'PastTense', 'Determiner', 'Singular']], + ['his fine', ['Possessive', 'Noun']], + ['contracted AIDS', ['PastTense', 'Acronym']], + ['city/town', ['Noun', 'Noun']], + ['boyfriend to Jane', ['Noun', 'Conjunction', 'Person']], + // ['boyfriend of Jane', ['Noun', 'Conjunction', 'Person']], + ['his fines', ['Possessive', 'Noun']], + ['100+ rumours', ['Value', 'Plural']], + ['John & John,', ['Noun', 'Noun', 'Noun']], + + //abbreviations + [ + 'col. Patrick said march and feb. etc.', + ['Abbreviation', 'Person', 'PastTense', 'Month', 'Conjunction', 'Abbreviation', 'Abbreviation'], + ], + //dates + + ['germans are nice', ['Demonym', 'Verb', 'Adjective']], + ['Iraqis are nice', ['Plural', 'Copula', 'Adjective']], + ['canadians are nice', ['ProperNoun', 'Verb', 'Adjective']], + ['thom is smart', ['ProperNoun', 'Verb', 'Adjective']], + ].forEach(function(a) { + let terms = nlp(a[0]).json(0).terms + terms.forEach((term, i) => { + let tag = a[1][i] + let found = term.tags.some(tg => tg === tag) + t.equal(found, true, term.text + ' ' + tag) + }) + }) + t.end() +}) diff --git a/tests/tagger/topics.test.js b/tests/tagger/topics.test.js new file mode 100644 index 000000000..cdda8c892 --- /dev/null +++ b/tests/tagger/topics.test.js @@ -0,0 +1,29 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('proper-nouns', function(t) { + const arr = [ + ['I met John Smith in Toronto', ['john smith', 'toronto']], + ['Toronto and Vancouver Canada', ['toronto', 'vancouver canada']], + // ['we ate shellfish at 23 Main st.', []], + ['google is suing motorola inc', ['google', 'motorola inc']], + ['the doctor and his brother see the mayor of france', ['france']], + ] + arr.forEach(a => { + const out = nlp(a[0]) + .match('#ProperNoun+') + .toLowerCase() + .out('array') + t.deepEqual(out, a[1], a[0]) + }) + t.end() +}) + +//after we change pos, untag propernoun +test('remove-proper-nouns', function(t) { + const doc = nlp('do what Theresa May') + t.equal(doc.match('may').has('#ProperNoun'), true, 'propernoun-init') + doc.match('may').tag('Verb') + t.equal(doc.match('may').has('#ProperNoun'), false, 'propernoun-missing') + t.end() +}) diff --git a/tests/tagger/untag.test.js b/tests/tagger/untag.test.js new file mode 100644 index 000000000..3d8a115c5 --- /dev/null +++ b/tests/tagger/untag.test.js @@ -0,0 +1,72 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('tag inference:', function(t) { + let m = nlp('aasdf2') + .unTag('Noun') + .unTag('NounPhrase') + let term = m.list[0].terms(0) + t.equal(Object.keys(term.tags).length, 0, 'aasdf2 has no tags') + //give it a specific tag- + m.tag('SportsTeam') + term = m.list[0].terms(0) + t.equal(term.tags.Noun, true, 'aasdf2 now has Noun') + t.equal(term.tags.Organization, true, 'aasdf2 now has Organization(inferred)') + //give it a redundant tag- + m.tag('Organization') + term = m.list[0].terms(0) + t.equal(term.tags.Noun, true, 'aasdf2 still has Noun') + t.equal(term.tags.Organization, true, 'aasdf2 still has Organization') + t.end() +}) + +test('untag inference:', function(t) { + let m = nlp('aasdf') + m.tag('FemaleName') + let term = m.list[0].terms(0) + t.equal(term.tags.FemaleName, true, 'aasdf first has FemaleName') + t.equal(term.tags.Person, true, 'aasdf first has person') + t.equal(term.tags.Noun, true, 'aasdf first has noun') + //remove the assumption.. + m.unTag('Noun') + t.equal(term.tags.Noun, undefined, 'aasdf now has no noun') + t.equal(term.tags.Person, undefined, 'aasdf now has no person(inferred)') + t.equal(term.tags.FemaleName, undefined, 'aasdf now has no FemaleName(inferred)') + t.end() +}) + +test('tag idempodence:', function(t) { + const m = nlp('walk').tag('Verb') + const term = m.list[0].terms(0) + t.equal(term.tags.Verb, true, 'walk has Verb') + t.equal(term.tags.Value, undefined, 'walk has no Value') + //untag irrelevant stuff + m.unTag('Value') + m.unTag('Determiner') + m.unTag('Country') + m.unTag('Place') + t.equal(term.tags.Verb, true, 'walk has Verb after') + t.equal(term.tags.Value, undefined, 'walk has no Value after') + t.end() +}) + +test('tags are self-removing', function(t) { + const terms = ['Person', 'Place', 'PastTense', 'FemaleName', 'Infinitive', 'HashTag', 'Month'] + terms.forEach(function(tag) { + const m = nlp('aasdf') + .tag(tag) + .unTag(tag) + const t0 = m.list[0].terms(0) + t.equal(t0.tags[tag], undefined, 'tag removes self ' + tag) + }) + t.end() +}) + +test('untag wildcard', function(t) { + const r = nlp('we live in Toronto Canada and it is cold') + r.match('#Place+').unTag('*') + t.equal(r.match('#Place').found, false, 'place-tag-is-gone') + const term = r.list[0].terms(3) || {} + t.equal(Object.keys(term.tags || {}).length, 0, 'toronto-has-no-tags-now') + t.end() +}) diff --git a/tests/tagger/web.test.js b/tests/tagger/web.test.js new file mode 100644 index 000000000..502e95e86 --- /dev/null +++ b/tests/tagger/web.test.js @@ -0,0 +1,66 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('is-email:', function(t) { + ;[ + [`s@s.com`, true], + [`sasdf@sasdf.com`, true], + [`sasdf@sasdf.ti`, true], + [`sasdf@sasdf.t`], + [`sasdf@sasdft`], + [`sasdfsasdft.com`], + [`@sasdft.com`], + [`_@_.com`, true], + [`_@_._`], + [`sas df@sasdf.com`], + [`sasdf@sa sdf.com`], + ].forEach(function(a) { + const term = nlp(a[0]).list[0].terms(0) + const msg = a[0] + ' is email: ' + a[1] + t.equal(term.tags['Email'], a[1], msg) + }) + t.end() +}) + +test('is-hashtag:', function(t) { + ;[ + [`#lkjsdf`, true], + [`#ll`, true], + [`#22ll`, true], + [`#_22ll`, true], + // [`#l`,], + [`# l`], + [`l#l`], + ].forEach(function(a) { + const term = nlp(a[0]).list[0].terms(0) + const msg = a[0] + ' is hashtag: ' + a[1] + t.equal(term.tags['HashTag'], a[1], msg) + }) + t.end() +}) + +test('is-url:', function(t) { + ;[ + [`http://cool.com/fun`, true], + [`https://cool.com`, true], + [`https://cool.com/`, true], + [`https://www.cool.com/`, true], + [`http://subdomain.cool.com/`, true], + [`www.fun.com/`, true], + [`www.fun.com`, true], + [`www.fun.com/foobar/fun`, true], + [`www.subdomain.cool.com/`, true], + [`wwwsubdomain.cool.com/`, true], + [`woo.br`, true], + [`woohoo.biz`, true], + [`woop.org/news`, true], + [`http://woop.org/news?foo=bar`, true], + [`http:subdomain.cool.com/`], + [`coolcom`], + ].forEach(function(a) { + const term = nlp(a[0]).list[0].terms(0) + const msg = a[0] + ' is url: ' + a[1] + t.equal(term.tags['Url'], a[1], msg) + }) + t.end() +}) diff --git a/tests/tokenize.test.js b/tests/tokenize.test.js new file mode 100644 index 000000000..b5dc1575f --- /dev/null +++ b/tests/tokenize.test.js @@ -0,0 +1,37 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('em-dash, en-dash', function(t) { + // '-': //dash + // '–': //en-dash + // '—': //em-dash + let doc = nlp('fun-time') + t.equal(doc.terms().length, 2, 'dash') + doc = nlp('fun–time') + t.equal(doc.terms().length, 2, 'en-dash') + doc = nlp('fun—time') + t.equal(doc.terms().length, 2, 'em-dash') + + //not a full word, either + doc = nlp('fun - time') + t.equal(doc.terms().length, 2, 'dash-word') + doc = nlp('fun – time') + t.equal(doc.terms().length, 2, 'en-dash-word') + doc = nlp('fun — time') + t.equal(doc.terms().length, 2, 'em-dash-word') + + //numeric forms are split, but contractions too + doc = nlp('20-20') + t.equal(doc.terms().length, 3, 'dash-num') + doc = nlp('20–20') + t.equal(doc.terms().length, 3, 'en-dash-num') + doc = nlp('20—20') + t.equal(doc.terms().length, 3, 'em-dash-num') + t.end() +}) + +test('emoji-only sentence', function(t) { + let doc = nlp('good night! 💋') + t.equal(doc.length, 2, 'boemojith sentence') + t.end() +}) diff --git a/tests/topics.test.js b/tests/topics.test.js new file mode 100644 index 000000000..1ea074283 --- /dev/null +++ b/tests/topics.test.js @@ -0,0 +1,70 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('topics:', function(t) { + let list = [ + ['Tony Hawk lives in Toronto. Tony Hawk is cool.', 'tony hawk'], + ['I live Toronto. I think Toronto is cool.', 'toronto'], + ['The EACD united in 1972. EACD must follow regulations.', 'eacd'], + // ['The Elkjsdflkjsdf sells hamburgers. I think the Elkjsdflkjsdf eats turky.', 'elkjsdflkjsdf'], + ["Toronto's citizens love toronto!", 'toronto'], + ] + list.forEach(function(a) { + const arr = nlp(a[0]) + .topics() + .out('freq') + t.equal(arr[0].reduced, a[1], a[0]) + }) + t.end() +}) + +test('topics-false-positives:', function(t) { + const arr = [ + 'somone ate her lunch', + 'everybody is dancing all night', + "a man and a woman ate her son's breakfast", + 'my brother walks to school', + `She's coming by`, + `if she doesn't like something about us she can keep us off`, + ` She's it! She could be a soap opera.`, + `she's a little dare-devil!`, + ] + arr.forEach(function(str, i) { + const doc = nlp(str).topics() + t.equal(doc.length, 0, 'topics #' + i + ' -> ' + doc.out()) + }) + t.end() +}) + +test('topics-basic', function(t) { + let doc = nlp('i went to Gloop University in Paris, France, with John H. Smith') + let arr = doc.topics().out('array') + t.deepEqual(arr, ['Gloop University', 'Paris, France,', 'John H. Smith'], 'found all three topics') + t.end() +}) + +test('misc entities', function(t) { + let doc = nlp('The Children are right to laugh at you, Ralph') + let m = doc.people() + t.equal(m.length, 1, 'one person') + + m = doc.places() + t.equal(m.length, 0, 'no places') + + m = doc.organizations() + t.equal(m.length, 0, 'no organizations') + + m = doc.entities() + t.equal(m.length, 1, 'one entity') + t.end() +}) + +test('topics concat:', function(t) { + const things = nlp('spencer and danny are in Paris France and germany for Google Inc and IBM') + .topics() + .json({ normal: true, trim: true }) + .map(o => o.normal) + const want = ['spencer', 'danny', 'paris france', 'germany', 'google inc', 'ibm'] + t.equal(things.join(', '), want.join(', '), 'found right things') + t.end() +}) diff --git a/tests/transform/delete.test.js b/tests/transform/delete.test.js new file mode 100644 index 000000000..5b64e391e --- /dev/null +++ b/tests/transform/delete.test.js @@ -0,0 +1,68 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('remove-basic :', function(t) { + let m = nlp('the brown cat played') + .match('brown') + .delete() + .all() + t.equal(m.out('text'), 'the cat played', 'brown-cat') + + m = nlp('the nice brown cat played') + .match('nice brown') + .delete() + .all() + t.equal(m.out('text'), 'the cat played', 'nice-brown') + + m = nlp('the nice brown cat played') + .match('#Adjective') + .delete() + .all() + t.equal(m.out('text'), 'the cat played', 'adj-each') + + m = nlp('the nice brown cat played') + .match('#Adjective+') + .delete() + .all() + t.equal(m.out('text'), 'the cat played', 'adj-consecutive') + + t.end() +}) + +test('remove-match :', function(t) { + let m = nlp('the brown cat played').delete('brown') + t.equal(m.out('text'), 'the cat played', 'brown-cat') + + m = nlp('the brown cat played. The brown dog sat down.').delete('brown') + t.equal(m.out('text'), 'the cat played. The dog sat down.', 'brown-cat') + + m = nlp('the nice brown cat played. The nice dog waited.').delete('nice brown') + t.equal(m.out('text'), 'the cat played. The nice dog waited.', 'nice-brown') + + m = nlp('the nice brown cat played. The cute dogs ate.').delete('#Adjective') + t.equal(m.out('text'), 'the cat played. The dogs ate.', 'adj-each') + + m = nlp('the nice brown cat played. The cute dogs ate.').delete('#Adjective+') + t.equal(m.out('text'), 'the cat played. The dogs ate.', 'adj-consecutive') + + t.end() +}) + +test('remove-logic :', function(t) { + let m = nlp('spencer kelly is here') + .match('spencer kelly') + .delete('spencer') + t.equal(m.out('normal'), 'kelly', 'remove(reg) returns this') + + m = nlp('spencer kelly is here') + .match('spencer kelly') + .delete() + .all() + t.equal(m.out('normal'), 'is here', 'remove() returns parent') + + m = nlp('spencer kelly is here') + .match('spencer kelly') + .delete('notfound') + t.equal(m.out('normal'), 'spencer kelly', 'remove(notfound) returns this') + t.end() +}) diff --git a/tests/transform/join.test.js b/tests/transform/join.test.js new file mode 100644 index 000000000..793640f09 --- /dev/null +++ b/tests/transform/join.test.js @@ -0,0 +1,26 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('join-basic', function(t) { + let str = `What's with these homies dissin' my girl? Why do they gotta front? + + What did we ever do to these guys that made them so violent? + + ` + let doc = nlp(str).join() + t.equal(doc.length, 1, 'one phrase') + doc = doc.splitOn('we ever') + t.equal(doc.length, 3, 'three phrases now') + // t.equal(doc.text(), str, 'original text unchanged') //TODO:fix me + t.end() +}) + +test('join-parents', function(t) { + let str = `left side. middle part one. two middle part two. right side.` + let doc = nlp(str) + doc.if('middle').join() + t.equal(doc.length, 3, 'three parts now') + t.equal(doc.all().length, 3, 'three sentences now') + t.equal(doc.text(), str, 'original text unchanged') + t.end() +}) diff --git a/tests/transform/normalize-light.test.js b/tests/transform/normalize-light.test.js new file mode 100644 index 000000000..4ac3c2cd0 --- /dev/null +++ b/tests/transform/normalize-light.test.js @@ -0,0 +1,63 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('normalize - light', function(t) { + let arr = [ + [ + ' so... you like donuts? have all the donuts in the world!!!', + 'so you like donuts? have all the donuts in the world!', + ], + // ['This is a test. .', 'this is a test.'], + ['This is a test?!', 'This is a test?'], + ['Björk, the singer-songwriter...', 'Bjork the singer songwriter'], + // ['the so-called “fascist dictator”', 'the so called "fascist dictator"'], + // ['the so-called ❛singer-songwriter❜', 'the so called \'singer songwriter\''], + // ['the so-called ❛group of seven❜', 'the so called \'group of 7\''], + ['Director of the F.B.I.', 'Director of the FBI.'], + ] + arr.forEach(function(a) { + const str = nlp(a[0]) + .normalize() + .out('text') + t.equal(str, a[1], a[0]) + }) + t.end() +}) + +test('normalize - medium', function(t) { + let arr = [ + [ + ' so... you like DONUTS? have all the donuts in the WORLD!!!', + 'so you like donuts? have all the donuts in the world!', + ], + ['This is a test?!', 'this is a test?'], + ['Björk, the singer-songwriter...', 'bjork the singer songwriter'], + ['Director of the F.B.I.', 'director of the fbi.'], + ] + arr.forEach(function(a) { + const str = nlp(a[0]) + .normalize('medium') + .out('text') + t.equal(str, a[1], a[0]) + }) + t.end() +}) + +test('normalize - heavy', function(t) { + let arr = [ + [ + ' so... you like DONUTS? have all the donuts in the WORLD!!!', + 'so you like donut? have all the donut in the world!', + ], + // ['This is a test?!', 'this be a test?'], + ['Björk, the singer-songwriter...', 'bjork the singer songwriter'], + ['Director of the F.B.I.', 'director of the fbi.'], + ] + arr.forEach(function(a) { + const str = nlp(a[0]) + .normalize('heavy') + .out('text') + t.equal(str, a[1], a[0]) + }) + t.end() +}) diff --git a/tests/transform/normalize-more.test.js b/tests/transform/normalize-more.test.js new file mode 100644 index 000000000..6918f8acd --- /dev/null +++ b/tests/transform/normalize-more.test.js @@ -0,0 +1,133 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('possessives', function(t) { + let doc = nlp(`Corey Hart's pudding and Google's advertising`) + doc = doc.normalize({ + possessives: true, + case: false, + }) + t.equal(doc.out(), 'Corey Hart pudding and Google advertising', 'normalize possessives') + t.end() +}) + +test('optional params', function(t) { + const doc = nlp(`John Smith bought automobiles (for us)`).normalize({ + case: true, + possessives: true, + parentheses: true, + // plurals: true, + verbs: true, + }) + t.equal(doc.out(), 'john smith buy automobiles for us', 'many-on') + t.end() +}) + +test('optional param - verbs and plurals together', function(t) { + const plurals = [['batmobiles', 'batmobile']] + const verbs = [['I was walking', 'I walk']] + + // good + plurals.forEach(a => { + const doc = nlp(a[0]) + const pluralsOn = doc.normalize({ + plurals: true, + }) + t.equal(pluralsOn.out(), a[1], a[0]) + }) + + // good + verbs.forEach(a => { + const doc = nlp(a[0]) + const verbsOn = doc.normalize({ + verbs: true, + }) + t.equal(verbsOn.out(), a[1], a[0]) + }) + + // bad + plurals.concat(verbs).forEach(a => { + const doc = nlp(a[0]) + const bothOn = doc.normalize({ + plurals: true, + verbs: true, + }) + t.equal(bothOn.out(), a[1], a[0]) + }) + + t.end() +}) + +test('honorifics', function(t) { + const tests = [ + ['rear admiral Smith', 'smith'], + ['Lieutenant John Smith', 'john smith'], + // ['Admiral Davis Jr', 'davis jr'], + ['Field marshal Herring', 'herring'], + ['General Lou Gobbells of the US air force', 'lou gobbells of the us air force'], + ['Rear admiral John', 'john'], + ['Lieutenant general James Baker', 'james baker'], + ['Lieutenant colonel Bing Crosby', 'bing crosby'], + ['Major Tom', 'tom'], + ['major effort by President Xi', 'major effort by xi'], + ['Corporal John Herring', 'john herring'], + ['sergeant major Harold', 'harold'], + ['Second lieutenant Semore Hirthman', 'semore hirthman'], + ['first lady Michelle obama', 'michelle obama'], + ['prime minister Stephen Hawking', 'stephen hawking'], + //no names + // ['first lieutenant', '1st lieutenant'], + // ['Sergeant', 'sergeant'], + ] + tests.forEach(a => { + let doc = nlp(a[0]) + doc = doc.normalize({ + honorifics: true, + case: true, + }) + t.equal(doc.out('normal'), a[1], a[0]) + }) + t.end() +}) + +test('hyphen-whitespace:', function(t) { + let doc = nlp(`the so-called “fascist dictator”`) + doc.normalize({ whitespace: true, punctuation: false }) + t.equal(doc.text(), `the so-called “fascist dictator”`, 'keep hyphen') + t.end() +}) + +test('dash-whitespace:', function(t) { + let str = `a dash seperates words - like that` + let doc = nlp(str) + doc.normalize({ whitespace: true, punctuation: false }) + t.equal(doc.text(), str, 'keep the dash') + t.end() +}) + +test('elipses-whitespace:', function(t) { + let doc = nlp('about this ...').normalize() + t.equal(doc.out('text'), 'about this', 'normalize seperate elipses') + + doc = nlp('about this ...').toLowerCase() + t.equal(doc.out('text'), 'about this ...', 'lowercase elipses') + + doc = nlp('about this...').normalize() + t.equal(doc.out('text'), 'about this', 'normalize attatched elipses') + t.end() +}) + +test('more-normalize:', function(t) { + let doc = nlp(`i saw first lady michelle obama`) + doc.normalize({ + honorifics: true, + }) + t.equal(doc.out('text'), 'i saw michelle obama', 'normalize honorifics') + + doc = nlp(`google's tax return`) + doc.normalize({ + possessives: true, + }) + t.equal(doc.out('text'), 'google tax return', 'normalize possessives') + t.end() +}) diff --git a/tests/transform/normalize-one.test.js b/tests/transform/normalize-one.test.js new file mode 100644 index 000000000..83fdfa783 --- /dev/null +++ b/tests/transform/normalize-one.test.js @@ -0,0 +1,60 @@ +const test = require('tape') +const nlp = require('../_lib') + +const only = function(options) { + const none = { + case: false, + whitespace: false, + unicode: false, + punctuation: false, + contraction: false, + } + return Object.assign({}, none, options) +} + +test('normalize defaults', function(t) { + let doc = nlp.tokenize(` it's coöl, (i think) . He is cool; i said .`) + doc.normalize() + t.equal(doc.text(), `it's cool (i think). He is cool i said.`, 'normalize-defaults') + t.end() +}) + +test('normalize unicode', function(t) { + let doc = nlp.tokenize(` it's coöl, (i think) . He is cool; i said .`) + let options = only({ unicode: true }) + doc.normalize(options) + t.equal(doc.text(), ` it's cool, (i think) . He is cool; i said .`, 'normalize-unicode') + t.end() +}) + +test('normalize punctuation', function(t) { + let doc = nlp.tokenize(` it's coöl, (i think) . He is cool; i said .`) + let options = only({ punctuation: true }) + doc.normalize(options) + t.equal(doc.text(), ` it's coöl (i think) . He is cool i said .`, 'normalize-punct') + t.end() +}) + +test('normalize whitespace', function(t) { + let doc = nlp.tokenize(` it's coöl, (i think) . He is cool; i said .`) + let options = only({ whitespace: true }) + doc.normalize(options) + t.equal(doc.text(), `it's coöl, (i think). He is cool; i said.`, 'normalize-whitespace') + t.end() +}) + +test('normalize parentheses', function(t) { + let doc = nlp(` it's coöl, (i think) . He is cool; i said .`) + let options = only({ parentheses: true }) + doc.normalize(options) + t.equal(doc.text(), ` it's coöl, i think . He is cool; i said .`, 'normalize-parentheses') + t.end() +}) + +test('normalize contractions', function(t) { + let doc = nlp(` it's coöl, (i think) . He is cool; i said .`) + let options = only({ contractions: true }) + doc.normalize(options) + t.equal(doc.text(), ` it is coöl, (i think) . He is cool; i said .`, 'normalize-contractions') + t.end() +}) diff --git a/tests/transform/prepend.test.js b/tests/transform/prepend.test.js new file mode 100644 index 000000000..328d9d2a5 --- /dev/null +++ b/tests/transform/prepend.test.js @@ -0,0 +1,55 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('prepend parent start', function(t) { + let doc = nlp(`one two three`) + doc.prepend('zero') + t.equal(doc.text(), 'zero one two three', 'prepended in parent') + t.end() +}) + +test('prepend middle', function(t) { + let doc = nlp(`one two four five`) + let m = doc.match('four').prepend('three') + t.equal(m.text().trim(), 'three four', 'prepended in child') + t.equal(doc.text(), 'one two three four five', 'prepended in parent') + t.end() +}) + +test('prepend multi', function(t) { + let doc = nlp('one two. three four') + doc.prepend('oooo') + t.equal(doc.text(), 'oooo one two. oooo three four') + t.end() +}) + +test('prepend children', function(t) { + let doc = nlp(`one four five six.`) + let m1 = doc.match('one four') + let m2 = m1.match('four') + m2.prepend('two three') + + t.equal(m1.text(), 'one two three four', 'prepended in child 1') + t.equal(m2.text(), 'two three four', 'prepended in child 2') + t.equal(doc.text(), 'one two three four five six.', 'prepended in parent') + t.end() +}) + +test('prepend start child', function(t) { + let doc = nlp(`one two three four`) + doc.match('one').prepend('zero') + t.equal(doc.text(), 'zero one two three four', 'prepended in parent') + t.end() +}) + +test('prepend many children', function(t) { + let doc = nlp(`one two three four`) + doc + .match('one two three') + .match('one two') + .match('.') + .match('one') + .prepend('zero') + t.equal(doc.text(), 'zero one two three four', 'prepended in parent') + t.end() +}) diff --git a/tests/transform/split.test.js b/tests/transform/split.test.js new file mode 100644 index 000000000..21cd838ee --- /dev/null +++ b/tests/transform/split.test.js @@ -0,0 +1,101 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('splitAfter', function(t) { + ;[ + ['doug and nancy', 'and', ['doug and', 'nancy']], + ['doug and also nancy', 'and also', ['doug and also', 'nancy']], + ['doug and definetly nancy', 'and #Adverb', ['doug and definetly', 'nancy']], + ['maybe doug but possibly nancy', 'but', ['maybe doug but', 'possibly nancy']], + + ['a x b x c', 'x', ['a x', 'b x', 'c']], + ['a b x c x', 'x', ['a b x', 'c x']], + ['x a b x c', 'x', ['x', 'a b x', 'c']], + ['x x a b c', 'x', ['x', 'x', 'a b c']], + ['a x b x', 'x', ['a x', 'b x']], + ['a x b c x', 'x', ['a x', 'b c x']], + ['x x a b c', 'x', ['x', 'x', 'a b c']], + + ['john paul george ringo', '.', ['john', 'paul', 'george', 'ringo']], + ['doug is really nice', 'is', ['doug is', 'really nice']], + ].forEach(function(a) { + const want = a[2] + const got = nlp(a[0]) + .splitAfter(a[1]) + .out('array') + t.deepEqual(got, want, a[0]) + }) + t.end() +}) + +test('splitOn', function(t) { + ;[ + ['doug and nancy', 'and', ['doug', 'and', 'nancy']], + ['doug and also nancy', 'and also', ['doug', 'and also', 'nancy']], + ['doug and definetly nancy', 'and #Adverb', ['doug', 'and definetly', 'nancy']], + ['maybe doug but possibly nancy', 'but', ['maybe doug', 'but', 'possibly nancy']], + ['doug is really nice', 'is', ['doug', 'is', 'really nice']], + + ['a x b x c', 'x', ['a', 'x', 'b', 'x', 'c']], + ['a b x x c', 'x', ['a b', 'x', 'x', 'c']], + ['x a b x c', 'x', ['x', 'a b', 'x', 'c']], + ['x x a b c', 'x', ['x', 'x', 'a b c']], + ['a x b x', 'x', ['a', 'x', 'b', 'x']], + ].forEach(function(a) { + const want = a[2] + const got = nlp(a[0]) + .splitOn(a[1]) + .out('array') + t.deepEqual(got, want, a[0]) + }) + t.end() +}) + +test('splitBefore', function(t) { + ;[ + ['doug and nancy', 'and', ['doug', 'and nancy']], + ['doug and also nancy', 'and also', ['doug', 'and also nancy']], + ['doug and definetly nancy', 'and #Adverb', ['doug', 'and definetly nancy']], + ['maybe doug but possibly nancy', 'but', ['maybe doug', 'but possibly nancy']], + ['doug is really nice', 'is', ['doug', 'is really nice']], + + ['a x b x c', 'x', ['a', 'x b', 'x c']], + ['a b x x c', 'x', ['a b', 'x', 'x c']], + ['x a b x c', 'x', ['x a b', 'x c']], + ['x x a b c', 'x', ['x', 'x a b c']], + ['a x b x', 'x', ['a', 'x b', 'x']], + ].forEach(function(a) { + const want = a[2] + const got = nlp(a[0]) + .splitBefore(a[1]) + .out('array') + t.deepEqual(got, want, a[0]) + }) + t.end() +}) + +test('multi splitBefore, multi sentence', function(t) { + let doc = nlp('before before match1, match2 after after. then a match3 over here. none found') + let m = doc.splitBefore('/^match/') + t.equal(m.length, 6, 'found 6') + t.equal(m.get(0).out('normal'), 'before before', 'found before') + t.equal(m.get(1).out('normal'), 'match1', 'found match1') + t.equal(m.get(2).out('normal'), 'match2 after after', 'found match2') + t.equal(m.get(3).out('normal'), 'then a', 'next sentence') + t.equal(m.get(4).out('normal'), 'match3 over here', 'next sentence match') + t.equal(m.get(5).out('normal'), 'none found', 'unfound') + t.end() +}) + +test('multi splitAfter, multi sentence', function(t) { + let doc = nlp('before before match1, match2 after after. then a match3 over here. none found') + let m = doc.splitAfter('/^match/') + t.equal(m.length, 6, 'found 6') + t.equal(m.get(0).out('normal'), 'before before match1', 'found match1') + t.equal(m.get(1).out('normal'), 'match2', 'found match2') + t.equal(m.get(2).out('normal'), 'after after', 'after') + t.equal(m.get(3).out('normal'), 'then a match3', 'next sentence match') + t.equal(m.get(4).out('normal'), 'over here', 'next sentence after') + t.equal(m.get(5).out('normal'), 'none found', 'unfound') + t.end() +}) diff --git a/tests/transform/splitOn.test.js b/tests/transform/splitOn.test.js new file mode 100644 index 000000000..2f93f2763 --- /dev/null +++ b/tests/transform/splitOn.test.js @@ -0,0 +1,100 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('one split, one sentence', function(t) { + let doc = nlp('before before match, after after.') + let m = doc.splitOn('@hasComma') + t.equal(m.length, 3, 'found 3') + t.equal(m.get(0).out('normal'), 'before before', 'found before') + t.equal(m.get(1).out('normal'), 'match', 'found match') + t.equal(m.get(2).out('normal'), 'after after', 'found after') + t.end() +}) + +test('multi split, one sentence', function(t) { + let doc = nlp('before before match, then a match, after after.') + let m = doc.splitOn('@hasComma') + t.equal(m.length, 5, 'found 5') + t.equal(m.get(0).out('normal'), 'before before', 'found before') + t.equal(m.get(1).out('normal'), 'match', 'found match') + t.equal(m.get(2).out('normal'), 'then a', 'found between') + t.equal(m.get(3).out('normal'), 'match', 'found match2') + t.equal(m.get(4).out('normal'), 'after after', 'found after') + t.end() +}) + +test('one split, multi sentence', function(t) { + let doc = nlp('before before match, after after. then over here') + let m = doc.splitOn('match') + t.equal(m.length, 4, 'found 4') + t.equal(m.get(0).out('normal'), 'before before', 'found before') + t.equal(m.get(1).out('normal'), 'match', 'found match') + t.equal(m.get(2).out('normal'), 'after after', 'found after') + t.equal(m.get(3).out('normal'), 'then over here', 'next sentence') + t.end() +}) + +test('multi split, multi sentence', function(t) { + let doc = nlp('before before match1, match2 after after. then a match3 over here') + let m = doc.splitOn('/^match/') + t.equal(m.length, 7, 'found 7') + t.equal(m.get(0).out('normal'), 'before before', 'found before') + t.equal(m.get(1).out('normal'), 'match1', 'found match1') + t.equal(m.get(2).out('normal'), 'match2', 'found match2') + t.equal(m.get(3).out('normal'), 'after after', 'found after') + t.equal(m.get(4).out('normal'), 'then a', 'next sentence') + t.equal(m.get(5).out('normal'), 'match3', 'next sentence match') + t.equal(m.get(6).out('normal'), 'over here', 'next sentence after') + t.end() +}) + +test('greedy split', function(t) { + let doc = nlp('match match middle middle match. then over here') + let m = doc.splitOn('match+') + t.equal(m.length, 4, 'found 4') + t.equal(m.get(0).out('normal'), 'match match', 'found two') + t.equal(m.get(1).out('normal'), 'middle middle', 'found middles') + t.equal(m.get(2).out('normal'), 'match', 'found one') + t.equal(m.get(3).out('normal'), 'then over here', 'next sentence') + t.end() +}) + +test('split skip sentence', function(t) { + let doc = nlp('before match. nothing found here. two match after') + let m = doc.splitOn('match') + t.equal(m.length, 6, 'found 6') + t.equal(m.get(0).out('normal'), 'before', 'found before') + t.equal(m.get(1).out('normal'), 'match', 'found match') + t.equal(m.get(2).out('normal'), 'nothing found here.', 'no-match sentence') + t.equal(m.get(3).out('normal'), 'two', 'found before2') + t.equal(m.get(4).out('normal'), 'match', 'found match2') + t.equal(m.get(5).out('normal'), 'after', 'found after') + t.end() +}) + +test('no match split', function(t) { + let doc = nlp('nothing found here. none here either') + let m = doc.splitOn('match') + t.equal(m.length, 2, 'found 2') + t.equal(m.get(0).out('normal'), 'nothing found here.', 'not found 1') + t.equal(m.get(1).out('normal'), 'none here either', 'not found 2') + t.end() +}) + +test('split-parent', function(t) { + let doc = nlp('if so, he is the best, that i see. he is the greatest in the world') + t.equal(doc.length, 2, 'init parent is 2 sentence') + + let m = doc.match('he is').splitOn() + t.equal(m.length, 5, 'splitOn parent into 5') + + m = doc.match('he is').splitAfter() + t.equal(m.length, 4, 'splitAfter parent into 4') + + m = doc.match('he is').splitBefore() + t.equal(m.length, 3, 'splitBefore parent into 3') + + t.equal(doc.length, 2, 'parent is still 2 sentence') + + t.end() +}) diff --git a/tests/unique.test.js b/tests/unique.test.js new file mode 100644 index 000000000..2789863ce --- /dev/null +++ b/tests/unique.test.js @@ -0,0 +1,30 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('term-unique', function(t) { + let doc = nlp(`him and her and him`) + let m = doc.terms().unique() + t.equal(m.text(), 'him and her', 'terms-unique') + t.equal(doc.text(), `him and her and him`, 'original-has-duplicates') + t.end() +}) + +test('sentence-unique', function(t) { + let str = `him and her. in toronto. him and her. him.` + let doc = nlp(str) + let uniq = doc.unique() + t.equal(uniq.text(), 'him and her. in toronto. him.', 'remove dup sentences') + t.equal(doc.text(), str, 'keep dup sentences') + t.end() +}) + +test('unique-normalize', function(t) { + let doc = nlp(`SPENCER's house (spencer)`) + doc = doc.terms().unique() + t.equal(doc.text(), "SPENCER's house", 'normalize-posessive') + + doc = nlp(`is not isn't`) + doc = doc.terms().unique() + t.equal(doc.text(), 'is not', 'normalize-contraction') + t.end() +}) diff --git a/tests/verbs/conjugate.test.js b/tests/verbs/conjugate.test.js new file mode 100644 index 000000000..696849f83 --- /dev/null +++ b/tests/verbs/conjugate.test.js @@ -0,0 +1,255 @@ +const test = require('tape') +const nlp = require('../_lib') + +const arr = [ + { + Infinitive: 'convolute', + PresentTense: 'convolutes', + Gerund: 'convoluting', + PastTense: 'convoluted', + }, + { + PresentTense: 'presents', + Gerund: 'presenting', + PastTense: 'presented', + Infinitive: 'present', + }, + { + PresentTense: 'angulates', + Gerund: 'angulating', + PastTense: 'angulated', + Infinitive: 'angulate', + }, + { + PresentTense: 'conjures', + Gerund: 'conjuring', + PastTense: 'conjured', + Infinitive: 'conjure', + }, + { + PresentTense: 'denounces', + Gerund: 'denouncing', + PastTense: 'denounced', + Infinitive: 'denounce', + }, + { + PresentTense: 'watches', + Gerund: 'watching', + PastTense: 'watched', + Infinitive: 'watch', + }, + { + PresentTense: 'tingles', + Gerund: 'tingling', + PastTense: 'tingled', + Infinitive: 'tingle', + }, + { + PresentTense: 'mortises', + Gerund: 'mortising', + PastTense: 'mortised', + Infinitive: 'mortise', + }, + { + PresentTense: 'disguises', + Gerund: 'disguising', + PastTense: 'disguised', + Infinitive: 'disguise', + }, + { + Infinitive: 'effect', + Gerund: 'effecting', + PastTense: 'effected', + PresentTense: 'effects', + }, + { + Infinitive: 'want', + Gerund: 'wanting', + PastTense: 'wanted', + PresentTense: 'wants', + }, + { + Infinitive: 'power', + Gerund: 'powering', + PastTense: 'powered', + PresentTense: 'powers', + }, + { + Infinitive: 'overcompensate', + PresentTense: 'overcompensates', + PastTense: 'overcompensated', + Gerund: 'overcompensating', + }, + { + Infinitive: 'ice', + PresentTense: 'ices', + PastTense: 'iced', + Gerund: 'icing', + }, + { + Infinitive: 'buy', + PresentTense: 'buys', + PastTense: 'bought', + Gerund: 'buying', + }, + { + Infinitive: 'flower', + PresentTense: 'flowers', + PastTense: 'flowered', + Gerund: 'flowering', + }, + { + Infinitive: 'rage', + PresentTense: 'rages', + PastTense: 'raged', + Gerund: 'raging', + }, + { + Infinitive: 'drive', + PresentTense: 'drives', + PastTense: 'drove', + Gerund: 'driving', + }, + { + Infinitive: 'foul', + PresentTense: 'fouls', + PastTense: 'fouled', + Gerund: 'fouling', + }, + { + Infinitive: 'overthrow', + PresentTense: 'overthrows', + Gerund: 'overthrowing', + PastTense: 'overthrew', + }, + { + Infinitive: 'aim', + PresentTense: 'aims', + PastTense: 'aimed', + Gerund: 'aiming', + }, + { + PresentTense: 'unifies', + Gerund: 'unifying', + PastTense: 'unified', + Infinitive: 'unify', + }, + { + PresentTense: 'addresses', + Gerund: 'addressing', + PastTense: 'addressed', + Infinitive: 'address', + }, + { + Infinitive: 'bumble', + PresentTense: 'bumbles', + PastTense: 'bumbled', + Gerund: 'bumbling', + }, + { + Infinitive: 'snipe', + PresentTense: 'snipes', + PastTense: 'sniped', + Gerund: 'sniping', + }, + { + PresentTense: 'relishes', + Gerund: 'relishing', + PastTense: 'relished', + Infinitive: 'relish', + }, + { + Infinitive: 'lengthen', + Gerund: 'lengthening', + PastTense: 'lengthened', + PresentTense: 'lengthens', + }, + { + Infinitive: 'farm', + PresentTense: 'farms', + PastTense: 'farmed', + Gerund: 'farming', + }, + { + Infinitive: 'develop', + PresentTense: 'develops', + PastTense: 'developed', + Gerund: 'developing', + }, + { + Infinitive: 'study', + PresentTense: 'studies', + PastTense: 'studied', + Gerund: 'studying', + }, + { + Infinitive: 'criticise', + PresentTense: 'criticises', + PastTense: 'criticised', + Gerund: 'criticising', + }, + { + Infinitive: 'speak', + PresentTense: 'speaks', + PastTense: 'spoke', + Gerund: 'speaking', + }, + { + Infinitive: 'fuzz', + PresentTense: 'fuzzes', + PastTense: 'fuzzed', + Gerund: 'fuzzing', + }, + { + Infinitive: 'invest', + PresentTense: 'invests', + PastTense: 'invested', + Gerund: 'investing', + }, + { + Infinitive: 'age', + PresentTense: 'ages', + PastTense: 'aged', + Gerund: 'ageing', + }, + { + Infinitive: 'shed', + PresentTense: 'sheds', + PastTense: 'shed', + Gerund: 'shedding', + }, + { + Infinitive: 'ace', + PresentTense: 'aces', + PastTense: 'aced', + Gerund: 'acing', + }, + { + Infinitive: 'egg', + PresentTense: 'eggs', + PastTense: 'egged', + Gerund: 'egging', + }, +] +test('conjugation:', function(t) { + const test_conjugation = function(inf, o, form, original) { + const msg = 'from ' + original + ' to ' + form + ': [' + o[original] + '] -> [' + inf[form] + ']' + t.equal(inf[form], o[form], msg) + } + + arr.forEach(function(o) { + const forms = ['Infinitive', 'PastTense', 'PresentTense', 'Gerund'] + for (let i = 0; i < forms.length; i++) { + const from = forms[i] + const inf = nlp(o[from]) + .tag('Verb') + .verbs() + .conjugate()[0] + test_conjugation(inf, o, 'Infinitive', from) + test_conjugation(inf, o, 'PastTense', from) + test_conjugation(inf, o, 'PresentTense', from) + test_conjugation(inf, o, 'Gerund', from) + } + }) + t.end() +}) diff --git a/tests/verbs/misc.test.js b/tests/verbs/misc.test.js new file mode 100644 index 000000000..42ea337f8 --- /dev/null +++ b/tests/verbs/misc.test.js @@ -0,0 +1,18 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('verbs.adverbs', function(t) { + let doc = nlp('spencer is really great! Spencer really really was superb.') + doc + .verbs() + .adverbs() + .delete() + t.equal(doc.out(), 'spencer is great! Spencer was superb.', 'no-adverbs') + + doc = nlp('spencer truly would really run quickly') + .verbs() + .adverbs() + t.equal(doc.length, 3, 'found all three adverbs') + t.equal(doc.text('reduced'), 'truly really quickly', 'found adverbs in order') + t.end() +}) diff --git a/tests/verbs/modal.test.js b/tests/verbs/modal.test.js new file mode 100644 index 000000000..2e07bbccf --- /dev/null +++ b/tests/verbs/modal.test.js @@ -0,0 +1,37 @@ +const test = require('tape') +const nlp = require('../_lib') + +//ignore some modals during conjugation, i guess +test('ignore-would-behaviour', t => { + let doc = nlp('he would walk') + doc.verbs().toPastTense() + t.equal(doc.text(), 'he walked', 'would-past') + + doc = nlp('he would walk') + doc.verbs().toFutureTense() + t.equal(doc.out(), 'he will walk', 'would-future') + + doc = nlp('he would walk') + doc.verbs().toPresentTense() + t.equal(doc.out(), 'he walks', 'would-present') + + // str = nlp('he would walk') + // .verbs() + // .toContinuous() + // .out() + // t.equal(str, 'he is walking', 'would-continuous') + + t.end() +}) + +// test('ignore-would-behaviour', t => { +// const doc = nlp(`best look after`).verbs() +// const out = doc.conjugation() +// t.equal(doc.length, 1, 'one-verb') +// t.ok(out, 'no-error') +// t.end() +// }) + +//can/could +//might +//should diff --git a/tests/verbs/negate.test.js b/tests/verbs/negate.test.js new file mode 100644 index 000000000..b34a134a6 --- /dev/null +++ b/tests/verbs/negate.test.js @@ -0,0 +1,34 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('verb negate:', function(t) { + let arr = [ + ['is', 'is not'], + ['will', 'will not'], + ['will be', 'will not be'], + ['was', 'was not'], + + ['walks', 'does not walk'], + ['walked', 'did not walk'], + // ['walking', 'not walking'], + // ['walk', 'do not walk'], + ['will walk', 'will not walk'], + ['will have walked', 'will not have walked'], + + // ['corrupted', 'did not corrupt'], + ['jumped', 'did not jump'], + ['stunk up', 'did not stink up'], + + [`would study`, `would not study`], + [`could study`, `could not study`], + [`should study`, `should not study`], + ] + arr.forEach(function(a) { + const str = nlp(a[0]) + .verbs() + .toNegative() + .out('normal') + t.equal(str, a[1], a[1] + ' --- ' + str) + }) + t.end() +}) diff --git a/tests/verbs/parts.test.js b/tests/verbs/parts.test.js new file mode 100644 index 000000000..a53f520d7 --- /dev/null +++ b/tests/verbs/parts.test.js @@ -0,0 +1,114 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('verb-parts:', function(t) { + const tests = [ + ['john is walking', '', 'is', ''], + ['john was walking', '', 'was', ''], + ['john will be walking', '', 'will be', ''], + ['john has been walking', '', 'has been', ''], + ['john had been walking', '', 'had been', ''], + ['john would have had been walking', '', 'would have had been', ''], + //negatives + ['john is not walking', 'not', 'is', ''], + ['john was not walking', 'not', 'was', ''], + ['john will not be walking', 'not', 'will be', ''], + ['john will be not walking', 'not', 'will be', ''], + ['john has not been walking', 'not', 'has been', ''], + ['john has been not walking', 'not', 'has been', ''], + ['john had not been walking', 'not', 'had been', ''], + ['john had been not walking', 'not', 'had been', ''], + ['john would be walking', '', 'would be', ''], + ['john would not be walking', 'not', 'would be', ''], + ['john would be not walking', 'not', 'would be', ''], + ['john would not have had been walking', 'not', 'would have had been', ''], + ['john would have not had been walking', 'not', 'would have had been', ''], + ['john would have had not been walking', 'not', 'would have had been', ''], + ['john would have had been not walking', 'not', 'would have had been', ''], + //adverbs + negatives combinations + ['john is really walking', '', 'is', 'really'], + ['john really is walking', '', 'is', ''], + ['john is walking really', '', 'is', ''], + ['john is not really walking', 'not', 'is', 'really'], + ['john is really not walking', 'not', 'is', 'really'], + ['john really is not walking', 'not', 'is', ''], + ['john is not walking really', 'not', 'is', ''], + ['john has really been not walking', 'not', 'has been', 'really'], + ['john has been really not walking', 'not', 'has been', 'really'], + ['john has been not really walking', 'not', 'has been', 'really'], + ['john has been not walking really', 'not', 'has been', ''], + ['john really would not have had been walking', 'not', 'would have had been', ''], + ['john would really not have had been walking', 'not', 'would have had been', 'really'], + ['john would not really have had been walking', 'not', 'would have had been', 'really'], + ['john would not have really had been walking', 'not', 'would have had been', 'really'], + ['john would not have had really been walking', 'not', 'would have had been', 'really'], + ['john would not have had been really walking', 'not', 'would have had been', 'really'], + ['john would not have had been walking really', 'not', 'would have had been', ''], + ] + tests.forEach(function(a) { + const arr = nlp(a[0]) + .verbs() + .json() + t.equal(arr.length, 1, '#verbs - ' + arr.length) + t.equal(arr[0].parts.negative || '', a[1], "neg-test - '" + a[0] + "'") + t.equal(arr[0].parts.auxiliary || '', a[2], "aux-test - '" + a[0] + "'") + t.equal(arr[0].parts.verb || '', 'walking', "verb-test - '" + a[0] + "'") + t.equal(arr[0].parts.adverb || '', a[3], "adverb-test - '" + a[0] + "'") + }) + t.end() +}) + +//dont take it too-far +test('verb-greedy:', function(t) { + let arr = nlp('he would be, had he survived') + .verbs() + .json() + t.equal(arr.length, 2, 'split-on-clause') + + arr = nlp('we walked, talked, and sang') + .verbs() + .json() + t.equal(arr.length, 3, 'split-on-list') + + arr = nlp('we walked, talked, and quickly sang') + .verbs() + .json() + t.equal(arr.length, 3, 'split-on-list2') + + arr = nlp('we suddenly walked, talked, and abruptly sang') + .verbs() + .json() + t.equal(arr.length, 3, 'split-on-list3') + + arr = nlp('we really') + .verbs() + .json() + t.equal(arr.length, 0, 'adverb-isnt-a-verb') + + arr = nlp('we really really') + .verbs() + .json() + t.equal(arr.length, 0, 'two-adverbs-isnt-a-verb') + + arr = nlp('not good') + .verbs() + .json() + t.equal(arr.length, 0, 'not-isnt-a-verb') + + let str = nlp('we must not') + .verbs() + .out('normal') + t.equal(str, 'must not', 'verb-not') + + str = nlp('we must really') + .verbs() + .out('normal') + t.equal(str, 'must', 'verb-adverb') + + str = nlp('we must really not') + .verbs() + .out('normal') + t.equal(str, 'must really not', 'verb-adverb-not') + + t.end() +}) diff --git a/tests/verbs/phrasal.test.js b/tests/verbs/phrasal.test.js new file mode 100644 index 000000000..8087e613f --- /dev/null +++ b/tests/verbs/phrasal.test.js @@ -0,0 +1,22 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('phrasal-verbs:', function(t) { + ;[ + [`he is really good`, ['he', 'is', 'really', 'good']], + [`he is upset about it`, ['he', 'is', 'upset', 'about', 'it']], + [`he will mess about with it`, ['he', 'will', 'mess about', 'with', 'it']], + + [`come forward`, ['come forward']], + [`come together`, ['come together']], + [`come apart`, ['come apart']], + + [`frighten back`, ['frighten', 'back']], + [`frighten away`, ['frighten away']], + ].forEach(function(a) { + const terms = nlp(a[0]).out('array') + const msg = terms.join(' ') + ' -- ' + a[1].join(' ') + t.equal(terms.join(' '), a[1].join(' '), msg) + }) + t.end() +}) diff --git a/tests/verbs/plural.test.js b/tests/verbs/plural.test.js new file mode 100644 index 000000000..fec91ee84 --- /dev/null +++ b/tests/verbs/plural.test.js @@ -0,0 +1,34 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('plural-verbs:', function(t) { + let r = nlp('i look at') + let len = r.verbs().isPlural().length + t.equal(len, 0, 'i singular') + + r = nlp('we look at it. They report on it') + len = r.verbs().isPlural().length + t.equal(len, 2, 'they plural') + + r = nlp('lkjsdf are cool') + let str = r + .verbs() + .isPlural() + .out('normal') + t.equal(str, 'are', 'are plural') + + r = nlp('lkjsdf does eat bugs') + str = r + .verbs() + .isPlural() + .out('normal') + t.equal(str, 'does eat', 'does plural') + + r = nlp('lkjsdf is cool') + str = r + .verbs() + .isPlural() + .out('normal') + t.equal(str, '', 'is singular') + t.end() +}) diff --git a/tests/verbs/toGerund.test.js b/tests/verbs/toGerund.test.js new file mode 100644 index 000000000..710dd5a3e --- /dev/null +++ b/tests/verbs/toGerund.test.js @@ -0,0 +1,22 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('verb-to-gerund:', function(t) { + ;[ + ['walk', 'walking'], + ['sing', 'singing'], + ['win', 'winning'], + ['will convert', 'converting'], + ['see', 'seeing'], + ['is', 'being'], + ['was', 'being'], + ['am', 'being'], + ].forEach(function(a) { + const str = nlp(a[0]) + .verbs() + .toGerund() + .out('normal') + t.equal(str, a[1], str + ' -> ' + a[1]) + }) + t.end() +}) diff --git a/tests/verbs/toNegative.test.js b/tests/verbs/toNegative.test.js new file mode 100644 index 000000000..049a9ce49 --- /dev/null +++ b/tests/verbs/toNegative.test.js @@ -0,0 +1,49 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('verb-to-negative:', function(t) { + let arr = [ + [`he is nice`, 'is not'], + [`she was nice`, 'was not'], + + [`she has walked`, 'has not walked'], + [`she had walked`, 'had not walked'], + [`we have had problems`, 'have not had'], + [`we would walk`, 'would not walk'], + [`we would have walked`, 'would not have walked'], + + //conjugations + [`she walked`, 'did not walk'], + [`it all came apart`, 'did not come apart'], + + //phrasals + [`he would come forward`, 'would not come forward'], + [`we come together`, 'do not come together'], + [`he was frightened`, 'was not'], + [`i didn't want to`, "didn't want"], + + //===singular + // pastTense - + ['john played', 'did not play'], + // presentTense - + ['john plays', 'does not play'], + // futureTense - + ['john will play', 'will not play'], + + ///===plural + // pastTense - + ['we played', 'did not play'], + // presentTense - + ['we play', 'do not play'], + // futureTense - + ['we will play', 'will not play'], + ] + arr.forEach(function(a) { + const vb = nlp(a[0]) + .verbs() + .toNegative() + const str = vb.out('text') + t.equal(str, a[1], "'" + str + "' - - want: " + a[1]) + }) + t.end() +}) diff --git a/tests/verbs/verb-contractions.test.js b/tests/verbs/verb-contractions.test.js new file mode 100644 index 000000000..d8730dc00 --- /dev/null +++ b/tests/verbs/verb-contractions.test.js @@ -0,0 +1,20 @@ +const test = require('tape') +const nlp = require('../_lib') + +test('conjugate-contractions:', t => { + let arr = [ + [`i'm good`, 'i was good'], + [`they're good`, 'they were good'], + //TODO: missing auxillary + [`we've said`, 'we said'], //or 'we have said' + [`they'd said`, 'they said'], //or 'they have said' + // (ambiguous) + [`he's good`, 'he was good'], + ] + arr.forEach(a => { + const doc = nlp(a[0]) + doc.verbs().toPastTense() + t.equal(doc.out(), a[1], a[1]) + }) + t.end() +}) diff --git a/tests/whitespace-out.test.js b/tests/whitespace-out.test.js new file mode 100644 index 000000000..7a5ca1ed0 --- /dev/null +++ b/tests/whitespace-out.test.js @@ -0,0 +1,25 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('whitespace-out', function(t) { + let str = 'one, two three. One, two, four?' + const doc = nlp(str) + + t.equal(doc.out(), str, 'original-okay') // ✅ + + // some phrases, but full-phrases + t.equal(doc.eq(0).text(), 'one, two three.', '.eq(0) okay') // ✅ + t.equal(doc.eq(1).text(), 'One, two, four?', '.eq(1) okay') // ✅ + + t.equal(doc.match('four').text(), 'four', 'one match') // ✅ + + t.equal(doc.match('two').text(), 'two two', 'two single matches') // ✅ + t.equal(doc.match('one').text(), 'one, One', 'two more single matches') // ✅ + + t.equal(doc.match('one two').text(), 'one, two One, two', 'two multi-matches') // ✅ + + // t.equal(doc.not('two').out(), 'one, three. One, four?', '.not() okay') // ❌ + + // t.equal(doc.match('.').out(), str, 'every word') // ❌ + t.end() +}) diff --git a/tests/wordcount.test.js b/tests/wordcount.test.js new file mode 100644 index 000000000..f9f0b7c7a --- /dev/null +++ b/tests/wordcount.test.js @@ -0,0 +1,33 @@ +const test = require('tape') +const nlp = require('./_lib') + +test('==WordCount==', function(t) { + let arr = [ + ['he is good', 3], + ['jack and jill went up the hill.', 7], + ['Mr. Clinton did so.', 4], + ['Bill Clinton ate cheese.', 4], + ['5kb of data.', 3], + ['it was five hundred and seventy two.', 7], + ['jack and jill went up the hill. They got water.', 10], + ['Bill Clinton went walking', 4], + ['Bill Clinton will go walking', 5], + [`is not isn't. it sure is.`, 6], + ] + arr.forEach(function(a) { + const doc = nlp(a[0]) + t.equal(doc.wordCount(), a[1], a[0]) + }) + t.end() +}) + +test('match-wordcount', function(t) { + let doc = nlp("he is cool. she is nice. it isn't here.") + t.equal(doc.eq(1).wordCount(), 3, 'middle-sentence') + t.equal(doc.match('(he|she)').wordCount(), 2, 'he/she match') + t.equal(doc.match('is').wordCount(), 3, 'is-contraction match') + //i guess!? + t.equal(doc.match('not').wordCount(), 0, 'not-contraction match') + t.equal(doc.match('not').length, 1, 'length-vs-wordCount') + t.end() +}) diff --git a/types/index.d.ts b/types/index.d.ts index 5d7676201..b5802a7c0 100644 --- a/types/index.d.ts +++ b/types/index.d.ts @@ -1,189 +1,233 @@ -declare function compromise(text: string, lexicon?: { [word: string]: string }): compromise.Text; -declare namespace compromise { - const version: string; - function verbose(str: any): void; - function tokenize(str: string): any; - function plugin(obj: any): any; - function clone(): any; - function unpack(...args: any[]): void; - function addWords(...args: any[]): void; - function addTags(...args: any[]): void; - function addRegex(...args: any[]): void; - function addPatterns(...args: any[]): void; - function addPlurals(...args: any[]): void; - function addConjugations(conj: { [verb: string]: Conjugation }): void; - - interface Conjugation { - Gerund?: string; - PresentTense?: string; - PastTense?: string; - PerfectTense?: string; - PluPerfectTense?: string; - FuturePerfect?: string; - Actor?: string; - } - - interface Text { - /** did it find anything? */ - readonly found: boolean; - /** just a handy wrap */ - readonly parent: Text; - /** how many Texts are there? */ - readonly length: number; - /** nicer than constructor.call.name or whatever */ - readonly isA: 'Text'; - /** the whitespace before and after this match */ - readonly whitespace: { - before(str: string): Text, - after(str: string): Text - }; - - acronyms(...args: any[]): any; - adjectives(...args: any[]): any; - adverbs(...args: any[]): any; - contractions(...args: any[]): any; - dates(...args: any[]): any; - nouns(...args: any[]): any; - people(...args: any[]): any; - sentences(...args: any[]): any; - terms(...args: any[]): any; - possessives(...args: any[]): any; - values(...args: any[]): any; - verbs(...args: any[]): any; - ngrams(...args: any[]): any; - startGrams(...args: any[]): any; - endGrams(...args: any[]): any; - words(...args: any[]): any; - - // misc - all(...args: any[]): any; - index(...args: any[]): any; - wordCount(...args: any[]): any; - data(...args: any[]): any; - /* javascript array loop-wrappers */ - map(...args: any[]): any; - forEach(...args: any[]): any; - filter(...args: any[]): any; - reduce(...args: any[]): any; - find(...args: any[]): any; - /** copy data properly so later transformations will have no effect */ - clone(...args: any[]): any; - - /** get the nth term of each result */ - term(...args: any[]): any; - firstTerm(...args: any[]): any; - lastTerm(...args: any[]): any; - - /** grab a subset of the results */ - slice(...args: any[]): any; - - /** use only the nth result */ - get(...args: any[]): any; - /** use only the first result */ - first(...args: any[]): any; - /** use only the last result */ - last(...args: any[]): any; - - concat(...args: any[]): any; - - /** make it into one sentence/termlist */ - flatten(...args: any[]): any; - - /** see if these terms can become this tag */ - canBe(...args: any[]): any; - - /** sample part of the array */ - random(...args: any[]): any; - setPunctuation(...args: any[]): any; - getPunctuation(...args: any[]): any; - // jquery-like api aliases - offset(...args: any[]): any; - text(...args: any[]): any; - eq(...args: any[]): any; - join(...args: any[]): any; - - // loops - toTitleCase(...args: any[]): any; - toUpperCase(...args: any[]): any; - toLowerCase(...args: any[]): any; - toCamelCase(...args: any[]): any; - - hyphenate(...args: any[]): any; - dehyphenate(...args: any[]): any; - trim(...args: any[]): any; - - insertBefore(...args: any[]): any; - insertAfter(...args: any[]): any; - insertAt(...args: any[]): any; - - replace(...args: any[]): any; - replaceWith(...args: any[]): any; - - delete(...args: any[]): any; - lump(...args: any[]): any; - - tagger(...args: any[]): any; - - tag(...args: any[]): any; - unTag(...args: any[]): any; - - // match - /** do a regex-like search through terms and return a subset */ - match(...args: any[]): any; - - not(...args: any[]): any; - - if(...args: any[]): any; - - ifNo(...args: any[]): any; - - has(...args: any[]): any; - - /** find a match and return everything in front of it */ - before(...args: any[]): any; - - /** find a match and return everything after it */ - after(...args: any[]): any; - // alias 'and' - and(...args: any[]): any; - notIf(...args: any[]): any; - only(...args: any[]): any; - onlyIf(...args: any[]): any; - - // out - out(...args: any[]): any; - debug(...args: any[]): any; - - // sort - /** reorder result.list alphabetically */ - sort(...args: any[]): any; - /** reverse the order of result.list */ - reverse(...args: any[]): any; - unique(...args: any[]): any; - - // split - /** turn result into two seperate results */ - splitAfter(...args: any[]): any; - /** turn result into two seperate results */ - splitBefore(...args: any[]): any; - /** turn result into two seperate results */ - splitOn(...args: any[]): any; - - // normalize - normalize(...args: any[]): any; - - // subsets - clauses(...args: any[]): any; - hashTags(...args: any[]): any; - organizations(...args: any[]): any; - phoneNumbers(...args: any[]): any; - places(...args: any[]): any; - quotations(...args: any[]): any; - topics(...args: any[]): any; - urls(...args: any[]): any; - questions(...args: any[]): any; - statements(...args: any[]): any; - parentheses(...args: any[]): any; +export as namespace nlp + +declare function nlp(text: string): nlp.Document + +// Constructor +declare module nlp { + /** parse text into a compromise object, without running POS-tagging */ + export function tokenize(text: string): Document + /** mix in a compromise-plugin */ + export function extend(plugin: any): Document + /** re-generate a Doc object from .json() results */ + export function load(json: any): Document + /** log our decision-making for debugging */ + export function verbose(bool: boolean): Document + /** current semver version of the library */ + export const version: Document + + class Document { + // Utils + /** return the whole original document ('zoom out') */ + all(): Document + /** is this document empty? */ + found: Boolean + /** return the previous result */ + parent(): Document + /** return all of the previous results */ + parents(): Document[] + /** (re)run the part-of-speech tagger on this document */ + tagger(): Document + /** count the # of terms in each match */ + wordCount(): Number + /** count the # of characters of each match */ + length(): Number + /** deep-copy the document, so that no references remain */ + clone(shallow?: Boolean): Document + /** freeze the current state of the document, for speed-purposes */ + cache(options?: Object): Document + /** un-freezes the current state of the document, so it may be transformed */ + uncache(options?: Object): Document + + // Accessors + /** use only the first result(s) */ + first(n?: Number): Document + /** use only the last result(s) */ + last(n?: Number): Document + /** grab a subset of the results */ + slice(start: Number, end?: Number): Document + /** use only the nth result */ + eq(n: Number): Document + /** get the first word in each match */ + firstTerm(): Document + /** get the end word in each match */ + lastTerm(): Document + /** return a flat list of all Term objects in match */ + termList(): any + + // Match + /** return a new Doc, with this one as a parent */ + match(match: String | Document): Document + /** return all results except for this */ + not(match: String | Document): Document + /** return only the first match */ + matchOne(match: String | Document): Document + /** return each current phrase, only if it contains this match */ + if(match: String | Document): Document + /** Filter-out any current phrases that have this match */ + ifNo(match: String | Document): Document + /** Return a boolean if this match exists */ + has(match: String | Document): Document + /** search through earlier terms, in the sentence */ + lookBehind(match: String | Document): Document + /** search through following terms, in the sentence */ + lookAhead(match: String | Document): Document + /** return the terms before each match */ + before(match: String | Document): Document + /** return the terms after each match */ + after(match: String | Document): Document + /** quick find for an array of string matches */ + lookup(matches: String[]): Document + + // Case + /** turn every letter of every term to lower-cse */ + toLowerCase(): Document + /** turn every letter of every term to upper case */ + toUpperCase(): Document + /** upper-case the first letter of each term */ + toTitleCase(): Document + /** remove whitespace and title-case each term */ + toCamelCase(): Document + + // Whitespace + /** add this punctuation or whitespace before each match */ + pre(str: String): Document + /** add this punctuation or whitespace after each match */ + post(str: String): Document + /** remove start and end whitespace */ + trim(): Document + /** connect words with hyphen, and remove whitespace */ + hyphenate(): Document + /** remove hyphens between words, and set whitespace */ + dehyphenate(): Document + + // Tag + /** Give all terms the given tag */ + tag(tag: String, reason?: String): Document + /** Only apply tag to terms if it is consistent with current tags */ + tagSafe(tag: String, reason?: String): Document + /** Remove this term from the given terms */ + unTag(tag: String, reason?: String): Document + /** return only the terms that can be this tag */ + canBe(tag: String): Document + + // Loops + /** run each phrase through a function, and create a new document */ + map(fn: Function): Document | [] + /** run a function on each phrase, as an individual document */ + forEach(fn: Function): Document + /** return only the phrases that return true */ + filter(fn: Function): Document + /** return a document with only the first phrase that matches */ + find(fn: Function): Document | undefined + /** return true or false if there is one matching phrase */ + some(fn: Function): Document + /** sample a subset of the results */ + random(n?: Number): Document + + // Insert + /** substitute-in new content */ + replaceWith(text: String | Function, keepTags?: Boolean, keepCase?: Boolean): Document + /** search and replace match with new content */ + replace(match: String, text?: String | Function, keepTags?: Boolean, keepCase?: Boolean): Document + /** fully remove these terms from the document */ + delete(match: String): Document + /** add these new terms to the end (insertAfter) */ + append(text: String): Document + /** add these new terms to the front (insertBefore) */ + prepend(text: String): Document + /** add these new things to the end */ + concat(text: String): Document + + // transform + /**re-arrange the order of the matches (in place) */ + sort(method?: String | Function): Document + /**reverse the order of the matches, but not the words */ + reverse(): Document + /** clean-up the document, in various ways */ + normalize(options?: String | Object): String + /** remove any duplicate matches */ + unique(): Document + /** return a Document with three parts for every match ('splitOn') */ + split(match?: String): Document + /** separate everything after the match as a new phrase */ + splitBefore(match?: String): Document + /** separate everything before the word, as a new phrase */ + splitAfter(match?: String): Document + /** split a document into labeled sections */ + segment(regs: Object, options?: Object): Document + /** make all phrases into one phrase */ + join(str?: String): Document + + // Output + /** return the document as text */ + text(options?: String | Object): String + /** pull out desired metadata from the document */ + json(options?: String | Object): any + /** some named output formats */ + out(format?: string): String + /** pretty-print the current document and its tags */ + debug(): Document + /** store a parsed document for later use */ + export(): any + + // Selections + /** split-up results by each individual term */ + terms(n?: Number): Document + /** split-up results into multi-term phrases */ + clauses(n?: Number): Document + /** return all terms connected with a hyphen or dash like `'wash-out'`*/ + hyphenated(n?: Number): Document + /** add quoation marks around each match */ + toQuoations(start?: String, end?: String): Document + /** add brackets around each match */ + toParentheses(start?: String, end?: String): Document + /** return things like `'(939) 555-0113'` */ + phoneNumbers(n?: Number): Document + /** return things like `'#nlp'` */ + hashTags(n?: Number): Document + /** return things like `'hi@compromise.cool'` */ + emails(n?: Number): Document + /** return things like `:)` */ + emoticons(n?: Number): Document + /** return athings like `💋` */ + emoji(n?: Number): Document + /** return things like `'@nlp_compromise'`*/ + atMentions(n?: Number): Document + /** return things like `'compromise.cool'` */ + urls(n?: Number): Document + /** return things like `'quickly'` */ + adverbs(n?: Number): Document + /** return things like `'he'` */ + pronouns(n?: Number): Document + /** return things like `'but'`*/ + conjunctions(n?: Number): Document + /** return things like `'of'`*/ + prepositions(n?: Number): Document + /** return things like `'Mrs.'`*/ + abbreviations(n?: Number): Document + + // Subsets + /** alias for .all(), until plugin overloading */ + sentences(): Document + /** return any multi-word terms, like "didn't" */ + contractions(n?: Number): Document + /** contract words that can combine, like "did not" */ + contract(): Document + /** return anything inside (parentheses) */ + parentheses(n?: Number): Document + /** return things like "Spencer's" */ + possessives(n?: Number): Document + /** return any terms inside 'quotation marks' */ + quotations(n?: Number): Document + /** return things like `'FBI'` */ + acronyms(n?: Number): Document + /** return things like `'eats, shoots, and leaves'` */ + lists(n?: Number): Document + /** return any subsequent terms tagged as a Noun */ + nouns(n?: Number): Document + /** return any subsequent terms tagged as a Verb */ + verbs(n?: Number): Document } } -export = compromise; + +export default nlp diff --git a/types/test.ts b/types/test.ts deleted file mode 100644 index 25a82afd6..000000000 --- a/types/test.ts +++ /dev/null @@ -1,16 +0,0 @@ -// tslint:disable:no-relative-import-in-test - -// importing via absolute path appears to be broken because this package -// indirectly imports itself (compromise -> compromise-plugin -> compromise), -// which brings in an older, broken version of its typings that typescript -// always seems to prefer over the typings in the repo proper. -import * as compromise from '.'; - -// $ExpectType Text -const r = compromise('he is fun', {}); - -r.verbs().out(); -r.debug(); - -// $ExpectType void -compromise.addConjugations({ eat: { Actor: "eater" } }); diff --git a/types/tsconfig.json b/types/tsconfig.json index 981f19be5..dd1bf1f5b 100644 --- a/types/tsconfig.json +++ b/types/tsconfig.json @@ -1,14 +1,23 @@ { "compilerOptions": { + "target": "es5", "module": "commonjs", - "lib": [ - "es6" - ], + "lib": [], + "checkJs": true, + "allowJs": true, + "sourceMap": false, + "importHelpers": true, + "downlevelIteration": true, + "strict": true, "noImplicitAny": true, - "noImplicitThis": true, "strictNullChecks": true, - "strictFunctionTypes": true, - "noEmit": true, - "baseUrl": "." + "noImplicitThis": true, + "alwaysStrict": true, + "noUnusedLocals": true, + "noUnusedParameters": true, + "noImplicitReturns": true, + "noFallthroughCasesInSwitch": true, + "moduleResolution": "node", + "allowSyntheticDefaultImports": true } } diff --git a/types/types.test.ts b/types/types.test.ts new file mode 100644 index 000000000..0848f7ae6 --- /dev/null +++ b/types/types.test.ts @@ -0,0 +1,15 @@ +// a smoke-test for our typescipt typings +// to run: +// npm install -g typescript +// npm install -g ts-node +// npm install --no-save @types/tape @types/node +// npm run test:types + +const test = require('tape') +import nlp from '../' + +test('typefile smoketest', (t: any) => { + const doc = nlp('hello world') + t.equal(doc.text(), 'hello world', 'basic-smoketest') + t.end() +})running ...
-