Merge pull request #363 from nlp-compromise/dev

v10
spencermountain · Apr 21, 2017 · 5c70a16 · 5c70a16
2 parents a3e4cd7 + a387880
commit 5c70a16
Show file tree

Hide file tree

Showing 103 changed files with 3,522 additions and 1,926 deletions.
diff --git a/builds/compromise.es6.js b/builds/compromise.es6.js
diff --git a/builds/compromise.es6.min.js b/builds/compromise.es6.min.js
diff --git a/builds/compromise.js b/builds/compromise.js
diff --git a/builds/compromise.min.js b/builds/compromise.min.js
diff --git a/changelog.md b/changelog.md
@@ -1,8 +1,21 @@
 `compromise` uses semver, with occasional releases to npm and bower.
 here, 'Major' is considered an api change, while 'Minor' is considered a performance change.
 
-
-### v.9  &nbsp;
+### v10
+- cleanup & rename some `.value()` methods
+- change lumping behaviour of lexicon terms with multiple words
+- keep more former tags after a term replace method
+- new `.random()` method
+- new `.lessThan()`, `.greaterThan()`, `.equalTo()` methods
+- new prefix/suffix/infix matches with `_ffix` syntax
+- `tag()` supports a sequence of tags for a sequence of terms
+- .match 'range' queries now use a real match - `#Adverb{2,4}`
+- new `.before()` and `.after()` match methods
+- removes `.lexicon()` method for many-lexicons concept
+- changes params of `.replaceWith()` method to a 'keyTags' boolean
+- improved .debug() and logging on clientside
+
+### v9  &nbsp;
 * 9.0.0
 - rename `Term.tag` object to `Term.tags` so the `.tag()` method can work throughout more-consistently
 - fix 'Auxillary' tag typo to 'Auxiliary'
@@ -12,28 +25,28 @@ here, 'Major' is considered an api change, while 'Minor' is considered a perform
 - new `.has()` method for faster lookups
 * 9.1.0 - pretty-real filesize reduction by swapping es6 classes for es5 inheritance
 
-### v.8  &nbsp;
+### v8  &nbsp;
 * 8.0.0 - less-ambitious date-parsing of nl-date forms
 *       - filesize reduction using [efrt](https://github.com/nlp-compromise/efrt) data structure (254k -> 214k)
 * 8.1.0 - add `nlp.tokenize()` method for disabling pos-tagging of input
 * 8.2.0 - add `nlp.out('index')` method, 12 bugs
 
-### v.7  &nbsp; :postal_horn:
+### v7  &nbsp; :postal_horn:
 * 7.0.0 - weee! [big change!](https://github.com/nlp-compromise/compromise/wiki/v7-Upgrade,-welcome) *npm package rename*
 * 7.0.15 - fix for IE9
 
-### v.6  &nbsp;  
+### v6  &nbsp;  
 * 6.5.0 - builds now using browserify + derequire()
 * 6.4.0 - re-written term-lumper logic
 * 6.3.0 - new nlp.lexicon({word:'POS'}) flow
 * 6.0.0 - be consistent with `text.normal()`, `term.all_forms()`, `text.word_count()`. `text.normal()` includes sentence-terminators, like periods etc.
 
-### v.5  &nbsp;  
+### v5  &nbsp;  
 * 5.2.0 - airport codes support, helper methods for specific POS
 * 5.1.0 - newlines split sentences
 * 5.0.0 - Text methods now return this, instead of array of sentences
 
-### v.4  &nbsp;  
+### v4  &nbsp;  
 * 4.12.0 - more-sensible responses for invalid, non-string inputs
 * 4.11.0 - 14 PRs, with fixes for currencies, pluralization, conjugation
 * 4.10.0 - Value.to_text() new method, fix "Posessive" POS typo
@@ -46,12 +59,12 @@ here, 'Major' is considered an api change, while 'Minor' is considered a perform
 * 4.1.0 - hyphenated words are normalized into spaces
 * 4.0.0 - grammar-aware match & replace functions
 
-### v.3  &nbsp;  **(Breaking)**
+### v3  &nbsp;  **(Breaking)**
 * 3.0.2 - Statement & Question classes
 * v3.0.0 - Feb 2016
   * split ngram, locale, and syllables into plugins in seperate repo
 
-### v.2
+### v2
 * v2.0.0 - Nov 2015 &nbsp;  **(Breaking)**
   * es6 classes, babel building
   * better test coverage
@@ -66,15 +79,15 @@ here, 'Major' is considered an api change, while 'Minor' is considered a perform
   * lexicon can be augmented third-party
   * date parsing results are different
 
-### v.1
+### v1
 * v1.1.0 - May 2015
 smarter handling of ambiguous contractions ("he's" -> ["he is", "he has"])
 
 * v1.0.0 - May 2015
 added name genders and beginning of co-reference resolution ('Tony' -> 'he') API.
 small breaking change on ```Noun.is_plural``` and ```Noun.is_entity```, affording significant pos() speedup. Bumped Major version for these changes.
 
-### v.0
+### v0
 
 * v0.5.2 - May 2015
 Phrasal verbs ('step up'), firstnames and .people()

diff --git a/demo/keypress/index.html b/demo/keypress/index.html
@@ -29,7 +29,8 @@ <h3>verb.to_past()</h3>
       <div id="past-verb-result"></div>
     </div>
 
-    <script type="text/javascript" src="https://unpkg.com/compromise@latest/builds/compromise.min.js"></script>
+    <!-- <script type="text/javascript" src="https://unpkg.com/compromise@latest/builds/compromise.min.js"></script> -->
+    <script type="text/javascript" src="../../builds/compromise.min.js"></script>
     <script type="text/javascript" language="javascript">
     var nlp = window.nlp;
     function toPast() {

diff --git a/docs/generic.js b/docs/generic.js
@@ -69,6 +69,11 @@ module.exports = {
       desc: 'turn a list of results into one result',
       returns: 'Text',
       example: 'nlp(\'sex cauldron? I thought they closed that place down.\').flatten().length()\n//1'
+    },
+    random: {
+      desc: 'sample a random section of n matches',
+      returns: 'Text',
+      example: 'nlp(\'one two three four\').terms().random(2).out(\'array\')\n//[\'four\',\'one\']'
     }
   },
 
@@ -133,12 +138,12 @@ module.exports = {
 
   replace: {
     replaceWith: {
-      desc: 'turn the current selection into something else. Essentially just delete() -> insertAt().',
+      desc: 'turn the current selection into something else. Essentially just delete() -> insertAt(). The second param says whether to keep original tags around.',
       returns: 'Text',
-      example: 'nlp(\'it was the worst of times\').match(\'worst\').replaceWith(\'blurst\').all().out()\n//it was the blurst of times'
+      example: 'nlp(\'it was the worst of times\').match(\'worst\').replaceWith(\'blurst\', true).all().out()\n//it was the blurst of times'
     },
     replace: {
-      desc: 'turn a new selection into something else. Essentially just match() -> delete() -> insertAt().',
+      desc: 'turn a new selection into something else. Essentially just match() -> delete() -> insertAt(). Third optional param keeps original tags around.',
       returns: 'Text',
       example: 'nlp(\'trust me folks, big league.\').replace(\'big league\',\'bigly\').all().out()\n//trust me folks, bigly.'
     },
@@ -174,6 +179,16 @@ module.exports = {
       desc: 'quick check to see if this match is found',
       returns: 'Boolean',
       example: 'nlp(\'I am the very model of a modern Major-General\').has(\'#Pronoun\')\n//true,'
+    },
+    before: {
+      desc: 'find a match, and return everything infront of it',
+      returns: 'Text',
+      example: 'nlp(\'one two three four five\').before(\'three\').out()\n//one two'
+    },
+    after: {
+      desc: 'find a match, and return everything following of it',
+      returns: 'Text',
+      example: 'nlp(\'one two three four five\').after(\'three\').out()\n//four five'
     }
   },
 

diff --git a/docs/subsets/values.js b/docs/subsets/values.js
@@ -16,15 +16,15 @@ module.exports = {
     returns: 'Text',
     example: 'nlp(\'ten things i hate about you\').values().toNumber().all().out()\n//10 things i hate about you'
   },
-  toTextValue: {
+  toText: {
     desc: 'turn a number like `5,500` into it\'s textual form, like `five thousand five hundred`',
     returns: 'Text',
-    example: 'nlp(\'10 things i hate about you\').values().toTextValue().all().out()\n//ten things i hate about you'
+    example: 'nlp(\'10 things i hate about you\').values().toText().all().out()\n//ten things i hate about you'
   },
-  toNiceNumber: {
+  toNice: {
     desc: 'turn a number into numerical form, but with nice commas, like `5,500`',
     returns: 'Text',
-    example: 'nlp(\'five hundred sixty two thousand, four hundred and seven\').values().toTextValue().all().out()\n//\'562,407\''
+    example: 'nlp(\'five hundred sixty two thousand, four hundred and seven\').values().toNice().all().out()\n//\'562,407\''
   },
   toCardinal: {
     desc: 'turn `fifth` into `five`, and `5th` into `5`',
@@ -40,5 +40,20 @@ module.exports = {
     desc: 'return the actual javascript integers (or floats)',
     returns: 'Array',
     example: 'nlp(\'at the seven eleven\').values().numbers()\n// [7, 11]'
+  },
+  greaterThan: {
+    desc: 'return only the values larger than a given value',
+    returns: 'Text',
+    example: 'nlp(\'seven peaches weigh ten kilograms\').values().greaterThan(7).out(\'array\')\n// [\'10\']'
+  },
+  lessThan: {
+    desc: 'return only the values smaller than a given value',
+    returns: 'Text',
+    example: 'nlp(\'he is 7 years old\').values().greaterThan(\'five\').out(\'array\')\n// [\'7\']'
+  },
+  isEqual: {
+    desc: 'return only the values equal to a given value',
+    returns: 'Text',
+    example: 'nlp(\'it\'s his 7th birthday\').values().isEqual(7).out(\'array\')\n// [\'7th\']'
   }
 };
diff --git a/open_bugs.js b/open_bugs.js
@@ -24,4 +24,13 @@
 //nlp('parent should not be uppercase').clone().toUpperCase().parent.debug()
 
 //bug 7. - adds 'a' when making singular
-nlp('men go').sentences().toPastTense().nouns().toSingular().debug();
+// nlp('men go').sentences().toPastTense().nouns().toSingular().debug();
+
+//bug 8
+// console.log(nlp('I\'m going to the shops').sentences().toPastTense().out());
+
+//bug 9
+// r = nlp('he walks');
+// r.match('walks').tag('Foo');
+// r.verbs().toPastTense();
+// r.debug();
diff --git a/package.json b/package.json
@@ -2,7 +2,7 @@
   "author": "Spencer Kelly <[email protected]> (http://spencermounta.in)",
   "name": "compromise",
   "description": "natural language processing in the browser",
-  "version": "9.1.0",
+  "version": "10.0.0",
   "main": "./builds/compromise.js",
   "repository": {
     "type": "git",

diff --git a/scratch.js b/scratch.js
@@ -1,38 +1,14 @@
 // 'use strict';
-//this file is not included in the build.
-//use it for messing around.
 var nlp = require('./src/index');
 // nlp.verbose('tagger');
-// const corpus = require('nlp-corpus');
-// let sotu = corpus.sotu.parsed()[23];
-const fresh = require('./test/unit/lib/freshPrince.js');
+const corpus = require('nlp-corpus');
+let text = corpus.sotu.parsed()[0];
+// const fresh = require('./test/unit/lib/freshPrince.js');
 
-// bug.1
-//  .? vs *
 
-// nlp('is this').sentences(0).toNegative().debug();
+// console.log(nlp('I\'m going to the shops').sentences().toPastTense().out());
 
-// nlp('I\'m going to the shops').sentences().toPastTense().debug();
-
-
-// let r = nlp.tokenize('5th - 7th').tag('NumberRange').debug();
-// r.contractions().debug();
-
-//===timer
-// console.time('parse');
-// let r = nlp(fresh);
-// console.timeEnd('parse');
-//
-// console.time('match');
-// r.match('#Determiner (story|thing|#Adjective)', true);
-// console.timeEnd('match');
-//
-// console.time('tag');
-// r.tag('#Person');
-// console.timeEnd('tag');
-
-let str = 'hello there. I work for the F.B.I. in ft. Mede. hello there!';
-let r = nlp(str);
-let o = r.sentences(1).out('offsets')[0];
-console.log(o);
-console.log(str.substring(o.wordStart, o.wordEnd));
+// r = nlp('he walks');
+// r.match('walks').tag('Foo');
+// r.verbs().toPastTense();
+// r.debug();
diff --git a/scripts/prepublish/linter.js b/scripts/prepublish/linter.js
@@ -8,3 +8,4 @@ const cmd = eslint + ' -c .eslintrc --color ' + path.join(__dirname, '../../src/
 exec(cmd, {
   async: true
 });
+console.log(' - done.');
diff --git a/src/data/adjectives/superlatives.js b/src/data/adjectives/superlatives.js
@@ -196,4 +196,3 @@ let arr = [
 ];
 
 module.exports = fns.uncompress_suffixes(arr, compressed);
-// console.log(JSON.stringify(module.exports.sort(), null, 2));
diff --git a/src/data/nouns/irregular_plurals.js b/src/data/nouns/irregular_plurals.js
@@ -58,7 +58,8 @@ let main = [
   ['radius', 'radii'],
   ['referendum', 'referenda'],
   ['thief', 'thieves'],
-  ['tooth', 'teeth']
+  ['tooth', 'teeth'],
+  ['modulus', 'moduli']
 ];
 //decompress it
 main = main.map(function(a) {

diff --git a/src/fns.js b/src/fns.js
@@ -95,7 +95,7 @@ exports.printTerm = function(t) {
   for(let i = 0; i < tags.length; i++) {
     if (tagset[tags[i]]) {
       const color = tagset[tags[i]].color || 'black';
-      return exports[color](t.plaintext);
+      return exports[color](t.out('text'));
     }
   }
   return c.reset + t.plaintext + c.reset;

diff --git a/src/index.js b/src/index.js
@@ -5,6 +5,7 @@ const log = require('./log');
 
 //the main thing
 const nlp = function (str, lexicon) {
+  // this.tagset = tagset;
   let r = buildResult(str, lexicon);
   r.tagger();
   return r;
@@ -15,15 +16,10 @@ nlp.tokenize = function(str) {
   return buildResult(str);
 };
 
-//this is handy
+//this is useful
 nlp.version = pkg.version;
 
-//so handy at times
-nlp.lexicon = function() {
-  return require('./data/lexicon');
-};
-
-//also this is much handy
+//turn-on some debugging
 nlp.verbose = function(str) {
   log.enable(str);
 };

diff --git a/src/log/client.js b/src/log/client.js
@@ -0,0 +1,35 @@
+'use strict';
+const fns = require('../fns');
+
+// const colors = {
+//   'Person': '#6393b9',
+//   'Pronoun': '#81acce',
+//   'Noun': 'steelblue',
+//   'Verb': 'palevioletred',
+//   'Adverb': '#f39c73',
+//   'Adjective': '#b3d3c6',
+//   'Determiner': '#d3c0b3',
+//   'Preposition': '#9794a8',
+//   'Conjunction': '#c8c9cf',
+//   'Value': 'palegoldenrod',
+//   'Expression': '#b3d3c6'
+// };
+
+const tag = (t, pos, reason) => {
+  let title = t.normal || '[' + t.silent_term + ']';
+  title = fns.leftPad('\'' + title + '\'', 12);
+  title += '  ->   ' + pos;
+  title += fns.rightPad((reason || ''), 15);
+  console.log('%c' + title, ' color: #a2c99c');
+};
+const untag = (t, pos, reason) => {
+  let title = t.normal || '[' + t.silent_term + ']';
+  title = fns.leftPad('\'' + title + '\'', 12);
+  title += '  ~*   ' + pos;
+  title += '    ' + (reason || '');
+  console.log('%c' + title, ' color: #b66a6a');
+};
+module.exports = {
+  tag: tag,
+  untag: untag,
+};
Original file line number	Diff line number	Diff line change
Expand Up		@@ -196,4 +196,3 @@ let arr = [
		];

		module.exports = fns.uncompress_suffixes(arr, compressed);
		// console.log(JSON.stringify(module.exports.sort(), null, 2));