From 6af45391393622ad4427365873bf12264dc1491a Mon Sep 17 00:00:00 2001 From: Spencer Kelly Date: Mon, 18 May 2015 21:39:34 -0400 Subject: [PATCH] bump to release 1.0.0, document 2 new functions --- README.md | 35 ++++++++--- bower.json | 2 +- changelog.md | 7 +++ client_side/nlp.js | 3 +- contributing.md | 3 +- index.js | 1 + package.json | 2 +- tests/pos_test/latest.js | 119 ++++++++++++++----------------------- tests/pos_test/pos_test.js | 4 +- 9 files changed, 88 insertions(+), 88 deletions(-) diff --git a/README.md b/README.md index 9763a069f..079788a16 100755 --- a/README.md +++ b/README.md @@ -103,6 +103,11 @@ nlp.syllables("hamburger") s.values() //[] ```` +as sugar, these methods can be called on multiple sentences from the nlp.pos() object too, like: +```javascript +nlp.pos("Tony is cool. Jen is happy.").people() +//[{text:"Tony"}, {text:"Jen"}] +``` ###Noun methods: ```javascript @@ -112,21 +117,34 @@ nlp.noun("earthquakes").singularize() nlp.noun("earthquake").pluralize() //earthquakes -nlp.noun('veggie burger').is_plural +nlp.noun('veggie burger').is_plural() //false -nlp.noun('tony danza').is_person +nlp.noun('tony danza').is_person() //true -nlp.noun('Tony J. Danza elementary school').is_person +nlp.noun('Tony J. Danza elementary school').is_person() //false -nlp.noun('SS Tony danza').is_person +nlp.noun('SS Tony danza').is_person() //false nlp.noun('hour').article() //an -nlp.inflect('mayors of toronto')) +nlp.noun('mayors of toronto').conjugate() //{ plural: 'mayors of toronto', singular: 'mayor of toronto' } + +nlp.noun("tooth").pronoun() +//it +nlp.noun("teeth").pronoun() +//they +nlp.noun("Tony Hawk").pronoun() +//"he" +nlp.noun("Nancy Hawk").pronoun() +//"she" + +var he = nlp.pos("Tony Danza is great. He lives in L.A.").sentences[1].tokens[0] +he.analysis.reference_to() +//{text:"Tony Danza"...} ``` ###Verb methods: @@ -244,6 +262,7 @@ nlp.denormalize("The quick brown fox jumps over the lazy dog", {percentage:50}) "VBN" : "past-participle verb (eaten)" "VBP" : "infinitive verb (eat)" "VBZ" : "present-tense verb (eats, swims)" + "VBF" : "future-tense verb (will eat)" "CP" : "copula (is, was, were)" "VBG" : "gerund verb (eating,winning)" "adjective": @@ -284,7 +303,7 @@ Because the library can conjugate all sorts of forms, it only needs to store one The lexicon was built using the [American National Corpus](http://www.americannationalcorpus.org/), then intersected with the regex rule-list. For example, it lists only 300 verbs, then blasts-out their 1200+ derived forms. ####Contractions -Unlike other nlp toolkits, this library puts a 'silent token' into the phrase for contractions. Otherwise something would be neglected. +It puts a 'silent token' into the phrase for contractions. Otherwise a meaningful part-of-speech could be neglected. ```javascript nlp.pos("i'm good.") [{ @@ -304,7 +323,7 @@ nlp.pos("i'm good.") }] ``` ####Tokenization -Neighbouring words with the same part of speech are merged together, unless there is punctuation, different capitalisation, or special cases. +Neighbouring words with the same part of speech are merged together, unless there is punctuation, different capitalisation, or some special cases. ```javascript nlp.pos("tony hawk won").tags() //tony hawk NN @@ -317,6 +336,8 @@ nlp.pos("tony hawk won", {dont_combine:true}).tags() //hawk NN //won VB ``` +####Phrasal Verbs +'beef up' is one verb, and not some direction of beefing. ## Licence MIT diff --git a/bower.json b/bower.json index 62f950538..03672ce7b 100644 --- a/bower.json +++ b/bower.json @@ -1,7 +1,7 @@ { "name": "nlp_compromise", "main": "./client_side/nlp.js", - "version": "0.5.2", + "version": "1.0.0", "homepage": "https://github.com/spencermountain/nlp_compromise", "authors": [ "Spencer Kelly " diff --git a/changelog.md b/changelog.md index 40cdb258c..26c7819a6 100644 --- a/changelog.md +++ b/changelog.md @@ -2,6 +2,13 @@ Uses semvar, with casual releases to npm and bower. 'Major' is considered an api change, while 'Minor' is considered a performance change. +#v1.0.0 - May 2015 +added name genders and beginning of co-reference resolution ('Tony' -> 'he') API. +small breaking change on ```Noun.is_plural``` and ```Noun.is_entity```, affording significant pos() speedup. Bumped Major version for these changes. + +#v0.5.2 - May 2015 +Phrasal verbs ('step up'), firstnames and .people() + #v0.4.0 - May 2015 Major file-size reduction through refactoring diff --git a/client_side/nlp.js b/client_side/nlp.js index fbd1443b0..0cc7adbb4 100644 --- a/client_side/nlp.js +++ b/client_side/nlp.js @@ -1,4 +1,4 @@ -/*! nlp_compromise 0.5.2 by @spencermountain 2015-05-18 MIT */ +/*! nlp_compromise 1.0.0 by @spencermountain 2015-05-18 MIT */ var nlp = (function() { var verb_irregulars = (function() { var types = [ @@ -9432,6 +9432,7 @@ if (typeof module !== "undefined" && module.exports) { // console.log( nlp.pos('she sells seashells by the seashore').sentences[0].negate().text() ) // console.log( nlp.pos('i will slouch')); // console.log( nlp.pos('Sally Davidson sells seashells by the seashore. Joe Biden said so.').people() ) +// console.log(nlp.pos("Tony Danza is great. He works in the bank.").sentences[1].tokens[0].analysis.reference_to()) return nlp; })() \ No newline at end of file diff --git a/contributing.md b/contributing.md index 8bd8f9719..0a1fb0fae 100644 --- a/contributing.md +++ b/contributing.md @@ -55,9 +55,10 @@ of minimized clientside build: * May 1st - 103kb * May 8th - 79kb * May 10th - 88kb +* May 18th - 99kb #speed -benchmarked on backend +benchmarked unit tests on backend-frontent. * May 2015 - 60ms boot have a nice day diff --git a/index.js b/index.js index f89728e2f..df4994613 100644 --- a/index.js +++ b/index.js @@ -52,3 +52,4 @@ if (typeof module !== "undefined" && module.exports) { // console.log( nlp.pos('she sells seashells by the seashore').sentences[0].negate().text() ) // console.log( nlp.pos('i will slouch')); // console.log( nlp.pos('Sally Davidson sells seashells by the seashore. Joe Biden said so.').people() ) +// console.log(nlp.pos("Tony Danza is great. He works in the bank.").sentences[1].tokens[0].analysis.reference_to()) diff --git a/package.json b/package.json index af0704f7d..32d633085 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "author": "Spencer Kelly (http://spencermounta.in)", "name": "nlp_compromise", "description": "natural language processing in the browser", - "version": "0.5.2", + "version": "1.0.0", "repository": { "type": "git", "url": "git://github.com/spencermountain/nlp_compromise.git" diff --git a/tests/pos_test/latest.js b/tests/pos_test/latest.js index 75197d2cf..384c1643f 100644 --- a/tests/pos_test/latest.js +++ b/tests/pos_test/latest.js @@ -665,12 +665,8 @@ module.exports={ "NN", "IN", "DT", - "NN" - ], - [ - "NNP" - ], - [ + "NN", + "NN", "NN" ], [ @@ -1753,7 +1749,6 @@ module.exports={ "NN", "VB", "IN", - "NNP", "CD", "CC", "DT", @@ -1858,7 +1853,6 @@ module.exports={ "VBP", "NN", "IN", - "NNP", "CD", "CC", "MD", @@ -2058,9 +2052,7 @@ module.exports={ "NN", "NN", "CC", - "NN" - ], - [ + "NN", "NN", "JJ", "IN", @@ -3139,12 +3131,8 @@ module.exports={ "IN", "NN", "NN", - "NN" - ], - [ - "NNP" - ], - [ + "NN", + "NN", "NN", "NN", "CC", @@ -4439,7 +4427,7 @@ module.exports={ "NN", "IN", "JJ", - "CD", + "NNP", "IN", "DT", "NNP" @@ -4521,7 +4509,7 @@ module.exports={ "VBP", "IN", "JJ", - "NNP", + "CD", "CC", "CP", "CD", @@ -4754,12 +4742,8 @@ module.exports={ "NN", "CC", "DT", - "NN" - ], - [ - "NNP" - ], - [ + "NN", + "NN", "NN", "NN" ], @@ -4955,9 +4939,7 @@ module.exports={ "DT", "NN", "CC", - "NN" - ], - [ + "NN", "NN", "VB", "NN", @@ -5295,7 +5277,9 @@ module.exports={ "DT", "NN", "IN", - "CD" + "CD", + "CC", + "NNP" ], [ "IN", @@ -5436,6 +5420,7 @@ module.exports={ "NN", "CC", "IN", + "NNP", "CD", "PRP", "CP", @@ -5477,6 +5462,7 @@ module.exports={ "CD", "NN", "IN", + "NNP", "CD" ], [ @@ -6955,10 +6941,6 @@ module.exports={ "IN", "DT", "NN", - "NN" - ], - [ - "DT", "NN", "DT", "VBZ", @@ -7098,20 +7080,14 @@ module.exports={ "CC", "NN", "NN", - "NN" - ], - [ "NN", - "NN" - ], - [ "NN", "NN", "NN", "NN", - "NN" - ], - [ + "NN", + "NN", + "NN", "NN", "NN", "NN", @@ -8511,6 +8487,7 @@ module.exports={ "RB", "JJ", "IN", + "NNP", "CD" ], [ @@ -11565,7 +11542,7 @@ module.exports={ "NN", "VBP", "DT", - "NNP", + "NN", "VBG", "CC", "IN", @@ -11703,7 +11680,7 @@ module.exports={ "NN" ], [ - "NNP", + "NN", "CP", "JJ", "RB", @@ -12025,7 +12002,6 @@ module.exports={ "NN", "NN", "VBD", - "NNP", "CD", "IN", "NN", @@ -12472,6 +12448,7 @@ module.exports={ "DT", "NN", "IN", + "NNP", "CD", "VBD", "DT", @@ -13550,9 +13527,8 @@ module.exports={ "NN", "PP", "IN", - "NNP", "CD", - "VB", + "NN", "JJ", "NN", "IN", @@ -13627,7 +13603,7 @@ module.exports={ "NN", "MD", "CC", - "VB", + "VBP", "IN", "VBP", "VBD", @@ -14742,7 +14718,6 @@ module.exports={ "DT", "NN", "IN", - "NNP", "CD", "CC", "JJ", @@ -14950,7 +14925,6 @@ module.exports={ ], [ "IN", - "NNP", "CD", "IN", "NN", @@ -15287,6 +15261,7 @@ module.exports={ ], [ "IN", + "NNP", "CD", "IN", "DT", @@ -15334,8 +15309,7 @@ module.exports={ "NNP", "CC", "CD", - "NN", - "NN" + "NNP" ], [ "DT", @@ -15522,7 +15496,7 @@ module.exports={ ], [ "PRP", - "VB", + "VBP", "NN", "VBP", "DT" @@ -19844,7 +19818,7 @@ module.exports={ "VBD", "IN", "PP", - "NN", + "NNP", "IN", "JJ", "CC", @@ -20099,7 +20073,6 @@ module.exports={ "VB", "PP", "IN", - "NNP", "CD", "RB", "VB", @@ -20113,7 +20086,7 @@ module.exports={ "NN" ], [ - "NN", + "NNP", "VB", "IN", "DT", @@ -20949,6 +20922,7 @@ module.exports={ "VBP", "NN", "IN", + "NNP", "CD" ], [ @@ -22074,6 +22048,7 @@ module.exports={ "PP", "NN", "IN", + "NNP", "CD" ], [ @@ -22092,7 +22067,7 @@ module.exports={ "NN" ], [ - "NN", + "NNP", "VB", "DT", "NNA", @@ -22151,7 +22126,7 @@ module.exports={ ], [ "IN", - "NNP", + "CD", "PRP", "CP", "JJ", @@ -23089,6 +23064,7 @@ module.exports={ "DT", "NN", "IN", + "NNP", "CD", "IN", "CD" @@ -23125,6 +23101,7 @@ module.exports={ ], [ "IN", + "NNP", "CD", "PRP", "CP", @@ -24401,13 +24378,11 @@ module.exports={ "CC", "JJ", "CC", - "NNP" + "NN" ], [ "IN", "CD", - "NNP", - "CD", "DT", "NN", "IN", @@ -24937,7 +24912,6 @@ module.exports={ [ "IN", "CD", - "NNP", "DT", "NN", "CC", @@ -24992,7 +24966,7 @@ module.exports={ "NN" ], [ - "NN", + "NNP", "RB", "VBD", "DT", @@ -25206,7 +25180,6 @@ module.exports={ ], [ "DT", - "NNP", "CD", "NN", "VB", @@ -25383,7 +25356,7 @@ module.exports={ "NN" ], [ - "NN", + "NNP", "JJR", "VBD", "DT", @@ -25428,6 +25401,7 @@ module.exports={ "IN", "NN", "IN", + "NNP", "CD", "NN", "VBD", @@ -25641,7 +25615,7 @@ module.exports={ "NN" ], [ - "NN", + "NNP", "VBD", "VBG", "PRP", @@ -25671,7 +25645,7 @@ module.exports={ "PRP" ], [ - "NNP", + "NN", "VBD", "DT", "PRP", @@ -25879,6 +25853,7 @@ module.exports={ ], [ "IN", + "NNP", "CD", "VBG", "DT", @@ -26157,8 +26132,6 @@ module.exports={ [ "IN", "CD", - "NNP", - "CD", "PRP", "CP", "JJ", @@ -27483,7 +27456,7 @@ module.exports={ ], [ "NN", - "NNP", + "NN", "CD", "NN", "VBD", @@ -27511,7 +27484,6 @@ module.exports={ ], [ "IN", - "NNP", "CD", "CC", "VBG", @@ -28248,9 +28220,7 @@ module.exports={ "CC", "VB", "IN", - "NN" - ], - [ + "NN", "NN", "IN", "NN", @@ -28335,7 +28305,6 @@ module.exports={ "NN", "NN", "IN", - "NNP", "CD", "DT", "NN", diff --git a/tests/pos_test/pos_test.js b/tests/pos_test/pos_test.js index fea292cd9..68094e085 100644 --- a/tests/pos_test/pos_test.js +++ b/tests/pos_test/pos_test.js @@ -16,12 +16,12 @@ function compare_to_latest() { }) latest[k].forEach(function(l, i) { //make sure their tokens line-up - if (l.length != results[i][0].length) { + if ( results[i] && l.length != results[i][0].length) { console.log(" "+i+") --tokenization change-- \"" + results[i][1].slice(0, 10).join(' ') + "\"..") return } l.forEach(function(pos, i2) { - if (pos != results[i][0][i2]) { + if ( results[i] && pos != results[i][0][i2]) { //we found a discrepency console.log(" "+i+") " + pos + " -> " + results[i][0][i2] + ' - "' + results[i][1][i2] + '" - '+results[i][2]) }