From 39590adf96ef895a119fe84ec710af93bfb3a458 Mon Sep 17 00:00:00 2001 From: spencer kelly Date: Thu, 4 Feb 2021 13:30:15 -0500 Subject: [PATCH] fix for implicit contractions output --- changelog.md | 4 ++++ scratch.js | 16 ++++++++++++++-- src/Doc/methods/transform/01-sort.js | 2 +- src/Phrase/methods/02-text.js | 12 ++++++++++++ src/Phrase/methods/05-json.js | 3 +++ tests/match/match-contraction.test.js | 4 ++-- tests/output/text.test.js | 8 ++++++++ tests/unique.test.js | 1 + 8 files changed, 45 insertions(+), 5 deletions(-) diff --git a/changelog.md b/changelog.md index 5aa76e582..3d09cb511 100644 --- a/changelog.md +++ b/changelog.md @@ -11,6 +11,10 @@ While all _Major_ releases should be reviewed, our only two _large_ releases are +#### 13.9.1 [Feb 2021] +- **[fix]** - matches over a contraction +- **[new]** - add 'implicit' text output + #### 13.9.0 [Feb 2021] - **[new]** - World.addConjugations() method - **[new]** - World.addPlurals() method diff --git a/scratch.js b/scratch.js index 26874eb78..b6a355dfe 100644 --- a/scratch.js +++ b/scratch.js @@ -17,8 +17,20 @@ nlp.extend(require('./plugins/penn-tags/src')) done - false */ -let doc = nlp(`haven't done`) -doc.match(`have done`).debug() +// let doc = nlp(`haven't done`) +// doc.match(`have done`).debug() + +// let doc = nlp(`i haven't done it`) +// let m = doc.match(`not done`) +// console.log(`|${m.text()}|`) +// m.debug() + +let doc = nlp(`is not foobar isn't`) +doc = doc.terms().unique().debug() +// console.log('|' + nlp(`isn't`).text('implicit') + '|') + +// console.log(`|${doc.text()}|`) + // doc.termList().forEach(t => { // console.log(t.text, t.isImplicit()) // }) diff --git a/src/Doc/methods/transform/01-sort.js b/src/Doc/methods/transform/01-sort.js index 75a7f48c9..161f3e75e 100644 --- a/src/Doc/methods/transform/01-sort.js +++ b/src/Doc/methods/transform/01-sort.js @@ -131,7 +131,7 @@ exports.unique = function () { let list = [].concat(this.list) let obj = {} list = list.filter(p => { - let str = p.text('reduced').trim() + let str = p.text('reduced').trim() || p.text('implicit').trim() if (obj.hasOwnProperty(str) === true) { return false } diff --git a/src/Phrase/methods/02-text.js b/src/Phrase/methods/02-text.js index 27f72adcc..52fce6af3 100644 --- a/src/Phrase/methods/02-text.js +++ b/src/Phrase/methods/02-text.js @@ -36,6 +36,13 @@ exports.text = function (options = {}, isFirst, isLast) { implicit: true, reduced: true, } + } else if (options === 'implicit') { + options = { + punctuation: true, + implicit: true, + whitespace: true, + trim: true, + } } else if (options === 'root') { options = { titlecase: false, @@ -57,6 +64,10 @@ exports.text = function (options = {}, isFirst, isLast) { isFull = true } let text = terms.reduce((str, t, i) => { + // don't output intro space for a contraction-match i'm good => "[am] good" + if (i === 0 && t.text === '' && t.implicit !== null && !options.implicit) { + return str + } options.last = isLast && i === terms.length - 1 let showPre = true let showPost = true @@ -71,6 +82,7 @@ exports.text = function (options = {}, isFirst, isLast) { } } let txt = t.textOut(options, showPre, showPost) + // console.log(terms) // if (options.titlecase && i === 0) { // txt = titleCase(txt) // } diff --git a/src/Phrase/methods/05-json.js b/src/Phrase/methods/05-json.js index afad27b9e..53496b6ef 100644 --- a/src/Phrase/methods/05-json.js +++ b/src/Phrase/methods/05-json.js @@ -14,6 +14,9 @@ exports.json = function (options = {}, world) { if (options.reduced) { res.reduced = this.text('reduced') } + if (options.implicit) { + res.implicit = this.text('implicit') + } if (options.root) { res.root = this.text('root') } diff --git a/tests/match/match-contraction.test.js b/tests/match/match-contraction.test.js index 01f87460f..f5395f589 100644 --- a/tests/match/match-contraction.test.js +++ b/tests/match/match-contraction.test.js @@ -10,7 +10,7 @@ test('match-contractions', function (t) { t.equal(m.text(), `haven't`, 'first-half-found') m = doc.match(`not done`) - t.equal(m.text(), ` done`, 'second-half-found') + t.equal(m.text(), `done`, 'second-half-found') m = doc.match(`haven't`) t.equal(m.text(), `haven't`, 'match-contraction') @@ -80,7 +80,7 @@ test('contraction-optional', function (t) { t.equal(m.text(), `i'm`, `i am?`) m = doc.match(`am glad?`) - t.equal(m.text(), ` glad`, `am glad?`) + t.equal(m.text(), `glad`, `am glad?`) m = doc.match(`i am? glad`) t.equal(m.text(), `i'm glad`, `i am? glad`) diff --git a/tests/output/text.test.js b/tests/output/text.test.js index e3d51a971..51c68bf39 100644 --- a/tests/output/text.test.js +++ b/tests/output/text.test.js @@ -71,6 +71,14 @@ test('text-reduced', function (t) { t.end() }) +test('text-implicit', function (t) { + let doc = nlp(`My dog isn't good, he's the best!`) + const str = 'My dog is not good, he is the best!' + t.equal(doc.json({ implicit: true })[0].implicit, str, 'json(implicit)') + t.equal(doc.text('implicit'), str, 'text(implicit): ') + t.end() +}) + test('text-root', function (t) { let doc = nlp(`My dog LOVES pizza, and grapes!!`) const str = 'my dog love pizza and grape' diff --git a/tests/unique.test.js b/tests/unique.test.js index 941e60cb7..5a582e796 100644 --- a/tests/unique.test.js +++ b/tests/unique.test.js @@ -26,5 +26,6 @@ test('unique-normalize', function (t) { doc = nlp(`is not isn't`) doc = doc.terms().unique() t.equal(doc.text(), 'is not', 'normalize-contraction') + t.equal(doc.length, 2, 'implicit words are uniqued') t.end() })