From c52c1c3106b0447915b66bdb5b8c712f68f6f8eb Mon Sep 17 00:00:00 2001 From: spencer kelly Date: Fri, 7 Aug 2020 11:51:53 -0400 Subject: [PATCH] fix for #767 --- changelog.md | 1 + scratch.js | 4 ++-- src/Phrase/match/03-tryMatch.js | 12 ++++++----- tests/match/greedy-capture.test.js | 33 +++++++++++++++++------------- 4 files changed, 29 insertions(+), 21 deletions(-) diff --git a/changelog.md b/changelog.md index a16bd49d6..5bf13f39e 100644 --- a/changelog.md +++ b/changelog.md @@ -15,6 +15,7 @@ While all _Major_ releases should be reviewed, our only two _large_ releases are #### 13.3.2 - fix for offset issue #771 +- fix for `{min,max}` syntax #767 - typescript fixes - update deps diff --git a/scratch.js b/scratch.js index 01aa857b2..7a18bb879 100644 --- a/scratch.js +++ b/scratch.js @@ -1,5 +1,5 @@ const nlp = require('./src/index') -nlp.verbose(true) +// nlp.verbose(true) // let txt = require('./scripts/test/speed/_sotu-text.js') nlp.extend(require('./plugins/numbers/src')) nlp.extend(require('./plugins/dates/src')) @@ -13,4 +13,4 @@ nlp.extend(require('./plugins/dates/src')) // doc.sentences().toPastTense().debug() // console.log(nlp('next week').dates().json()) -nlp('you are John, Lisa, and Fred').debug() +nlp('you are John, Lisa, Fred').match('#FirstName{1,2}').debug() diff --git a/src/Phrase/match/03-tryMatch.js b/src/Phrase/match/03-tryMatch.js index 43dfa057f..0db947ace 100644 --- a/src/Phrase/match/03-tryMatch.js +++ b/src/Phrase/match/03-tryMatch.js @@ -2,7 +2,7 @@ const makeId = require('../../Term/_id') // i formally apologize for how complicated this is. //found a match? it's greedy? keep going! -const getGreedy = function(terms, t, reg, until, index, length) { +const getGreedy = function (terms, t, reg, until, index, length) { let start = t for (; t < terms.length; t += 1) { //stop for next-reg match @@ -27,7 +27,7 @@ const getGreedy = function(terms, t, reg, until, index, length) { } //'unspecific greedy' is a weird situation. -const greedyTo = function(terms, t, nextReg, index, length) { +const greedyTo = function (terms, t, nextReg, index, length) { //if there's no next one, just go off the end! if (!nextReg) { return terms.length @@ -43,7 +43,7 @@ const greedyTo = function(terms, t, nextReg, index, length) { } // get or create named group -const getOrCreateGroup = function(namedGroups, namedGroupId, terms, startIndex, group) { +const getOrCreateGroup = function (namedGroups, namedGroupId, terms, startIndex, group) { const g = namedGroups[namedGroupId] if (g) { @@ -62,7 +62,7 @@ const getOrCreateGroup = function(namedGroups, namedGroupId, terms, startIndex, } /** tries to match a sequence of terms, starting from here */ -const tryHere = function(terms, regs, index, length) { +const tryHere = function (terms, regs, index, length) { const namedGroups = {} let previousGroupId = null let t = 0 @@ -125,7 +125,6 @@ const tryHere = function(terms, regs, index, length) { continue } - //if it looks like a match, continue //we have a special case where an end-anchored greedy match may need to //start matching before the actual end; we do this by (temporarily!) @@ -172,6 +171,9 @@ const tryHere = function(terms, regs, index, length) { if (t === null) { return [false, null] //greedy was too short } + if (reg.min && reg.min > t) { + return [false, null] //greedy was too short + } // if this was also an end-anchor match, check to see we really // reached the end if (reg.end === true && index + t !== length) { diff --git a/tests/match/greedy-capture.test.js b/tests/match/greedy-capture.test.js index 92eb68c34..91aece5de 100644 --- a/tests/match/greedy-capture.test.js +++ b/tests/match/greedy-capture.test.js @@ -6,33 +6,25 @@ const nlp = require('../_lib') * https://github.com/spencermountain/compromise/issues/654 */ -test('issue-654: named greedy capture', function(t) { +test('issue-654: named greedy capture', function (t) { let m - m = nlp('ralf eats the glue') - .match('ralf eats [*]') - .groups('target') + m = nlp('ralf eats the glue').match('ralf eats [*]').groups('target') t.equal(m.out('normal'), 'the glue', 'wildcard capture at the end') - m = nlp('ralf eats the glue') - .match('ralf eats [*] glue') - .groups('target') + m = nlp('ralf eats the glue').match('ralf eats [*] glue').groups('target') t.equal(m.out('normal'), 'the', 'wildcard capture in the middle') - m = nlp('ralf eats the glue') - .match('ralf eats [.+]') - .groups('target') + m = nlp('ralf eats the glue').match('ralf eats [.+]').groups('target') t.equal(m.out('normal'), 'the glue', 'wildcard capture at the end') - m = nlp('ralf eats the glue') - .match('ralf eats [.+] glue') - .groups('target') + m = nlp('ralf eats the glue').match('ralf eats [.+] glue').groups('target') t.equal(m.out('normal'), 'the', 'wildcard capture in the middle') t.end() }) -test('issue-654: greedy capture', function(t) { +test('issue-654: greedy capture', function (t) { let m m = nlp('ralf eats the glue').match('ralf eats [*]', 0) @@ -49,3 +41,16 @@ test('issue-654: greedy capture', function(t) { t.end() }) + +test('test greedy min/max', function (t) { + let doc = nlp('hello John, Lisa, Fred').match('#FirstName{3,6}') + t.equal(doc.text(), 'John, Lisa, Fred', 'min met') + + doc = nlp('hello John, Lisa, Fred').match('#FirstName{4,6}') + t.equal(doc.found, false, 'min not met') + + doc = nlp('hello John, Lisa, Fred').match('#FirstName{1,2}') + t.equal(doc.eq(0).text(), 'John, Lisa', 'max-match') + t.equal(doc.eq(1).text(), 'Fred', 'max-over-run') + t.end() +})