Skip to content

Commit

Permalink
Merge pull request #927 from spencermountain/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
spencermountain authored Jun 3, 2022
2 parents 536c4e8 + d9abd3c commit f78f38f
Show file tree
Hide file tree
Showing 23 changed files with 151 additions and 44 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:

strategy:
matrix:
node-version: [12.x, 14.x, 15.x, 16.x]
node-version: [12.x, 14.x, 18.x]
os: [macos-latest, ubuntu-latest, windows-latest]

steps:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ jobs:
${{ runner.os }}-node-
- run: npm ci
- run: npm i c8 codecov
- run: npm i -g c8 codecov
- run: c8 -r lcov -n 'src/**/*' -n 'plugins/**/*' npm run test && codecov -t 15039ad1-b495-48cd-b4a0-bcf124c9b318
# - run: npm run codecov
4 changes: 2 additions & 2 deletions builds/compromise.js

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions builds/one/compromise-one.cjs

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions builds/one/compromise-one.mjs

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions builds/three/compromise-three.cjs

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions builds/three/compromise-three.mjs

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions builds/two/compromise-two.cjs

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions builds/two/compromise-two.mjs

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ While all _Major_ releases should be reviewed, our only two _large_ releases are
-->

#### 14.2.1 [June 2021]
- **[fix]** - double-contraction issue #935
- **[fix]** - .not() memleak #926

#### 14.2.0 [June 2021]
- **[fix]** - speed improvements
- **[fix]** - bug with fast-or possessive matches
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"author": "Spencer Kelly <[email protected]> (http://spencermounta.in)",
"name": "compromise",
"description": "modest natural language processing",
"version": "14.2.0",
"version": "14.2.1",
"main": "./src/three.js",
"unpkg": "./builds/compromise.js",
"type": "module",
Expand Down
26 changes: 20 additions & 6 deletions scratch.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,28 @@ nlp.plugin(plg)
// // b.debug()
// console.log(b)

// nlp(` from malnutrition, chest diseases, cardiovascular disorders, skin problems, infectious diseases and the aftereffects of assaults and rape.`).debug()
//

let txt = ''

let doc = nlp(`i'm good`)
doc.verbs().toPresent()
doc.debug()
// let txt = `
// Test
// ***
// book's plane's farm's field's`
// let txt = `book's plane's`
let txt = `he's foo she's`
let doc = nlp(txt)
console.log(doc.docs[0].map(t => t.index))
doc.terms().debug()

// console.log(usedWords)
// let doc = nlp('Maris Piper potatoes')
// doc.nouns().toSingular()
// console.log(doc.text())

// nlp("Anna's eating lunch.").debug()

// let doc = nlp(`i'm good`)
// doc.verbs().toPresent()
// doc.debug()



Expand Down
7 changes: 4 additions & 3 deletions src/1-one/change/compute/uuid.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Base 36 (numbers+ascii)
TTT|NNN|II|R
TTT -> 46 seconds since load
TTT -> 46 terms since load
NNN -> 46 thousand sentences (>1 inf-jest)
II -> 1,200 words in a sentence (nuts)
R -> 1-36 random number
Expand All @@ -28,7 +28,7 @@ collisions are more-likely after
after 46-thousand sentences
*/
const start = new Date().getTime()
let start = 0

const pad3 = (str) => {
str = str.length < 3 ? '0' + str : str
Expand All @@ -37,7 +37,8 @@ const pad3 = (str) => {

const toId = function (term) {
let [n, i] = term.index || [0, 0]
var now = new Date().getTime() - start;
start += 1
var now = start;
now = parseInt(now, 10)

//don't overflow time
Expand Down
3 changes: 2 additions & 1 deletion src/1-one/contraction-one/compute/contractions/_splice.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@ const insertContraction = function (document, point, words) {
if (!words || words.length === 0) {
return
}
words = words.map((word) => {
words = words.map((word, i) => {
word.implicit = word.text
word.machine = word.text
word.pre = ''
word.post = ''
word.text = ''
word.normal = ''
word.index = [n, w + i]
return word
})
if (words[0]) {
Expand Down
6 changes: 4 additions & 2 deletions src/1-one/contraction-one/compute/contractions/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ const reTag = function (terms, view, start, len) {
end += 1
}
tmp.ptrs = [[0, start, end]]
tmp.compute(['lexicon', 'preTagger', 'index'])
tmp.compute(['lexicon', 'preTagger'])
}

const byEnd = {
Expand Down Expand Up @@ -61,7 +61,9 @@ const knownOnes = function (list, term, before, after) {
}

const toDocs = function (words, view) {
return view.fromText(words.join(' ')).docs[0]
let doc = view.fromText(words.join(' '))
doc.compute('id')
return doc.docs[0]
}

//really easy ones
Expand Down
3 changes: 2 additions & 1 deletion src/2-two/contraction-two/compute/_splice.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@ const insertContraction = function (document, point, words) {
if (!words || words.length === 0) {
return
}
words = words.map((word) => {
words = words.map((word, i) => {
word.implicit = word.text
word.machine = word.text
word.pre = ''
word.post = ''
word.text = ''
word.normal = ''
word.index = [n, w + i]
return word
})
if (words[0]) {
Expand Down
20 changes: 17 additions & 3 deletions src/2-two/contraction-two/compute/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@ import isPossessive from './isPossessive.js'

const byApostrophe = /'/

// poor-mans reindexing of this sentence only
const reIndex = function (terms) {
terms.forEach((t, i) => {
if (t.index) {
t.index[1] = i
}
})
}

// run tagger on our new implicit terms
const reTag = function (terms, view, start, len) {
let tmp = view.update()
Expand All @@ -19,7 +28,9 @@ const reTag = function (terms, view, start, len) {
end += 1
}
tmp.ptrs = [[0, start, end]]
tmp.compute(['lexicon', 'preTagger', 'index'])
tmp.compute(['lexicon', 'preTagger'])
// don't for a reindex of the whole document
reIndex(terms)
}

const byEnd = {
Expand All @@ -38,9 +49,12 @@ const byEnd = {
}

const toDocs = function (words, view) {
return view.fromText(words.join(' ')).docs[0]
let doc = view.fromText(words.join(' '))
doc.compute('id')
return doc.docs[0]
}


//really easy ones
const contractionTwo = (view) => {
let { world, document } = view
Expand All @@ -66,7 +80,7 @@ const contractionTwo = (view) => {
if (words) {
words = toDocs(words, view)
splice(document, [n, i], words)
reTag(document[n], view)
reTag(document[n], view, i, words.length)
continue
}
}
Expand Down
8 changes: 7 additions & 1 deletion src/API/methods/compute.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,13 @@ const fns = {
}
// allow a list of methods
else if (isArray(input)) {
input.forEach(name => world.compute.hasOwnProperty(name) && compute[name](this))
input.forEach(name => {
if (world.compute.hasOwnProperty(name)) {
compute[name](this)
} else {
console.warn('no compute:', input) // eslint-disable-line
}
})
}
// allow a custom compute function
else if (typeof input === 'function') {
Expand Down
17 changes: 9 additions & 8 deletions src/API/methods/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,16 @@ const utils = {
},
/** return individual terms*/
terms: function (n) {
let m = this.match('.')
// this is a bit faster than .match('.')
let ptrs = []
this.docs.forEach((terms) => {
terms.forEach((term) => {
let [y, x] = term.index || []
ptrs.push([y, x, x + 1])
})
})
let m = this.update(ptrs)
// let ptrs = []
// this.docs.forEach((terms) => {
// terms.forEach((term) => {
// let [y, x] = term.index || []
// ptrs.push([y, x, x + 1])
// })
// })
// let m = this.update(ptrs)
return typeof n === 'number' ? m.eq(n) : m
},

Expand Down
2 changes: 1 addition & 1 deletion src/_version.js
Original file line number Diff line number Diff line change
@@ -1 +1 @@
export default '14.2.0'
export default '14.2.1'
14 changes: 14 additions & 0 deletions tests/two/contractions/contract.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,17 @@ is`

t.end()
})

test('multiple contraction in sentence', function (t) {
let doc = nlp(`he's foo she's`)
t.equal(doc.terms().length, 5, here + 'multi-contraction-count')
t.equal(doc.has('he is foo she is'), true, here + 'multi-contraction-order')

doc = nlp(`he's she's`)
t.equal(doc.terms().length, 4, here + 'multi-contraction-count-2')
t.equal(doc.has('he is she is'), true, here + 'multi-contraction-order-2')

doc = nlp(`he's dead, he's dead`)
t.equal(doc.match('he is dead').length, 2, here + 'multi-contraction-count')
t.end()
})
49 changes: 49 additions & 0 deletions tests/two/misc/term-ids.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import test from 'tape'
import nlp from '../_lib.js'
const here = '[two/term-ids] '

let txt = `The hours have passed like stones being pushed up a mountain. For all of the luxury that surrounds us, I can't shake this feeling of unease that's slowly creeping in through the back of my mind. I can tell that Johna and Temmy have noticed it as well—it's just something about the air here that makes me uneasy. Joanna feigns disinterest but behind her shades she's studying the surroundings like the seasoned detective she is.`

test('term-id validation', function (t) {
txt = txt.repeat(4)
let doc = nlp(txt)
let badTerm = []
let already = {}
let words = 0
// ensure they all have ids
doc.docs.forEach(terms => {
terms.forEach(term => {
words += 1
if (!term.id) {
badTerm.push(term)
}
// collisions should be very unlikely
if (already[term.id]) {
badTerm.push(term)
}
already[term.id] = true
})
})
// if (badTerm.length) {
// console.log('dupe terms:', badTerm)
// }
t.equal(badTerm.length, 0, here + 'terms have unique-ids')
let terms = doc.terms()
t.equal(terms.length, words, here + 'right term count')

t.equal(terms.ptrs.length, words, here + 'right pointer count')
t.end()
})

test('term-id validation', function (t) {
const text = (txt + '\n').repeat(50)
const doc = nlp(text)
let m = doc.terms()
let max = m.length
m = m.not('#Pronoun')
m = m.not('#Preposition')
m = m.not('#Conjunction')
m = m.not('#Determiner')
t.equal(m.length < max, true, here + ' no .not() memleak')
t.end()
})
2 changes: 1 addition & 1 deletion tests/two/transform/clone.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ test('clone does not leak', function (t) {
m = m.not('foo')
m = m.if('.')
m = m.eq(0).tag('Yeah')
m.compute(['normal', 'tagger', 'foo'])
m.compute(['normal', 'preTagger', 'postTagger', 'contractions'])
m = m.all()
}
// is it still unchanged?
Expand Down

0 comments on commit f78f38f

Please sign in to comment.