Skip to content

Commit

Permalink
Merge pull request #930 from spencermountain/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
spencermountain authored Jun 15, 2022
2 parents f3d66e7 + c7e38a7 commit d123804
Show file tree
Hide file tree
Showing 54 changed files with 469 additions and 333 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -823,7 +823,7 @@ nlp.extend({
| [Tokenization](https://observablehq.com/@spencermountain/compromise-tokenization) | [Text](https://observablehq.com/@spencermountain/compromise-text) | [Strict](https://observablehq.com/@spencermountain/compromise-strict) |
| [Named-Entities](https://observablehq.com/@spencermountain/topics-named-entity-recognition) | [Utils](https://observablehq.com/@spencermountain/compromise-utils) | [Penn-tags](https://observablehq.com/@spencermountain/compromise-penn-tags) |
| [Whitespace](https://observablehq.com/@spencermountain/compromise-whitespace) | [Verbs](https://observablehq.com/@spencermountain/verbs) | [Typeahead](https://observablehq.com/@spencermountain/compromise/compromise-typeahead) |
| [World data](https://observablehq.com/@spencermountain/compromise-world) | [Normalization](https://observablehq.com/@spencermountain/compromise-normalization) | |
| [World data](https://observablehq.com/@spencermountain/compromise-world) | [Normalization](https://observablehq.com/@spencermountain/compromise-normalization) | [Sweep](https://observablehq.com/@spencermountain/compromise-sweep) |
| [Fuzzy-matching](https://observablehq.com/@spencermountain/compromise-fuzzy-matching) | [Typescript](https://observablehq.com/@spencermountain/compromise-typescript) | [Mutation](https://observablehq.com/@spencermountain/compromise-mutation) |


Expand Down
4 changes: 2 additions & 2 deletions builds/compromise.js

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions builds/one/compromise-one.cjs

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions builds/one/compromise-one.mjs

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions builds/three/compromise-three.cjs

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions builds/three/compromise-three.mjs

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions builds/two/compromise-two.cjs

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions builds/two/compromise-two.mjs

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ While all _Major_ releases should be reviewed, our only _large_ releases are **v
<!-- #### [Unreleased]
-->

#### 14.3.1 [June 2021]
- **[fix]** - missed caches in .sweep()
- **[new]** - .out('hash') and `.json({hash:true})`

#### 14.3.0 [June 2021]
- **[fix]** - unwanted logging in compromise/one
- **[fix]** - dependency export path for react-native builds #928
Expand Down
1 change: 0 additions & 1 deletion data/lexicon/adjectives/comparables.js
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ export default [
'icy',
'ill',
'insecure',
'instant',
'intellegent',
'intermediate',
'intimate',
Expand Down
3 changes: 3 additions & 0 deletions data/lexicon/nouns/singulars.js
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,9 @@ export default [
'incentive',
'parish',
'tv',

'stone',
'tributary',
]


2 changes: 2 additions & 0 deletions data/lexicon/switches/adj-noun.js
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ export default [
'incumbent',
'individual',
'innocent',
'instant',
'juvenile',
'justice',
'latter',
Expand Down Expand Up @@ -99,6 +100,7 @@ export default [
'token',
'top',
'total',
'trial',
'undergraduate',
'underground',
'upstairs',
Expand Down
1 change: 1 addition & 0 deletions data/lexicon/switches/noun-verb.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ export default [
'breach',
'break',
'bridge',
'bribe',
'broadcast',
'bubble',
'buckle',
Expand Down
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"author": "Spencer Kelly <[email protected]> (http://spencermounta.in)",
"name": "compromise",
"description": "modest natural language processing",
"version": "14.3.0",
"version": "14.3.1",
"main": "./src/three.js",
"unpkg": "./builds/compromise.js",
"type": "module",
Expand Down Expand Up @@ -114,4 +114,4 @@
"_tests/**"
],
"license": "MIT"
}
}
92 changes: 77 additions & 15 deletions scratch.js
Original file line number Diff line number Diff line change
@@ -1,24 +1,86 @@
/* eslint-disable no-console, no-unused-vars */
import nlp from './src/one.js'
import plg from './plugins/dates/src/plugin.js'
nlp.plugin(plg)
import nlp from './src/two.js'
// import plg from './plugins/dates/src/plugin.js'
// nlp.plugin(plg)

// nlp.verbose('tagger')

// let doc = nlp('one match two three')
// let a = doc.match('match two')
// let b = a.remove('two')
// console.log(a)
// // a.debug()
// // b.debug()
// console.log(b)
let txt = ''
// tagging/root issues June 10
// "Okay, okay, okay should I be scared?"
// "This is when I started to get scared."

// "A 40-year-old man called me swell."
// "that's just not swell"

txt = "Let’s get you into wardrobe for a fitting."
// txt = "I was an expert"
// txt = "definitely worth a rental."
// txt = "keeping the matter a secret"

let doc = nlp(`let's not`)
// txt = "My pants don't even fit right"
// txt = "In a baseball hat fit for a queen"
// txt = "Srinath will be fit in three weeks"
// txt = "does the different part fit together"

// txt = "License fee for beach vendors hiked"
// txt = "TTC to hike fares by 10 cents in March"

// txt = "He deserted from the Dragoons at"
// txt = "banks wear deserted look"

// txt = "CBI catches DD acting director taking bribe"
// txt = "How do I keep kissing you, and catch my breath?"

// txt = " throw stones, Dick, said Jaqueline."
// txt = "Loblaws reducing food prices at Toronto stores"
// txt = "Stock prices closed higher in Stockholm"

// txt = `Upload documents required to verify your eligibility`

txt = `It sure seemed that way.`
txt = `I am not sure when to take.`
txt = `Pretty sure my arm is broke`
txt = `Not sure about the details.`
txt = `Sure you don't wanna pretzel?`
txt = `You, you sure you need shoes?`
txt = `Sure enough, no one was there.`
txt = `Are you sure you wanna do this?`
txt = `make sure that it's truly lost.`
txt = `You sure this is what you want?`

// let txt = `There are no open wounds `

// let matches = [
// { match: m }
// ]
// let net = nlp.buildNet(matches)





// banks wear
// %Plural|Verb% %Noun|Verb%

// let doc = nlp(`he will have been walking`).debug()
// let doc = nlp(`Bob has handled`).debug()
// doc.match('have').tag('Auxiliary')
// doc.verbs().toPresent()
// console.log(doc.has('he has really walked'))
// let net = nlp.buildNet(matches)
// console.log(net)
// console.log(net.hooks)
// let doc = nlp(txt)
// let doc = nlp(`To sit on my throne as the Prince of Bel Air`)
// let m = doc.match(net).debug()
// doc.compute('root')
// console.log(doc.text('root'))
// doc.debug()
// doc.verbs().toFutureTense()
doc.debug()
// doc.match(net).debug()
// console.log(doc.has(net))
// doc.debug()
// console.log(doc.has('re-purpose'))

// let txt = `follow-up`
Expand Down Expand Up @@ -78,14 +140,14 @@ doc.debug()
// doc.match('#Person').debug()

// let net = nlp.buildNet([
// { match: 'every single #Noun' },
// { match: 'not (a|one) #Singular' },
// { match: 'every single #Noun' },
// { match: 'not (a|one) #Singular' },
// ])
// let doc = nlp('i saw every single house. i met none. ')
// doc.match(net).debug()
// let m = nlp([['first.', 'foo bar']]).debug()
// let matches = [
// { match: 'third' },
// { match: 'third' },
// ]
// let net = nlp.buildNet(matches)
// let doc = nlp(`first. second. third`)
Expand Down
3 changes: 3 additions & 0 deletions src/1-one/cache/methods/cacheDoc.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ const createCache = function (document) {
if (term.machine) {
stuff.add(term.machine)
}
if (term.root) {
stuff.add(term.root)
}
// cache slashes words, etc
if (term.alias) {
term.alias.forEach(str => stuff.add(str))
Expand Down
11 changes: 7 additions & 4 deletions src/1-one/output/api/json.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import { textFromTerms } from './_text.js'
import { textFromTerms } from './lib/_text.js'
import fmts from './_fmts.js'
import hash from './lib/hash.js'


const defaults = {
text: true,
terms: true,
Expand All @@ -12,15 +15,15 @@ const merge = function (a, b) {
}

const fns = {
text: (terms) => {
return textFromTerms(terms, { keepPunct: true }, false)
},
text: (terms) => textFromTerms(terms, { keepPunct: true }, false),
normal: (terms) => textFromTerms(terms, merge(fmts.normal, { keepPunct: true }), false),
implicit: (terms) => textFromTerms(terms, merge(fmts.implicit, { keepPunct: true }), false),

machine: (terms) => textFromTerms(terms, opts, false),
root: (terms) => textFromTerms(terms, merge(opts, { form: 'root' }), false),

hash: (terms) => hash(textFromTerms(terms, { keepPunct: true }, false)),

offset: (terms) => {
let len = fns.text(terms).length
return {
Expand Down
File renamed without changes.
73 changes: 73 additions & 0 deletions src/1-one/output/api/lib/hash.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/* eslint-disable no-bitwise */
/* eslint-disable no-mixed-operators */
/* eslint-disable no-multi-assign */

// https://github.com/jbt/tiny-hashes/
let k = [], i = 0;
for (; i < 64;) {
k[i] = 0 | Math.sin(++i % Math.PI) * 4294967296;
}

export default function md5(s) {
let b, c, d,
h = [b = 0x67452301, c = 0xEFCDAB89, ~b, ~c],
words = [],
j = decodeURI(encodeURI(s)) + '\x80',
a = j.length;

s = (--a / 4 + 2) | 15;

words[--s] = a * 8;

for (; ~a;) {
words[a >> 2] |= j.charCodeAt(a) << 8 * a--;
}

for (i = j = 0; i < s; i += 16) {
a = h;

for (; j < 64;
a = [
d = a[3],
(
b +
((d =
a[0] +
[
b & c | ~b & d,
d & b | ~d & c,
b ^ c ^ d,
c ^ (b | ~d)
][a = j >> 4] +
k[j] +
~~words[i | [
j,
5 * j + 1,
3 * j + 5,
7 * j
][a] & 15]
) << (a = [
7, 12, 17, 22,
5, 9, 14, 20,
4, 11, 16, 23,
6, 10, 15, 21
][4 * a + j++ % 4]) | d >>> -a)
),
b,
c
]
) {
b = a[1] | 0;
c = a[2];
}
for (j = 4; j;) h[--j] += a[j];
}

for (s = ''; j < 32;) {
s += ((h[j >> 3] >> ((1 ^ j++) * 4)) & 15).toString(16);
}

return s;
}

// console.log(md5('food-safety'))
5 changes: 4 additions & 1 deletion src/1-one/output/api/out.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import debug from './debug/index.js'
import wrap from './wrap.js'

import hash from './lib/hash.js'

const isObject = val => {
return Object.prototype.toString.call(val) === '[object Object]'
Expand Down Expand Up @@ -35,6 +35,9 @@ const out = function (method) {
if (method === 'machine' || method === 'reduced') {
return this.text('machine')
}
if (method === 'hash' || method === 'md5') {
return hash(this.text())
}

// json data formats
if (method === 'json') {
Expand Down
2 changes: 1 addition & 1 deletion src/1-one/output/api/text.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { textFromDoc } from './_text.js'
import { textFromDoc } from './lib/_text.js'
import fmts from './_fmts.js'

const isObject = val => {
Expand Down
6 changes: 6 additions & 0 deletions src/1-one/output/plugin.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
import api from './api/index.js'
import hash from './api/lib/hash.js'

export default {
api,
methods: {
one: {
hash
}
}
}
9 changes: 3 additions & 6 deletions src/1-one/sweep/lib.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,8 @@ export default {
// compile a list of matches into a match-net
buildNet: function (matches) {
const methods = this.methods()
let { index, always } = methods.one.buildNet(matches, this.world())
return {
isNet: true,
index,
always
}
let net = methods.one.buildNet(matches, this.world())
net.isNet = true
return net
}
}
Loading

0 comments on commit d123804

Please sign in to comment.