Skip to content

Commit

Permalink
all tests passing
Browse files Browse the repository at this point in the history
  • Loading branch information
spencermountain committed Apr 4, 2017
1 parent 8a2dd23 commit afc2ff3
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 47 deletions.
7 changes: 4 additions & 3 deletions scratch.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ var nlp = require('./src/index');
// nlp.verbose('tagger');
// const corpus = require('nlp-corpus');
// let sotu = corpus.sotu.parsed()[23];
const fresh = require('./test/unit/lib/freshPrince.js');
// const fresh = require('./test/unit/lib/freshPrince.js');

// bug.1
// .? vs *
Expand All @@ -31,5 +31,6 @@ const fresh = require('./test/unit/lib/freshPrince.js');
// r.tag('#Person');
// console.timeEnd('tag');

let r = nlp('work with F.B.I.').nouns();
r.debug();
let r = nlp('the F.B.I.');
// console.log(r.list[0].terms[1].normal);
console.log(r.out('normal'));
16 changes: 13 additions & 3 deletions src/term/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
const fns = require('./paths').fns;
const build_whitespace = require('./whitespace');
const makeUID = require('./makeUID');
//normalization
const addNormal = require('./methods/normalize/normalize').addNormal;
const addRoot = require('./methods/normalize/root');

const Term = function(str) {
this._text = fns.ensureString(str);
Expand All @@ -10,12 +13,13 @@ const Term = function(str) {
let parsed = build_whitespace(this._text);
this.whitespace = parsed.whitespace;
this._text = parsed.text;
// console.log(this.whitespace, this._text);
this.parent = null;
this.silent_term = '';
//normalize the _text
addNormal(this);
addRoot(this);
//has this term been modified
this.dirty = false;
this.normalize();
//make a unique id for this term
this.uid = makeUID(this.normal);

Expand All @@ -42,6 +46,12 @@ const Term = function(str) {
});
};

//run each time a new text is set
Term.prototype.normalize = function() {
addNormal(this);
addRoot(this);
return this;
};

/** where in the sentence is it? zero-based. */
Term.prototype.index = function() {
Expand All @@ -60,7 +70,7 @@ Term.prototype.clone = function() {
return term;
};

require('./methods/normalize')(Term);
// require('./methods/normalize')(Term);
require('./methods/misc')(Term);
require('./methods/out')(Term);
require('./methods/tag')(Term);
Expand Down
23 changes: 4 additions & 19 deletions src/term/methods/misc.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
'use strict';
const bestTag = require('./bestTag');
const isAcronym = require('./normalize/isAcronym');


//regs-
const periodAcronym = /([A-Z]\.)+[A-Z]?$/;
const oneLetterAcronym = /^[A-Z]\.$/;
const noPeriodAcronym = /[A-Z]{3}$/;
const hasVowel = /[aeiouy]/i;
const hasLetter = /[a-z]/;
const hasNumber = /[0-9]/;
Expand All @@ -17,24 +16,10 @@ const addMethods = (Term) => {
bestTag: function () {
return bestTag(this);
},

/** does it appear to be an acronym, like FBI or M.L.B. */
/** is this term like F.B.I. or NBA */
isAcronym: function () {
//like N.D.A
if (periodAcronym.test(this.text) === true) {
return true;
}
//like 'F.'
if (oneLetterAcronym.test(this.text) === true) {
return true;
}
//like NDA
if (noPeriodAcronym.test(this.text) === true) {
return true;
}
return false;
return isAcronym(this._text);
},

/** check if it is word-like in english */
isWord: function () {
let t = this;
Expand Down
21 changes: 0 additions & 21 deletions src/term/methods/normalize/index.js

This file was deleted.

23 changes: 23 additions & 0 deletions src/term/methods/normalize/isAcronym.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
'use strict';
//regs -
const periodAcronym = /([A-Z]\.)+[A-Z]?$/;
const oneLetterAcronym = /^[A-Z]\.$/;
const noPeriodAcronym = /[A-Z]{3}$/;

/** does it appear to be an acronym, like FBI or M.L.B. */
const isAcronym = function (str) {
//like N.D.A
if (periodAcronym.test(str) === true) {
return true;
}
//like 'F.'
if (oneLetterAcronym.test(str) === true) {
return true;
}
//like NDA
if (noPeriodAcronym.test(str) === true) {
return true;
}
return false;
};
module.exports = isAcronym;
4 changes: 3 additions & 1 deletion src/term/methods/normalize/normalize.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
'use strict';
const killUnicode = require('./unicode');
const isAcronym = require('./isAcronym');


//some basic operations on a string to reduce noise
exports.normalize = function(str) {
Expand Down Expand Up @@ -32,7 +34,7 @@ exports.addNormal = function (term) {
let str = term._text || '';
str = exports.normalize(str);
//compact acronyms
if (term.isAcronym()) {
if (isAcronym(term._text)) {
str = str.replace(/\./g, '');
}
//nice-numbers
Expand Down

0 comments on commit afc2ff3

Please sign in to comment.