diff --git a/client_side/nlp.js b/client_side/nlp.js index 367002bd1..fbd1443b0 100644 --- a/client_side/nlp.js +++ b/client_side/nlp.js @@ -5427,7 +5427,7 @@ var date_extractor = (function() { // console.log(date_extractor("1999")) //wrapper for value's methods -var Value = function(str, next, last, token) { +var Value = function(str, sentence, word_i) { var the = this the.word = str || ''; @@ -5932,11 +5932,14 @@ var inflect = (function() { // console.log(inflect.singularize('mayors of chicago')=="mayor of chicago") //wrapper for noun's methods -var Noun = function(str, next, last, token) { +var Noun = function(str, sentence, word_i) { var the = this + var token, next; + if(sentence!==undefined && word_i!==undefined){ + token=sentence.tokens[word_i] + next=sentence.tokens[word_i+i] + } the.word = str || ''; - the.next = next - the.last = last if (typeof module !== "undefined" && module.exports) { parts_of_speech = require("../../data/parts_of_speech") @@ -6089,7 +6092,11 @@ var Noun = function(str, next, last, token) { "ss", "of", "the", - "for" + "for", + "and", + "&", + "co", + "sons" ] l= blacklist.length for (i = 0; i < l; i++) { @@ -6104,15 +6111,18 @@ var Noun = function(str, next, last, token) { return true } } - //see if noun has a first-name - var names = Object.keys(firstnames) - l = names.length - var firstname=the.word.split(' ')[0].toLowerCase() - for (i = 0; i < l; i++) { - if (names[i]===firstname) { - return true - } + //see if noun has a known first-name + var names=the.word.split(' ').map(function(a){ + return a.toLowerCase() + }) + if(firstnames[names[0]]){ + return true + } + //(test middle name too, if there's one) + if(names.length> 2 && firstnames[names[1]]){ + return true } + //if it has an initial between two words if(the.word.match(/[a-z]{3,20} [a-z]\.? [a-z]{3,20}/i)){ return true @@ -6120,6 +6130,100 @@ var Noun = function(str, next, last, token) { return false } + //decides if it deserves a he, she, they, or it + the.pronoun=function(){ + + //if it's a person try to classify male/female + if(the.is_person()){ + var names=the.word.split(' ').map(function(a){ + return a.toLowerCase() + }) + if(firstnames[names[0]]==="m" || firstnames[names[1]]=="m"){ + return "he" + } + if(firstnames[names[0]]==="f" || firstnames[names[1]]=="f" ){ + return "she" + } + //test some honourifics + if(the.word.match(/^(mrs|miss|ms|misses|mme|mlle)\.? /,'i')){ + return "she" + } + if(the.word.match(/\b(mr|mister|sr|jr)\b/,'i')){ + return "he" + } + //if it's a known unisex name, don't try guess it. be safe. + if(firstnames[names[0]]==="a" || firstnames[names[1]]=="a" ){ + return "they" + } + //if we think it's a person, but still don't know the gender, do a little guessing + if(names[0].match(/[aeiy]$/)){//if it ends in a 'ee or ah', female + return "she" + } + if(names[0].match(/[ou]$/)){//if it ends in a 'oh or uh', male + return "he" + } + if(names[0].match(/(nn|ll|tt)/)){//if it has double-consonants, female + return "she" + } + //fallback to 'singular-they' + return "they" + } + + //not a person + if(the.is_plural()){ + return "they" + } + + return "it" + } + + //tokens that refer to the same thing. "[obama] is cool, [he] is nice." + the.referenced_by = function() { + //if it's named-noun, look forward for the pronouns pointing to it -> '... he' + if(token && token.pos.tag!=="PRP"){ + var prp=the.pronoun() + //look at rest of sentence + var interested=sentence.tokens.slice(word_i+1, sentence.tokens.length) + //add next sentence too, could go further.. + if(sentence.next){ + interested=interested.concat(sentence.next.tokens) + } + //find the matching pronouns, and break if another noun overwrites it + var matches=[] + for(var i=0; i entities //most of this logic is in ./parents/noun diff --git a/client_side/nlp.min.js b/client_side/nlp.min.js index 330bf2cd6..243b46a52 100644 --- a/client_side/nlp.min.js +++ b/client_side/nlp.min.js @@ -1,5 +1,5 @@ /*nlp_comprimise by @spencermountain in 2015*/ var nlp=function(){var a=function(){var a=["infinitive","gerund","past","present","doer","future"],b=[["arise","arising","arose","_s","_r"],["babysit","_ting","babysat","_s","_ter"],["be","_ing","was","is",""],["beat","_ing","_","_s","_er"],["become","becoming","became","_s","_r"],["bend","_ing","bent","_s","_er"],["begin","_ning","began","_s","_ner"],["bet","_ting","_","_s","_ter"],["bind","_ing","bound","_s","_er"],["bite","biting","bit","_s","_r"],["bleed","_ing","bled","_s","_er"],["blow","_ing","blew","_s","_er"],["break","_ing","broke","_s","_er"],["breed","_ing","bred","_s","_er"],["bring","_ing","brought","_s","_er"],["broadcast","_ing","_","_s","_er"],["build","_ing","built","_s","_er"],["buy","_ing","bought","_s","_er"],["catch","_ing","caught","_es","_er"],["choose","choosing","chose","_s","_r"],["come","coming","came","_s","_r"],["cost","_ing","_","_s","_er"],["cut","_ting","_","_s","_ter"],["deal","_ing","_t","_s","_er"],["dig","_ging","dug","_s","_ger"],["do","_ing","did","_es","_er"],["draw","_ing","drew","_s","_er"],["drink","_ing","drank","_s","_er"],["drive","driving","drove","_s","_r"],["eat","_ing","ate","_s","_er"],["fall","_ing","fell","_s","_er"],["feed","_ing","fed","_s","_er"],["feel","_ing","felt","_s","_er"],["fight","_ing","fought","_s","_er"],["find","_ing","found","_s","_er"],["fly","_ing","flew","_s","flier"],["forbid","_ing","forbade","_s"],["forget","_ing","forgot","_s","_er"],["forgive","forgiving","forgave","_s","_r"],["freeze","freezing","froze","_s","_r"],["get","_ting","got","_s","_ter"],["give","giving","gave","_s","_r"],["go","_ing","went","_es","_er"],["grow","_ing","grew","_s","_er"],["hang","_ing","hung","_s","_er"],["have","having","had","has"],["hear","_ing","_d","_s","_er"],["hide","hiding","hid","_s","_r"],["hit","_ting","_","_s","_ter"],["hold","_ing","held","_s","_er"],["hurt","_ing","_","_s","_er"],["know","_ing","knew","_s","_er"],["relay","_ing","_ed","_s","_er"],["lay","_ing","laid","_s","_er"],["lead","_ing","led","_s","_er"],["leave","leaving","left","_s","_r"],["lend","_ing","lent","_s","_er"],["let","_ting","_","_s","_ter"],["lie","lying","lay","_s","_r"],["light","_ing","lit","_s","_er"],["lose","losing","lost","_s","_r"],["make","making","made","_s","_r"],["mean","_ing","_t","_s","_er"],["meet","_ing","met","_s","_er"],["pay","_ing","paid","_s","_er"],["put","_ting","_","_s","_ter"],["quit","_ting","_","_s","_ter"],["read","_ing","_","_s","_er"],["ride","riding","rode","_s","_r"],["ring","_ing","rang","_s","_er"],["rise","rising","rose","_s","_r"],["run","_ning","ran","_s","_ner"],["say","_ing","said","_s"],["see","_ing","saw","_s","_r"],["sell","_ing","sold","_s","_er"],["send","_ing","sent","_s","_er"],["set","_ting","_","_s","_ter"],["shake","shaking","shook","_s","_r"],["shine","shining","shone","_s","_r"],["shoot","_ing","shot","_s","_er"],["show","_ing","_ed","_s","_er"],["shut","_ting","_","_s","_ter"],["sing","_ing","sang","_s","_er"],["sink","_ing","sank","_s","_er"],["sit","_ting","sat","_s","_ter"],["slide","sliding","slid","_s","_r"],["speak","_ing","spoke","_s","_er"],["spend","_ing","spent","_s","_er"],["spin","_ning","spun","_s","_ner"],["spread","_ing","_","_s","_er"],["stand","_ing","stood","_s","_er"],["steal","_ing","stole","_s","_er"],["stick","_ing","stuck","_s","_er"],["sting","_ing","stung","_s","_er"],["strike","striking","struck","_s","_r"],["swear","_ing","swore","_s","_er"],["swim","_ing","swam","_s","_mer"],["swing","_ing","swung","_s","_er"],["take","taking","took","_s","_r"],["teach","_ing","taught","_s","_er"],["tear","_ing","tore","_s","_er"],["tell","_ing","told","_s","_er"],["think","_ing","thought","_s","_er"],["throw","_ing","threw","_s","_er"],["understand","_ing","understood","_s"],["wake","waking","woke","_s","_r"],["wear","_ing","wore","_s","_er"],["win","_ning","won","_s","_ner"],["withdraw","_ing","withdrew","_s","_er"],["write","writing","wrote","_s","_r"],["tie","tying","_d","_s","_r"],["obey","_ing","_ed","_s","_er"],["ski","_ing","_ied","_s","_er"],["boil","_ing","_ed","_s","_er"],["miss","_ing","_ed","_","_er"],["act","_ing","_ed","_s","_or"],["compete","competing","_d","_s","competitor"],["being","are","were","are"],["imply","_ing","implied","implies","implier"],["ice","icing","_d","_s","_r"],["develop","_ing","_","_s","_er"],["wait","_ing","_ed","_s","_er"],["aim","_ing","_ed","_s","_er"],["spill","_ing","spilt","_s","_er"],["drop","_ping","_ped","_s","_per"],["head","_ing","_ed","_s","_er"],["log","_ging","_ged","_s","_ger"],["rub","_bing","_bed","_s","_ber"],["smash","_ing","_ed","_es","_er"],["add","_ing","_ed","_s","_er"],["word","_ing","_ed","_s","_er"],["suit","_ing","_ed","_s","_er"],["be","am","was","am",""]],c=b.map(function(b){for(var c={},d=0;da;a++)for(b=g[e[a]].split(","),c=0;ca;a++)for(b=h[e[a]].split(","),c=0;ca;a+=1)f[i[a]]="a";return"undefined"!=typeof module&&module.exports&&(module.exports=f),f}(),m=function(a){"undefined"!=typeof module&&module.exports&&(k=require("../../data/lexicon/abbreviations"));var b=[],c=a.split(/(\S.+?[.\?!])(?=\s+|$|")/g);k=k.concat(["jan","feb","mar","apr","jun","jul","aug","sep","oct","nov","dec","sept","sep"]);var d=new RegExp("\\b("+k.join("|")+")[.!?] ?$","i"),e=new RegExp("[ |.][A-Z].?$","i"),f=new RegExp("\\.\\.\\.*$"),g=c.length; -for(i=0;i0&&(b.push(c[i]),c[i]=""));return 0===b.length?[a]:b};"undefined"!=typeof module&&module.exports&&(exports.sentences=m);var n=function(){var a=function(a,b){b=b||{};var c,d,e,f,g,h=b.min_count||1,i=b.max_size||5,j=/[^a-zA-Z'\-]+/g,k=[null],l=[];for(i++,c=1;i>=c;c++)k.push({});for(a=a.replace(j," ").replace(/^\s+/,"").replace(/\s+$/,""),a=a.toLowerCase(),a=a.split(/\s+/),c=0,f=a.length;f>c;c++)for(g=a[c],k[1][g]=(k[1][g]||0)+1,d=2;i>=d&&f>=c+d;d++)g+=" "+a[c+d-1],k[d][g]=(k[d][g]||0)+1;for(c=void 0,e=1;i>=e;e++){l[e]=[];var m=k[e];for(c in m)m.hasOwnProperty(c)&&m[c]>=h&&l[e].push({word:c,count:m[c],size:e})}return l=l.filter(function(a){return null!==a}),l=l.map(function(a){return a=a.sort(function(a,b){return b.count-a.count})})};return"undefined"!=typeof module&&module.exports&&(exports.ngram=a),a}(),o=function(){"undefined"!=typeof module&&module.exports&&(m=require("./sentence").sentences,d=require("../../data/lexicon/multiples"));var a=Object.keys(d).map(function(a){return a.split(" ")}),b=function(a){return a?(a=a.toLowerCase(),a=a.replace(/[,\.!:;\?\(\)]/,""),a=a.replace(/’/g,"'"),a=a.replace(/"/g,""),a.match(/[a-z0-9]/i)?a:""):""},c=function(a){return a.match(/\?$/)?"interrogative":a.match(/\!$/)?"exclamative":"declarative"},e=function(c){for(var d=[],e=c.map(function(a){return b(a)}),f=0;f0&&null!==a.match(/^[A-Z][a-z]/),punctuated:null!==a.match(/[,;:\(\)"]/)||void 0,end:c===d.length-1||void 0,start:0===c||void 0}});return{sentence:a,tokens:f,type:c(a)}})};return"undefined"!=typeof module&&module.exports&&(exports.tokenize=f),f}(),p=function(){var a={2:"²ƻ",3:"³ƷƸƹƺǮǯЗҘҙӞӟӠӡȜȝ",5:"Ƽƽ",8:"Ȣȣ","!":"¡","?":"¿Ɂɂ",a:"ªÀÁÂÃÄÅàáâãäåĀāĂ㥹ǍǎǞǟǠǡǺǻȀȁȂȃȦȧȺΆΑΔΛάαλАДадѦѧӐӑӒӓƛɅ",b:"ßþƀƁƂƃƄƅɃΒβϐϦБВЪЬбвъьѢѣҌҍҔҕƥƾ",c:"¢©ÇçĆćĈĉĊċČčƆƇƈȻȼͻͼͽϲϹϽϾϿЄСсєҀҁҪҫ",d:"ÐĎďĐđƉƊȡƋƌǷ",e:"ÈÉÊËèéêëĒēĔĕĖėĘęĚěƎƏƐǝȄȅȆȇȨȩɆɇΈΕΞΣέεξϱϵ϶ЀЁЕЭеѐёҼҽҾҿӖӗӘәӚӛӬӭ",f:"ƑƒϜϝӺӻ",g:"ĜĝĞğĠġĢģƓǤǥǦǧǴǵ",h:"ĤĥĦħƕǶȞȟΉΗЂЊЋНнђћҢңҤҥҺһӉӊ",I:"ÌÍÎÏ",i:"ìíîïĨĩĪīĬĭĮįİıƖƗȈȉȊȋΊΐΪίιϊІЇії",j:"ĴĵǰȷɈɉϳЈј",k:"ĶķĸƘƙǨǩΚκЌЖКжкќҚқҜҝҞҟҠҡ",l:"ĹĺĻļĽľĿŀŁłƚƪǀǏǐȴȽΙӀӏ",m:"ΜϺϻМмӍӎ",n:"ÑñŃńŅņŇňʼnŊŋƝƞǸǹȠȵΝΠήηϞЍИЙЛПийлпѝҊҋӅӆӢӣӤӥπ",o:"ÒÓÔÕÖØðòóôõöøŌōŎŏŐőƟƠơǑǒǪǫǬǭǾǿȌȍȎȏȪȫȬȭȮȯȰȱΌΘΟΦΩδθοσόϕϘϙϬϭϴОФоѲѳѺѻѼѽӦӧӨөӪӫ¤ƍΏ",p:"ƤƿΡρϷϸϼРрҎҏÞ",q:"Ɋɋ",r:"ŔŕŖŗŘřƦȐȑȒȓɌɍЃГЯгяѓҐґҒғӶӷſ",s:"ŚśŜŝŞşŠšƧƨȘșȿςϚϛϟϨϩЅѕ",t:"ŢţŤťŦŧƫƬƭƮȚțȶȾΓΤτϮϯТт҂Ҭҭ",u:"µÙÚÛÜùúûüŨũŪūŬŭŮůŰűŲųƯưƱƲǓǔǕǖǗǘǙǚǛǜȔȕȖȗɄΰμυϋύϑЏЦЧцџҴҵҶҷҸҹӋӌӇӈ",v:"ƔνѴѵѶѷ",w:"ŴŵƜωώϖϢϣШЩшщѡѿ",x:"×ΧχϗϰХхҲҳӼӽӾӿ",y:"¥ÝýÿŶŷŸƳƴȲȳɎɏΎΥΨΫγψϒϓϔЎУучўѰѱҮүҰұӮӯӰӱӲӳ",z:"ŹźŻżŽžƩƵƶȤȥɀΖζ"},b=[];Object.keys(a).forEach(function(c){a[c].split("").forEach(function(a){b.push([a,c])})});var c={},d={};b.forEach(function(a){c[a[0]]=a[1],d[a[1]]=a[0]});var e=function(a,b){b=b||{},b.percentage=b.percentage||50;var d=a.split("").map(function(a){var d=100*Math.random();return c[a]&&d2)return a;var b=[/^[^aeiou]?ion/,/^[^aeiou]?ised/,/^[^aeiou]?iled/],c=a.length;if(c>1)for(var d=a[c-2]+a[c-1],e=0;en){j+=n*q,q=.1*q,p=0;continue}if(100>n){j+=.1*n*q,q=.01*q,p=0;continue}}if(m.match(/^[0-9]\.[0-9]$/))p+=parseFloat(m);else if(parseInt(m,10)!=m)if(void 0===a[m])if(b[m]){if(f)return null;if(g)return null;if(h)return null;g=!0,p+=b[m]}else if(c[m]){if(f)return null;if(g)return null;if(h)return null;h=!0,p+=c[m]}else{if(!d[m])return null;if(i[m])return null;i[m]=!0,f=!1,g=!1,h=!1,0===p?(j=j||1,j*=d[m]):(p*=d[m],j+=p),p=0}else{if(f)return null;if(g)return null;f=!0,p+=a[m]}else p+=parseInt(m,10)}else{if(r)return null;r=!0,j+=p,p=0,f=!1,q=.1}return p&&(j+=(p||1)*q),j*=k};return"undefined"!=typeof module&&module.exports&&(module.exports=e),e}(),x=function(){var a="(january|february|march|april|may|june|july|august|september|october|november|december|jan|feb|mar|apr|aug|sept|oct|nov|dec),?",b="([0-9]{1,2}),?",c="([12][0-9]{3})",d=function(a,b){return Object.keys(b).reduce(function(c,d){return c[d]=a[b[d]],c},{})},e=[{reg:String(a)+" "+String(b)+"-"+String(b)+" "+String(c),example:"March 7th-11th 1987",process:function(a){a||(a=[]);var b={month:1,day:2,to_day:3,year:4};return d(a,b)}},{reg:String(b)+" of "+String(a)+" to "+String(b)+" of "+String(a)+" "+String(c),example:"28th of September to 5th of October 2008",process:function(a){a||(a=[]);var b={day:1,month:2,to_day:3,to_month:4,to_year:5};return d(a,b)}},{reg:String(a)+" "+String(b)+" to "+String(a)+" "+String(b)+" "+String(c),example:"March 7th to june 11th 1987",process:function(a){a||(a=[]);var b={month:1,day:2,to_month:3,to_day:4,year:5,to_year:5};return d(a,b)}},{reg:"between "+String(b)+" "+String(a)+" and "+String(b)+" "+String(a)+" "+String(c),example:"between 13 February and 15 February 1945",process:function(a){a||(a=[]);var b={day:1,month:2,to_day:3,to_month:4,year:5,to_year:5};return d(a,b)}},{reg:"between "+String(a)+" "+String(b)+" and "+String(a)+" "+String(b)+" "+String(c),example:"between March 7th and june 11th 1987",process:function(a){a||(a=[]);var b={month:1,day:2,to_month:3,to_day:4,year:5,to_year:5};return d(a,b)}},{reg:String(a)+" "+String(b)+" "+String(c),example:"March 1st 1987",process:function(a){a||(a=[]);var b={month:1,day:2,year:3};return d(a,b)}},{reg:String(b)+" - "+String(b)+" of "+String(a)+" "+String(c),example:"3rd - 5th of March 1969",process:function(a){a||(a=[]);var b={day:1,to_day:2,month:3,year:4};return d(a,b)}},{reg:String(b)+" of "+String(a)+" "+String(c),example:"3rd of March 1969",process:function(a){a||(a=[]);var b={day:1,month:2,year:3};return d(a,b)}},{reg:String(a)+" "+c+",? to "+String(a)+" "+String(c),example:"September 1939 to April 1945",process:function(a){a||(a=[]);var b={month:1,year:2,to_month:3,to_year:4};return d(a,b)}},{reg:String(a)+" "+String(c),example:"March 1969",process:function(a){a||(a=[]);var b={month:1,year:2};return d(a,b)}},{reg:String(a)+" "+b,example:"March 18th",process:function(a){a||(a=[]);var b={month:1,day:2};return d(a,b)}},{reg:String(b)+" of "+a,example:"18th of March",process:function(a){a||(a=[]);var b={month:2,day:1};return d(a,b)}},{reg:c+" ?- ?"+String(c),example:"1997-1998",process:function(a){a||(a=[]);var b={year:1,to_year:2};return d(a,b)}},{reg:c,example:"1998",process:function(a){a||(a=[]);var b={year:1};return d(a,b)}}].map(function(a){return a.reg=new RegExp(a.reg,"g"),a}),f={january:0,february:1,march:2,april:3,may:4,june:5,july:6,august:7,september:8,october:9,november:10,december:11,jan:0,feb:1,mar:2,apr:3,aug:7,sept:8,oct:9,nov:10,dec:11},g=[31,29,31,30,31,30,31,31,30,31,30,31],h=function(a){return a=a.toLowerCase(),a=a.replace(/([0-9])(th|rd|st)/g,"$1")},i=function(a,b){var c;return c=new Date,b=b||{},a.year=parseInt(a.year,10)||void 0,a.day=parseInt(a.day,10)||void 0,a.to_day=parseInt(a.to_day,10)||void 0,a.to_year=parseInt(a.to_year,10)||void 0,a.month=f[a.month],a.to_month=f[a.to_month],void 0!==a.to_month&&void 0===a.month&&(a.month=a.to_month),void 0===a.to_month&&void 0!==a.month&&(a.to_month=a.month),a.to_year&&!a.year&&(a.year=a.to_year),!a.to_year&&a.year&&void 0!==a.to_month&&(a.to_year=a.year),b.assume_year&&!a.year&&(a.year=c.getFullYear()),void 0!==a.day&&(a.day>31||void 0!==a.month&&a.day>g[a.month])&&(a.day=void 0),void 0!==a.to_month&&a.to_month2090||a.year<1200)&&(a.year=void 0,a.to_year=void 0),a={day:a.day,month:a.month,year:a.year,to:{day:a.to_day,month:a.to_month,year:a.to_year}},a.year&&a.day&&void 0!==a.month&&(a.date_object=new Date,a.date_object.setYear(a.year),a.date_object.setMonth(a.month),a.date_object.setDate(a.day)),a.to.year&&a.to.day&&void 0!==a.to.month&&(a.to.date_object=new Date,a.to.date_object.setYear(a.to.year),a.to.date_object.setMonth(a.to.month),a.to.date_object.setDate(a.to.day)),a.year||void 0!==a.month?a:{})},j=function(a,b){b=b||{},a=h(a);for(var c,d,f,g,j=e.length,k=0;j>k;k+=1)if(g=e[k],a.match(g.reg))return f=new RegExp(g.reg.source,"i"),c=f.exec(a),d=g.process(c),i(d,b)};return"undefined"!=typeof module&&module.exports&&(module.exports=j),j}(),y=function(a,b,c,d){var e=this;return e.word=a||"","undefined"!=typeof module&&module.exports&&(w=require("./to_number"),x=require("./date_extractor"),v=require("../../data/parts_of_speech")),e.date=function(a){return a=a||{},x(e.word,a)},e.is_date=function(){var a=/(january|february|march|april|may|june|july|august|september|october|november|december|jan|feb|mar|apr|aug|sept|oct|nov|dec)/i,b=/1?[0-9]:[0-9]{2}/,c=/\b(monday|tuesday|wednesday|thursday|friday|saturday|sunday|mon|tues|wed|thurs|fri|sat|sun)\b/i;return e.word.match(a)||e.word.match(b)||e.word.match(c)?!0:!1},e.number=function(){return e.is_date()?null:w(e.word)},e.which=function(){return e.date()?v.DA:e.number()?v.NU:v.CD}(),e};"undefined"!=typeof module&&module.exports&&(module.exports=y);var z=function(){var a=function(a){if(!a)return null;var b={hour:"an",heir:"an",heirloom:"an",honest:"an",honour:"an",honor:"an",uber:"an"},c=function(a){return a.length<=5&&a.match(/^[A-Z]*$/)?!0:a.length>=4&&a.match(/^([A-Z]\.)*$/)?!0:!1},d={A:!0,E:!0,F:!0,H:!0,I:!0,L:!0,M:!0,N:!0,O:!0,R:!0,S:!0,X:!0},e=[/^onc?e/i,/^u[bcfhjkqrstn][aeiou]/i,/^eul/i];if(b.hasOwnProperty(a))return b[a];if(c(a)&&d.hasOwnProperty(a.substr(0,1)))return"an";for(var f=0;f3?!0:!1},i=function(a){return uncountable_nouns[a]?{plural:a,singular:a}:h(a)?{plural:a,singular:f(a)}:{singular:a,plural:d(a)}},j={inflect:i,is_plural:h,singularize:f,pluralize:d};return"undefined"!=typeof module&&module.exports&&(module.exports=j),j}(),B=function(a,b,c,d){var e=this;e.word=a||"",e.next=b,e.last=c,"undefined"!=typeof module&&module.exports&&(v=require("../../data/parts_of_speech"),l=require("../../data/lexicon/firstnames"),f=require("../../data/lexicon/honourifics"),A=require("./conjugate/inflect"),z=require("./indefinite_article"));var g={it:"PRP",they:"PRP",i:"PRP",them:"PRP",you:"PRP",she:"PRP",me:"PRP",he:"PRP",him:"PRP",her:"PRP",us:"PRP",we:"PRP",thou:"PRP"},h={itself:1,west:1,western:1,east:1,eastern:1,north:1,northern:1,south:1,southern:1,the:1,one:1,your:1,my:1,today:1,yesterday:1,tomorrow:1,era:1,century:1,it:1};return e.is_acronym=function(){var a=e.word;return a.length<=5&&a.match(/^[A-Z]*$/)?!0:a.length>=4&&a.match(/^([A-Z]\.)*$/)?!0:!1},e.is_entity=function(){if(!d)return!1;if(d.normalised.length<3||!d.normalised.match(/[a-z]/i))return!1;if(g[d.normalised])return!1;if(h[d.normalised])return!1;if(d.pos){if("NNA"==d.pos.tag)return!1;if("NNO"==d.pos.tag)return!1;if("NNG"==d.pos.tag)return!1;if("NNP"==d.pos.tag)return!0}return d.noun_capital?!0:d.normalised.match(/ /)?!0:d.normalised.match(/\./)?!0:d.normalised.length<5&&d.text.match(/^[A-Z]*$/)?!0:e.is_acronym()?!0:!1},e.conjugate=function(){return A.inflect(e.word)},e.is_plural=function(){return A.is_plural(e.word)},e.article=function(){return z(e.word)},e.pluralize=function(){return A.pluralize(e.word)},e.singularize=function(){return A.singularize(e.word)},e.is_person=function(){var a,b,c=["center","centre","memorial","school","bridge","university","house","college","square","park","foundation","institute","ss","of","the","for"];for(b=c.length,a=0;b>a;a++)if(e.word.match(new RegExp("\\b"+c[a]+"\\b","i")))return!1;for(b=f.length,a=0;b>a;a++)if(e.word.match(new RegExp("\\b"+f[a]+"\\.?\\b","i")))return!0;var d=Object.keys(l);b=d.length;var g=e.word.split(" ")[0].toLowerCase();for(a=0;b>a;a++)if(d[a]===g)return!0;return e.word.match(/[a-z]{3,20} [a-z]\.? [a-z]{3,20}/i)?!0:!1},e.which=function(){return e.word.match(/'s$/)?v.NNO:v.NN}(),e};"undefined"!=typeof module&&module.exports&&(module.exports=B);var C=function(){var a=function(a){var b={idly:"idle",sporadically:"sporadic",basically:"basic",grammatically:"grammatical",alphabetically:"alphabetical",economically:"economical",conically:"conical",politically:"political",vertically:"vertical",practically:"practical",theoretically:"theoretical",critically:"critical",fantastically:"fantastic",mystically:"mystical",pornographically:"pornographic",fully:"full",jolly:"jolly",wholly:"whole"},c=[{reg:/bly$/i,repl:"ble"},{reg:/gically$/i,repl:"gical"},{reg:/([rsdh])ically$/i,repl:"$1ical"},{reg:/ically$/i,repl:"ic"},{reg:/uly$/i,repl:"ue"},{reg:/ily$/i,repl:"y"},{reg:/(.{3})ly$/i,repl:"$1"}];if(b.hasOwnProperty(a))return b[a];for(var d=0;db;b++){a=c[e[b]].length;for(var g=0;a>g;g++)d[c[e[b]][g]]=e[b]}return"undefined"!=typeof module&&module.exports&&(module.exports=d),d}(),F={infinitive:[["(eed)$",{pr:"$1s",g:"$1ing",pa:"$1ed","do":"$1er"}],["(e)(ep)$",{pr:"$1$2s",g:"$1$2ing",pa:"$1pt","do":"$1$2er"}],["(a[tg]|i[zn]|ur|nc|gl|is)e$",{pr:"$1es",g:"$1ing",pa:"$1ed"}],["([i|f|rr])y$",{pr:"$1ies",g:"$1ying",pa:"$1ied"}],["([td]er)$",{pr:"$1s",g:"$1ing",pa:"$1ed"}],["([bd]l)e$",{pr:"$1es",g:"$1ing",pa:"$1ed"}],["(ish|tch|ess)$",{pr:"$1es",g:"$1ing",pa:"$1ed"}],["(ion|end|e[nc]t)$",{pr:"$1s",g:"$1ing",pa:"$1ed"}],["(om)e$",{pr:"$1es",g:"$1ing",pa:"ame"}],["([aeiu])([pt])$",{pr:"$1$2s",g:"$1$2$2ing",pa:"$1$2"}],["(er)$",{pr:"$1s",g:"$1ing",pa:"$1ed"}],["(en)$",{pr:"$1s",g:"$1ing",pa:"$1ed"}]],present:[["(ies)$",{"in":"y",g:"ying",pa:"ied"}],["(tch|sh)es$",{"in":"$1",g:"$1ing",pa:"$1ed"}],["(ss)es$",{"in":"$1",g:"$1ing",pa:"$1ed"}],["([tzlshicgrvdnkmu])es$",{"in":"$1e",g:"$1ing", -pa:"$1ed"}],["(n[dtk]|c[kt]|[eo]n|i[nl]|er|a[ytrl])s$",{"in":"$1",g:"$1ing",pa:"$1ed"}],["(ow)s$",{"in":"$1",g:"$1ing",pa:"ew"}],["(op)s$",{"in":"$1",g:"$1ping",pa:"$1ped"}],["([eirs])ts$",{"in":"$1t",g:"$1tting",pa:"$1tted"}],["(ll)s$",{"in":"$1",g:"$1ing",pa:"$1ed"}],["(el)s$",{"in":"$1",g:"$1ling",pa:"$1led"}],["(ip)es$",{"in":"$1e",g:"$1ing",pa:"$1ed"}],["ss$",{"in":"ss",g:"ssing",pa:"ssed"}],["s$",{"in":"",g:"ing",pa:"ed"}]],gerund:[["pping$",{"in":"p",pr:"ps",pa:"pped"}],["lling$",{"in":"ll",pr:"lls",pa:"lled"}],["tting$",{"in":"t",pr:"ts",pa:"t"}],["ssing$",{"in":"ss",pr:"sses",pa:"ssed"}],["gging$",{"in":"g",pr:"gs",pa:"gged"}],["([^aeiou])ying$",{"in":"$1y",pr:"$1ies",pa:"$1ied","do":"$1ier"}],["(i.)ing$",{"in":"$1e",pr:"$1es",pa:"$1ed"}],["(u[rtcb]|[bdtpkg]l|n[cg]|a[gdkvtc]|[ua]s|[dr]g|yz|o[rlsp]|cre)ing$",{"in":"$1e",pr:"$1es",pa:"$1ed"}],["(ch|sh)ing$",{"in":"$1",pr:"$1es",pa:"$1ed"}],["(..)ing$",{"in":"$1",pr:"$1s",pa:"$1ed"}]],past:[["(ued)$",{pr:"ues",g:"uing",pa:"ued","do":"uer"}],["(e|i)lled$",{pr:"$1lls",g:"$1lling",pa:"$1lled","do":"$1ller"}],["(sh|ch)ed$",{"in":"$1",pr:"$1es",g:"$1ing","do":"$1er"}],["(tl|gl)ed$",{"in":"$1e",pr:"$1es",g:"$1ing","do":"$1er"}],["(ss)ed$",{"in":"$1",pr:"$1es",g:"$1ing","do":"$1er"}],["pped$",{"in":"p",pr:"ps",g:"pping","do":"pper"}],["tted$",{"in":"t",pr:"ts",g:"tting","do":"tter"}],["gged$",{"in":"g",pr:"gs",g:"gging","do":"gger"}],["(h|ion|n[dt]|ai.|[cs]t|pp|all|ss|tt|int|ail|ld|en|oo.|er|k|pp|w|ou.|rt|ght|rm)ed$",{"in":"$1",pr:"$1s",g:"$1ing","do":"$1er"}],["(..[^aeiou])ed$",{"in":"$1e",pr:"$1es",g:"$1ing","do":"$1er"}],["ied$",{"in":"y",pr:"ies",g:"ying","do":"ier"}],["(.o)ed$",{"in":"$1o",pr:"$1os",g:"$1oing","do":"$1oer"}],["(.i)ed$",{"in":"$1",pr:"$1s",g:"$1ing","do":"$1er"}],["([rl])ew$",{"in":"$1ow",pr:"$1ows",g:"$1owing"}],["([pl])t$",{"in":"$1t",pr:"$1ts",g:"$1ting"}]]};F=Object.keys(F).reduce(function(a,b){return a[b]=F[b].map(function(a){var b={reg:new RegExp(a[0],"i"),repl:{infinitive:a[1]["in"],present:a[1].pr,past:a[1].pa,gerund:a[1].g}};return a[1]["do"]&&(b.repl.doer=a[1]["do"]),b}),a},{}),"undefined"!=typeof module&&module.exports&&(module.exports=F);var G=function(){var a=function(a){a=a||"";var b={tie:"tier",dream:"dreamer",sail:"sailer",run:"runner",rub:"rubber",begin:"beginner",win:"winner",claim:"claimant",deal:"dealer",spin:"spinner"},c={aid:1,fail:1,appear:1,happen:1,seem:1,"try":1,say:1,marry:1,be:1,forbid:1,understand:1,bet:1},d=[{reg:/e$/i,repl:"er"},{reg:/([aeiou])([mlgp])$/i,repl:"$1$2$2er"},{reg:/([rlf])y$/i,repl:"$1ier"},{reg:/^(.?.[aeiou])t$/i,repl:"$1tter"}];if(c.hasOwnProperty(a))return null;if(b.hasOwnProperty(a))return b[a];for(var e=0;e4?a.replace(/ed$/,""):a.replace(/d$/,"");var c,d,e,f;return a.match(/[^aeiou]$/)?(e=a+"ing",d=a+"ed",c=a.match(/ss$/)?a+"es":a+"s",f=G(b)):(e=a.replace(/[aeiou]$/,"ing"),d=a.replace(/[aeiou]$/,"ed"),c=a.replace(/[aeiou]$/,"es"),f=G(b)),{infinitive:b,present:c,past:d,gerund:e,doer:f,future:"will "+b}},d=function(a,b){return a.infinitive?(a.gerund||(a.gerund=a.infinitive+"ing"),a.doer||(a.doer=G(a.infinitive)),a.present||(a.present=a.infinitive+"s"),a.past||(a.past=a.infinitive+"ed"),b&&Object.keys(a).forEach(function(c){a[c]=b+a[c]}),a.future||(a.future="will "+a.infinitive),a.perfect||(a.perfect="have "+a.past),a.pluperfect||(a.pluperfect="had "+a.past),a.future_perfect||(a.future_perfect="will have "+a.past),a):a},e=function(f){if(void 0===f)return{};var g=new RegExp("^(.*?) (in|out|on|off|behind|way|with|of|do|away|across|ahead|back|over|under|together|apart|up|upon|aback|down|about|before|after|around|to|forth|round|through|along|onto)$","i");if(f.match(" ")&&f.match(g)){var h=f.match(g,""),i=h[1],j=h[2],k=e(i);return delete k.doer,Object.keys(k).forEach(function(a){k[a]&&(k[a]+=" "+j)}),k}f.match(/^had [a-z]/i)&&(f=f.replace(/^had /i,"")),f.match(/^have [a-z]/i)&&(f=f.replace(/^have /i,"")),f.match(/^will have [a-z]/i)&&(f=f.replace(/^will have /i,"")),f=f.replace(/^will /i,"");var l,m,n=(f.match(/^(over|under|re|anti|full)\-?/i)||[])[0],o=f.replace(/^(over|under|re|anti|full)\-?/i,""),p={},q=a.length;for(m=0;q>m;m++)if(l=a[m],o===l.present||o===l.gerund||o===l.past||o===l.infinitive)return p=JSON.parse(JSON.stringify(a[m])),d(p,n);var r=b(f)||"infinitive";q=F[r].length;var s;for(m=0;q>m;m++)if(s=F[r][m],f.match(s.reg))return p[r]=f,Object.keys(s.repl).forEach(function(a){a===r?p[a]=f:p[a]=f.replace(s.reg,s.repl[a])}),d(p);return c(f)};return"undefined"!=typeof module&&module.exports&&(module.exports=e),e}(),I=function(a,b,c,d){var e=this;e.word=a||"",e.next=b,e.last=c,"undefined"!=typeof module&&module.exports&&(H=require("./conjugate/conjugate"),v=require("../../data/parts_of_speech"));var f={is:"CP","will be":"CP",will:"CP",are:"CP",was:"CP",were:"CP"},g={can:"MD",may:"MD",could:"MD",might:"MD",will:"MD","ought to":"MD",would:"MD",must:"MD",shall:"MD",should:"MD"},h={past:"VBD",participle:"VBN",infinitive:"VBP",present:"VBZ",gerund:"VBG"};return e.conjugate=function(){return H(e.word)},e.to_past=function(){return"gerund"===e.form?e.word:H(e.word).past},e.to_present=function(){return H(e.word).present},e.to_future=function(){return"will "+H(e.word).infinitive},e.form=function(){for(var a=["past","present","gerund","infinitive"],b=H(e.word),c=0;ci;i++)for(m=o[p[i]],i2=0;i2i;i++)n[p[i]]="CD";for(q=h.length,i=0;q>i;i++)n[h[i]]="JJ";for(q=k.length,i=0;q>i;i++)n[k[i]]="NNAB";for(q=f.length,i=0;q>i;i++)n[f[i]]="NNAB";for(q=g.length,i=0;q>i;i++)n[g[i]]="NN";Object.keys(l).forEach(function(a){n[a]="NNP"}),Object.keys(d).forEach(function(a){n[a]=d[a]}),Object.keys(P).forEach(function(a){n[a]=P[a]});var r;for(q=e.length,i=0;q>i;i++)r=H(e[i]),n[r.infinitive]=n[r.infinitive]||"VBP",n[r.past]=n[r.past]||"VBD",n[r.gerund]=n[r.gerund]||"VBG",n[r.present]=n[r.present]||"VBZ",r.doer&&(n[r.doer]=n[r.doer]||"NNA"),r.participle&&(n[r.participle]=n[r.participle]||"VBN");for(q=a.length,i=0;q>i;i++)r=a[i],n[r.infinitive]=n[r.infinitive]||"VBP",n[r.gerund]=n[r.gerund]||"VBG",n[r.past]=n[r.past]||"VBD",n[r.present]=n[r.present]||"VBZ",r.doer&&(n[r.doer]=n[r.doer]||"NNA"),r.participle&&(n[r.future]=n[r.future]||"VB");var s,t;for(q=c.length,i=0;q>i;i++)n[c[i]]="JJ";for(p=Object.keys(b),q=p.length,i=0;q>i;i++)t=p[i],n[t]="JJ",s=M(t),s&&s!==t&&!n[s]&&(n[s]=n[s]||"RB"),s=K(t),!s||s.match(/^more ./)||s===t||n[s]||(n[s]=n[s]||"JJR"),s=L(t),!s||s.match(/^most ./)||s===t||n[s]||(n[s]=n[s]||"JJS");return"undefined"!=typeof module&&module.exports&&(module.exports=n),n}(),R=function(a){var b=this;b.tokens=a||[];var c=function(a){return a.charAt(0).toUpperCase()+a.slice(1)};return b.tense=function(){var a=b.tokens.filter(function(a){return"verb"===a.pos.parent});return a.map(function(a){return a.analysis.tense})},b.to_past=function(){return b.tokens=b.tokens.map(function(a){return"verb"===a.pos.parent&&(a.text=a.analysis.to_past(),a.normalised=a.text),a}),b},b.to_present=function(){return b.tokens=b.tokens.map(function(a){return"verb"===a.pos.parent&&(a.text=a.analysis.to_present(),a.normalised=a.text),a}),b},b.to_future=function(){return b.tokens=b.tokens.map(function(a){return"verb"===a.pos.parent&&(a.text=a.analysis.to_future(),a.normalised=a.text),a}),b},b.insert=function(a,c){c&&a&&b.tokens.splice(c,0,a)},b.negate=function(){for(var a={everyone:"no one",everybody:"nobody",someone:"no one",somebody:"nobody",always:"never",is:"isn't",are:"aren't",was:"wasn't",will:"won't","didn't":"did","wouldn't":"would","couldn't":"could","shouldn't":"should","can't":"can","won't":"will","mustn't":"must","shan't":"shall",shant:"shall",did:"didn't",would:"wouldn't",could:"couldn't",should:"shouldn't",can:"can't",must:"mustn't"},d=0;d4&&a.match(t[b].reg))return v[t[b].pos]},g=function(a,b,c){var d=c.tokens[b-1],e=c.tokens[b+1],f={the:1,a:1,an:1};return"march"!=a.normalised&&"april"!=a.normalised&&"may"!=a.normalised||!(e&&"CD"==e.pos.tag||d&&"CD"==d.pos.tag)||(a.pos=v.CD,a.pos_reason="may_is_date"),e&&"noun"!==a.pos.parent&&"glue"!==a.pos.parent&&"MD"===e.pos.tag&&(a.pos=v.NN,a.pos_reason="before_modal"),d&&"will"==d.normalised&&!d.punctuated&&"noun"==a.pos.parent&&"PRP"!==a.pos.tag&&(a.pos=v.VB,a.pos_reason="after_will"),d&&"i"==d.normalised&&!d.punctuated&&"noun"==a.pos.parent&&(a.pos=v.VB,a.pos_reason="after_i"),d&&"noun"===a.pos.parent&&"PRP"!==a.pos.tag&&"RB"===d.pos.tag&&!d.start&&(a.pos=v.VB,a.pos_reason="after_adverb"),e&&"adjective"===a.pos.parent&&"adjective"===e.pos.parent&&!a.punctuated&&(a.pos=v.RB,a.pos_reason="consecutive_adjectives"),d&&"verb"===a.pos.parent&&f[d.pos.normalised]&&"CP"!=a.pos.tag&&(a.pos=v.NN,a.pos_reason="determiner-verb"),d&&"CP"===d.pos.tag&&"DT"!==a.pos.tag&&"RB"!==a.pos.tag&&"adjective"!==a.pos.parent&&"value"!==a.pos.parent&&(a.pos=v.JJ,a.pos_reason="copula-adjective"),d&&e&&"CP"===d.pos.tag&&"RB"===a.pos.tag&&"verb"===e.pos.parent&&(c.tokens[b+1].pos=v.JJ,c.tokens[b+1].pos_reason="copula-adverb-adjective"),e&&"PRP"==e.pos.tag&&"noun"==a.pos.parent&&!a.punctuated&&(a.pos=v.VB,a.pos_reason="before_[him|her|it]"),d&&e&&"DT"===d.pos.tag&&"noun"===e.pos.parent&&"verb"===a.pos.parent&&(a.pos=v.JJ,a.pos_reason="determiner-adjective-noun"),a},h=function(a){for(var b,c,d,e={"i'd":["i","would"],"she'd":["she","would"],"he'd":["he","would"],"they'd":["they","would"],"we'd":["we","would"],"i'll":["i","will"],"she'll":["she","will"],"he'll":["he","will"],"they'll":["they","will"],"we'll":["we","will"],"i've":["i","have"],"they've":["they","have"],"we've":["we","have"],"should've":["should","have"],"would've":["would","have"],"could've":["could","have"],"must've":["must","have"],"i'm":["i","am"],"he's":["he","is"],"she's":["she","is"],"we're":["we","are"],"they're":["they","are"],cannot:["can","not"]},f=0;f4){var d=a.normalised.substr(c-4,c-1);if(u.hasOwnProperty(d))return a.pos=v[u[d]],a.pos_reason="wordnet suffix",a}var g=f(a.normalised);return g?(a.pos=g,a.pos_reason="regex suffix",a):parseFloat(a.normalised)?(a.pos=v.CD,a.pos_reason="parsefloat",a):a}),a.tokens=a.tokens.map(function(a,b){return"lexicon"!==a.pos_reason&&a.normalised.match(/.ed$/)&&(a.pos=v.VB,a.pos_reason="ed"),a});var c=null,d="";a.tokens=a.tokens.map(function(b,e){var f=a.tokens[e+1];if(b.pos){if("the"==b.normalised||"a"==b.normalised||"an"==b.normalised||"PP"===b.pos.tag)return c="noun",d=b.pos.name,b;if("PRP"===b.pos.tag||"MD"===b.pos.tag)return c="verb",d=b.pos.name,b}return b.pos&&("verb"==c&&"noun"==b.pos.parent&&(!f||f.pos&&"noun"!=f.pos.parent)&&(f&&f.pos&&f.pos.parent==c||(b.pos=v.VB,b.pos_reason="signal from "+d,c=null)),"noun"==c&&"verb"==b.pos.parent&&(!f||f.pos&&"verb"!=f.pos.parent)&&(f&&f.pos&&f.pos.parent==c||(b.pos=v.NN,b.pos_reason="signal from "+d,c=null))),c&&!b.pos&&(f&&f.pos&&f.pos.parent==c||(b.pos=v[c],b.pos_reason="signal from "+d,c=null)),"verb"===c&&b.pos&&"verb"===b.pos.parent&&(c=null),"noun"===c&&b.pos&&"noun"===b.pos.parent&&(c=null),b});var i={};a.tokens.forEach(function(a){a.pos&&(i[a.pos.parent]=!0)}),a.tokens=a.tokens.map(function(a,b){if(!a.pos){if(i.adjective&&i.noun&&!i.verb)return a.pos=v.VB,a.pos_reason="need one verb",i.verb=!0,a;a.pos=v.NN,a.pos_reason="noun fallback"}return a}),a.tokens=a.tokens.map(function(b,c){return g(b,c,a)}),a.tokens=a.tokens.map(function(b,c){return g(b,c,a)})}),b.dont_combine||(i=i.map(function(a){return c(a)}),i=i.map(function(a){return d(a)})),i=i.map(function(a){return a.tokens=a.tokens.map(function(b,c){var d=a.tokens[c-1]||null,e=a.tokens[c+1]||null;return b.analysis=O[b.pos.parent](b.normalised,e,d,b),b}),a}),i=i.map(function(a){var b=new R(a.tokens);return b.type=a.type,b}),new S(i)};return"undefined"!=typeof module&&module.exports&&(module.exports=i), -i}(),U=function(){"undefined"!=typeof module&&module.exports&&(T=require("./pos"));var a=function(a,b){b=b||{};var c=T(a,b).sentences,d=c.reduce(function(a,c){return a.concat(c.entities(b))},[]),e={};return d=d.filter(function(a){return a.analysis.is_person()&&a.normalised.split(" ").forEach(function(a){e[a]=!0}),e[a.normalised]?!1:!0})};return"undefined"!=typeof module&&module.exports&&(module.exports=a),a}();if("undefined"!=typeof module&&module.exports)var O=require("./src/parents/parents"),m=require("./src/methods/tokenization/sentence").sentences,o=require("./src/methods/tokenization/tokenize").tokenize,n=require("./src/methods/tokenization/ngram").ngram,p=require("./src/methods/transliteration/unicode_normalisation"),q=require("./src/methods/syllables/syllable"),V=require("./src/methods/localization/britishize"),s=V.americanize,r=V.britishize,T=require("./src/pos"),U=require("./src/spot");var W={noun:O.noun,adjective:O.adjective,verb:O.verb,adverb:O.adverb,value:O.value,sentences:m,ngram:n,tokenize:o,americanize:s,britishize:r,syllables:q,normalize:p.normalize,denormalize:p.denormalize,pos:T,spot:U};return"undefined"!=typeof module&&module.exports&&(module.exports=W),W}(); \ No newline at end of file +for(i=0;i0&&(b.push(c[i]),c[i]=""));return 0===b.length?[a]:b};"undefined"!=typeof module&&module.exports&&(exports.sentences=m);var n=function(){var a=function(a,b){b=b||{};var c,d,e,f,g,h=b.min_count||1,i=b.max_size||5,j=/[^a-zA-Z'\-]+/g,k=[null],l=[];for(i++,c=1;i>=c;c++)k.push({});for(a=a.replace(j," ").replace(/^\s+/,"").replace(/\s+$/,""),a=a.toLowerCase(),a=a.split(/\s+/),c=0,f=a.length;f>c;c++)for(g=a[c],k[1][g]=(k[1][g]||0)+1,d=2;i>=d&&f>=c+d;d++)g+=" "+a[c+d-1],k[d][g]=(k[d][g]||0)+1;for(c=void 0,e=1;i>=e;e++){l[e]=[];var m=k[e];for(c in m)m.hasOwnProperty(c)&&m[c]>=h&&l[e].push({word:c,count:m[c],size:e})}return l=l.filter(function(a){return null!==a}),l=l.map(function(a){return a=a.sort(function(a,b){return b.count-a.count})})};return"undefined"!=typeof module&&module.exports&&(exports.ngram=a),a}(),o=function(){"undefined"!=typeof module&&module.exports&&(m=require("./sentence").sentences,d=require("../../data/lexicon/multiples"));var a=Object.keys(d).map(function(a){return a.split(" ")}),b=function(a){return a?(a=a.toLowerCase(),a=a.replace(/[,\.!:;\?\(\)]/,""),a=a.replace(/’/g,"'"),a=a.replace(/"/g,""),a.match(/[a-z0-9]/i)?a:""):""},c=function(a){return a.match(/\?$/)?"interrogative":a.match(/\!$/)?"exclamative":"declarative"},e=function(c){for(var d=[],e=c.map(function(a){return b(a)}),f=0;f0&&null!==a.match(/^[A-Z][a-z]/),punctuated:null!==a.match(/[,;:\(\)"]/)||void 0,end:c===d.length-1||void 0,start:0===c||void 0}});return{sentence:a,tokens:f,type:c(a)}})};return"undefined"!=typeof module&&module.exports&&(exports.tokenize=f),f}(),p=function(){var a={2:"²ƻ",3:"³ƷƸƹƺǮǯЗҘҙӞӟӠӡȜȝ",5:"Ƽƽ",8:"Ȣȣ","!":"¡","?":"¿Ɂɂ",a:"ªÀÁÂÃÄÅàáâãäåĀāĂ㥹ǍǎǞǟǠǡǺǻȀȁȂȃȦȧȺΆΑΔΛάαλАДадѦѧӐӑӒӓƛɅ",b:"ßþƀƁƂƃƄƅɃΒβϐϦБВЪЬбвъьѢѣҌҍҔҕƥƾ",c:"¢©ÇçĆćĈĉĊċČčƆƇƈȻȼͻͼͽϲϹϽϾϿЄСсєҀҁҪҫ",d:"ÐĎďĐđƉƊȡƋƌǷ",e:"ÈÉÊËèéêëĒēĔĕĖėĘęĚěƎƏƐǝȄȅȆȇȨȩɆɇΈΕΞΣέεξϱϵ϶ЀЁЕЭеѐёҼҽҾҿӖӗӘәӚӛӬӭ",f:"ƑƒϜϝӺӻ",g:"ĜĝĞğĠġĢģƓǤǥǦǧǴǵ",h:"ĤĥĦħƕǶȞȟΉΗЂЊЋНнђћҢңҤҥҺһӉӊ",I:"ÌÍÎÏ",i:"ìíîïĨĩĪīĬĭĮįİıƖƗȈȉȊȋΊΐΪίιϊІЇії",j:"ĴĵǰȷɈɉϳЈј",k:"ĶķĸƘƙǨǩΚκЌЖКжкќҚқҜҝҞҟҠҡ",l:"ĹĺĻļĽľĿŀŁłƚƪǀǏǐȴȽΙӀӏ",m:"ΜϺϻМмӍӎ",n:"ÑñŃńŅņŇňʼnŊŋƝƞǸǹȠȵΝΠήηϞЍИЙЛПийлпѝҊҋӅӆӢӣӤӥπ",o:"ÒÓÔÕÖØðòóôõöøŌōŎŏŐőƟƠơǑǒǪǫǬǭǾǿȌȍȎȏȪȫȬȭȮȯȰȱΌΘΟΦΩδθοσόϕϘϙϬϭϴОФоѲѳѺѻѼѽӦӧӨөӪӫ¤ƍΏ",p:"ƤƿΡρϷϸϼРрҎҏÞ",q:"Ɋɋ",r:"ŔŕŖŗŘřƦȐȑȒȓɌɍЃГЯгяѓҐґҒғӶӷſ",s:"ŚśŜŝŞşŠšƧƨȘșȿςϚϛϟϨϩЅѕ",t:"ŢţŤťŦŧƫƬƭƮȚțȶȾΓΤτϮϯТт҂Ҭҭ",u:"µÙÚÛÜùúûüŨũŪūŬŭŮůŰűŲųƯưƱƲǓǔǕǖǗǘǙǚǛǜȔȕȖȗɄΰμυϋύϑЏЦЧцџҴҵҶҷҸҹӋӌӇӈ",v:"ƔνѴѵѶѷ",w:"ŴŵƜωώϖϢϣШЩшщѡѿ",x:"×ΧχϗϰХхҲҳӼӽӾӿ",y:"¥ÝýÿŶŷŸƳƴȲȳɎɏΎΥΨΫγψϒϓϔЎУучўѰѱҮүҰұӮӯӰӱӲӳ",z:"ŹźŻżŽžƩƵƶȤȥɀΖζ"},b=[];Object.keys(a).forEach(function(c){a[c].split("").forEach(function(a){b.push([a,c])})});var c={},d={};b.forEach(function(a){c[a[0]]=a[1],d[a[1]]=a[0]});var e=function(a,b){b=b||{},b.percentage=b.percentage||50;var d=a.split("").map(function(a){var d=100*Math.random();return c[a]&&d2)return a;var b=[/^[^aeiou]?ion/,/^[^aeiou]?ised/,/^[^aeiou]?iled/],c=a.length;if(c>1)for(var d=a[c-2]+a[c-1],e=0;en){j+=n*q,q=.1*q,p=0;continue}if(100>n){j+=.1*n*q,q=.01*q,p=0;continue}}if(m.match(/^[0-9]\.[0-9]$/))p+=parseFloat(m);else if(parseInt(m,10)!=m)if(void 0===a[m])if(b[m]){if(f)return null;if(g)return null;if(h)return null;g=!0,p+=b[m]}else if(c[m]){if(f)return null;if(g)return null;if(h)return null;h=!0,p+=c[m]}else{if(!d[m])return null;if(i[m])return null;i[m]=!0,f=!1,g=!1,h=!1,0===p?(j=j||1,j*=d[m]):(p*=d[m],j+=p),p=0}else{if(f)return null;if(g)return null;f=!0,p+=a[m]}else p+=parseInt(m,10)}else{if(r)return null;r=!0,j+=p,p=0,f=!1,q=.1}return p&&(j+=(p||1)*q),j*=k};return"undefined"!=typeof module&&module.exports&&(module.exports=e),e}(),x=function(){var a="(january|february|march|april|may|june|july|august|september|october|november|december|jan|feb|mar|apr|aug|sept|oct|nov|dec),?",b="([0-9]{1,2}),?",c="([12][0-9]{3})",d=function(a,b){return Object.keys(b).reduce(function(c,d){return c[d]=a[b[d]],c},{})},e=[{reg:String(a)+" "+String(b)+"-"+String(b)+" "+String(c),example:"March 7th-11th 1987",process:function(a){a||(a=[]);var b={month:1,day:2,to_day:3,year:4};return d(a,b)}},{reg:String(b)+" of "+String(a)+" to "+String(b)+" of "+String(a)+" "+String(c),example:"28th of September to 5th of October 2008",process:function(a){a||(a=[]);var b={day:1,month:2,to_day:3,to_month:4,to_year:5};return d(a,b)}},{reg:String(a)+" "+String(b)+" to "+String(a)+" "+String(b)+" "+String(c),example:"March 7th to june 11th 1987",process:function(a){a||(a=[]);var b={month:1,day:2,to_month:3,to_day:4,year:5,to_year:5};return d(a,b)}},{reg:"between "+String(b)+" "+String(a)+" and "+String(b)+" "+String(a)+" "+String(c),example:"between 13 February and 15 February 1945",process:function(a){a||(a=[]);var b={day:1,month:2,to_day:3,to_month:4,year:5,to_year:5};return d(a,b)}},{reg:"between "+String(a)+" "+String(b)+" and "+String(a)+" "+String(b)+" "+String(c),example:"between March 7th and june 11th 1987",process:function(a){a||(a=[]);var b={month:1,day:2,to_month:3,to_day:4,year:5,to_year:5};return d(a,b)}},{reg:String(a)+" "+String(b)+" "+String(c),example:"March 1st 1987",process:function(a){a||(a=[]);var b={month:1,day:2,year:3};return d(a,b)}},{reg:String(b)+" - "+String(b)+" of "+String(a)+" "+String(c),example:"3rd - 5th of March 1969",process:function(a){a||(a=[]);var b={day:1,to_day:2,month:3,year:4};return d(a,b)}},{reg:String(b)+" of "+String(a)+" "+String(c),example:"3rd of March 1969",process:function(a){a||(a=[]);var b={day:1,month:2,year:3};return d(a,b)}},{reg:String(a)+" "+c+",? to "+String(a)+" "+String(c),example:"September 1939 to April 1945",process:function(a){a||(a=[]);var b={month:1,year:2,to_month:3,to_year:4};return d(a,b)}},{reg:String(a)+" "+String(c),example:"March 1969",process:function(a){a||(a=[]);var b={month:1,year:2};return d(a,b)}},{reg:String(a)+" "+b,example:"March 18th",process:function(a){a||(a=[]);var b={month:1,day:2};return d(a,b)}},{reg:String(b)+" of "+a,example:"18th of March",process:function(a){a||(a=[]);var b={month:2,day:1};return d(a,b)}},{reg:c+" ?- ?"+String(c),example:"1997-1998",process:function(a){a||(a=[]);var b={year:1,to_year:2};return d(a,b)}},{reg:c,example:"1998",process:function(a){a||(a=[]);var b={year:1};return d(a,b)}}].map(function(a){return a.reg=new RegExp(a.reg,"g"),a}),f={january:0,february:1,march:2,april:3,may:4,june:5,july:6,august:7,september:8,october:9,november:10,december:11,jan:0,feb:1,mar:2,apr:3,aug:7,sept:8,oct:9,nov:10,dec:11},g=[31,29,31,30,31,30,31,31,30,31,30,31],h=function(a){return a=a.toLowerCase(),a=a.replace(/([0-9])(th|rd|st)/g,"$1")},i=function(a,b){var c;return c=new Date,b=b||{},a.year=parseInt(a.year,10)||void 0,a.day=parseInt(a.day,10)||void 0,a.to_day=parseInt(a.to_day,10)||void 0,a.to_year=parseInt(a.to_year,10)||void 0,a.month=f[a.month],a.to_month=f[a.to_month],void 0!==a.to_month&&void 0===a.month&&(a.month=a.to_month),void 0===a.to_month&&void 0!==a.month&&(a.to_month=a.month),a.to_year&&!a.year&&(a.year=a.to_year),!a.to_year&&a.year&&void 0!==a.to_month&&(a.to_year=a.year),b.assume_year&&!a.year&&(a.year=c.getFullYear()),void 0!==a.day&&(a.day>31||void 0!==a.month&&a.day>g[a.month])&&(a.day=void 0),void 0!==a.to_month&&a.to_month2090||a.year<1200)&&(a.year=void 0,a.to_year=void 0),a={day:a.day,month:a.month,year:a.year,to:{day:a.to_day,month:a.to_month,year:a.to_year}},a.year&&a.day&&void 0!==a.month&&(a.date_object=new Date,a.date_object.setYear(a.year),a.date_object.setMonth(a.month),a.date_object.setDate(a.day)),a.to.year&&a.to.day&&void 0!==a.to.month&&(a.to.date_object=new Date,a.to.date_object.setYear(a.to.year),a.to.date_object.setMonth(a.to.month),a.to.date_object.setDate(a.to.day)),a.year||void 0!==a.month?a:{})},j=function(a,b){b=b||{},a=h(a);for(var c,d,f,g,j=e.length,k=0;j>k;k+=1)if(g=e[k],a.match(g.reg))return f=new RegExp(g.reg.source,"i"),c=f.exec(a),d=g.process(c),i(d,b)};return"undefined"!=typeof module&&module.exports&&(module.exports=j),j}(),y=function(a,b,c){var d=this;return d.word=a||"","undefined"!=typeof module&&module.exports&&(w=require("./to_number"),x=require("./date_extractor"),v=require("../../data/parts_of_speech")),d.date=function(a){return a=a||{},x(d.word,a)},d.is_date=function(){var a=/(january|february|march|april|may|june|july|august|september|october|november|december|jan|feb|mar|apr|aug|sept|oct|nov|dec)/i,b=/1?[0-9]:[0-9]{2}/,c=/\b(monday|tuesday|wednesday|thursday|friday|saturday|sunday|mon|tues|wed|thurs|fri|sat|sun)\b/i;return d.word.match(a)||d.word.match(b)||d.word.match(c)?!0:!1},d.number=function(){return d.is_date()?null:w(d.word)},d.which=function(){return d.date()?v.DA:d.number()?v.NU:v.CD}(),d};"undefined"!=typeof module&&module.exports&&(module.exports=y);var z=function(){var a=function(a){if(!a)return null;var b={hour:"an",heir:"an",heirloom:"an",honest:"an",honour:"an",honor:"an",uber:"an"},c=function(a){return a.length<=5&&a.match(/^[A-Z]*$/)?!0:a.length>=4&&a.match(/^([A-Z]\.)*$/)?!0:!1},d={A:!0,E:!0,F:!0,H:!0,I:!0,L:!0,M:!0,N:!0,O:!0,R:!0,S:!0,X:!0},e=[/^onc?e/i,/^u[bcfhjkqrstn][aeiou]/i,/^eul/i];if(b.hasOwnProperty(a))return b[a];if(c(a)&&d.hasOwnProperty(a.substr(0,1)))return"an";for(var f=0;f3?!0:!1},i=function(a){return uncountable_nouns[a]?{plural:a,singular:a}:h(a)?{plural:a,singular:f(a)}:{singular:a,plural:d(a)}},j={inflect:i,is_plural:h,singularize:f,pluralize:d};return"undefined"!=typeof module&&module.exports&&(module.exports=j),j}(),B=function(a,b,c){var d,e,g=this;void 0!==b&&void 0!==c&&(d=b.tokens[c],e=b.tokens[c+i]),g.word=a||"","undefined"!=typeof module&&module.exports&&(v=require("../../data/parts_of_speech"),l=require("../../data/lexicon/firstnames"),f=require("../../data/lexicon/honourifics"),A=require("./conjugate/inflect"),z=require("./indefinite_article"));var h={it:"PRP",they:"PRP",i:"PRP",them:"PRP",you:"PRP",she:"PRP",me:"PRP",he:"PRP",him:"PRP",her:"PRP",us:"PRP",we:"PRP",thou:"PRP"},j={itself:1,west:1,western:1,east:1,eastern:1,north:1,northern:1,south:1,southern:1,the:1,one:1,your:1,my:1,today:1,yesterday:1,tomorrow:1,era:1,century:1,it:1};return g.is_acronym=function(){var a=g.word;return a.length<=5&&a.match(/^[A-Z]*$/)?!0:a.length>=4&&a.match(/^([A-Z]\.)*$/)?!0:!1},g.is_entity=function(){if(!d)return!1;if(d.normalised.length<3||!d.normalised.match(/[a-z]/i))return!1;if(h[d.normalised])return!1;if(j[d.normalised])return!1;if(d.pos){if("NNA"==d.pos.tag)return!1;if("NNO"==d.pos.tag)return!1;if("NNG"==d.pos.tag)return!1;if("NNP"==d.pos.tag)return!0}return d.noun_capital?!0:d.normalised.match(/ /)?!0:d.normalised.match(/\./)?!0:d.normalised.length<5&&d.text.match(/^[A-Z]*$/)?!0:g.is_acronym()?!0:!1},g.conjugate=function(){return A.inflect(g.word)},g.is_plural=function(){return A.is_plural(g.word)},g.article=function(){return z(g.word)},g.pluralize=function(){return A.pluralize(g.word)},g.singularize=function(){return A.singularize(g.word)},g.is_person=function(){var a,b,c=["center","centre","memorial","school","bridge","university","house","college","square","park","foundation","institute","ss","of","the","for","and","&","co","sons"];for(b=c.length,a=0;b>a;a++)if(g.word.match(new RegExp("\\b"+c[a]+"\\b","i")))return!1;for(b=f.length,a=0;b>a;a++)if(g.word.match(new RegExp("\\b"+f[a]+"\\.?\\b","i")))return!0;var d=g.word.split(" ").map(function(a){return a.toLowerCase()});return l[d[0]]?!0:d.length>2&&l[d[1]]?!0:g.word.match(/[a-z]{3,20} [a-z]\.? [a-z]{3,20}/i)?!0:!1},g.pronoun=function(){if(g.is_person()){var a=g.word.split(" ").map(function(a){return a.toLowerCase()});return"m"===l[a[0]]||"m"==l[a[1]]?"he":"f"===l[a[0]]||"f"==l[a[1]]?"she":g.word.match(/^(mrs|miss|ms|misses|mme|mlle)\.? /,"i")?"she":g.word.match(/\b(mr|mister|sr|jr)\b/,"i")?"he":"a"===l[a[0]]||"a"==l[a[1]]?"they":a[0].match(/[aeiy]$/)?"she":a[0].match(/[ou]$/)?"he":a[0].match(/(nn|ll|tt)/)?"she":"they"}return g.is_plural()?"they":"it"},g.referenced_by=function(){if(d&&"PRP"!==d.pos.tag){var a=g.pronoun(),e=b.tokens.slice(c+1,b.tokens.length);b.next&&(e=e.concat(b.next.tokens));for(var f=[],h=0;hb;b++){a=c[e[b]].length;for(var g=0;a>g;g++)d[c[e[b]][g]]=e[b]}return"undefined"!=typeof module&&module.exports&&(module.exports=d),d}(),F={infinitive:[["(eed)$",{pr:"$1s",g:"$1ing",pa:"$1ed","do":"$1er"}],["(e)(ep)$",{pr:"$1$2s",g:"$1$2ing",pa:"$1pt","do":"$1$2er"}],["(a[tg]|i[zn]|ur|nc|gl|is)e$",{pr:"$1es",g:"$1ing",pa:"$1ed"}],["([i|f|rr])y$",{pr:"$1ies",g:"$1ying",pa:"$1ied"}],["([td]er)$",{pr:"$1s",g:"$1ing",pa:"$1ed"}],["([bd]l)e$",{pr:"$1es",g:"$1ing",pa:"$1ed"}],["(ish|tch|ess)$",{pr:"$1es",g:"$1ing",pa:"$1ed"}],["(ion|end|e[nc]t)$",{pr:"$1s",g:"$1ing",pa:"$1ed"}],["(om)e$",{pr:"$1es",g:"$1ing",pa:"ame"}],["([aeiu])([pt])$",{pr:"$1$2s",g:"$1$2$2ing",pa:"$1$2"}],["(er)$",{pr:"$1s",g:"$1ing",pa:"$1ed"}],["(en)$",{pr:"$1s",g:"$1ing",pa:"$1ed"}]],present:[["(ies)$",{"in":"y",g:"ying",pa:"ied"}],["(tch|sh)es$",{"in":"$1",g:"$1ing",pa:"$1ed"}],["(ss)es$",{"in":"$1",g:"$1ing",pa:"$1ed"}],["([tzlshicgrvdnkmu])es$",{"in":"$1e",g:"$1ing",pa:"$1ed"}],["(n[dtk]|c[kt]|[eo]n|i[nl]|er|a[ytrl])s$",{"in":"$1",g:"$1ing",pa:"$1ed"}],["(ow)s$",{"in":"$1",g:"$1ing",pa:"ew"}],["(op)s$",{"in":"$1",g:"$1ping",pa:"$1ped"}],["([eirs])ts$",{"in":"$1t",g:"$1tting",pa:"$1tted"}],["(ll)s$",{"in":"$1",g:"$1ing",pa:"$1ed"}],["(el)s$",{"in":"$1",g:"$1ling",pa:"$1led"}],["(ip)es$",{"in":"$1e",g:"$1ing",pa:"$1ed"}],["ss$",{"in":"ss",g:"ssing",pa:"ssed"}],["s$",{"in":"",g:"ing",pa:"ed"}]],gerund:[["pping$",{"in":"p",pr:"ps",pa:"pped"}],["lling$",{"in":"ll",pr:"lls",pa:"lled"}],["tting$",{"in":"t",pr:"ts",pa:"t"}],["ssing$",{"in":"ss",pr:"sses",pa:"ssed"}],["gging$",{"in":"g",pr:"gs",pa:"gged"}],["([^aeiou])ying$",{"in":"$1y",pr:"$1ies",pa:"$1ied","do":"$1ier"}],["(i.)ing$",{"in":"$1e",pr:"$1es",pa:"$1ed"}],["(u[rtcb]|[bdtpkg]l|n[cg]|a[gdkvtc]|[ua]s|[dr]g|yz|o[rlsp]|cre)ing$",{"in":"$1e",pr:"$1es",pa:"$1ed"}],["(ch|sh)ing$",{"in":"$1",pr:"$1es",pa:"$1ed"}],["(..)ing$",{"in":"$1",pr:"$1s",pa:"$1ed"}]],past:[["(ued)$",{pr:"ues",g:"uing",pa:"ued","do":"uer"}],["(e|i)lled$",{pr:"$1lls",g:"$1lling",pa:"$1lled","do":"$1ller"}],["(sh|ch)ed$",{"in":"$1",pr:"$1es",g:"$1ing","do":"$1er"}],["(tl|gl)ed$",{"in":"$1e",pr:"$1es",g:"$1ing","do":"$1er"}],["(ss)ed$",{"in":"$1",pr:"$1es",g:"$1ing","do":"$1er"}],["pped$",{"in":"p",pr:"ps",g:"pping","do":"pper"}],["tted$",{"in":"t",pr:"ts",g:"tting","do":"tter"}],["gged$",{"in":"g",pr:"gs",g:"gging","do":"gger"}],["(h|ion|n[dt]|ai.|[cs]t|pp|all|ss|tt|int|ail|ld|en|oo.|er|k|pp|w|ou.|rt|ght|rm)ed$",{"in":"$1",pr:"$1s",g:"$1ing","do":"$1er"}],["(..[^aeiou])ed$",{"in":"$1e",pr:"$1es",g:"$1ing","do":"$1er"}],["ied$",{"in":"y",pr:"ies",g:"ying","do":"ier"}],["(.o)ed$",{"in":"$1o",pr:"$1os",g:"$1oing","do":"$1oer"}],["(.i)ed$",{"in":"$1",pr:"$1s",g:"$1ing","do":"$1er"}],["([rl])ew$",{"in":"$1ow",pr:"$1ows",g:"$1owing"}],["([pl])t$",{"in":"$1t",pr:"$1ts",g:"$1ting"}]]};F=Object.keys(F).reduce(function(a,b){return a[b]=F[b].map(function(a){var b={reg:new RegExp(a[0],"i"),repl:{infinitive:a[1]["in"],present:a[1].pr,past:a[1].pa,gerund:a[1].g}};return a[1]["do"]&&(b.repl.doer=a[1]["do"]),b}),a},{}),"undefined"!=typeof module&&module.exports&&(module.exports=F);var G=function(){var a=function(a){a=a||"";var b={tie:"tier",dream:"dreamer",sail:"sailer",run:"runner",rub:"rubber",begin:"beginner",win:"winner",claim:"claimant",deal:"dealer",spin:"spinner"},c={aid:1,fail:1,appear:1,happen:1,seem:1,"try":1,say:1,marry:1,be:1,forbid:1,understand:1,bet:1},d=[{reg:/e$/i,repl:"er"},{reg:/([aeiou])([mlgp])$/i,repl:"$1$2$2er"},{reg:/([rlf])y$/i,repl:"$1ier"},{reg:/^(.?.[aeiou])t$/i,repl:"$1tter"}];if(c.hasOwnProperty(a))return null;if(b.hasOwnProperty(a))return b[a];for(var e=0;e4?a.replace(/ed$/,""):a.replace(/d$/,"");var c,d,e,f;return a.match(/[^aeiou]$/)?(e=a+"ing",d=a+"ed",c=a.match(/ss$/)?a+"es":a+"s",f=G(b)):(e=a.replace(/[aeiou]$/,"ing"),d=a.replace(/[aeiou]$/,"ed"),c=a.replace(/[aeiou]$/,"es"),f=G(b)),{infinitive:b,present:c,past:d,gerund:e,doer:f,future:"will "+b}},d=function(a,b){return a.infinitive?(a.gerund||(a.gerund=a.infinitive+"ing"),a.doer||(a.doer=G(a.infinitive)),a.present||(a.present=a.infinitive+"s"),a.past||(a.past=a.infinitive+"ed"),b&&Object.keys(a).forEach(function(c){a[c]=b+a[c]}),a.future||(a.future="will "+a.infinitive),a.perfect||(a.perfect="have "+a.past),a.pluperfect||(a.pluperfect="had "+a.past),a.future_perfect||(a.future_perfect="will have "+a.past),a):a},e=function(f){if(void 0===f)return{};var g=new RegExp("^(.*?) (in|out|on|off|behind|way|with|of|do|away|across|ahead|back|over|under|together|apart|up|upon|aback|down|about|before|after|around|to|forth|round|through|along|onto)$","i");if(f.match(" ")&&f.match(g)){var h=f.match(g,""),i=h[1],j=h[2],k=e(i);return delete k.doer,Object.keys(k).forEach(function(a){k[a]&&(k[a]+=" "+j)}),k}f.match(/^had [a-z]/i)&&(f=f.replace(/^had /i,"")),f.match(/^have [a-z]/i)&&(f=f.replace(/^have /i,"")),f.match(/^will have [a-z]/i)&&(f=f.replace(/^will have /i,"")),f=f.replace(/^will /i,"");var l,m,n=(f.match(/^(over|under|re|anti|full)\-?/i)||[])[0],o=f.replace(/^(over|under|re|anti|full)\-?/i,""),p={},q=a.length;for(m=0;q>m;m++)if(l=a[m],o===l.present||o===l.gerund||o===l.past||o===l.infinitive)return p=JSON.parse(JSON.stringify(a[m])),d(p,n);var r=b(f)||"infinitive";q=F[r].length;var s;for(m=0;q>m;m++)if(s=F[r][m],f.match(s.reg))return p[r]=f,Object.keys(s.repl).forEach(function(a){a===r?p[a]=f:p[a]=f.replace(s.reg,s.repl[a])}),d(p);return c(f)};return"undefined"!=typeof module&&module.exports&&(module.exports=e),e}(),I=function(a,b,c){var d,e,f=this;void 0!==b&&void 0!==c&&(d=b.tokens[c],e=b.tokens[c+i]),f.word=a||"","undefined"!=typeof module&&module.exports&&(H=require("./conjugate/conjugate"),v=require("../../data/parts_of_speech"));var g={is:"CP","will be":"CP",will:"CP",are:"CP",was:"CP",were:"CP"},h={can:"MD",may:"MD",could:"MD",might:"MD",will:"MD","ought to":"MD",would:"MD",must:"MD",shall:"MD",should:"MD"},j={past:"VBD",participle:"VBN",infinitive:"VBP",present:"VBZ",gerund:"VBG"};return f.conjugate=function(){return H(f.word)},f.to_past=function(){return"gerund"===f.form?f.word:H(f.word).past},f.to_present=function(){return H(f.word).present},f.to_future=function(){return"will "+H(f.word).infinitive},f.form=function(){for(var a=["past","present","gerund","infinitive"],b=H(f.word),c=0;ci;i++)for(m=o[p[i]],i2=0;i2i;i++)n[p[i]]="CD";for(q=h.length,i=0;q>i;i++)n[h[i]]="JJ";for(q=k.length,i=0;q>i;i++)n[k[i]]="NNAB";for(q=f.length,i=0;q>i;i++)n[f[i]]="NNAB";for(q=g.length,i=0;q>i;i++)n[g[i]]="NN";Object.keys(l).forEach(function(a){n[a]="NNP"}),Object.keys(d).forEach(function(a){n[a]=d[a]}),Object.keys(P).forEach(function(a){n[a]=P[a]});var r;for(q=e.length,i=0;q>i;i++)r=H(e[i]),n[r.infinitive]=n[r.infinitive]||"VBP",n[r.past]=n[r.past]||"VBD",n[r.gerund]=n[r.gerund]||"VBG",n[r.present]=n[r.present]||"VBZ",r.doer&&(n[r.doer]=n[r.doer]||"NNA"),r.participle&&(n[r.participle]=n[r.participle]||"VBN");for(q=a.length,i=0;q>i;i++)r=a[i],n[r.infinitive]=n[r.infinitive]||"VBP",n[r.gerund]=n[r.gerund]||"VBG",n[r.past]=n[r.past]||"VBD",n[r.present]=n[r.present]||"VBZ",r.doer&&(n[r.doer]=n[r.doer]||"NNA"),r.participle&&(n[r.future]=n[r.future]||"VB");var s,t;for(q=c.length,i=0;q>i;i++)n[c[i]]="JJ";for(p=Object.keys(b),q=p.length,i=0;q>i;i++)t=p[i],n[t]="JJ",s=M(t),s&&s!==t&&!n[s]&&(n[s]=n[s]||"RB"),s=K(t),!s||s.match(/^more ./)||s===t||n[s]||(n[s]=n[s]||"JJR"),s=L(t),!s||s.match(/^most ./)||s===t||n[s]||(n[s]=n[s]||"JJS");return"undefined"!=typeof module&&module.exports&&(module.exports=n),n}(),R=function(a){var b=this;b.tokens=a||[];var c=function(a){return a.charAt(0).toUpperCase()+a.slice(1)};return b.tense=function(){var a=b.tokens.filter(function(a){return"verb"===a.pos.parent});return a.map(function(a){return a.analysis.tense})},b.to_past=function(){return b.tokens=b.tokens.map(function(a){return"verb"===a.pos.parent&&(a.text=a.analysis.to_past(),a.normalised=a.text),a}),b},b.to_present=function(){return b.tokens=b.tokens.map(function(a){return"verb"===a.pos.parent&&(a.text=a.analysis.to_present(),a.normalised=a.text),a}),b},b.to_future=function(){return b.tokens=b.tokens.map(function(a){return"verb"===a.pos.parent&&(a.text=a.analysis.to_future(),a.normalised=a.text),a}),b},b.insert=function(a,c){c&&a&&b.tokens.splice(c,0,a)},b.negate=function(){for(var a={everyone:"no one",everybody:"nobody",someone:"no one",somebody:"nobody",always:"never",is:"isn't",are:"aren't",was:"wasn't",will:"won't","didn't":"did","wouldn't":"would","couldn't":"could","shouldn't":"should","can't":"can","won't":"will","mustn't":"must","shan't":"shall",shant:"shall",did:"didn't",would:"wouldn't",could:"couldn't",should:"shouldn't",can:"can't",must:"mustn't"},d=0;d4&&a.match(t[b].reg))return v[t[b].pos]},g=function(a,b,c){var d=c.tokens[b-1],e=c.tokens[b+1],f={the:1,a:1,an:1};return"march"!=a.normalised&&"april"!=a.normalised&&"may"!=a.normalised||!(e&&"CD"==e.pos.tag||d&&"CD"==d.pos.tag)||(a.pos=v.CD,a.pos_reason="may_is_date"),e&&"noun"!==a.pos.parent&&"glue"!==a.pos.parent&&"MD"===e.pos.tag&&(a.pos=v.NN,a.pos_reason="before_modal"),d&&"will"==d.normalised&&!d.punctuated&&"noun"==a.pos.parent&&"PRP"!==a.pos.tag&&(a.pos=v.VB,a.pos_reason="after_will"),d&&"i"==d.normalised&&!d.punctuated&&"noun"==a.pos.parent&&(a.pos=v.VB,a.pos_reason="after_i"),d&&"noun"===a.pos.parent&&"PRP"!==a.pos.tag&&"RB"===d.pos.tag&&!d.start&&(a.pos=v.VB,a.pos_reason="after_adverb"),e&&"adjective"===a.pos.parent&&"adjective"===e.pos.parent&&!a.punctuated&&(a.pos=v.RB,a.pos_reason="consecutive_adjectives"),d&&"verb"===a.pos.parent&&f[d.pos.normalised]&&"CP"!=a.pos.tag&&(a.pos=v.NN,a.pos_reason="determiner-verb"),d&&"CP"===d.pos.tag&&"DT"!==a.pos.tag&&"RB"!==a.pos.tag&&"adjective"!==a.pos.parent&&"value"!==a.pos.parent&&(a.pos=v.JJ,a.pos_reason="copula-adjective"),d&&e&&"CP"===d.pos.tag&&"RB"===a.pos.tag&&"verb"===e.pos.parent&&(c.tokens[b+1].pos=v.JJ,c.tokens[b+1].pos_reason="copula-adverb-adjective"),e&&"PRP"==e.pos.tag&&"noun"==a.pos.parent&&!a.punctuated&&(a.pos=v.VB,a.pos_reason="before_[him|her|it]"),d&&e&&"DT"===d.pos.tag&&"noun"===e.pos.parent&&"verb"===a.pos.parent&&(a.pos=v.JJ,a.pos_reason="determiner-adjective-noun"),a},h=function(a){for(var b,c,d,e={"i'd":["i","would"],"she'd":["she","would"],"he'd":["he","would"],"they'd":["they","would"],"we'd":["we","would"],"i'll":["i","will"],"she'll":["she","will"],"he'll":["he","will"],"they'll":["they","will"],"we'll":["we","will"],"i've":["i","have"],"they've":["they","have"],"we've":["we","have"],"should've":["should","have"],"would've":["would","have"],"could've":["could","have"],"must've":["must","have"],"i'm":["i","am"],"he's":["he","is"],"she's":["she","is"],"we're":["we","are"],"they're":["they","are"],cannot:["can","not"]},f=0;f4){var d=a.normalised.substr(c-4,c-1);if(u.hasOwnProperty(d))return a.pos=v[u[d]],a.pos_reason="wordnet suffix",a}var g=f(a.normalised);return g?(a.pos=g,a.pos_reason="regex suffix",a):parseFloat(a.normalised)?(a.pos=v.CD,a.pos_reason="parsefloat",a):a}),a.tokens=a.tokens.map(function(a,b){return"lexicon"!==a.pos_reason&&a.normalised.match(/.ed$/)&&(a.pos=v.VB,a.pos_reason="ed"),a});var c=null,d="";a.tokens=a.tokens.map(function(b,e){var f=a.tokens[e+1];if(b.pos){if("the"==b.normalised||"a"==b.normalised||"an"==b.normalised||"PP"===b.pos.tag)return c="noun",d=b.pos.name,b;if("PRP"===b.pos.tag||"MD"===b.pos.tag)return c="verb",d=b.pos.name,b}return b.pos&&("verb"==c&&"noun"==b.pos.parent&&(!f||f.pos&&"noun"!=f.pos.parent)&&(f&&f.pos&&f.pos.parent==c||(b.pos=v.VB, +b.pos_reason="signal from "+d,c=null)),"noun"==c&&"verb"==b.pos.parent&&(!f||f.pos&&"verb"!=f.pos.parent)&&(f&&f.pos&&f.pos.parent==c||(b.pos=v.NN,b.pos_reason="signal from "+d,c=null))),c&&!b.pos&&(f&&f.pos&&f.pos.parent==c||(b.pos=v[c],b.pos_reason="signal from "+d,c=null)),"verb"===c&&b.pos&&"verb"===b.pos.parent&&(c=null),"noun"===c&&b.pos&&"noun"===b.pos.parent&&(c=null),b});var i={};a.tokens.forEach(function(a){a.pos&&(i[a.pos.parent]=!0)}),a.tokens=a.tokens.map(function(a,b){if(!a.pos){if(i.adjective&&i.noun&&!i.verb)return a.pos=v.VB,a.pos_reason="need one verb",i.verb=!0,a;a.pos=v.NN,a.pos_reason="noun fallback"}return a}),a.tokens=a.tokens.map(function(b,c){return g(b,c,a)}),a.tokens=a.tokens.map(function(b,c){return g(b,c,a)})}),b.dont_combine||(i=i.map(function(a){return c(a)}),i=i.map(function(a){return d(a)})),i=i.map(function(a){var b=new R(a.tokens);return b.type=a.type,b}),i=i.map(function(a){return a.tokens=a.tokens.map(function(b,c){return b.analysis=O[b.pos.parent](b.normalised,a,c),b}),a}),i=i.map(function(a,b){return a.last=i[b-1],a.next=i[b+1],a}),new S(i)};return"undefined"!=typeof module&&module.exports&&(module.exports=i),i}(),U=function(){"undefined"!=typeof module&&module.exports&&(T=require("./pos"));var a=function(a,b){b=b||{};var c=T(a,b).sentences,d=c.reduce(function(a,c){return a.concat(c.entities(b))},[]),e={};return d=d.filter(function(a){return a.analysis.is_person()&&a.normalised.split(" ").forEach(function(a){e[a]=!0}),e[a.normalised]?!1:!0})};return"undefined"!=typeof module&&module.exports&&(module.exports=a),a}();if("undefined"!=typeof module&&module.exports)var O=require("./src/parents/parents"),m=require("./src/methods/tokenization/sentence").sentences,o=require("./src/methods/tokenization/tokenize").tokenize,n=require("./src/methods/tokenization/ngram").ngram,p=require("./src/methods/transliteration/unicode_normalisation"),q=require("./src/methods/syllables/syllable"),V=require("./src/methods/localization/britishize"),s=V.americanize,r=V.britishize,T=require("./src/pos"),U=require("./src/spot");var W={noun:O.noun,adjective:O.adjective,verb:O.verb,adverb:O.adverb,value:O.value,sentences:m,ngram:n,tokenize:o,americanize:s,britishize:r,syllables:q,normalize:p.normalize,denormalize:p.denormalize,pos:T,spot:U};return"undefined"!=typeof module&&module.exports&&(module.exports=W),W}(); \ No newline at end of file diff --git a/src/parents/noun/index.js b/src/parents/noun/index.js index 18e5c26a7..1a77c87a6 100644 --- a/src/parents/noun/index.js +++ b/src/parents/noun/index.js @@ -244,6 +244,53 @@ var Noun = function(str, sentence, word_i) { return "it" } + //tokens that refer to the same thing. "[obama] is cool, [he] is nice." + the.referenced_by = function() { + //if it's named-noun, look forward for the pronouns pointing to it -> '... he' + if(token && token.pos.tag!=="PRP"){ + var prp=the.pronoun() + //look at rest of sentence + var interested=sentence.tokens.slice(word_i+1, sentence.tokens.length) + //add next sentence too, could go further.. + if(sentence.next){ + interested=interested.concat(sentence.next.tokens) + } + //find the matching pronouns, and break if another noun overwrites it + var matches=[] + for(var i=0; i