Skip to content

Commit

Permalink
more place and org tagging
Browse files Browse the repository at this point in the history
  • Loading branch information
spencermountain committed Jun 8, 2024
1 parent f66569c commit 1a252e1
Show file tree
Hide file tree
Showing 10 changed files with 78 additions and 116 deletions.
34 changes: 29 additions & 5 deletions data/lexicon/places/places.js
Original file line number Diff line number Diff line change
Expand Up @@ -143,13 +143,37 @@ export default [
'east side',
'north side',
'south side',
'brookside',
'brookstone',
'centerville',
'clearwater',
'Maplewood',
'Pinecrest',
'Brookside',
'Riverside',
'Willowbrook',
'cedar grove',
'fairhaven',
'green acres',
'highland',
'highlands',
'hillcrest',
'lakefront',
'lowlands',
'lowland',
'maplewood',
'meadowlands',
'midlands',
'oakmont',
'pinecrest',
'riverbend',
'riverfront',
'riverside',
'springhill',
'springwood',
'willowbrook',
'sunset hills',
'oakwood',
'parkside',
'green valley',
'maple grove',
'pine ridge',
'greenfield',

//misc notable british
'abbotsford',
Expand Down
110 changes: 6 additions & 104 deletions scratch.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,110 +4,12 @@ import nlp from './src/three.js'
// nlp.plugin(plg)
// nlp.verbose('tagger')

let doc = nlp(` Maplewood
Pinecrest
Oakville
Brookside
Springdale
Riverside
Willowbrook
Meadowview
Cedar Falls
Lakeview
Sunset Hills
Valley Springs
Mountainview
Greenfield
Parkville
Fairview
Woodland Heights
Harbor Town
Highland Park
Clearwater Junction
Maple Grove
Pine Ridge
Oak Valley
Brookhaven
Spring Valley
Riverdale
Willow Springs
Meadowbrook
Cedar Ridge
Lakeside
Sunset Terrace
Valley View
Mountain Valley
Green Hills
Parkside
Fair Oaks
Woodland Hills
Harbor Springs
Highland Village
Clear Creek
Maple Ridge
Pine Lake
Oakwood
Brookfield
Spring Creek
Riverbank
Willow Creek
Meadowland
Cedar Creek
Lake Shore
Sunset Ridge
Valley Ridge
Mountain Top
Green Valley
Park View
Fairbanks
Woodland Park
Harbor Point
Highlands
Clearview
Maple Lane
Pine Hill
Oakdale
Brooks Crossing
Springhill
Riverside Park
Willow Grove
Meadowlands
Cedar Springs
Lakeside Estates
Sunset View
Valley Heights
Mountain Vista
Green Acres
Park Lane
Fairmont
Woodside
Harbor Heights
High Point
Clearbrook
Maple Falls
Pinecrest Heights
Oakridge
Brookshire
Springville
Riverwood
Willow Lake
Meadow Heights
Cedar Grove
Lakeview Terrace
Sunset Place
Valley Park
Mountain Meadow
Greenfield Estates
Park Place
Fairfield
Woodridge
Harbor Village
Hillcrest
Clearview Heights
`)
.ifNo('#Place')
.debug()
const text_1 = 'There are twenty-four apples and 12000 oranges on the table.'
const doc = nlp(text_1)

doc.values(0).toNumber().debug()

// let doc = nlp(` `).debug()

// -bury
// -ford
Expand Down
7 changes: 7 additions & 0 deletions src/2-two/postTagger/model/nouns/organizations.js
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,11 @@ export default [
},
// 'toronto fc'
{ match: '#Place+ fc', tag: 'SportsTeam', reason: 'fc-sportsteam' },

// baltimore quilting club
{
match: '#Place+ #Noun{0,2} (club|society|group|team|committee|commission|association|guild|crew)',
tag: 'Organization',
reason: 'place-noun-society',
},
]
7 changes: 6 additions & 1 deletion src/2-two/postTagger/model/nouns/places.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,12 @@ export default [
reason: 'address-st',
},
// port dover
{ match: '(port|mount) #ProperName', tag: 'Place', reason: 'port-name' },
{ match: '(port|mount|mt) #ProperName', tag: 'Place', reason: 'port-name' },
// generic 'oak ridge' names
// { match: '(oak|maple|spruce|pine|cedar|willow|green|sunset|sunrise) #Place', tag: 'Place', reason: 'tree-name' },
// generic 'sunset view' names
// { match: '() #Place', tag: 'Place', reason: 'tree-name' },

// Sports Arenas and Complexs
// {
// match:
Expand Down
9 changes: 9 additions & 0 deletions src/2-two/preTagger/compute/tagger/3rd-pass/04-placeWords.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,15 @@ const placeCont = new Set([
'southern',
'state',
'western',
'spring',
'pine',
'sunset',
'view',
'oak',
'maple',
'spruce',
'cedar',
'willow',
])
// center of...
const noBefore = new Set(['center', 'centre', 'way', 'range', 'bar', 'bridge', 'field', 'pit'])
Expand Down
4 changes: 2 additions & 2 deletions src/2-two/preTagger/model/lexicon/_data.js

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions src/2-two/preTagger/model/patterns/suffixes.js
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,11 @@ export default [
hurst: Place,
stead: Place,
endon: Place,
brook: Place,
shire: Place,
worth: Noun,
field: Prop,
ridge: Place,
},
{
//6-letter
Expand All @@ -242,6 +245,7 @@ export default [
cedent: Sing,
ionary: Sing,
cklist: Sing,
brooke: Place,
keeper: Actor,
logist: Actor,
teenth: 'Value',
Expand All @@ -256,6 +260,7 @@ export default [
},
{
//7-letter
chester: Place,
logists: Actor,
opoulos: Last,
borough: Place,
Expand Down
14 changes: 13 additions & 1 deletion src/2-two/preTagger/model/placeWords.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@ export default [
'cove',
'coves',
'crater',
'crossing',
'creek',
'desert',
'dune',
'dunes',
'downs',
'estates',
'escarpment',
'estuary',
'falls',
Expand All @@ -33,10 +35,12 @@ export default [
'glacier',
'gorge',
'gorges',
'grove',
'gulf',
'gully',
'highland',
'heights',
'hollow',
'hill',
'hills',
'inlet',
Expand All @@ -47,6 +51,7 @@ export default [
'knoll',
'lagoon',
'lake',
'lakeshore',
'marsh',
'marshes',
'mount',
Expand All @@ -72,6 +77,7 @@ export default [
'shores',
'strait',
'straits',
'springs',
'stream',
'swamp',
'tombolo',
Expand All @@ -80,11 +86,13 @@ export default [
'trench',
'valley',
'vallies',
'village',
'volcano',
'waterfall',
'watershed',
'wetland',
'woods',
'acres',

// districts
'burough',
Expand Down Expand Up @@ -145,6 +153,7 @@ export default [
'house',
'levee',
'library',
'manor',
'memorial',
'monument',
'museum',
Expand All @@ -171,6 +180,7 @@ export default [
'park',
'parks',
'site',
'ranch',
'raceway',
'sportsplex',

Expand All @@ -182,17 +192,19 @@ export default [
// 'civic centre',

// roads
'ave',
'st',
'street',
'rd',
'road',
'lane',
'landing',
'crescent',
'cr',
'way',
'tr',
'terrace',
'avenue',
'ave',
].reduce((h, str) => {
h[str] = true
return h
Expand Down
1 change: 0 additions & 1 deletion tests/three/people/people.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,6 @@ test('people positives:', function (t) {
'Abie Malan',
'Christoph Zürcher',
'dmitry medvedev',
'emmeline pankhurst',
'diego maradona',
'dmitry medvedev',
'ebenezer scrooge',
Expand Down
3 changes: 1 addition & 2 deletions tests/two/variables/person-match.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ let arr = [
['Halle Berry', '#Person+'],
['Tom Brady', '#MaleName #LastName'],
['Matthew Broderick', '#MaleName #LastName'],
['Nathan Lane', '#MaleName #LastName'],
['Mel Brooks', '#Person+'],
['Dan Brown', '#MaleName #LastName'],
['Jerry Bruckheimer', '#MaleName #LastName'],
Expand Down Expand Up @@ -101,4 +100,4 @@ test('match:', function (t) {
t.equal(m.text(), doc.text(), here + msg)
})
t.end()
})
})

0 comments on commit 1a252e1

Please sign in to comment.