From 822fe39d45c5a561abd4fbc25096fcc360376f4d Mon Sep 17 00:00:00 2001 From: David Yip Date: Tue, 29 Dec 2015 13:50:52 -0600 Subject: [PATCH] Handle streets where the name overlaps the street-type map. Some addresses like 14168 W RIVER RD COLUMBIA STATION, OH 44028-9430 are interpreted with the street being "W", the street type as "River" (which abbreviates to "riv"), and the city as "RD \nCOLUMBIA STATION". The example addresses are all in Ohio because that's my current dataset, but it's not an Ohio-specific phenomenon: for example, in Illinois, there's a River Road that follows the Des Plaines River. The erroneous parse appears to be from the "100 South Street" special case in the street regexp. This commit adds a second special case with higher precedence, matching [prefix, non-numeric street, street type] sequences. The street match excludes numerics to preserve the existing parse behavior for the "6641 N 2200 W Apt D304 Park City, UT 84098" case. --- lib/street_address.rb | 8 ++++++++ test/street_address_test.rb | 30 ++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/lib/street_address.rb b/lib/street_address.rb index 7e74eb7..cf0ff37 100644 --- a/lib/street_address.rb +++ b/lib/street_address.rb @@ -581,6 +581,14 @@ class << self # note that expressions like [^,]+ may scan more than you expect self.street_regexp = / (?: + # special case for addresses like 14168 W River Rd and 3301 N Park + # Blvd, where the street name matches one of the street types + (?: + (? #{direct_regexp})\W+ + (? [^\d]+)\W+ + (? #{street_type_regexp})\b + ) + | # special case for addresses like 100 South Street (?:(? #{direct_regexp})\W+ (? #{street_type_regexp})\b diff --git a/test/street_address_test.rb b/test/street_address_test.rb index 5611e06..7c4abc4 100644 --- a/test/street_address_test.rb +++ b/test/street_address_test.rb @@ -259,6 +259,36 @@ class StreetAddressUsTest < MiniTest::Test :street_type => 'Rd', :state => 'CO' }, + "14168 W RIVER RD \nCOLUMBIA STATION, OH 44028-9430" => { # overlapping street type and road name + :city => 'Columbia Station', + :postal_code => '44028', + :postal_code_ext => '9430', + :number => '14168', + :street => 'River', + :street_type => 'Rd', + :state => 'OH', + :prefix => 'W' + }, + "555 E LAKE AVE \nBELLEFONTAINE, OH 43311-2509" => { + :city => 'Bellefontaine', + :postal_code => '43311', + :postal_code_ext => '2509', + :number => '555', + :street => 'Lake', + :street_type => 'Ave', + :state => 'OH', + :prefix => 'E' + }, + "19600 N PARK BLVD \nSHAKER HEIGHTS, OH 44122-1825" => { + :city => 'Shaker Heights', + :postal_code => '44122', + :postal_code_ext => '1825', + :number => '19600', + :street => 'Park', + :street_type => 'Blvd', + :state => 'OH', + :prefix => 'N' + }, "1234 COUNTY HWY 60E, Town, CO 12345" => { :city => 'Town', :postal_code => '12345',