Skip to content

Commit

Permalink
search: consolidate on GovernmentIDs for Business, fixup related parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
adamdecaf committed Jan 16, 2025
1 parent 8d2a894 commit 8b061bd
Show file tree
Hide file tree
Showing 8 changed files with 101 additions and 176 deletions.
2 changes: 1 addition & 1 deletion internal/integrity/ofac_csl_us_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func TestIntegrity_OFAC_US_CSL(t *testing.T) {
require.ElementsMatch(t, ofacEntity.Business.AltNames, cslUSEntity.Business.AltNames)
require.Equal(t, ofacEntity.Business.Created, cslUSEntity.Business.Created)
require.Equal(t, ofacEntity.Business.Dissolved, cslUSEntity.Business.Dissolved)
require.ElementsMatch(t, ofacEntity.Business.Identifiers, cslUSEntity.Business.Identifiers)
require.ElementsMatch(t, ofacEntity.Business.GovernmentIDs, cslUSEntity.Business.GovernmentIDs)

// Common Fields
require.Equal(t, ofacEntity.Contact, cslUSEntity.Contact)
Expand Down
28 changes: 7 additions & 21 deletions pkg/csl_us/mapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ func mapBusiness(src ENHANCED_XML.EntitiesEntity) *search.Business {

// Map business identifiers
if src.IdentityDocuments != nil {
business.Identifiers = mapIdentifiers(src.IdentityDocuments)
business.GovernmentIDs = mapGovernmentIDs(src.IdentityDocuments)
}

return business
Expand Down Expand Up @@ -230,7 +230,7 @@ func mapOrganization(src ENHANCED_XML.EntitiesEntity) *search.Organization {

// Map organization identifiers
if src.IdentityDocuments != nil {
org.Identifiers = mapIdentifiers(src.IdentityDocuments)
org.GovernmentIDs = mapGovernmentIDs(src.IdentityDocuments)
}

return org
Expand Down Expand Up @@ -467,7 +467,9 @@ func mapGovernmentIDs(docs *ENHANCED_XML.EntityIdentityDocuments) []search.Gover
Country: getCountryCode(doc.IssuingCountry),
Identifier: doc.DocumentNumber,
}
ids = append(ids, id)
if string(id.Type) != "" {
ids = append(ids, id)
}
}
return ids
}
Expand All @@ -493,7 +495,8 @@ func mapIDType(ref ENHANCED_XML.ReferenceValueReferenceType) search.GovernmentID
return search.GovernmentIDCUIT
case 1607: // Electoral Registry No.
return search.GovernmentIDElectoral
case 1581: // Business Registration Document #
case 1581, 1585, 91752, 91760, 91761:
// Business Registration Document, etc
return search.GovernmentIDBusinessRegisration
case 1619: // Commercial Registry Number
return search.GovernmentIDCommercialRegistry
Expand All @@ -510,23 +513,6 @@ func mapIDType(ref ENHANCED_XML.ReferenceValueReferenceType) search.GovernmentID
}
}

func mapIdentifiers(docs *ENHANCED_XML.EntityIdentityDocuments) []search.Identifier {
if docs == nil {
return nil
}

var identifiers []search.Identifier
for _, doc := range docs.IdentityDocument {
identifier := search.Identifier{
Name: doc.Type.Text,
Country: getCountryCode(doc.IssuingCountry),
Identifier: doc.DocumentNumber,
}
identifiers = append(identifiers, identifier)
}
return identifiers
}

func mapAircraftType(value string) search.AircraftType {
switch strings.ToLower(value) {
case "cargo":
Expand Down
131 changes: 38 additions & 93 deletions pkg/ofac/mapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,70 +41,9 @@ var (
)

var (
dobPatterns = []string{
"02 Jan 2006", // 01 Apr 1950
"Jan 2006", // Sep 1958
"2006", // 1928
}
dobPatterns = []string{"02 Jan 2006", "Jan 2006", "2006"}
)

// Company Number 05527424 (United Kingdom)
// Company Number IMO 1991835.
// Commercial Registry Number 0411518776478 (Iran)
// Enterprise Number 0430.033.662 (Belgium).
// Tax ID No. 230810605961 (Russia).
// Trade License No. 04110179 (United Kingdom).
// UK Company Number 01019769 (United Kingdom)
// US FEIN 000920912 (United States).

func makeIdentifiers(remarks []string, needles []string) []search.Identifier {
seen := make(map[string]bool)
var out []search.Identifier

for i := range needles {
if id := makeIdentifier(remarks, needles[i]); id != nil {
// Create unique key from name and country
key := id.Name + "|" + id.Country
if !seen[key] {
seen[key] = true
out = append(out, *id)
}
}
}
return out
}

func makeIdentifier(remarks []string, suffix string) *search.Identifier {
found := findMatchingRemarks(remarks, suffix)
if len(found) == 0 {
for _, rmk := range remarks {
if matches := identifierRegex.FindStringSubmatch(rmk); len(matches) > 1 {
found = append(found, remark{fullName: suffix, value: matches[1]})
break
}
}
}
if len(found) == 0 {
return nil
}

// Often the country is in parenthesis at the end, so let's look for that
country := ""
value := found[0].value

if matches := countryParenRegex.FindStringSubmatch(value); len(matches) > 1 {
country = matches[1]
// Remove the country part from the value
value = strings.TrimSpace(countryParenRegex.ReplaceAllString(value, ""))
}

return &search.Identifier{
Name: strings.TrimSpace(found[0].fullName),
Country: country,
Identifier: value,
}
}

func findDateStamp(matchingRemarks []remark) *time.Time {
return withFirstP(matchingRemarks, func(in remark) *time.Time {
t, err := parseTime(dobPatterns, in.value)
Expand Down Expand Up @@ -191,36 +130,38 @@ func ToEntity(sdn SDN, addresses []Address, comments []SDNComments, altIds []Alt
out.Business.Created = findDateStamp(findMatchingRemarks(remarks, "Organization Established Date"))
// out.Business.Dissolved = findDateStamp(findMatchingRemarks(remarks, "TODO(adam)"))

out.Business.Identifiers = makeIdentifiers(remarks, []string{
"Branch Unit Number",
"Business Number",
"Business Registration Document",
"Business Registration Number",
"Certificate of Incorporation Number",
"Chamber of Commerce Number",
"Chinese Commercial Code",
"Commercial Registry Number",
"Company Number",
"Company ID", // new: e.g., "Company ID: No. 59 531..."
"D-U-N-S Number", // new: e.g., "D-U-N-S Number 33-843-5672"
"Dubai Chamber of Commerce Membership No", // new
"Enterprise Number",
"Fiscal Code", // new: business tax identifiers
"Folio Mercantil No", // new: Mexican business registration
"Legal Entity Number",
"Matricula Mercantil No", // new: Colombian business registration
"Public Registration Number", // new
"Registration Number",
"RIF", // new: Venezuelan tax ID
"RUC", // new: Panama business registration
"Romanian C.R", // new: Romanian Commercial Registry
"Tax ID No.", // new: Important business identifier
"Trade License No", // new
"UK Company Number", // new: Specific UK format
"US FEIN", // new: US Federal Employer ID Number
"United Social Credit Code Certificate", // new: Chinese business ID
"V.A.T. Number", // new: VAT registration numbers
})
out.Business.GovernmentIDs = parseGovernmentIDs(remarks)

// out.Business.Identifiers = makeIdentifiers(remarks, []string{
// "Branch Unit Number",
// "Business Number",
// "Business Registration Document",
// "Business Registration Number",
// "Certificate of Incorporation Number",
// "Chamber of Commerce Number",
// "Chinese Commercial Code",
// "Commercial Registry Number",
// "Company Number",
// "Company ID", // new: e.g., "Company ID: No. 59 531..."
// "D-U-N-S Number", // new: e.g., "D-U-N-S Number 33-843-5672"
// "Dubai Chamber of Commerce Membership No", // new
// "Enterprise Number",
// "Fiscal Code", // new: business tax identifiers
// "Folio Mercantil No", // new: Mexican business registration
// "Legal Entity Number",
// "Matricula Mercantil No", // new: Colombian business registration
// "Public Registration Number", // new
// "Registration Number",
// "RIF", // new: Venezuelan tax ID
// "RUC", // new: Panama business registration
// "Romanian C.R", // new: Romanian Commercial Registry
// "Tax ID No.", // new: Important business identifier
// "Trade License No", // new
// "UK Company Number", // new: Specific UK format
// "US FEIN", // new: US Federal Employer ID Number
// "United Social Credit Code Certificate", // new: Chinese business ID
// "V.A.T. Number", // new: VAT registration numbers
// })

case "individual":
out.Type = search.EntityPerson
Expand Down Expand Up @@ -364,7 +305,9 @@ var (
governmentIDElectoralRegex = regexp.MustCompile(`(?i)Electoral\s+Registry\s+(?:No\.|Number)?\s*([A-Z0-9-]+)`)

// Business Registration
governmentIDBusinessRegistrationRegex = regexp.MustCompile(`(?i)Business\s+Registration\s+(?:No\.|Number|Document)?\s*([A-Z0-9-]+)`)
governmentIDBusinessRegistrationRegex = regexp.MustCompile(`(?i)Business\s+Registration\s+(?:No\.|Number|Document)?\s*([A-Z0-9-\.]+)`)
governmentIDCompanyNumberRegex = regexp.MustCompile(`(?i)Company\s+Number\s+([0-9]+)`)
governmentIDLegalEntityNumberRegex = regexp.MustCompile(`(?i)Legal\s+Entity\s+Number\s+([A-Za-z0-9\-\.]+)`)
governmentIDCommercialRegistryRegex = regexp.MustCompile(`(?i)Commercial\s+Registry\s+(?:No\.|Number)?\s*([A-Z0-9-./]+)`)

// Birth Certificates
Expand Down Expand Up @@ -393,6 +336,8 @@ func parseGovernmentIDs(remarks []string) []search.GovernmentID {
governmentIDCURPRegex: search.GovernmentIDCURP,
governmentIDElectoralRegex: search.GovernmentIDElectoral,
governmentIDBusinessRegistrationRegex: search.GovernmentIDBusinessRegisration,
governmentIDCompanyNumberRegex: search.GovernmentIDBusinessRegisration,
governmentIDLegalEntityNumberRegex: search.GovernmentIDBusinessRegisration,
governmentIDCommercialRegistryRegex: search.GovernmentIDCommercialRegistry,
governmentIDBirthCertRegex: search.GovernmentIDBirthCert,
governmentIDRefugeeRegex: search.GovernmentIDRefugee,
Expand Down
42 changes: 21 additions & 21 deletions pkg/ofac/mapper_business_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ func TestMapperBusiness__FromSource(t *testing.T) {
require.Equal(t, createdAt.Format(time.RFC3339), business.Created.Format(time.RFC3339))
require.Nil(t, business.Dissolved)

expectedIdentifiers := []search.Identifier{
{Name: "Company Number", Country: "Czech Republic", Identifier: "07486049"},
{Name: "Legal Entity Number", Country: "Czech Republic", Identifier: "5299007NTWCC3U23WM81"},
expectedGovernmentIDs := []search.GovernmentID{
{Type: search.GovernmentIDBusinessRegisration, Country: "Czech Republic", Identifier: "07486049"},
{Type: search.GovernmentIDBusinessRegisration, Country: "Czech Republic", Identifier: "5299007NTWCC3U23WM81"},
}
require.ElementsMatch(t, expectedIdentifiers, business.Identifiers)
require.ElementsMatch(t, expectedGovernmentIDs, business.GovernmentIDs)

expectedContact := search.ContactInfo{
Websites: []string{"suex.io"},
Expand Down Expand Up @@ -129,11 +129,11 @@ func TestMapperBusiness__FromSource(t *testing.T) {
require.Equal(t, createdAt.Format(time.RFC3339), business.Created.Format(time.RFC3339))
require.Nil(t, business.Dissolved)

expectedIdentifiers := []search.Identifier{
{Name: "Business Registration Number", Country: "Russia", Identifier: "1207700248030"},
{Name: "Tax ID No.", Country: "Russia", Identifier: "9709063550"},
expectedGovernmentIDs := []search.GovernmentID{
{Type: search.GovernmentIDBusinessRegisration, Country: "Russia", Identifier: "1207700248030"},
{Type: search.GovernmentIDTax, Country: "Russia", Identifier: "9709063550"},
}
require.ElementsMatch(t, expectedIdentifiers, business.Identifiers)
require.ElementsMatch(t, expectedGovernmentIDs, business.GovernmentIDs)

expectedContact := search.ContactInfo{
Websites: []string{"www.dialog.info", "www.dialog-regions.ru"},
Expand Down Expand Up @@ -173,26 +173,26 @@ func TestMapper__CompleteBusiness(t *testing.T) {

require.NotNil(t, e.Business)
require.Equal(t, "ACME CORPORATION", e.Business.Name)
require.Len(t, e.Business.Identifiers, 3)
require.Len(t, e.Business.GovernmentIDs, 3)

// Sort the identifiers to ensure consistent ordering for tests
identifiers := e.Business.Identifiers
sort.Slice(identifiers, func(i, j int) bool {
return identifiers[i].Country < identifiers[j].Country
govIDs := e.Business.GovernmentIDs
sort.Slice(govIDs, func(i, j int) bool {
return govIDs[i].Country < govIDs[j].Country
})

// Verify identifiers
require.Equal(t, "Hong Kong", identifiers[0].Country)
require.Equal(t, "Business Registration Number", identifiers[0].Name)
require.Equal(t, "51566843", identifiers[0].Identifier)
require.Equal(t, "Hong Kong", govIDs[0].Country)
require.Equal(t, search.GovernmentIDBusinessRegisration, govIDs[0].Type)
require.Equal(t, "51566843", govIDs[0].Identifier)

require.Equal(t, "Switzerland", identifiers[1].Country)
require.Equal(t, "Commercial Registry Number", identifiers[1].Name)
require.Equal(t, "CH-020.1.066.499-9", identifiers[1].Identifier)
require.Equal(t, "Switzerland", govIDs[1].Country)
require.Equal(t, search.GovernmentIDCommercialRegistry, govIDs[1].Type)
require.Equal(t, "CH-020.1.066.499-9", govIDs[1].Identifier)

require.Equal(t, "United Kingdom", identifiers[2].Country)
require.Equal(t, "Company Number", identifiers[2].Name)
require.Equal(t, "05527424", identifiers[2].Identifier)
require.Equal(t, "United Kingdom", govIDs[2].Country)
require.Equal(t, search.GovernmentIDBusinessRegisration, govIDs[2].Type)
require.Equal(t, "05527424", govIDs[2].Identifier)

// Verify other entity types are nil
require.Nil(t, e.Person)
Expand Down
30 changes: 10 additions & 20 deletions pkg/search/models.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,32 +119,22 @@ var (
)

type Business struct {
Name string `json:"name"`
AltNames []string `json:"altNames"`
Created *time.Time `json:"created"`
Dissolved *time.Time `json:"dissolved"`
Identifiers []Identifier `json:"identifiers"`
}

// Identifier
//
// TODO(adam): Look at OpenSanctions for tax ID codes
// https://www.opensanctions.org/reference/#schema.Company
type Identifier struct {
Name string `json:"string"`
Country string `json:"country"` // ISO-3166 // TODO(adam):
Identifier string `json:"value"`
Name string `json:"name"`
AltNames []string `json:"altNames"`
Created *time.Time `json:"created"`
Dissolved *time.Time `json:"dissolved"`
GovernmentIDs []GovernmentID `json:"governmentIDs"`
}

// Organization
//
// TODO(adam): https://www.opensanctions.org/reference/#schema.Organization
type Organization struct {
Name string `json:"name"`
AltNames []string `json:"altNames"`
Created *time.Time `json:"created"`
Dissolved *time.Time `json:"dissolved"`
Identifiers []Identifier `json:"identifier"`
Name string `json:"name"`
AltNames []string `json:"altNames"`
Created *time.Time `json:"created"`
Dissolved *time.Time `json:"dissolved"`
GovernmentIDs []GovernmentID `json:"governmentIDs"`
}

type Aircraft struct {
Expand Down
4 changes: 2 additions & 2 deletions pkg/search/similarity.go
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ func countBusinessFields(b *Business) int {
if b.Created != nil {
count++
}
if len(b.Identifiers) > 0 {
if len(b.GovernmentIDs) > 0 {
count++
}

Expand All @@ -394,7 +394,7 @@ func countOrganizationFields(o *Organization) int {
if o.Created != nil {
count++
}
if len(o.Identifiers) > 0 {
if len(o.GovernmentIDs) > 0 {
count++
}

Expand Down
Loading

0 comments on commit 8b061bd

Please sign in to comment.