Skip to content

Commit

Permalink
lddbToTrig: remove non-printable characters to make Virtuoso happier
Browse files Browse the repository at this point in the history
  • Loading branch information
andersju committed Oct 17, 2023
1 parent 75d2eee commit f6a271e
Showing 1 changed file with 10 additions and 0 deletions.
10 changes: 10 additions & 0 deletions importers/src/main/groovy/whelk/importer/ImporterMain.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,16 @@ class ImporterMain {
}

private static void filterProblematicData(id, data) {
if (data instanceof Collection) {
data.eachWithIndex { it, index ->
if (it instanceof String) {
// Virtuoso bulk load doesn't like some unusual characters, such as 0x02,
// so remove invisible control characters and unused code points
data[index] = it.replaceAll(/\p{C}/, "")
}
}
}

if (data instanceof Map) {
data.removeAll { entry ->
return entry.key.startsWith("generic") || entry.key.equals("marc:hasGovernmentDocumentClassificationNumber") || (entry.key.equals("encodingLevel") && entry.value instanceof String && entry.value.contains(" "))
Expand Down

0 comments on commit f6a271e

Please sign in to comment.