diff --git a/librisworks/scripts/display-works.groovy b/librisworks/scripts/display-works.groovy index 281ab8ca7a..c26eb0ddf9 100644 --- a/librisworks/scripts/display-works.groovy +++ b/librisworks/scripts/display-works.groovy @@ -2,7 +2,7 @@ import se.kb.libris.mergeworks.Doc import se.kb.libris.mergeworks.Html import se.kb.libris.mergeworks.WorkComparator -import static se.kb.libris.mergeworks.Util.partition +import static se.kb.libris.mergeworks.Util.workClusters htmlReport = getReportWriter('works.html') @@ -28,15 +28,6 @@ new File(System.getProperty('clusters')).splitEachLine(~/[\t ]+/) { cluster -> htmlReport.println(Html.END) -Collection> workClusters(Collection docs, WorkComparator c) { - docs.each { it.addComparisonProps() } - - def workClusters = partition(docs, { Doc a, Doc b -> c.sameWork(a, b) }) - .each { work -> work.each { doc -> doc.removeComparisonProps() } } - - return workClusters -} - Doc createNewWork(Map workData) { workData['@id'] = "TEMPID#it" Map data = [ diff --git a/librisworks/scripts/merge-works.groovy b/librisworks/scripts/merge-works.groovy index 2afa00ccbd..6f9fb5bf99 100644 --- a/librisworks/scripts/merge-works.groovy +++ b/librisworks/scripts/merge-works.groovy @@ -2,7 +2,7 @@ import se.kb.libris.mergeworks.Html import se.kb.libris.mergeworks.WorkComparator import se.kb.libris.mergeworks.Doc -import static se.kb.libris.mergeworks.Util.partition +import static se.kb.libris.mergeworks.Util.workClusters maybeDuplicates = getReportWriter("maybe-duplicate-linked-works.tsv") multiWorkReport = getReportWriter("multi-work-clusters.html") @@ -46,7 +46,7 @@ new File(System.getProperty('clusters')).splitEachLine(~/[\t ]+/) { cluster -> uniqueWorksAndTheirInstances.add(new Tuple2(linkedWorks.find(), localWorks)) } else { maybeDuplicates.println(linkedWorks.collect { it.shortId() }.join('\t')) - System.err.println("Local works ${localWorks.collect { it.shortId() }} match multiple linked works: ${linkedWorks.collect { it.shortId() }}. Duplicate linked works?") + System.err.println("Local works ${localWorks.collect { it.shortId() }} match multiple linked works: ${linkedWorks.collect { it.shortId() }}. Duplicated linked works?") } } @@ -102,19 +102,6 @@ void saveAndLink(Doc workDoc, Collection instanceDocs = [], boolean existsI } } -Collection> workClusters(Collection docs, WorkComparator c) { - docs.each { - if (it.instanceData) { - it.addComparisonProps() - } - } - - def workClusters = partition(docs, { Doc a, Doc b -> c.sameWork(a, b) }) - .each { work -> work.each { doc -> doc.removeComparisonProps() } } - - return workClusters -} - Doc createNewWork(Map workData) { workData['@id'] = "TEMPID#it" Map data = [ diff --git a/librisworks/scripts/swedish-fiction.groovy b/librisworks/scripts/swedish-fiction.groovy index dacab96ea7..4e6ec15086 100644 --- a/librisworks/scripts/swedish-fiction.groovy +++ b/librisworks/scripts/swedish-fiction.groovy @@ -17,6 +17,7 @@ new File(System.getProperty('clusters')).splitEachLine(~/[\t ]+/) {cluster -> && !doc.isDrama() && !doc.isThesis() && !doc.isInSb17Bibliography() + && !doc.intendedForMarcPreAdolescent() } def swedish = { Doc doc -> diff --git a/librisworks/src/main/groovy/se/kb/libris/mergeworks/Doc.groovy b/librisworks/src/main/groovy/se/kb/libris/mergeworks/Doc.groovy index 11bb47af05..88f9e65618 100644 --- a/librisworks/src/main/groovy/se/kb/libris/mergeworks/Doc.groovy +++ b/librisworks/src/main/groovy/se/kb/libris/mergeworks/Doc.groovy @@ -144,6 +144,10 @@ class Doc { asList(workData['genreForm']) } + List intendedAudience() { + asList(workData['intendedAudience']) + } + List publication() { asList(instanceData?.publication) } @@ -211,6 +215,10 @@ class Doc { || hasRelationshipWithContribution() } + boolean intendedForMarcPreAdolescent() { + intendedAudience().contains(['@id': 'https://id.kb.se/marc/PreAdolescent']) + } + boolean hasPart() { workData['hasPart'] || instanceData['hasTitle'].findAll { it['@type'] == 'Title' }.any { it.hasPart?.size() > 1 || it.hasPart?.any { p -> asList(p.partName).size() > 1 || asList(p.partNumber).size() > 1 } diff --git a/librisworks/src/main/groovy/se/kb/libris/mergeworks/Util.groovy b/librisworks/src/main/groovy/se/kb/libris/mergeworks/Util.groovy index f28a486b59..dfc94ed985 100644 --- a/librisworks/src/main/groovy/se/kb/libris/mergeworks/Util.groovy +++ b/librisworks/src/main/groovy/se/kb/libris/mergeworks/Util.groovy @@ -5,6 +5,8 @@ import whelk.Whelk import whelk.util.DocumentUtil import whelk.util.Unicode +import static se.kb.libris.mergeworks.compare.IntendedAudience.preferredComparisonOrder + class Util { static def titleComponents = ['mainTitle', 'titleRemainder', 'subtitle', 'hasPart', 'partNumber', 'partName', 'marc:parallelTitle', 'marc:equalTitle'] @@ -308,4 +310,17 @@ class Util { ? normalize("${agent.givenName} ${agent.familyName}") : agent.name ? normalize("${agent.name}") : null } + + static Collection> workClusters(Collection docs, WorkComparator c) { + docs.each { + if (it.instanceData) { + it.addComparisonProps() + } + }.with { preferredComparisonOrder(it) } + + def workClusters = partition(docs, { Doc a, Doc b -> c.sameWork(a, b) }) + .each { work -> work.each { doc -> doc.removeComparisonProps() } } + + return workClusters + } } \ No newline at end of file diff --git a/librisworks/src/main/groovy/se/kb/libris/mergeworks/WorkComparator.groovy b/librisworks/src/main/groovy/se/kb/libris/mergeworks/WorkComparator.groovy index 4d6ca48c00..0e6e064fed 100644 --- a/librisworks/src/main/groovy/se/kb/libris/mergeworks/WorkComparator.groovy +++ b/librisworks/src/main/groovy/se/kb/libris/mergeworks/WorkComparator.groovy @@ -11,10 +11,10 @@ class WorkComparator { Map comparators = [ 'classification' : new Classification(), - 'contentType' : new SameOrEmpty('https://id.kb.se/term/rda/Text'), + 'contentType' : new ContentType(), 'genreForm' : new GenreForm(), 'hasTitle' : new WorkTitle(), - 'intendedAudience': new SameOrEmpty('https://id.kb.se/marc/Juvenile'), + 'intendedAudience': new IntendedAudience(), '_numPages' : new Extent(), 'subject' : new Subject(), 'summary' : new StuffSet(), diff --git a/librisworks/src/main/groovy/se/kb/libris/mergeworks/compare/ContentType.groovy b/librisworks/src/main/groovy/se/kb/libris/mergeworks/compare/ContentType.groovy new file mode 100644 index 0000000000..4d0d3e498b --- /dev/null +++ b/librisworks/src/main/groovy/se/kb/libris/mergeworks/compare/ContentType.groovy @@ -0,0 +1,12 @@ +package se.kb.libris.mergeworks.compare + +import static se.kb.libris.mergeworks.Util.asList + +class ContentType extends StuffSet { + private static def allowedValues = ['https://id.kb.se/term/rda/StillImage', 'https://id.kb.se/term/rda/Text'] + + @Override + boolean isCompatible(Object a, Object b) { + asList(a).every { it['@id'] in allowedValues } && asList(b).every { it['@id'] in allowedValues } + } +} diff --git a/librisworks/src/main/groovy/se/kb/libris/mergeworks/compare/IntendedAudience.groovy b/librisworks/src/main/groovy/se/kb/libris/mergeworks/compare/IntendedAudience.groovy new file mode 100644 index 0000000000..ce2569f866 --- /dev/null +++ b/librisworks/src/main/groovy/se/kb/libris/mergeworks/compare/IntendedAudience.groovy @@ -0,0 +1,24 @@ +package se.kb.libris.mergeworks.compare + +import se.kb.libris.mergeworks.Doc + +import static se.kb.libris.mergeworks.Util.asList + +class IntendedAudience extends StuffSet { + private static def GENERAL = ['@id': 'https://id.kb.se/marc/General'] + private static def ADULT = ['@id': 'https://id.kb.se/marc/Adult'] + + @Override + boolean isCompatible(Object a, Object b) { + !a || !b || asList(a) == [GENERAL] || asList(b) == [GENERAL] + || !(asList(a) + asList(b)).findResults { it == ADULT }.containsAll([true, false]) + } + + static void preferredComparisonOrder(Collection docs) { + docs.sort { Doc d -> + d.intendedAudience().with { + it.isEmpty() || it == [GENERAL] || it == [ADULT] + } + }.reverse(true) + } +} diff --git a/librisworks/src/main/groovy/se/kb/libris/mergeworks/compare/SameOrEmpty.groovy b/librisworks/src/main/groovy/se/kb/libris/mergeworks/compare/SameOrEmpty.groovy deleted file mode 100644 index 10826079b3..0000000000 --- a/librisworks/src/main/groovy/se/kb/libris/mergeworks/compare/SameOrEmpty.groovy +++ /dev/null @@ -1,21 +0,0 @@ -package se.kb.libris.mergeworks.compare - -import static se.kb.libris.mergeworks.Util.asList - -class SameOrEmpty implements FieldHandler { - Object link - - SameOrEmpty(String iri) { - this.link = [['@id': iri]] - } - - @Override - boolean isCompatible(Object a, Object b) { - (!a && asList(b) == link) || (!b && asList(a) == link) - } - - @Override - Object merge(Object a, Object b) { - return a ?: b - } -} \ No newline at end of file