Skip to content

Commit

Permalink
Merge pull request #1302 from libris/feature/softer-work-matching
Browse files Browse the repository at this point in the history
Feature/softer work matching
  • Loading branch information
kwahlin authored Sep 8, 2023
2 parents ee2e597 + 12639b7 commit e7aa105
Show file tree
Hide file tree
Showing 9 changed files with 65 additions and 48 deletions.
11 changes: 1 addition & 10 deletions librisworks/scripts/display-works.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import se.kb.libris.mergeworks.Doc
import se.kb.libris.mergeworks.Html
import se.kb.libris.mergeworks.WorkComparator

import static se.kb.libris.mergeworks.Util.partition
import static se.kb.libris.mergeworks.Util.workClusters

htmlReport = getReportWriter('works.html')

Expand All @@ -28,15 +28,6 @@ new File(System.getProperty('clusters')).splitEachLine(~/[\t ]+/) { cluster ->

htmlReport.println(Html.END)

Collection<Collection<Doc>> workClusters(Collection<Doc> docs, WorkComparator c) {
docs.each { it.addComparisonProps() }

def workClusters = partition(docs, { Doc a, Doc b -> c.sameWork(a, b) })
.each { work -> work.each { doc -> doc.removeComparisonProps() } }

return workClusters
}

Doc createNewWork(Map workData) {
workData['@id'] = "TEMPID#it"
Map data = [
Expand Down
17 changes: 2 additions & 15 deletions librisworks/scripts/merge-works.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import se.kb.libris.mergeworks.Html
import se.kb.libris.mergeworks.WorkComparator
import se.kb.libris.mergeworks.Doc

import static se.kb.libris.mergeworks.Util.partition
import static se.kb.libris.mergeworks.Util.workClusters

maybeDuplicates = getReportWriter("maybe-duplicate-linked-works.tsv")
multiWorkReport = getReportWriter("multi-work-clusters.html")
Expand Down Expand Up @@ -46,7 +46,7 @@ new File(System.getProperty('clusters')).splitEachLine(~/[\t ]+/) { cluster ->
uniqueWorksAndTheirInstances.add(new Tuple2(linkedWorks.find(), localWorks))
} else {
maybeDuplicates.println(linkedWorks.collect { it.shortId() }.join('\t'))
System.err.println("Local works ${localWorks.collect { it.shortId() }} match multiple linked works: ${linkedWorks.collect { it.shortId() }}. Duplicate linked works?")
System.err.println("Local works ${localWorks.collect { it.shortId() }} match multiple linked works: ${linkedWorks.collect { it.shortId() }}. Duplicated linked works?")
}
}

Expand Down Expand Up @@ -102,19 +102,6 @@ void saveAndLink(Doc workDoc, Collection<Doc> instanceDocs = [], boolean existsI
}
}

Collection<Collection<Doc>> workClusters(Collection<Doc> docs, WorkComparator c) {
docs.each {
if (it.instanceData) {
it.addComparisonProps()
}
}

def workClusters = partition(docs, { Doc a, Doc b -> c.sameWork(a, b) })
.each { work -> work.each { doc -> doc.removeComparisonProps() } }

return workClusters
}

Doc createNewWork(Map workData) {
workData['@id'] = "TEMPID#it"
Map data = [
Expand Down
1 change: 1 addition & 0 deletions librisworks/scripts/swedish-fiction.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ new File(System.getProperty('clusters')).splitEachLine(~/[\t ]+/) {cluster ->
&& !doc.isDrama()
&& !doc.isThesis()
&& !doc.isInSb17Bibliography()
&& !doc.intendedForMarcPreAdolescent()
}

def swedish = { Doc doc ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,10 @@ class Doc {
asList(workData['genreForm'])
}

List<Map> intendedAudience() {
asList(workData['intendedAudience'])
}

List<Map> publication() {
asList(instanceData?.publication)
}
Expand Down Expand Up @@ -211,6 +215,10 @@ class Doc {
|| hasRelationshipWithContribution()
}

boolean intendedForMarcPreAdolescent() {
intendedAudience().contains(['@id': 'https://id.kb.se/marc/PreAdolescent'])
}

boolean hasPart() {
workData['hasPart'] || instanceData['hasTitle'].findAll { it['@type'] == 'Title' }.any {
it.hasPart?.size() > 1 || it.hasPart?.any { p -> asList(p.partName).size() > 1 || asList(p.partNumber).size() > 1 }
Expand Down
15 changes: 15 additions & 0 deletions librisworks/src/main/groovy/se/kb/libris/mergeworks/Util.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import whelk.Whelk
import whelk.util.DocumentUtil
import whelk.util.Unicode

import static se.kb.libris.mergeworks.compare.IntendedAudience.preferredComparisonOrder

class Util {
static def titleComponents = ['mainTitle', 'titleRemainder', 'subtitle', 'hasPart', 'partNumber', 'partName', 'marc:parallelTitle', 'marc:equalTitle']

Expand Down Expand Up @@ -298,4 +300,17 @@ class Util {
? normalize("${agent.givenName} ${agent.familyName}")
: agent.name ? normalize("${agent.name}") : null
}

static Collection<Collection<Doc>> workClusters(Collection<Doc> docs, WorkComparator c) {
docs.each {
if (it.instanceData) {
it.addComparisonProps()
}
}.with { preferredComparisonOrder(it) }

def workClusters = partition(docs, { Doc a, Doc b -> c.sameWork(a, b) })
.each { work -> work.each { doc -> doc.removeComparisonProps() } }

return workClusters
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ class WorkComparator {

Map<String, FieldHandler> comparators = [
'classification' : new Classification(),
'contentType' : new SameOrEmpty('https://id.kb.se/term/rda/Text'),
'contentType' : new ContentType(),
'genreForm' : new GenreForm(),
'hasTitle' : new WorkTitle(),
'intendedAudience': new SameOrEmpty('https://id.kb.se/marc/Juvenile'),
'intendedAudience': new IntendedAudience(),
'_numPages' : new Extent(),
'subject' : new Subject(),
'summary' : new StuffSet(),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package se.kb.libris.mergeworks.compare

import static se.kb.libris.mergeworks.Util.asList

class ContentType extends StuffSet {
private static def allowedValues = ['https://id.kb.se/term/rda/StillImage', 'https://id.kb.se/term/rda/Text']

@Override
boolean isCompatible(Object a, Object b) {
asList(a).every { it['@id'] in allowedValues } && asList(b).every { it['@id'] in allowedValues }
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package se.kb.libris.mergeworks.compare

import se.kb.libris.mergeworks.Doc

import static se.kb.libris.mergeworks.Util.asList

class IntendedAudience extends StuffSet {
private static def GENERAL = ['@id': 'https://id.kb.se/marc/General']
private static def ADULT = ['@id': 'https://id.kb.se/marc/Adult']

@Override
boolean isCompatible(Object a, Object b) {
!a || !b || asList(a) == [GENERAL] || asList(b) == [GENERAL]
|| !(asList(a) + asList(b)).findResults { it == ADULT }.containsAll([true, false])
}

static void preferredComparisonOrder(Collection<Doc> docs) {
docs.sort { Doc d ->
d.intendedAudience().with {
it.isEmpty() || it == [GENERAL] || it == [ADULT]
}
}.reverse(true)
}
}

This file was deleted.

0 comments on commit e7aa105

Please sign in to comment.