From e01ca83ea807069617676d846f4810af95e90afd Mon Sep 17 00:00:00 2001 From: kwahlin Date: Wed, 23 Aug 2023 16:04:03 +0200 Subject: [PATCH 1/6] Add more genre/form terms as criteria for when to keep illustrator on work --- librisworks/scripts/contributions-to-instance.groovy | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/librisworks/scripts/contributions-to-instance.groovy b/librisworks/scripts/contributions-to-instance.groovy index 3d6ce7e82d..d57b804637 100644 --- a/librisworks/scripts/contributions-to-instance.groovy +++ b/librisworks/scripts/contributions-to-instance.groovy @@ -73,7 +73,8 @@ boolean isStillImage(Map work) { boolean isPictureBook(Map work) { def picBookTerms = [ 'https://id.kb.se/term/barngf/Bilderb%C3%B6cker', - 'https://id.kb.se/term/barngf/Sm%C3%A5barnsbilderb%C3%B6cker' + 'https://id.kb.se/term/barngf/Sm%C3%A5barnsbilderb%C3%B6cker', + 'https://id.kb.se/term/barngf/Pekb%C3%B6cker' ].collect { [(ID_KEY): it] } return asList(work.genreForm).any { it in picBookTerms } @@ -85,7 +86,8 @@ boolean isComics(Map work, Whelk whelk) { 'https://id.kb.se/term/barngf/Tecknade%20serier', 'https://id.kb.se/term/gmgpc/swe/Tecknade%20serier', 'https://id.kb.se/marc/ComicOrGraphicNovel', - 'https://id.kb.se/marc/ComicStrip' + 'https://id.kb.se/marc/ComicStrip', + 'https://id.kb.se/term/barngf/Bildromaner' ].collect { [(ID_KEY): it] } return asList(work.genreForm).any { From 48ec16a35d7902d148ee0237068f7fc74b24316f Mon Sep 17 00:00:00 2001 From: kwahlin Date: Fri, 25 Aug 2023 09:52:40 +0200 Subject: [PATCH 2/6] Decide in advance how to handle illustrators Also the criteria for keeping illustrator on work don't necessarily need to be met when instance type is Electronic. In that case it's sufficient if any of the works in the cluster meet the criteria. --- .../scripts/contributions-to-instance.groovy | 68 +++++++++++++++++-- 1 file changed, 63 insertions(+), 5 deletions(-) diff --git a/librisworks/scripts/contributions-to-instance.groovy b/librisworks/scripts/contributions-to-instance.groovy index d57b804637..598a388981 100644 --- a/librisworks/scripts/contributions-to-instance.groovy +++ b/librisworks/scripts/contributions-to-instance.groovy @@ -1,12 +1,18 @@ import whelk.Whelk +import java.util.concurrent.ConcurrentHashMap +import java.util.concurrent.ConcurrentLinkedQueue + import static se.kb.libris.mergeworks.Util.Relator import static whelk.JsonLd.ID_KEY import static whelk.JsonLd.TYPE_KEY report = getReportWriter('report.tsv') +//mixed = getReportWriter('mixed.tsv') +//keep = getReportWriter('keep.tsv') +//moveFor = getReportWriter('move.tsv') -def ids = new File(System.getProperty('clusters')).collect { it.split('\t').collect { it.trim() } }.flatten() +def clusters = new File(System.getProperty('clusters')).collect { it.split('\t').collect { it.trim() } } def whelk = getWhelk() def instanceRolesByDomain = whelk.resourceCache.relators.findResults { @@ -16,16 +22,66 @@ def instanceRolesByDomain = whelk.resourceCache.relators.findResults { } } def instanceRoles = instanceRolesByDomain + [Relator.ILLUSTRATOR, Relator.AUTHOR_OF_INTRO, Relator.AUTHOR_OF_AFTERWORD].collect { [(ID_KEY): it.iri] } +def ill = [(ID_KEY): Relator.ILLUSTRATOR.iri] + +def keepIllustratorOnWorkForIds = [:] + +clusters.each { c -> + def keepOnWork = new ConcurrentHashMap() + def electronic = new ConcurrentHashMap() +// def move = new ConcurrentLinkedQueue() + + selectByIds(c) { bib -> + def id = bib.doc.shortId + Map instance = bib.graph[1] + Map work = instance.instanceOf + work.contribution?.each { contrib -> + if (asList(contrib.role).contains(ill)) { + def agent = asList(contrib.agent).find() + if (!agent) return + if (isPrimaryContribution(contrib) + || has9pu(contrib) + || isPictureBook(work) + || isComics(work, bib.whelk) + || isStillImage(work) + ) { + keepOnWork.computeIfAbsent(agent, f -> new ConcurrentLinkedQueue()).add(id) + } else if (instance[TYPE_KEY] == 'Electronic') { + electronic.computeIfAbsent(agent, f -> new ConcurrentLinkedQueue()).add(id) + } +// else { +// move.add(id) +// } + } + } + } -selectByIds(ids) { bib -> + keepOnWork.each { agent, ids -> + keepIllustratorOnWorkForIds.computeIfAbsent(agent, f -> [] as Set).with { s -> + s.addAll(ids) + if (electronic[agent]) { + s.addAll(electronic[agent]) + } + } + } + +// if (keepOnWork && move) { +// mixed.println(c.join('\t')) +// } else if (keepOnWork) { +// keep.println(c.join('\t')) +// } else if (move) { +// moveFor.println(c.join('\t')) +// } +} + +selectByIds(clusters.flatten()) { bib -> + def id = bib.doc.shortId Map instance = bib.graph[1] def work = instance.instanceOf def contribution = work?.contribution if (!contribution) return - def ill = [(ID_KEY): Relator.ILLUSTRATOR.iri] - def modified = false contribution.removeAll { c -> @@ -33,7 +89,9 @@ selectByIds(ids) { bib -> def toInstance = asList(c.role).intersect(instanceRoles) if (toInstance.contains(ill)) { - if (has9pu(c) || isPictureBook(work) || isComics(work, bib.whelk) || isStillImage(work)) { + def illustrator = asList(c.agent).find() + if (!illustrator) return + if (id in keepIllustratorOnWorkForIds[illustrator]) { toInstance.remove(ill) } } From 6ff2e40ca1a4219c962eafc9be471f31ce972823 Mon Sep 17 00:00:00 2001 From: kwahlin Date: Thu, 31 Aug 2023 14:52:23 +0200 Subject: [PATCH 3/6] Add https://id.kb.se/term/barngf/Manga to comics term list --- librisworks/scripts/contributions-to-instance.groovy | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/librisworks/scripts/contributions-to-instance.groovy b/librisworks/scripts/contributions-to-instance.groovy index 598a388981..5376def1b8 100644 --- a/librisworks/scripts/contributions-to-instance.groovy +++ b/librisworks/scripts/contributions-to-instance.groovy @@ -145,7 +145,8 @@ boolean isComics(Map work, Whelk whelk) { 'https://id.kb.se/term/gmgpc/swe/Tecknade%20serier', 'https://id.kb.se/marc/ComicOrGraphicNovel', 'https://id.kb.se/marc/ComicStrip', - 'https://id.kb.se/term/barngf/Bildromaner' + 'https://id.kb.se/term/barngf/Bildromaner', + 'https://id.kb.se/term/barngf/Manga' ].collect { [(ID_KEY): it] } return asList(work.genreForm).any { From 7de922d174fde9c76bfe4d0c0f83fedc10ee13bf Mon Sep 17 00:00:00 2001 From: kwahlin Date: Thu, 31 Aug 2023 15:27:34 +0200 Subject: [PATCH 4/6] Update illustrator criteria - 9pu and StillImage no longer sufficient criteria - Always do the same action (move or keep) for the same illustrator within a cluster - Clean up --- .../scripts/contributions-to-instance.groovy | 43 ++++++------------- 1 file changed, 13 insertions(+), 30 deletions(-) diff --git a/librisworks/scripts/contributions-to-instance.groovy b/librisworks/scripts/contributions-to-instance.groovy index 5376def1b8..176b4ef832 100644 --- a/librisworks/scripts/contributions-to-instance.groovy +++ b/librisworks/scripts/contributions-to-instance.groovy @@ -8,9 +8,6 @@ import static whelk.JsonLd.ID_KEY import static whelk.JsonLd.TYPE_KEY report = getReportWriter('report.tsv') -//mixed = getReportWriter('mixed.tsv') -//keep = getReportWriter('keep.tsv') -//moveFor = getReportWriter('move.tsv') def clusters = new File(System.getProperty('clusters')).collect { it.split('\t').collect { it.trim() } } @@ -28,8 +25,7 @@ def keepIllustratorOnWorkForIds = [:] clusters.each { c -> def keepOnWork = new ConcurrentHashMap() - def electronic = new ConcurrentHashMap() -// def move = new ConcurrentLinkedQueue() + def noIndicationOfKeeping = new ConcurrentHashMap() selectByIds(c) { bib -> def id = bib.doc.shortId @@ -40,18 +36,13 @@ clusters.each { c -> def agent = asList(contrib.agent).find() if (!agent) return if (isPrimaryContribution(contrib) - || has9pu(contrib) || isPictureBook(work) || isComics(work, bib.whelk) - || isStillImage(work) ) { keepOnWork.computeIfAbsent(agent, f -> new ConcurrentLinkedQueue()).add(id) - } else if (instance[TYPE_KEY] == 'Electronic') { - electronic.computeIfAbsent(agent, f -> new ConcurrentLinkedQueue()).add(id) + } else { + noIndicationOfKeeping.computeIfAbsent(agent, f -> new ConcurrentLinkedQueue()).add(id) } -// else { -// move.add(id) -// } } } } @@ -59,19 +50,11 @@ clusters.each { c -> keepOnWork.each { agent, ids -> keepIllustratorOnWorkForIds.computeIfAbsent(agent, f -> [] as Set).with { s -> s.addAll(ids) - if (electronic[agent]) { - s.addAll(electronic[agent]) + if (noIndicationOfKeeping[agent]) { + s.addAll(noIndicationOfKeeping[agent]) } } } - -// if (keepOnWork && move) { -// mixed.println(c.join('\t')) -// } else if (keepOnWork) { -// keep.println(c.join('\t')) -// } else if (move) { -// moveFor.println(c.join('\t')) -// } } selectByIds(clusters.flatten()) { bib -> @@ -120,13 +103,13 @@ boolean isPrimaryContribution(Map contribution) { contribution[TYPE_KEY] == 'PrimaryContribution' } -boolean has9pu(Map contribution) { - asList(contribution.role).contains([(ID_KEY): Relator.PRIMARY_RIGHTS_HOLDER.iri]) -} - -boolean isStillImage(Map work) { - asList(work.contentType).contains([(ID_KEY): 'https://id.kb.se/term/rda/StillImage']) -} +//boolean has9pu(Map contribution) { +// asList(contribution.role).contains([(ID_KEY): Relator.PRIMARY_RIGHTS_HOLDER.iri]) +//} +// +//boolean isStillImage(Map work) { +// asList(work.contentType).contains([(ID_KEY): 'https://id.kb.se/term/rda/StillImage']) +//} boolean isPictureBook(Map work) { def picBookTerms = [ @@ -135,7 +118,7 @@ boolean isPictureBook(Map work) { 'https://id.kb.se/term/barngf/Pekb%C3%B6cker' ].collect { [(ID_KEY): it] } - return asList(work.genreForm).any { it in picBookTerms } + return asList(work.genreForm).any { it in picBookTerms } || asList(work.classification).any { it.code == 'Hcf(yb)' } } boolean isComics(Map work, Whelk whelk) { From 3146d53aa7d49445a71c5358dbf91915241f9e22 Mon Sep 17 00:00:00 2001 From: kwahlin Date: Tue, 5 Sep 2023 13:17:16 +0200 Subject: [PATCH 5/6] =?UTF-8?q?Add=20barngf/kapitelb=C3=B6cker=20to=20pict?= =?UTF-8?q?ure=20book=20term=20list?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- librisworks/scripts/contributions-to-instance.groovy | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/librisworks/scripts/contributions-to-instance.groovy b/librisworks/scripts/contributions-to-instance.groovy index 176b4ef832..6df118f788 100644 --- a/librisworks/scripts/contributions-to-instance.groovy +++ b/librisworks/scripts/contributions-to-instance.groovy @@ -115,7 +115,8 @@ boolean isPictureBook(Map work) { def picBookTerms = [ 'https://id.kb.se/term/barngf/Bilderb%C3%B6cker', 'https://id.kb.se/term/barngf/Sm%C3%A5barnsbilderb%C3%B6cker', - 'https://id.kb.se/term/barngf/Pekb%C3%B6cker' + 'https://id.kb.se/term/barngf/Pekb%C3%B6cker', + 'https://id.kb.se/term/barngf/Kapitelb%C3%B6cker' ].collect { [(ID_KEY): it] } return asList(work.genreForm).any { it in picBookTerms } || asList(work.classification).any { it.code == 'Hcf(yb)' } From e14e39f54881587f20af266fb03f307ac6c2db3b Mon Sep 17 00:00:00 2001 From: kwahlin Date: Wed, 6 Sep 2023 14:31:11 +0200 Subject: [PATCH 6/6] =?UTF-8?q?Revert=20"Add=20barngf/kapitelb=C3=B6cker?= =?UTF-8?q?=20to=20picture=20book=20term=20list"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 3146d53aa7d49445a71c5358dbf91915241f9e22. --- librisworks/scripts/contributions-to-instance.groovy | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/librisworks/scripts/contributions-to-instance.groovy b/librisworks/scripts/contributions-to-instance.groovy index 6df118f788..176b4ef832 100644 --- a/librisworks/scripts/contributions-to-instance.groovy +++ b/librisworks/scripts/contributions-to-instance.groovy @@ -115,8 +115,7 @@ boolean isPictureBook(Map work) { def picBookTerms = [ 'https://id.kb.se/term/barngf/Bilderb%C3%B6cker', 'https://id.kb.se/term/barngf/Sm%C3%A5barnsbilderb%C3%B6cker', - 'https://id.kb.se/term/barngf/Pekb%C3%B6cker', - 'https://id.kb.se/term/barngf/Kapitelb%C3%B6cker' + 'https://id.kb.se/term/barngf/Pekb%C3%B6cker' ].collect { [(ID_KEY): it] } return asList(work.genreForm).any { it in picBookTerms } || asList(work.classification).any { it.code == 'Hcf(yb)' }