Skip to content

Commit

Permalink
Merge pull request #1300 from libris/feature/lxl-4183-correct-illustr…
Browse files Browse the repository at this point in the history
…ators-vs-translators

Feature/lxl 4183 correct illustrators vs translators
  • Loading branch information
kwahlin authored Sep 7, 2023
2 parents 34931e8 + d5b3fc7 commit ee2e597
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 27 deletions.
90 changes: 73 additions & 17 deletions librisworks/scripts/add-missing-contribution-data.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ titleMovedToTranslationOf = getReportWriter("title-moved-to-translationOf.tsv")
originalWorkFoundInCluster = getReportWriter("original-work-found-in-cluster.tsv")
originalWorkFoundInCluster.println(['id', 'added translationOf', 'translationOf occurs in (examples)'].join('\t'))

illVsTrl = getReportWriter("ill-vs-trl.tsv")
illVsTrl.println(['id', 'removed/replaced role', 'agent name', 'resp statement'].join('\t'))

def clusters = new File(System.getProperty('clusters')).collect { it.split('\t').collect { it.trim() } }

idToCluster = initIdToCluster(clusters)
Expand All @@ -56,13 +59,20 @@ selectByIds(clusters.flatten()) { bib ->
asList(c.agent).each { Map agent ->
def agentStr = toString(agent)
def loadedAgent = loadIfLink(agent)
if (agent.containsKey('@id')) {
if (agent.containsKey(ID_KEY)) {
agentToLifeSpan.computeIfAbsent(agentStr, f -> lifeSpan(loadedAgent))
}
([loadedAgent] + asList(loadedAgent.hasVariant)).each { a ->
String agentName = name(a)
if (agentName) {
nameToAgents.computeIfAbsent(agentName, f -> new ConcurrentHashMap().newKeySet()).add(agentStr)
def acronym = agentName.split(' ').findAll().with { parts ->
(0..<parts.size() - 1).each {
parts[it] = parts[it][0]
}
parts.join(' ')
}
nameToAgents.computeIfAbsent(acronym, f -> new ConcurrentHashMap().newKeySet()).add(agentStr)
}
}
def roleToIds = agentToRolesToIds.computeIfAbsent(agentStr, f -> new ConcurrentHashMap())
Expand Down Expand Up @@ -127,14 +137,28 @@ selectByIds(clusters.flatten()) { bib ->
def existingNames = contribution.findResults { agentToNames[toString(asList(it.agent).find())] }.flatten()
contributionsInRespStatement.removeAll { String name, List<Relator> roles ->
existingNames.any { similarName(it, name) }
|| roles.collect { [(ID_KEY): it.iri] }.intersect(contribution.collect { it.role }.flatten())
|| roles.collect { toIdMap(it.iri) }.intersect(contribution.collect { it.role }.flatten())
}

// match remaining against local agents in same cluster
modified |= tryAddLocalAgentContributionsFromRespStatement(contribution, contributionsInRespStatement, respStatement, id)
// if still no match, add constructed local Contribution with agent + roles extracted from responsibilityStatement
modified |= addRemainingContributionsFromRespStatement(contribution, contributionsInRespStatement, normalizedNameToName, respStatement, id)

if (modified) {
bib.scheduleSave()
}
}

selectByIds(clusters.flatten()) { bib ->
def id = bib.doc.shortId
def work = bib.graph[1].instanceOf
def contribution = work?.contribution

if (!contribution) return

def modified = false

contribution.each { Map c ->
// add roles from contributions in same cluster with matching agent
modified |= tryAddRole(c, id)
Expand Down Expand Up @@ -232,7 +256,18 @@ boolean tryAddRolesFromRespStatement(Map contribution, Map contributionsInRespSt
def rolesOfInterest = rolesInRespStatement.findResults { Relator relator ->
relator == Relator.IMPLICIT_AUTHOR && !isPrimaryContribution
? null
: [(ID_KEY): relator.iri]
: toIdMap(relator.iri)
}

def modified = false

def incorrectIllOrTrl = findIncorrectIllVsTrl(currentRoles, rolesOfInterest)
if (incorrectIllOrTrl) {
currentRoles.remove(toIdMap(incorrectIllOrTrl))
contribution['role'] = currentRoles
roleToIds[toIdMap(incorrectIllOrTrl)].remove(id)
illVsTrl.println([id, roleShort(incorrectIllOrTrl), name, respStatement].join('\t'))
modified = true
}
def newRoles = rolesOfInterest - currentRoles
if (newRoles) {
Expand All @@ -246,10 +281,10 @@ boolean tryAddRolesFromRespStatement(Map contribution, Map contributionsInRespSt
newRoles.each { r ->
roleToIds.computeIfAbsent(r, f -> new ConcurrentHashMap().newKeySet()).add(id)
}
return true
modified = true
}

return false
return modified
}

boolean tryAddLinkedAgentContributionsFromRespStatement(List<Map> contribution, Map contributionsInRespStatement, String respStatement, String id) {
Expand All @@ -270,10 +305,10 @@ boolean tryAddLinkedAgentContributionsFromRespStatement(List<Map> contribution,
def newContribution =
[
'@type': 'Contribution',
'agent': [(ID_KEY): agentIri]
'agent': toIdMap(agentIri)
]

roles = roles.collect { r -> [(ID_KEY): r.iri] }
roles = roles.collect { r -> toIdMap(r.iri) }

if (roles) {
newContribution['role'] = roles
Expand Down Expand Up @@ -319,7 +354,7 @@ boolean tryAddLocalAgentContributionsFromRespStatement(List<Map> contribution, M
'agent': toMap(localAgent)
]

roles = roles.collect { r -> [(ID_KEY): r.iri] }
roles = roles.collect { r -> toIdMap(r.iri) }

if (roles) {
newContribution['role'] = roles
Expand Down Expand Up @@ -348,7 +383,7 @@ boolean addRemainingContributionsFromRespStatement(List<Map> contribution, Map c
if (contributionsInRespStatement.isEmpty()) return false

return contributionsInRespStatement.removeAll { name, roles ->
def translatorEditor = roles.findResults { r -> r == Relator.TRANSLATOR || r == Relator.EDITOR ? [(ID_KEY): r.iri] : null }
def translatorEditor = roles.findResults { r -> r == Relator.TRANSLATOR || r == Relator.EDITOR ? toIdMap(r.iri) : null }

if (translatorEditor) {
def newContribution =
Expand Down Expand Up @@ -382,7 +417,7 @@ boolean tryAddRole(Map contribution, String id) {
Map roleToIds = agentToRolesToIds[agentStr]
if (!roleToIds) return false

def adapterEditor = [Relator.EDITOR, Relator.ADAPTER].collect { [(ID_KEY): it.iri] }
def adapterEditor = [Relator.EDITOR, Relator.ADAPTER].collect { toIdMap(it.iri) }

def currentRoles = asList(contribution.role)
// find roles in cluster that can be added (certain conditions need to be met)
Expand All @@ -391,12 +426,18 @@ boolean tryAddRole(Map contribution, String id) {
def inClusterWithRole = ids.intersect(idToCluster[id])
return inClusterWithRole
&& !noRole([r])
&& (inClusterWithRole.size() >= inCluster.size()
&& (inClusterWithRole.size() >= inCluster.size() / 2
|| noRole(currentRoles)
|| r == [(ID_KEY): Relator.PRIMARY_RIGHTS_HOLDER.iri]
|| r == toIdMap(Relator.PRIMARY_RIGHTS_HOLDER.iri)
|| (r in adapterEditor && currentRoles.intersect(adapterEditor)))
}.collect { it.key }

def illAndTrl = [toIdMap(Relator.TRANSLATOR.iri), toIdMap(Relator.ILLUSTRATOR.iri)]

if ((currentRoles + rolesInCluster).containsAll(illAndTrl)) {
rolesInCluster -= illAndTrl
}

def newRoles = rolesInCluster - currentRoles
if (newRoles) {
contribution['role'] = noRole(currentRoles) ? newRoles : currentRoles + newRoles
Expand All @@ -415,7 +456,7 @@ boolean tryAddRole(Map contribution, String id) {
}

boolean tryAddMissingTranslationOf(Map work, List<Map> contribution, String id) {
def trl = [(ID_KEY): Relator.TRANSLATOR.iri]
def trl = toIdMap(Relator.TRANSLATOR.iri)
def translators = contribution.findResults { asList(it.role).contains(trl) ? toString(asList(it.agent).find()) : null }

if (!translators || work['translationOf']) return false
Expand Down Expand Up @@ -448,7 +489,7 @@ boolean tryAddMissingTranslationOf(Map work, List<Map> contribution, String id)
}

boolean noRole(List<Map> roles) {
roles.isEmpty() || roles == [[:]] || roles == [[(ID_KEY): Relator.UNSPECIFIED_CONTRIBUTOR.iri]]
roles.isEmpty() || roles == [[:]] || roles == [toIdMap(Relator.UNSPECIFIED_CONTRIBUTOR.iri)]
}

private Map loadIfLink(Map m) {
Expand Down Expand Up @@ -485,7 +526,7 @@ static Map<String, List<Relator>> parseSwedishFictionContribution(String contrib
[
(Relator.TRANSLATOR) : ~/(bemynd(\w+|\.)? )?öf?v(\.|ers(\.|\p{L}+)?)( (till|från) \p{L}+)?|(till svenskan?|från \p{L}+)|svensk text/,
(Relator.AUTHOR) : ~/^(text(e[nr])?|skriven|written)/,
(Relator.ILLUSTRATOR) : ~/\bbild(erStrin)?|ill(\.|ustr(\.|\w+)?)|\bvi(gn|nj)ett(er|ill)?|ritad/,
(Relator.ILLUSTRATOR) : ~/\bbild(er)?|ill(\.|ustr(\.|\w+)?)|\bvi(gn|nj)ett(er|ill)?|ritad|\bteckn(ad|ingar)/,
(Relator.AUTHOR_OF_INTRO) : ~/förord|inl(edn(\.|ing)|edd)/,
(Relator.COVER_DESIGNER) : ~/omslag/,
(Relator.AUTHOR_OF_AFTERWORD): ~/efter(ord|skrift)/,
Expand All @@ -499,7 +540,7 @@ static Map<String, List<Relator>> parseSwedishFictionContribution(String contrib
def namePattern = ~/\p{Lu}:?\p{Ll}+('\p{Ll})?(,? [Jj](r|unior))?/
def betweenNamesPattern = ~/-| |\. ?| ([Dd]e(l| la)?|von|van( de[nr])?|v\.|le|af|du|dos) | [ODdLl]'/
def fullNamePattern = ~/(($initialPattern|$namePattern)($betweenNamesPattern)?)*$namePattern/
def conjPattern = ~/ (och|&|and) /
def conjPattern = ~/(,| och| &| and) /
def roleAfterNamePattern = ~/( ?\(($rolePattern$conjPattern)?$rolePattern\))/
def fullContributionPattern = ~/(($rolePattern($conjPattern|\/))*$rolePattern$followsRolePattern)?$fullNamePattern($conjPattern$fullNamePattern)*$roleAfterNamePattern?/

Expand Down Expand Up @@ -600,5 +641,20 @@ static List<Character> initials(List nameParts) {
}

static List<String> nameParts(String s) {
s.split(/\s+|-/) as List
s.split(' ').findAll()
}

static String findIncorrectIllVsTrl(List currentRoles, List rolesInRespStatement) {
if ((currentRoles + rolesInRespStatement)[ID_KEY].containsAll([Relator.ILLUSTRATOR.iri, Relator.TRANSLATOR.iri])) {
if (!rolesInRespStatement[ID_KEY].contains(Relator.ILLUSTRATOR.iri)) {
return Relator.ILLUSTRATOR.iri
}
if (!rolesInRespStatement[ID_KEY].contains(Relator.TRANSLATOR.iri)) {
return Relator.TRANSLATOR.iri
}
}
}

def toIdMap(String iri) {
[(ID_KEY): iri]
}
10 changes: 0 additions & 10 deletions librisworks/src/main/groovy/se/kb/libris/mergeworks/Util.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -293,16 +293,6 @@ class Util {

static def isTitle = { it.'@type' == 'Title' }

static boolean nameMatch(Object local, Map agent) {
def variants = [agent] + asList(agent.hasVariant)

def localName = local instanceof Map ? name(local) : normalize(local)

localName && variants.any {
name(it) && localName == name(it)
}
}

static String name(Map agent) {
(agent.givenName && agent.familyName)
? normalize("${agent.givenName} ${agent.familyName}")
Expand Down

0 comments on commit ee2e597

Please sign in to comment.