From 84860e56a587dd2fa518526de84a04b6de98053c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olov=20Ylinenp=C3=A4=C3=A4?= Date: Thu, 19 Oct 2023 20:08:29 +0200 Subject: [PATCH 1/3] Add transcription of Bulgarian and Russian (KR76) --- .../main/groovy/whelk/util/Romanizer.groovy | 2 + .../src/main/resources/romanizer/bg-kr76.txt | 58 ++++++ .../main/resources/romanizer/cyrl-kr76.txt | 192 ++++++++++++++++++ .../src/main/resources/romanizer/ru-kr76.txt | 92 +++++++++ .../groovy/whelk/util/RomanizerSpec.groovy | 27 +++ 5 files changed, 371 insertions(+) create mode 100644 whelk-core/src/main/resources/romanizer/bg-kr76.txt create mode 100644 whelk-core/src/main/resources/romanizer/cyrl-kr76.txt create mode 100644 whelk-core/src/main/resources/romanizer/ru-kr76.txt diff --git a/whelk-core/src/main/groovy/whelk/util/Romanizer.groovy b/whelk-core/src/main/groovy/whelk/util/Romanizer.groovy index 60dcbb52c9..cd0e4f22c0 100644 --- a/whelk-core/src/main/groovy/whelk/util/Romanizer.groovy +++ b/whelk-core/src/main/groovy/whelk/util/Romanizer.groovy @@ -23,6 +23,7 @@ class Romanizer { private static final List AUTO = [ auto('be-Cyrl', 'be-Latn-t-be-Cyrl-m0-iso-1968', ['be-iso.txt', 'slavic-iso.txt']), auto('bg-Cyrl', 'bg-Latn-t-bg-Cyrl-m0-iso-1968', ['bg-iso.txt', 'slavic-iso.txt']), + auto('bg-Cyrl', 'bg-Latn-t-bg-Cyrl-x0-kr76', ['cyrl-kr76.txt', 'bg-kr76.txt']), auto('bs-Cyrl', 'bs-Latn-t-bs-Cyrl-m0-iso-1968', ['bs-sr-iso.txt', 'slavic-iso.txt']), auto('el' , 'el-Latn-t-el-Grek-x0-btj', ['el-btj.txt']), auto('grc' , 'grc-Latn-t-grc-Grek-x0-skr-1980', ['grc-skr.txt']), @@ -32,6 +33,7 @@ class Romanizer { auto('mk-Cyrl', 'mk-Latn-t-mk-Cyrl-m0-iso-1968', ['mk-iso.txt', 'slavic-iso.txt']), auto('mn-Cyrl', 'mn-Latn-t-mn-Cyrl-x0-lessing', ['mn-lessing.txt']), auto('ru-Cyrl', 'ru-Latn-t-ru-Cyrl-m0-iso-1968', ['ru-iso.txt', 'slavic-iso.txt']), + auto('ru-Cyrl', 'ru-Latn-t-ru-Cyrl-x0-kr76', ['cyrl-kr76.txt', 'ru-kr76.txt']), auto('sr-Cyrl', 'sr-Latn-t-sr-Cyrl-m0-iso-1968', ['bs-sr-iso.txt', 'slavic-iso.txt']), auto('uk-Cyrl', 'uk-Latn-t-uk-Cyrl-m0-iso-1968', ['uk-iso.txt', 'slavic-iso.txt']), diff --git a/whelk-core/src/main/resources/romanizer/bg-kr76.txt b/whelk-core/src/main/resources/romanizer/bg-kr76.txt new file mode 100644 index 0000000000..1820ac5e76 --- /dev/null +++ b/whelk-core/src/main/resources/romanizer/bg-kr76.txt @@ -0,0 +1,58 @@ +# ICU transform rules + +# Bulgarian +# See also: cyrl-kr76.txt + +# Е е e +\u0415 > E ; +\u0435 > e ; + +# NOTE not in table +# Ё ё e +\u0401 > E ; +\u0451 > e ; + +# Ж ж zj +\u0416 } [:LowercaseLetter:] > Zj ; +\u0416 > ZJ ; +\u0436 > zj ; + +# И и i +\u0418 > I ; +\u0438 > i ; + +# Х х ch +\u0425 } [:LowercaseLetter:] <> Ch; +\u0425 <> CH ; +\u0445 <> ch ; + +# Ц ц ts +\u0426 } [:LowercaseLetter:] <> Ts ; +\u0426 <> TS ; +\u0446 <> ts ; + +# Ч ч tj +\u0427 } [:LowercaseLetter:] <> Tj ; +\u0427 <> TJ ; +\u0447 <> tj ; + +# Ш ш sj +\u0428 } [:LowercaseLetter:] <> Sj ; +\u0428 <> SJ ; +\u0448 <> sj ; + +# Щ щ sjt +\u0429 } [:LowercaseLetter:] <> Sjt ; +\u0429 <> SJT ; +\u0449 <> sjt ; + +# Ъ ъ ă +# 5) återges ej då den enligt äldre ortografi står i ordslut +\u042A } [:^Letter:] <> ; +\u042A <> \u0102 ; +\u044A } [:^Letter:] <> ; +\u044A <> \u0103 ; + +# Ѣ ѣ e +\u0462 > E ; +\u0463 > e ; \ No newline at end of file diff --git a/whelk-core/src/main/resources/romanizer/cyrl-kr76.txt b/whelk-core/src/main/resources/romanizer/cyrl-kr76.txt new file mode 100644 index 0000000000..291eaaeff4 --- /dev/null +++ b/whelk-core/src/main/resources/romanizer/cyrl-kr76.txt @@ -0,0 +1,192 @@ +# ICU transform rules + +# Common base for transcription of cyrillic script. +# "KR76 (Kjellberg) - Transkribering av kyrilliska alfabetet (folkbibliotekens praxis)" +# https://metadatabyran.kb.se/download/18.6945cdaa174b74a2c361604/1601918194487/Transkriberingsschema_Kyrilliska%20alfabet_2015.pdf +# https://metadatabyran.kb.se/beskrivning/specialanvisningar/mangsprak/libris-romaniseringspraxis#h-Kyrilliskskrift + +$vowel = [а у о ы и э я ю ё е А У О Ы И Э Я Ю Ё Е] ; +$s_t = [с т С Т] ; +$soft_hard = [ь ъ Ь Ъ] ; + +# А а a +\u0410 > A ; +\u0430 > a ; + +# Б б b +\u0411 > B ; +\u0431 > b ; + +# В в v +\u0412 > V ; +\u0432 > v ; + +# Г г g +\u0413 > G ; +\u0433 > g ; + +# TODO not in table +# Ѓ ѓ g +\u0403 > G ; +\u0453 > g ; + +# TODO not in table +# Ґ ґ g +\u0490 > G ; +\u0491 > g ; + +# Д д d +\u0414 > D ; +\u0434 > d ; + +# TODO serbocroatian +# Ђ ђ đ +\u0402 > \u0110 ; +\u0452 > \u0111 ; + +# З з z +\u0417 > Z ; +\u0437 > z ; + +# Й й j +\u0419 > J ; +\u0439 > j ; + +# І і і +# 3) tillhör den äldre ortografin +\u0406 > I ; +\u0456 > i ; + +# TODO not in table +# Ї ї ï +\u0407 > \u00CF ; +\u0457 > \u00EF ; + +# TODO no in russian table +# J j j +\u0408 > J ; +\u0458 > j ; + +# К к k +\u041A > K ; +\u043A > k ; + +# TODO not in table +# Ќ ќ ḱ +\u040C > \u1E30 ; +\u045C > \u1E31 ; + +# Л л l +\u041B > L ; +\u043B > l ; + +# TODO not in russian table +# Љ љ lj +\u0409 } [:LowercaseLetter:] > Lj; +\u0409 > LJ ; +\u0459 > lj ; + +# М м m +\u041C > M ; +\u043C > m ; + +# Н н n +\u041D > N ; +\u043D > n ; + +# TODO not in russian table +# Њ њ nj +\u040A } [:LowercaseLetter:] > Nj; +\u040A > NJ ; +\u045A > nj ; + +# О о o +\u041E > O ; +\u043E > o ; + +# П п p +\u041F > P ; +\u043F > p ; + +# Р р r +\u0420 > R ; +\u0440 > r ; + +# С с s +\u0421 > S ; +\u0441 > s ; + +# Т т t +\u0422 > T ; +\u0442 > t ; + +# TODO not in russian table +# Ћ ћ ć +\u040B <> \u0106 ; +\u045B <> \u0107 ; + +# У у u +\u0423 <> U ; +\u0443 <> u ; + +# TODO not in table +# Ў ў ŭ +\u040E <> \u016C ; +\u045E <> \u016D ; + +# Ф ф f +\u0424 <> F ; +\u0444 <> f ; + +# TODO not in russian table +# Џ џ dž +\u040F } [:LowercaseLetter:] <> D\u017E; +\u040F <> D\u017D ; +\u045F <> d\u017E ; + +# Ы ы y +\u042B <> Y ; +\u044B <> y ; + +# Ь ь +# 4) återges ej; se dock not 1 +# 6) återges ej +\u042C > ; +\u044C > ; + +# Э э e +\u042D > E ; +\u044D > e ; + +# Ю ю ju +# 8) iu efter с, т och з +[с т з С Т З] { \u042E } [:LowercaseLetter:] <> Iu; +[с т з С Т З] { \u042E <> IU ; +[с т з С Т З] { \u044E <> iu ; +\u042E } [:LowercaseLetter:] <> Ju; +\u042E <> JU ; +\u044E <> ju ; + +# Я я ja +# 9) ia efter с, т och з +[с т з С Т З] { \u042F } [:LowercaseLetter:] <> Ia; +[с т з С Т З] { \u042F <> IA ; +[с т з С Т З] { \u044F <> ia ; +\u042F } [[:LowercaseLetter:][:^Letter:]] <> Ja; +\u042F <> JA ; +\u044F <> ja ; + +# Ө ө f +# 3) tillhör den äldre ortografin +\u04E8 > F ; +\u04E9 > f ; + +# Ѵ ѵ i +# 3) tillhör den äldre ortografin +\u0474 > I ; +\u0475 > i ; + +# Ѫ ѫ ă +# 3) tillhör den äldre ortografin +\u046A <> \u0102 ; +\u046B <> \u0103 ; \ No newline at end of file diff --git a/whelk-core/src/main/resources/romanizer/ru-kr76.txt b/whelk-core/src/main/resources/romanizer/ru-kr76.txt new file mode 100644 index 0000000000..435f540c47 --- /dev/null +++ b/whelk-core/src/main/resources/romanizer/ru-kr76.txt @@ -0,0 +1,92 @@ +# ICU transform rules + +# Russian +# See also: cyrl-kr76.txt + +# Е е e +# 1) efter vokal, ь, ъ och i början av ord je; dock ie om ь eller ъ föregås av с eller т +$s_t $soft_hard { \u0415 } > IE; +$s_t $soft_hard { \u0435 > ie; + +[$vowel $soft_hard] { \u0415 } [:LowercaseLetter:] > Je; +[$vowel $soft_hard] { \u0415 > JE ; +[$vowel $soft_hard] { \u0435 > je ; + +[:^Letter:] { \u0415 } [:LowercaseLetter:] > Je; +[:^Letter:] { \u0415 } [:Letter:] > JE ; +[:^Letter:] { \u0435 } [:Letter:] > je ; + +\u0415 > E ; +\u0435 > e ; + +# Ё ё e +# 1) efter vokal, ь, ъ och i början av ord je; dock ie om ь eller ъ föregås av с eller т +$s_t $soft_hard { \u0401 > IE; +$s_t $soft_hard { \u0451 > ie; + +[$vowel $soft_hard] { \u0401 } [:LowercaseLetter:] > Je; +[$vowel $soft_hard] { \u0401 > JE ; +[$vowel $soft_hard] { \u0451 > je ; + +[:^Letter:] { \u0401 } [:LowercaseLetter:] > Je; +[:^Letter:] { \u0401 } [:Letter:] > JE ; +[:^Letter:] { \u0451 } [:Letter:] > je ; + +\u0401 > E ; +\u0451 > e ; + +# Ж ж zj +\u0416 } [:LowercaseLetter:] > Zj ; +\u0416 > ZJ ; +\u0436 > zj ; + +# И и i +# 2) efter ь ji +[ь Ь] { \u0418 } [:LowercaseLetter:] > Ji ; +[ь Ь] { \u0418 > JI ; +[ь Ь] { \u0438 > ji ; +\u0418 > I ; +\u0438 > i ; + +# Х х ch +\u0425 } [:LowercaseLetter:] <> Ch; +\u0425 <> CH ; +\u0445 <> ch ; + +# Ц ц ts +\u0426 } [:LowercaseLetter:] <> Ts ; +\u0426 <> TS ; +\u0446 <> ts ; + +# Ч ч tj +\u0427 } [:LowercaseLetter:] <> Tj ; +\u0427 <> TJ ; +\u0447 <> tj ; + +# Ш ш sj +\u0428 } [:LowercaseLetter:] <> Sj ; +\u0428 <> SJ ; +\u0448 <> sj ; + +# Щ щ sjtj +\u0429 } [:LowercaseLetter:] <> Sjtj ; +\u0429 <> SJTJ ; +\u0449 <> sjtj ; + +# Ъ ъ +# 4) återges ej; se dock not 1 +\u042A > ; +\u044A > ; + +# Ѣ ѣ e +# 7) efter vokal och i början av ord je +$vowel { \u0462 } [:LowercaseLetter:] > Je; +$vowel [:^Letter:] { \u0462 > JE ; +$vowel [:^Letter:] { \u0463 > je ; + +[:^Letter:] { \u0462 } [:LowercaseLetter:] > Je; +[:^Letter:] { \u0462 } [:Letter:] > JE ; +[:^Letter:] { \u0463 } [:Letter:]> je ; + +\u0462 > E ; +\u0463 > e ; \ No newline at end of file diff --git a/whelk-core/src/test/groovy/whelk/util/RomanizerSpec.groovy b/whelk-core/src/test/groovy/whelk/util/RomanizerSpec.groovy index 56063803e7..274117a540 100644 --- a/whelk-core/src/test/groovy/whelk/util/RomanizerSpec.groovy +++ b/whelk-core/src/test/groovy/whelk/util/RomanizerSpec.groovy @@ -116,6 +116,22 @@ class RomanizerSpec extends Specification { 'Ё' || 'E' } + def "Russian with KR76"() { + expect: + new Romanizer().romanize(source, 'ru')['ru-Latn-t-ru-Cyrl-x0-kr76'] == target + where: + source || target + // https://libris.kb.se/dr3t6tk7btsm1xpm#it + 'Маша и медведь' || 'Masja i medved' + 'Русские народные сказки' || 'Russkie narodnye skazki' + // Hard sign + 'съесть' || 'sest' + 'съ' || 's' + // Yo + 'сёрфингист' || 'serfingist' + 'Ё' || 'E' + } + def "Belarusian with ISO"() { expect: new Romanizer().romanize(source, 'be')['be-Latn-t-be-Cyrl-m0-iso-1968'] == target @@ -154,6 +170,17 @@ class RomanizerSpec extends Specification { "Ще се удавят в сълзите на майките си" || "Šte se udavjat v sălzite na majkite si" } + def "Bulgarian with KR76"() { + expect: + new Romanizer().romanize(source, 'bg')['bg-Latn-t-bg-Cyrl-x0-kr76'] == target + where: + source || target + // https://libris.kb.se/fzr6pkkr2vnc152#it + 'Баба праща поздрави и се извинява' || 'Baba prasjta pozdravi i se izvinjava' + // https://libris.kb.se/jxqb93w0gxhtbf7d#it + "Белия зъб" || "Belija zăb" + } + def "Macedonian with ISO"() { expect: new Romanizer().romanize(source, 'mk')['mk-Latn-t-mk-Cyrl-m0-iso-1968'] == target From 5f21be4df41d3b384e23f8918299faa7c1841df9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olov=20Ylinenp=C3=A4=C3=A4?= Date: Thu, 19 Oct 2023 20:31:10 +0200 Subject: [PATCH 2/3] Add transcription of Bosnian and Serbian (KR76) --- .../main/groovy/whelk/util/Romanizer.groovy | 2 + .../main/resources/romanizer/bs-sr-kr76.txt | 46 +++++++++++++++++++ .../main/resources/romanizer/cyrl-kr76.txt | 9 ---- .../groovy/whelk/util/RomanizerSpec.groovy | 20 ++++++++ 4 files changed, 68 insertions(+), 9 deletions(-) create mode 100644 whelk-core/src/main/resources/romanizer/bs-sr-kr76.txt diff --git a/whelk-core/src/main/groovy/whelk/util/Romanizer.groovy b/whelk-core/src/main/groovy/whelk/util/Romanizer.groovy index cd0e4f22c0..b71223f99c 100644 --- a/whelk-core/src/main/groovy/whelk/util/Romanizer.groovy +++ b/whelk-core/src/main/groovy/whelk/util/Romanizer.groovy @@ -25,6 +25,7 @@ class Romanizer { auto('bg-Cyrl', 'bg-Latn-t-bg-Cyrl-m0-iso-1968', ['bg-iso.txt', 'slavic-iso.txt']), auto('bg-Cyrl', 'bg-Latn-t-bg-Cyrl-x0-kr76', ['cyrl-kr76.txt', 'bg-kr76.txt']), auto('bs-Cyrl', 'bs-Latn-t-bs-Cyrl-m0-iso-1968', ['bs-sr-iso.txt', 'slavic-iso.txt']), + auto('bs-Cyrl', 'bs-Latn-t-bs-Cyrl-x0-kr76', ['cyrl-kr76.txt', 'bs-sr-kr76.txt']), auto('el' , 'el-Latn-t-el-Grek-x0-btj', ['el-btj.txt']), auto('grc' , 'grc-Latn-t-grc-Grek-x0-skr-1980', ['grc-skr.txt']), auto('yi-Hebr', 'yi-Latn-t-yi-Hebr-x0-yivo', ['yi-yivo.txt']), @@ -35,6 +36,7 @@ class Romanizer { auto('ru-Cyrl', 'ru-Latn-t-ru-Cyrl-m0-iso-1968', ['ru-iso.txt', 'slavic-iso.txt']), auto('ru-Cyrl', 'ru-Latn-t-ru-Cyrl-x0-kr76', ['cyrl-kr76.txt', 'ru-kr76.txt']), auto('sr-Cyrl', 'sr-Latn-t-sr-Cyrl-m0-iso-1968', ['bs-sr-iso.txt', 'slavic-iso.txt']), + auto('sr-Cyrl', 'sr-Latn-t-sr-Cyrl-x0-kr76', ['cyrl-kr76.txt', 'bs-sr-kr76.txt']), auto('uk-Cyrl', 'uk-Latn-t-uk-Cyrl-m0-iso-1968', ['uk-iso.txt', 'slavic-iso.txt']), // Converted from LOC mappings diff --git a/whelk-core/src/main/resources/romanizer/bs-sr-kr76.txt b/whelk-core/src/main/resources/romanizer/bs-sr-kr76.txt new file mode 100644 index 0000000000..da4963e55b --- /dev/null +++ b/whelk-core/src/main/resources/romanizer/bs-sr-kr76.txt @@ -0,0 +1,46 @@ +# ICU transform rules + +# Bosnian & Serbian +# See also: cyrl-kr76.txt + +# Е е e +\u0415 <> E ; +\u0435 <> e ; + +# NOTE not in table +# Ё ё e +\u0401 > E ; +\u0451 > e ; + +# Ж ж ž +\u0416 > \u017d ; +\u0436 > \u017e ; + +# И и i +\u0418 > I ; +\u0438 > i ; + +# Х х h +\u0425 <> H ; +\u0445 <> h ; + +# Ц ц c +\u0426 <> C ; +\u0446 <> c ; + +# Ч ч č +\u0427 <> \u010c ; +\u0447 <> \u010d ; + +# Щ щ ž +\u0429 <> \u0160\u010C ; +\u0449 <> \u0161\u010D ; + +# Џ џ dž +\u040F } [:LowercaseLetter:] <> D\u017E; +\u040F <> D\u017D ; +\u045F <> d\u017E ; + +# Ш ш š +\u0428 <> \u0160 ; +\u0448 <> \u0161 ; \ No newline at end of file diff --git a/whelk-core/src/main/resources/romanizer/cyrl-kr76.txt b/whelk-core/src/main/resources/romanizer/cyrl-kr76.txt index 291eaaeff4..47526add0e 100644 --- a/whelk-core/src/main/resources/romanizer/cyrl-kr76.txt +++ b/whelk-core/src/main/resources/romanizer/cyrl-kr76.txt @@ -80,7 +80,6 @@ $soft_hard = [ь ъ Ь Ъ] ; \u041B > L ; \u043B > l ; -# TODO not in russian table # Љ љ lj \u0409 } [:LowercaseLetter:] > Lj; \u0409 > LJ ; @@ -94,7 +93,6 @@ $soft_hard = [ь ъ Ь Ъ] ; \u041D > N ; \u043D > n ; -# TODO not in russian table # Њ њ nj \u040A } [:LowercaseLetter:] > Nj; \u040A > NJ ; @@ -120,7 +118,6 @@ $soft_hard = [ь ъ Ь Ъ] ; \u0422 > T ; \u0442 > t ; -# TODO not in russian table # Ћ ћ ć \u040B <> \u0106 ; \u045B <> \u0107 ; @@ -138,12 +135,6 @@ $soft_hard = [ь ъ Ь Ъ] ; \u0424 <> F ; \u0444 <> f ; -# TODO not in russian table -# Џ џ dž -\u040F } [:LowercaseLetter:] <> D\u017E; -\u040F <> D\u017D ; -\u045F <> d\u017E ; - # Ы ы y \u042B <> Y ; \u044B <> y ; diff --git a/whelk-core/src/test/groovy/whelk/util/RomanizerSpec.groovy b/whelk-core/src/test/groovy/whelk/util/RomanizerSpec.groovy index 274117a540..4b514040d0 100644 --- a/whelk-core/src/test/groovy/whelk/util/RomanizerSpec.groovy +++ b/whelk-core/src/test/groovy/whelk/util/RomanizerSpec.groovy @@ -156,6 +156,15 @@ class RomanizerSpec extends Specification { // https://libris.kb.se/5m0smjhb3d66cj4c#it "Бајке за дјецу" || "Bajke za djecu" // "Bajke za djecu" in record } + + def "Bosnian with KR76"() { + expect: + new Romanizer().romanize(source, 'bs')['bs-Latn-t-bs-Cyrl-x0-kr76'] == target + where: + source || target + // https://libris.kb.se/5m0smjhb3d66cj4c#it + "Бајке за дјецу" || "Bajke za djecu" // "Bajke za djecu" in record + } def "Bulgarian with ISO"() { expect: @@ -204,6 +213,17 @@ class RomanizerSpec extends Specification { 'Тиги, хаjдемо у шетњу' || 'Tigi, hajdemo u šetnju' } + def "Serbian with KR76"() { + expect: + new Romanizer().romanize(source, 'sr')['sr-Latn-t-sr-Cyrl-x0-kr76'] == target + where: + source || target + // https://libris.kb.se/2dbbcc810dxjnk9l#it + "Узбуна у кући белих медведа" || "Uzbuna u kući belih medveda" + // https://libris.kb.se/dqnnpp1sbxm436wp#it + 'Тиги, хаjдемо у шетњу' || 'Tigi, hajdemo u šetnju' + } + def "Ukrainian with ISO"() { expect: new Romanizer().romanize(source, 'uk')['uk-Latn-t-uk-Cyrl-m0-iso-1968'] == target From bae858b8c43c47f247b4242b96fafdef0620ae2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olov=20Ylinenp=C3=A4=C3=A4?= Date: Thu, 19 Oct 2023 20:49:23 +0200 Subject: [PATCH 3/3] Clean up --- whelk-core/src/main/resources/romanizer/bs-sr-kr76.txt | 4 ++++ whelk-core/src/main/resources/romanizer/cyrl-kr76.txt | 6 ------ 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/whelk-core/src/main/resources/romanizer/bs-sr-kr76.txt b/whelk-core/src/main/resources/romanizer/bs-sr-kr76.txt index da4963e55b..0750746066 100644 --- a/whelk-core/src/main/resources/romanizer/bs-sr-kr76.txt +++ b/whelk-core/src/main/resources/romanizer/bs-sr-kr76.txt @@ -12,6 +12,10 @@ \u0401 > E ; \u0451 > e ; +# Ђ ђ đ +\u0402 > \u0110 ; +\u0452 > \u0111 ; + # Ж ж ž \u0416 > \u017d ; \u0436 > \u017e ; diff --git a/whelk-core/src/main/resources/romanizer/cyrl-kr76.txt b/whelk-core/src/main/resources/romanizer/cyrl-kr76.txt index 47526add0e..dccc39dcd9 100644 --- a/whelk-core/src/main/resources/romanizer/cyrl-kr76.txt +++ b/whelk-core/src/main/resources/romanizer/cyrl-kr76.txt @@ -39,11 +39,6 @@ $soft_hard = [ь ъ Ь Ъ] ; \u0414 > D ; \u0434 > d ; -# TODO serbocroatian -# Ђ ђ đ -\u0402 > \u0110 ; -\u0452 > \u0111 ; - # З з z \u0417 > Z ; \u0437 > z ; @@ -62,7 +57,6 @@ $soft_hard = [ь ъ Ь Ъ] ; \u0407 > \u00CF ; \u0457 > \u00EF ; -# TODO no in russian table # J j j \u0408 > J ; \u0458 > j ;