diff --git a/Cargo.toml b/Cargo.toml index 02c8569..eb0a539 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,7 @@ full = [ "albanian", "belarusian", "bulgarian", + "catalan", "croatian", "czech", "danish", @@ -54,6 +55,7 @@ afrikaans = [] albanian = [] belarusian = [] bulgarian = [] +catalan = [] croatian = [] czech = [] danish = [] diff --git a/README.md b/README.md index 554eb5e..e92e178 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,7 @@ Each language added individually contributes: | Albanian | 1.4 KiB | | Belarusian | 3.9 KiB | | Bulgarian | 13 KiB | +| Catalan | 1.7 KiB | | Croatian | 2.0 KiB | | Czech | 40 KiB | | Danish | 5.7 KiB | diff --git a/patterns/hyph-ca.tex b/patterns/hyph-ca.tex new file mode 100644 index 0000000..b59d200 --- /dev/null +++ b/patterns/hyph-ca.tex @@ -0,0 +1,305 @@ +% This is a modified version of `cahyph.tex' for use with Typst's hypher. The +% `n` compatibility command has been removed and the ^^ notation has been +% resolved. We also removed non-pattern exceptions. The original file has been +% retrieved from +% https://ftp.uni-bayreuth.de/packages/TeX/languages/hyphenation/cahyph.tex on +% February 27, 2024. The modification has been made by Martin Haug on the same +% day. +% +% Hyphenation patterns for Catalan. +% This is version 1.11 +% Compiled by Gon\c{c}al Badenes and Francina Turon, +% December 1991-January 1995. +% +% Copyright (C) 1991-2003 Gon\c{c}al Badenes +% +% ----------------------------------------------------------------- +% IMPORTANT NOTICE: +% +% This program can be redistributed and/or modified under the terms +% of the LaTeX Project Public License Distributed from CTAN +% archives in directory macros/latex/base/lppl.txt; either +% version 1 of the License, or any later version. +% ----------------------------------------------------------------- +% +%%% ==================================================================== +%%% @TeX-hyphen-file{ +%%% author = "Gon\c{c}al Badenes", +%%% version = "1.11", +%%% date = "15 July 2003", +%%% time = "15:08:12 CET", +%%% filename = "cahyph.tex", +%%% email = "g.badenes@ieee.org", +%%% codetable = "ISO/ASCII", +%%% keywords = "TeX, hyphen, catalan", +%%% supported = "yes", +%%% abstract = "Catalan hyphenation patterns", +%%% docstring = "This file contains the hyphenation patterns +%%% for the catalan language", +%%% } +%%% ==================================================================== +% +% NOTICE: Version 1.11 is identical to version 1.10 (issued on January 17, +% 1995) except for the updated copyright notice above. +% +% The macros used were created for ghyph31.tex by Bernd Raichle (see the +% German hyphenation pattern files for further details) +% +% This patterns have been created using standard, conservative +% hyphenation rules for catalan. The results have refined running them +% through patgen. In that way, the number of hits has been increased. +% +% These rules produce no wrong patterns (Results checked against the +% ``Diccionari Ortogr\`afic i de Pron\'uncia'', Enciclop\`edia +% Catalana. The percentage of valid hyphen misses is lower than 1% +% +% Some of the patterns below represent combinations that never +% happen in Catalan. We have tried to keep them to a minimum. +% +% *** IMPORTANT *** +% \lefthyphenmin and \righthyphenmin should be set to 2 and 2 +% respectively. If you set them below these values incorrect breaks +% will happen (specially at the beginning of foreign words and words +% which begin with some prefixes). +% *** IMPORTANT *** +% +% Please report any problem you might have to the authors!!! +% + +\patterns{% +% Attach vowel groups to left consonant +1ba 1be 1bi 1bo 1bu +1ca 1ce 1ci 1co 1cu +1da 1de 1di 1do 3du +1fa 1fe 1fi 1fo 1fu +1ga 1ge 1gi 1go 1gu +1ha 1he 1hi 1ho 1hu +1ja 1je 1ji 1jo 1ju +1la 1le 1li 1lo 1lu +1ma 1me 1mi 1mo 1mu +1na 1ne 3ni 1no 1nu +1pa 3pe 3pi 3po 1pu +1qu +1ra 1re 1ri 1ro 1ru +1sa 1se 1si 1so 1su +1ta 1te 1ti 1to 1tu +1va 1ve 1vi 1vo 1vu +1xa 1xe 1xi 1xo 1xu +1za 1ze 1zi 1zo 1zu +1bé 1bí 1bó 1bú 1bà 1bè 1bò +1cé 1cí 1có 1cú 1cà 1cè 1cò +1ço 1ça 1çu +1çó 1çú 1çà 1çò +1dé 1dí 1dó 1dú 1dà 1dè 1dò +1fé 1fí 1fó 1fú 1fà 1fè 1fò +1gé 1gí 1gó 1gú 1gà 1gè +1gò 1gü +1hé 1hí 1hó 1hú 1hà 1hè 1hò +1jé 1jí 1jó 1jú 1jà 1jè 1jò +1lé 1lí 1ló 1lú 1là 1lè 1lò +1mé 1mí 1mó 1mú 1mà 1mè 1mò +1né 1ní 1nó 1nú 1nà 1nè 1nò +1pé 1pí 1pó 1pú 1pà 1pè 1pò +1qü +1ré 1rí 1ró 1rú 1rà 1rè 1rò +1sé 1sí 1só 1sú 1sà 1sè 1sò +1té 1tí 1tó 1tú 1tà 1tè 1tò +1vé 1ví 1vó 1vú 1và 1vè 1vò +1xé 1xí 1xó 1xú 1xà 1xè 1xò +1zé 1zí 1zó 1zú 1zà 1zè 1zò +% +% Build legal consonant groups, leave other consonants bound to +% the previous group. This overrides part of the previous pattern +% group. +3l2la 1l2le 1l2li 3l2lo 1l2lu +1b2la 1b2le 1b2li 1b2lo 1b2lu +1b2ra 1b2re 1b2ri 1b2ro 1b2ru +1c2la 1c2le 1c2li 1c2lo 1c2lu +1c2ra 1c2re 1c2ri 1c2ro 1c2ru +1d2ra 1d2re 1d2ri 1d2ro 1d2ru +1f2la 1f2le 1f2li 1f2lo 1f2lu +1f2ra 1f2re 1f2ri 1f2ro 1f2ru +1g2la 1g2le 1g2li 1g2lo 1g2lu +1g2ra 1g2re 1g2ri 1g2ro 1g2ru +1p2la 1p2le 1p2li 1p2lo 1p2lu +1p2ra 1p2re 1p2ri 1p2ro 1p2ru +1t2ra 1t2re 1t2ri 1t2ro 1t2ru +1n2ya 1n2ye 1n2yi 1n2yo 1n2yu +1l2lé 1l2lí 1l2ló 1l2lú 1l2là +1l2lè 1l2lò +1b2lé 1b2lí 1b2ló 1b2lú 1b2là +1b2lè 1b2lò +1b2ré 1b2rí 1b2ró 1b2rú 1b2rà +1b2rè 1b2rò +1c2lé 1c2lí 1c2ló 1c2lú 1c2là +1c2lè 1c2lò +1c2ré 1c2rí 1c2ró 1c2rú 1c2rà +1c2rè 1c2rò +1d2ré 1d2rí 1d2ró 1d2rú 1d2rà +1d2rè 1d2rò +1f2lé 1f2lí 1f2ló 1f2lú 1f2là +1f2lè 1f2lò +1f2ré 1f2rí 1f2ró 1f2rú 1f2rà +1f2rè 1f2rò +1g2lé 1g2lí 1g2ló 1g2lú 1g2là +1g2lè 1g2lò +1g2ré 1g2rí 1g2ró 1g2rú 1g2rà +1g2rè 1g2rò +1p2lé 1p2lí 1p2ló 1p2lú 1p2là +1p2lè 1p2lò +1p2ré 1p2rí 1p2ró 1p2rú 1p2rà +1p2rè 1p2rò +1t2ré 1t2rí 1t2ró 1t2rú 1t2rà +1t2rè 1t2rò +1n2yé 1n2yí 1n2yó 1n2yú 1n2yà +1n2yè 1n2yò +% +% Vowels are kept together by the defaults +% We break here diphthongs and the like +a1a a1e a1o +e1a e1e e1o +i1a i1e i1o +o1a o1e o1o +u1a u1e u1o +a1é a1í a1ó a1ú a1à a1è +a1ò a1ï a1ü +e1é e1í e1ó e1ú e1à e1è +e1ò e1ï e1ü +i1é i1í i1ó i1ú i1à i1è +i1ò i1ï i1ü +o1é o1í o1ó o1ú o1à o1è +o1ò o1ï o1ü +u1é u1í u1ó u1ú u1à u1è +u1ò u1ï u1ü +é1a é1e é1o +é1ï é1ü +í1a í1e í1o +í1ï í1ü +ó1a ó1e ó1o +ó1ï ó1ü +ú1a ú1e ú1o +ú1ï ú1ü +à1a à1e à1o +à1ï à1ü +è1a è1e è1o +è1ï è1ü +ò1a ò1e ò1o +ò1ï ò1ü +ï1a ï1e ï1o ï1é ï1í +ï1ó ï1ú ï1à +ï1è ï1ò ï1i +ü1a ü1e ü1o ü1é ü1í +ü1ó ü1ú ü1à +ü1è ü1ò +% +% We consider here i and u as semiconsonants +a1i2a a1i2e a1i2o a1i2u +a1u2a a1u2e a1u2i a1u2o a1u2u +e1i2a e1i2e e1i2o e1i2u +e1u2a e1u2e e1u2i e1u2o e1u2u +i1i2a i1i2e i1i2o i1i2u +i1u2a i1u2e i1u2i i1u2o i1u2u +o1i2a o1i2e o1i2o o1i2u +o1u2a o1u2e o1u2o o1u2i o1u2u +u1i2a u1i2e u1i2o u1i2u +u1u2a u1u2e u1u2i u1u2o u1u2u +a1i2é a1i2í a1i2ó a1i2ú a1i2à +a1i2è a1i2ò +a1u2é a1u2í a1u2ó a1u2ú a1u2à +a1u2è a1u2ò +e1i2é e1i2í e1i2ó e1i2ú e1i2à +e1i2è e1i2ò +e1u2é e1u2í e1u2ó e1u2ú e1u2à +e1u2è e1u2ò +i1i2é i1i2í i1i2ó i1i2ú i1i2à +i1i2è i1i2ò +i1u2é i1u2í i1u2ó i1u2ú i1u2à +i1u2è i1u2ò +o1i2é o1i2í o1i2ó o1i2ú o1i2à +o1i2è o1i2ò +o1u2é o1u2í o1u2ó o1u2ú o1u2à +o1u2è o1u2ò +u1i2é u1i2í u1i2ó u1i2ú u1i2à +u1i2è u1i2ò +u1u2é u1u2í u1u2ó u1u2ú u1u2à +u1u2è u1u2ò +é1i2a é1i2e é1i2o é1i2u +é1u2a é1u2e é1u2o é1u2i é1u2u +í1i2a í1i2e í1i2o í1i2u +í1u2a í1u2e í1u2o í1u2i í1u2u +ó1i2a ó1i2e ó1i2o ó1i2u +ó1u2a ó1u2e ó1u2o ó1u2i ó1u2u +ú1i2a ú1i2e ú1i2o ú1i2u +ú1u2a ú1u2e ú1u2o ú1u2i ú1u2u +à1i2a à1i2e à1i2o à1i2u +à1u2a à1u2e à1u2o à1u2i à1u2u +è1i2a è1i2e è1i2o è1i2u +è1u2a è1u2e è1u2o è1u2i è1u2u +ò1i2a ò1i2e ò1i2o ò1i2u +ò1u2a ò1u2e ò1u2o ò1u2i ò1u2u +ï1i2a ï1i2e ï1i2o ï1i2é ï1i2í +ï1i2ó +ï1i2ú ï1i2à +ï1i2è ï1i2ò ï1i2u +ï1u2a ï1u2e ï1u2o ï1u2é ï1u2í +ï1u2ó +ï1u2ú ï1u2à +ï1u2è ï1u2ò ï1u2i ï1u2u +ü1i2a ü1i2e ü1i2o ü1i2é ü1i2í +ü1i2ó +ü1i2ú ü1i2à +ü1i2è ü1i2ò ü1i2u +ü1u2a ü1u2e ü1u2o ü1u2é ü1u2í +ü1u2ó +ü1u2ú ü1u2à +ü1u2è ü1u2ò ü1u2i ü1u2u +% +% Semiconsonants at the beginning of word +% .i2a .i2e .i2o .i2u .u2a .u2e .u2i .u2o +.hi2a .hi2e .hi2o .hi2u .hu2a .hu2e .hu2i .hu2o +% .i2é .i2í .i2ó .i2ú .i2à +.i2è .i2ò +% .u2é .u2í .u2ó .u2ú .u2à +.u2è .u2ò +.hi2é .hi2ó .hi2ú .hi2à .hi2è +.hi2ò +.hu2é .hu2í .hu2ó .hu2à .hu2è +.hu2ò +% +% And now the crescent diphtongs +gu2a gu2e gu2i gu2o qu2a qu2e qu2i qu2o +gu2é gu2í gu2ó gu2à gu2è +gu2ò +qu2é qu2í qu2ó qu2à qu2è +qu2ò +gü2e gü2é gü2í gü2è gü2i +qü2e qü2é qü2í qü2è qü2i +% +% We add here some exceptions to the rules for diaeresis +a1isme. e1isme. i1isme. o1isme. u1isme. +a1ista. e1ista. i1ista. o1ista. u1ista. +a1um. e1um. i1um. o1um. u1um. +% a1us. e1us. i1us. o1us. u1us. +% +% disallow hyphenation on possible prefixes +.antihi2 .be2n .be2s .bi2s .ca2p .ce2l .cla2r .co2ll .co2n .co2r .de2s +.di2s .en3a .hipe2r .hiperm2n .in3ac .in3ad .in3ap .in3es .in3o +.inte2r .ma2l .mal1t2hus .pa2n .pe2r .pe3ri .pos2t .psa2l .rebe2s +.re2d .su2b .sub3o .subde2s .supe2r .tran2s +% .a2b .a2n .e2n .e2x .g2h .g2n .i2n .o2b .p2s +% +% Avoid hyphenation on some intra-word groups +g2no p2si p2se p2neu +g2nò p2sí +% +% Avoid wrong hyphenation on some foreign-origin words +.ch2 .th2 +% .c2h2 .d2h2 .k2s2 .m2n2 .r2h2 .s2 .t2h2 .t2s2 .t2x2 .w2h2 +ein1s2tein ru1t2herford ni2etz1sc2he +% +% Add some good patterns found by patgen +3exp 3nef 3nei 3pr 3ser a3ne a3ri bi3se des3ag des3ar des3av des3enc +e3ism e3le e3rio e3ris es3aco es3af es3ap es3arr es3as es3int +ig3n in3ex n3si o3ro qui3e s3emp s3esp sub3a ui3et +o3gnò +} diff --git a/src/lang.rs b/src/lang.rs index dbe5681..36bffaa 100644 --- a/src/lang.rs +++ b/src/lang.rs @@ -21,6 +21,9 @@ pub enum Lang { /// Hyphenation for _Bulgarian._ (Code: `bg`, Script, `Cyrl`, Feature: `bulgarian`) #[cfg(feature = "bulgarian")] Bulgarian, + /// Hyphenation for _Catalan._ (Code: `ca`, Script, `Latn`, Feature: `catalan`) + #[cfg(feature = "catalan")] + Catalan, /// Hyphenation for _Croatian._ (Code: `hr`, Script, `Latn`, Feature: `croatian`) #[cfg(feature = "croatian")] Croatian, @@ -125,6 +128,8 @@ impl Lang { b"be" => Some(Self::Belarusian), #[cfg(feature = "bulgarian")] b"bg" => Some(Self::Bulgarian), + #[cfg(feature = "catalan")] + b"ca" => Some(Self::Catalan), #[cfg(feature = "croatian")] b"hr" => Some(Self::Croatian), #[cfg(feature = "czech")] @@ -207,6 +212,8 @@ impl Lang { Self::Belarusian => (2, 2), #[cfg(feature = "bulgarian")] Self::Bulgarian => (2, 2), + #[cfg(feature = "catalan")] + Self::Catalan => (2, 2), #[cfg(feature = "croatian")] Self::Croatian => (2, 2), #[cfg(feature = "czech")] @@ -280,6 +287,8 @@ impl Lang { Self::Belarusian => State::root(include_bytes!("../tries/be.bin")), #[cfg(feature = "bulgarian")] Self::Bulgarian => State::root(include_bytes!("../tries/bg.bin")), + #[cfg(feature = "catalan")] + Self::Catalan => State::root(include_bytes!("../tries/ca.bin")), #[cfg(feature = "croatian")] Self::Croatian => State::root(include_bytes!("../tries/hr.bin")), #[cfg(feature = "czech")] diff --git a/tests/generate.rs b/tests/generate.rs index 488186e..9c4b7b1 100644 --- a/tests/generate.rs +++ b/tests/generate.rs @@ -7,10 +7,11 @@ use std::path::Path; #[test] fn generate_code() { - let mut languages: [(&str, &str, &[&str], &str, &str, u8, u8); 34] = [ + let mut languages: [(&str, &str, &[&str], &str, &str, u8, u8); 35] = [ ("Afrikaans", "af", &[], "Latn", "hyph-af.tex", 1, 2), ("Belarusian", "be", &[], "Cyrl", "hyph-be.tex", 2, 2), ("Bulgarian", "bg", &[], "Cyrl", "hyph-bg.tex", 2, 2), + ("Catalan", "ca", &[], "Latn", "hyph-ca.tex", 2, 2), ("Czech", "cs", &[], "Latn", "hyph-cs-sojka.tex", 2, 2), ("Danish", "da", &[], "Latn", "hyph-da.tex", 2, 2), ("German", "de", &[], "Latn", "hyph-de-1996.tex", 2, 2), diff --git a/tries/ca.bin b/tries/ca.bin new file mode 100644 index 0000000..9628d25 Binary files /dev/null and b/tries/ca.bin differ