Skip to content

Commit 6bf7b35

Browse files
authored
Handle Ukrainian numbers with apostrophe (#95)
1 parent 2a204b4 commit 6bf7b35

File tree

4 files changed

+31
-4
lines changed

4 files changed

+31
-4
lines changed

number_parser/data/uk.py

+5
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,14 @@
8282
"чотириста": 400,
8383
"п'ятсот": 500,
8484
"пʼятсот": 500,
85+
"п'ятисот": 500,
86+
"пʼятисот": 500,
87+
"пятсот": 500,
88+
"пятисот": 500,
8589
"шістсот": 600,
8690
"сімсот": 700,
8791
"вісімсот": 800,
92+
"девятсот": 900,
8893
"дев'ятсот": 900,
8994
"девʼятсот": 900
9095
},

number_parser/parser.py

+4
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,10 @@ def parse_number(input_string, language=None):
305305

306306
lang_data = LanguageData(language)
307307

308+
# Normalize the input string by removing apostrophes
309+
input_string = input_string.replace("'", "")
310+
input_string = input_string.replace("’", "")
311+
308312
tokens = _tokenize(input_string, language)
309313
normalized_tokens = _normalize_tokens(tokens)
310314
for index, token in enumerate(normalized_tokens):

pytest.ini

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
[pytest]
2-
flake8-max-line-length = 119
3-
flake8-ignore =
1+
[flake8]
2+
max-line-length = 119
3+
ignore =
44
# This rule goes against the PEP 8 recommended style and it's incompatible
55
# with W504
66
W503
@@ -9,7 +9,6 @@ flake8-ignore =
99
# E501: Line too long
1010
number-parser/number_parser/data/* E501
1111

12-
1312
# Exclude files that are meant to provide top-level imports
1413
# F401: Module imported but unused
1514
number-parser/number_parser/__init__.py F401

tests/test_language_uk.py

+19
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,25 @@
107107
("ундецільйон", 1_000_000_000_000_000_000_000_000_000_000_000_000),
108108
("дуодецільйон", 1_000_000_000_000_000_000_000_000_000_000_000_000_000),
109109
("тредецільйон", 1_000_000_000_000_000_000_000_000_000_000_000_000_000_000),
110+
# Test cases with apostrophe
111+
("п'ять", 5),
112+
("п’ять", 5),
113+
("п'ятдесят", 50),
114+
("п’ятдесят", 50),
115+
("п'ятисот", 500),
116+
("п’ятисот", 500),
117+
("п'ятнадцять", 15),
118+
("п’ятнадцять", 15),
119+
("п'ятдесят тисяч", 50_000),
120+
("п’ятдесят тисяч", 50_000),
121+
("дев'ять", 9),
122+
("дев’ять", 9),
123+
("дев'ятнадцять", 19),
124+
("дев’ятнадцять", 19),
125+
("дев'ятсот", 900),
126+
("дев’ятсот", 900),
127+
("дев'ятсот тисяч", 900_000),
128+
("дев’ятсот тисяч", 900_000),
110129
],
111130
)
112131
def test_parse_number(expected, test_input):

0 commit comments

Comments
 (0)