forked from charlesmadere/CynanBotCommon
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjishoHelper.py
143 lines (108 loc) · 5.88 KB
/
jishoHelper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import locale
import urllib
from typing import List
import requests
from lxml import html
from requests import ConnectionError, HTTPError, Timeout
from urllib3.exceptions import MaxRetryError, NewConnectionError
import CynanBotCommon.utils as utils
class JishoResult():
def __init__(
self,
definitions: List[str],
furigana: str,
url: str,
word: str
):
if not utils.hasItems(definitions):
raise ValueError(f'definitions argument is malformed: \"{definitions}\"')
elif not utils.isValidUrl(url):
raise ValueError(f'url argument is malformed: \"{url}\"')
elif not utils.isValidStr(word):
raise ValueError(f'word argument is malformed: \"{word}\"')
self.__definitions = definitions
self.__furigana = furigana
self.__url = url
self.__word = word
def getDefinitions(self) -> List[str]:
return self.__definitions
def getFurigana(self) -> str:
return self.__furigana
def getUrl(self) -> str:
return self.__url
def getWord(self) -> str:
return self.__word
def hasFurigana(self) -> bool:
return utils.isValidStr(self.__furigana)
def toStr(self, definitionDelimiter: str = ' ') -> str:
if definitionDelimiter is None:
raise ValueError(f'definitionDelimiter argument is malformed: \"{definitionDelimiter}\"')
furigana = ''
if self.hasFurigana():
furigana = f'({self.__furigana}) '
definitions = definitionDelimiter.join(self.__definitions)
return f'{furigana}{self.__word} — {definitions}'
class JishoHelper():
def __init__(self, definitionsMaxSize: int = 3):
if not utils.isValidNum(definitionsMaxSize) or definitionsMaxSize < 1:
raise ValueError(f'definitionsMaxSize argument is malformed: \"{definitionsMaxSize}\"')
self.__definitionsMaxSize = definitionsMaxSize
def search(self, query: str) -> JishoResult:
if not utils.isValidStr(query):
raise ValueError(f'query argument is malformed: \"{query}\"')
query = query.strip()
print(f'Looking up \"{query}\"... ({utils.getNowTimeText()})')
encodedQuery = urllib.parse.quote(query)
requestUrl = f'https://jisho.org/search/{encodedQuery}'
rawResponse = None
try:
rawResponse = requests.get(url = requestUrl, timeout = utils.getDefaultTimeout())
except (ConnectionError, HTTPError, MaxRetryError, NewConnectionError, Timeout) as e:
print(f'Exception occurred when attempting to search Jisho for \"{query}\": {e}')
raise RuntimeError(f'Exception occurred when attempting to search Jisho for \"{query}\": {e}')
htmlTree = html.fromstring(rawResponse.content)
if htmlTree is None:
print(f'Exception occurred when attempting to decode Jisho\'s response for \"{query}\" into HTML tree')
raise RuntimeError(f'Exception occurred when attempting to decode Jisho\'s response for \"{query}\" into HTML tree')
parentElements = htmlTree.find_class('concept_light-representation')
if not utils.hasItems(parentElements):
print(f'Exception occurred when attempting to find parent elements in Jisho\'s HTML tree in query for \"{query}\"')
raise ValueError(f'Exception occurred when attempting to find parent elements in Jisho\'s HTML tree in query for \"{query}\"')
textElements = parentElements[0].find_class('text')
if textElements is None or len(textElements) != 1:
print(f'Exception occurred when attempting to find text elements in Jisho\'s HTML tree in query for \"{query}\"')
raise ValueError(f'Exception occurred when attempting to find text elements in Jisho\'s HTML tree in query for \"{query}\"')
word = utils.cleanStr(textElements[0].text_content())
if not utils.isValidStr(word):
print(f'Exception occurred when checking that Jisho\'s word is valid in query for \"{query}\"')
raise ValueError(f'Exception occurred when checking that Jisho\'s word is valid in query for \"{query}\"')
definitionElements = htmlTree.find_class('meaning-meaning')
if not utils.hasItems(definitionElements):
print(f'Exception occurred when attempting to find definition elements in Jisho\'s HTML tree in query for \"{query}\"')
raise ValueError(f'Exception occurred when attempting to find definition elements in Jisho\'s HTML tree in query for \"{query}\"')
definitions = list()
for definitionElement in definitionElements:
breakUnitElements = definitionElement.find_class('break-unit')
if breakUnitElements is None or len(breakUnitElements) != 0:
continue
definition = utils.cleanStr(definitionElement.text_content())
if not utils.isValidStr(definition):
continue
number = locale.format_string("%d", len(definitions) + 1, grouping = True)
definitions.append(f'#{number} {definition}')
if len(definitions) >= self.__definitionsMaxSize:
# keep from adding tons of definitions
break
if not utils.hasItems(definitions):
print(f'Unable to find any viable definitions for \"{query}\"')
raise ValueError(f'Unable to find any viable definitions for \"{query}\"')
furigana = None
furiganaElements = htmlTree.find_class('furigana')
if utils.hasItems(furiganaElements):
furigana = utils.cleanStr(furiganaElements[0].text_content())
return JishoResult(
definitions = definitions,
furigana = furigana,
url = requestUrl,
word = word
)