From 8254fd332352438f439b7c4f678974f419c80040 Mon Sep 17 00:00:00 2001 From: pizzamx Date: Thu, 29 Dec 2022 11:52:04 +0800 Subject: [PATCH 1/4] [script.subtitles.zimukux] 0.2.0 --- script.subtitles.zimukux/addon.xml | 6 +- .../resource.language.en_GB/strings.po | 4 + .../resource.language.zh_CN/strings.po | 4 + .../resources/lib/sub_provider_service.py | 7 +- .../resources/lib/zimuku_agent.py | 92 ++++++++++++++++--- .../resources/settings.xml | 12 ++- 6 files changed, 108 insertions(+), 17 deletions(-) diff --git a/script.subtitles.zimukux/addon.xml b/script.subtitles.zimukux/addon.xml index 7b1c3d5540..3b405dae79 100644 --- a/script.subtitles.zimukux/addon.xml +++ b/script.subtitles.zimukux/addon.xml @@ -1,5 +1,5 @@  - + @@ -20,6 +20,10 @@ resources/fanart.png +v0.2.0(2023/5/30) +- Merged https://github.com/pizzamx/zimuku_for_kodi/pull/18 (big thanks to lm317379829 for solving the captcha issue) +- Let's bump version a bit + v0.1.9(2022/12/23) - Merged https://github.com/pizzamx/zimuku_for_kodi/pull/14 - New setting allow showing only language suffix of sub file name to prevent the UI from scrolling (slowly!) diff --git a/script.subtitles.zimukux/resources/language/resource.language.en_GB/strings.po b/script.subtitles.zimukux/resources/language/resource.language.en_GB/strings.po index 8646eb193e..1d191b1c47 100644 --- a/script.subtitles.zimukux/resources/language/resource.language.en_GB/strings.po +++ b/script.subtitles.zimukux/resources/language/resource.language.en_GB/strings.po @@ -9,6 +9,10 @@ msgctxt "#30101" msgid "Site URL" msgstr "" +msgctxt "#301011" +msgid "OCR URL" +msgstr "OCR API" + msgctxt "#30200" msgid "Sub preference" msgstr "" diff --git a/script.subtitles.zimukux/resources/language/resource.language.zh_CN/strings.po b/script.subtitles.zimukux/resources/language/resource.language.zh_CN/strings.po index 9c9765737f..46f9184fd1 100644 --- a/script.subtitles.zimukux/resources/language/resource.language.zh_CN/strings.po +++ b/script.subtitles.zimukux/resources/language/resource.language.zh_CN/strings.po @@ -9,6 +9,10 @@ msgctxt "#30101" msgid "Site URL" msgstr "网址" +msgctxt "#301011" +msgid "OCR URL" +msgstr "OCR API 的地址(请勿随意修改)" + msgctxt "#30200" msgid "Sub preference" msgstr "字幕下载偏好" diff --git a/script.subtitles.zimukux/resources/lib/sub_provider_service.py b/script.subtitles.zimukux/resources/lib/sub_provider_service.py index 7a7dc5117e..5673382ddc 100644 --- a/script.subtitles.zimukux/resources/lib/sub_provider_service.py +++ b/script.subtitles.zimukux/resources/lib/sub_provider_service.py @@ -213,11 +213,13 @@ def handle_params(params): def run(): global agent, logger + # 获取参数 params = get_params() logger = Logger() logger.log(sys._getframe().f_code.co_name, "HANDLE PARAMS:%s" % params) + # 获取url zimuku_base_url = __addon__.getSetting("ZiMuKuUrl") tpe = __addon__.getSetting("subtype") lang = __addon__.getSetting("sublang") @@ -227,8 +229,11 @@ def run(): else __addon__.getSetting("proxy_server")) os.environ["HTTP_PROXY"] = os.environ["HTTPS_PROXY"] = proxy + ocrUrl= __addon__.getSetting("ocr_url") + + # 查询 agent = zmkagnt.Zimuku_Agent(zimuku_base_url, __temp__, logger, Unpacker(), - {'subtype': tpe, 'sublang': lang}) + {'subtype': tpe, 'sublang': lang}, ocrUrl) handle_params(params) xbmcplugin.endOfDirectory(int(sys.argv[1])) diff --git a/script.subtitles.zimukux/resources/lib/zimuku_agent.py b/script.subtitles.zimukux/resources/lib/zimuku_agent.py index 75eb949ca6..3be819be7d 100644 --- a/script.subtitles.zimukux/resources/lib/zimuku_agent.py +++ b/script.subtitles.zimukux/resources/lib/zimuku_agent.py @@ -17,18 +17,19 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. """ -from ast import expr_context + import os import sys import time +import json +import base64 import urllib - import requests from bs4 import BeautifulSoup class Zimuku_Agent: - def __init__(self, base_url, dl_location, logger, unpacker, settings): + def __init__(self, base_url, dl_location, logger, unpacker, settings, ocrUrl='https://ddddocr.lm317379829.repl.co/'): self.ua = 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)' self.ZIMUKU_BASE = base_url self.INIT_PAGE = base_url + '/?security_verify_data=313932302c31303830' @@ -43,6 +44,7 @@ def __init__(self, base_url, dl_location, logger, unpacker, settings): self.plugin_settings = settings self.session = requests.Session() self.vertoken = '' + self.ocrUrl = ocrUrl # 一次性调用,获取那个vertoken。目测这东西会过期,不过不管那么多了,感觉过两天验证机制又要变 # self.init_site() @@ -56,7 +58,6 @@ def init_site(self): 'srcurl', '68747470733a2f2f7a696d756b752e6f72672f') self.get_page(self.ZIMUKU_BASE) - self.get_page(self.INIT_PAGE) _, resp = self.get_page(self.ZIMUKU_BASE) self.get_vertoken(resp) @@ -105,6 +106,58 @@ def get_page(self, url, **kwargs): return headers, http_body + def verify(self, url, append): + headers = None + http_body = None + s = self.session + try: + request_headers = {'User-Agent': self.ua} + + a = requests.adapters.HTTPAdapter(max_retries=3) + s.mount('https://', a) + + self.logger.log(sys._getframe().f_code.co_name, + '[CHALLENGE VERI-CODE] requests GET [%s]' % (url), level=3) + + http_response = s.get(url, headers=request_headers) + + if http_response.status_code != 200: + soup = BeautifulSoup(http_response.content, 'html.parser') + content = soup.find_all(attrs={'class': 'verifyimg'})[ + 0].get('src') + if content is not None: + # 处理编码 + ocrurl = self.ocrUrl + payload = {'imgdata': content} + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36' + } + response = requests.request( + "POST", ocrurl, headers=headers, json=payload) + result_json = json.loads(response.text) + text = '' + if result_json['code'] == 1: + text = result_json['result'] + str1 = '' + i = 0 + for ch in text: + if str1 == '': + str1 = hex(ord(text[i])) + else: + str1 += hex(ord(text[i])) + i = i + 1 + + # 使用带验证码的访问 + get_cookie_url = '%s%s&%s' % ( + url, append, 'security_verify_img=' + str1.replace('0x', '')) + http_response = s.get( + get_cookie_url, headers=request_headers) + a = 1 + + except Exception as e: + self.logger.log(sys._getframe().f_code.co_name, + "ERROR CHALLENGING VERI-CODE(target URL: %s): %s" % (url, e), level=3) + def extract_sub_info(self, sub, lang_info_mode): """ 从 html 块中解析出字幕信息 @@ -169,7 +222,7 @@ def extract_sub_info(self, sub, lang_info_mode): if rating not in ["0", "1", "2", "3", "4", "5"]: self.logger.log( sys._getframe().f_code.co_name, "NO RATING AVAILABLE IN (%s), URL: %s" % - (rating_div_str, link), + (rating_div_str, link), 2) rating = "0" except: @@ -200,7 +253,7 @@ def get_vertoken(self, resp): self.logger.log(sys._getframe().f_code.co_name, "Fetching new vertoken form home page") try: - headers, data = self.get_page(self.ZIMUKU_BASE+'/') + headers, data = self.get_page(self.ZIMUKU_BASE + '/') hsoup = BeautifulSoup(resp, 'html.parser') vertoken = hsoup.find( 'input', attrs={'name': 'vertoken'}).attrs.get('value', '') @@ -233,17 +286,20 @@ def search(self, title, items): # vertoken = self.get_vertoken() - get_cookie_url = '%s&%s' % (self.ZIMUKU_API % - (urllib.parse.quote(title)), self.TOKEN_PARAM) url = self.ZIMUKU_API % urllib.parse.quote(title) try: # 10/10/22: 变成搜索要先拿 cookie - self.get_page(url) - self.get_page(get_cookie_url) + # self.get_page(url) + # self.get_page(get_cookie_url) + + # 处理验证码逻辑 + self.verify(url, '&chost=zimuku.org') # 真正的搜索 self.logger.log(sys._getframe().f_code.co_name, "Search API url: %s" % (url)) + + url += '&chost=zimuku.org' _, data = self.get_page(url) soup = BeautifulSoup(data, 'html.parser') except Exception as e: @@ -451,6 +507,9 @@ def download(self, url): supported_archive_exts = (".zip", ".7z", ".tar", ".bz2", ".rar", ".gz", ".xz", ".iso", ".tgz", ".tbz2", ".cbr") try: + # 处理验证码逻辑 + self.verify(url, '?') + # Subtitle detail page. headers, data = self.get_page(url) soup = BeautifulSoup(data, 'html.parser') @@ -458,8 +517,9 @@ def download(self, url): if not (url.startswith(('http://', 'https://'))): url = urllib.parse.urljoin(self.ZIMUKU_BASE, url) - self.logger.log(sys._getframe().f_code.co_name, - "GET SUB DETAIL PAGE: %s" % (url)) + + # 处理验证码逻辑 + self.verify(url, '?') # Subtitle download-list page. headers, data = self.get_page(url) @@ -576,6 +636,10 @@ def download_links(self, links, referer): try: self.logger.log(sys._getframe().f_code.co_name, "DOWNLOAD SUBTITLE: %s" % (url)) + + # 处理验证码逻辑 + self.verify(url, '?') + # Download subtitle one by one until success. headers, data = self.get_page(url, Referer=referer) @@ -606,13 +670,13 @@ def download_links(self, links, referer): else: self.logger.log( sys._getframe().f_code.co_name, 'File received but too small: %s %d bytes' % - (filename, len(data)), + (filename, len(data)), level=2) return '', '' else: self.logger.log( sys._getframe().f_code.co_name, 'Failed to download subtitle from all links: %s' % - (referer), + (referer), level=2) return '', '' diff --git a/script.subtitles.zimukux/resources/settings.xml b/script.subtitles.zimukux/resources/settings.xml index cea3d900c9..58a5a57339 100644 --- a/script.subtitles.zimukux/resources/settings.xml +++ b/script.subtitles.zimukux/resources/settings.xml @@ -9,7 +9,17 @@ 30101 - http://zimuku.org + https://so.zimuku.org + + false + + + + 0 + + 301011 + + https://ddddocr.lm317379829.repl.co/ false From 17f98900d120d0b5c4cf9ac650eeb48bc4283d86 Mon Sep 17 00:00:00 2001 From: pizzamx Date: Sat, 3 Jun 2023 11:31:54 +0800 Subject: [PATCH 2/4] [script.subtitles.zimukux] 0.2.0 --- script.subtitles.zimukux/addon.xml | 14 +------------- .../resources/lib/zimuku_agent.py | 7 +++---- 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/script.subtitles.zimukux/addon.xml b/script.subtitles.zimukux/addon.xml index 3b405dae79..22d7bd10e5 100644 --- a/script.subtitles.zimukux/addon.xml +++ b/script.subtitles.zimukux/addon.xml @@ -45,19 +45,7 @@ v0.1.5(2022/4/22) - Misc. bug fix and code tweaks - Is COVID-19 gonna last forever? -v0.1.1(2021/6/21) -- Rename addon to script.subtitles.zimukux - -V0.1.0 (2021/6/14) -- Episode match logic gets more flexibility -- You can now specify your preferred sub type (srt, ass) and language (be cautious setting this parameter) -- A bit of refactoring to separate crawl logic from Kodi GUI and local file r/w interaction -- Some unit tests - -V0.0.3 (2021/5/18) -- Fork from service.subtitles.zimuku V2.0.3 -- Episode match for drama to locate the subtitle you need faster -- add support to change site URL on the fly +CHECK https://github.com/pizzamx/zimuku_for_kodi/releases FOR EARLIER RELEASE NOTES... diff --git a/script.subtitles.zimukux/resources/lib/zimuku_agent.py b/script.subtitles.zimukux/resources/lib/zimuku_agent.py index 3be819be7d..704b877697 100644 --- a/script.subtitles.zimukux/resources/lib/zimuku_agent.py +++ b/script.subtitles.zimukux/resources/lib/zimuku_agent.py @@ -32,9 +32,7 @@ class Zimuku_Agent: def __init__(self, base_url, dl_location, logger, unpacker, settings, ocrUrl='https://ddddocr.lm317379829.repl.co/'): self.ua = 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)' self.ZIMUKU_BASE = base_url - self.INIT_PAGE = base_url + '/?security_verify_data=313932302c31303830' # self.ZIMUKU_API = '%s/search?q=%%s&vertoken=%%s' % base_url - self.TOKEN_PARAM = 'security_verify_data=313932302c31303830' self.ZIMUKU_API = '%s/search?q=%%s' % base_url self.DOWNLOAD_LOCATION = dl_location self.FILE_MIN_SIZE = 1024 @@ -324,8 +322,9 @@ def search(self, title, items): return self.double_filter(subtitle_list, items) # 2. 直接找不到,看是否存在同一季的链接,进去找 - season_name_chn = ('一', '二', '三', '四', '五', '六', '七', '八', '九', '十', '十一', '十二', '十三', '十四', '十五')[ - int(items['season']) - 1] if s_e != 'N/A' else 'N/A' + season_name_chn = \ + ('一', '二', '三', '四', '五', '六', '七', '八', '九', '十', '十一', '十二', '十三', '十四', '十五')[ + int(items['season']) - 1] if s_e != 'N/A' else 'N/A' season_list = soup.find_all("div", class_="item prel clearfix") page_list = soup.find('div', class_='pagination') From 0fca7e137417ae2565a0d90800cc74cd1d99d3e2 Mon Sep 17 00:00:00 2001 From: pizzamx Date: Mon, 11 Dec 2023 21:07:27 +0800 Subject: [PATCH 3/4] [script.subtitles.zimukux] 0.2.1 --- script.subtitles.zimukux/addon.xml | 5 ++- .../resources/lib/zimuku_agent.py | 34 +++++++++++-------- .../resources/settings.xml | 4 +-- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/script.subtitles.zimukux/addon.xml b/script.subtitles.zimukux/addon.xml index 22d7bd10e5..49803dd9b1 100644 --- a/script.subtitles.zimukux/addon.xml +++ b/script.subtitles.zimukux/addon.xml @@ -1,5 +1,5 @@  - + @@ -20,6 +20,9 @@ resources/fanart.png +v0.2.1(2023/12/11) +- Merged https://github.com/pizzamx/zimuku_for_kodi/pull/23(thanks @jiangpengcheng for solving the cookie issue) + v0.2.0(2023/5/30) - Merged https://github.com/pizzamx/zimuku_for_kodi/pull/18 (big thanks to lm317379829 for solving the captcha issue) - Let's bump version a bit diff --git a/script.subtitles.zimukux/resources/lib/zimuku_agent.py b/script.subtitles.zimukux/resources/lib/zimuku_agent.py index 704b877697..025edbea92 100644 --- a/script.subtitles.zimukux/resources/lib/zimuku_agent.py +++ b/script.subtitles.zimukux/resources/lib/zimuku_agent.py @@ -34,6 +34,7 @@ def __init__(self, base_url, dl_location, logger, unpacker, settings, ocrUrl='ht self.ZIMUKU_BASE = base_url # self.ZIMUKU_API = '%s/search?q=%%s&vertoken=%%s' % base_url self.ZIMUKU_API = '%s/search?q=%%s' % base_url + self.INIT_PAGE = base_url + '/?security_verify_data=313932302c31303830' self.DOWNLOAD_LOCATION = dl_location self.FILE_MIN_SIZE = 1024 @@ -44,20 +45,16 @@ def __init__(self, base_url, dl_location, logger, unpacker, settings, ocrUrl='ht self.vertoken = '' self.ocrUrl = ocrUrl - # 一次性调用,获取那个vertoken。目测这东西会过期,不过不管那么多了,感觉过两天验证机制又要变 - # self.init_site() + # 一次性调用,获取必需的cookies,验证机制可能之后会变 + self.init_site() def set_setting(self, settings): # for unittestting purpose self.plugin_settings = settings def init_site(self): - self.session.cookies.set( - 'srcurl', '68747470733a2f2f7a696d756b752e6f72672f') - self.get_page(self.ZIMUKU_BASE) - - _, resp = self.get_page(self.ZIMUKU_BASE) - self.get_vertoken(resp) + self.get_page(self.INIT_PAGE) + self.get_page(self.INIT_PAGE) def get_page(self, url, **kwargs): """ @@ -291,7 +288,7 @@ def search(self, title, items): # self.get_page(get_cookie_url) # 处理验证码逻辑 - self.verify(url, '&chost=zimuku.org') + # self.verify(url, '&chost=zimuku.org') # 真正的搜索 self.logger.log(sys._getframe().f_code.co_name, @@ -360,12 +357,19 @@ def search(self, title, items): 'Error getting sub page', level=3) return [] subs = soup.tbody.find_all("tr") + unfiltered_sub_list = [] for sub in reversed(subs): + subtitle = self.extract_sub_info(sub, 2) + unfiltered_sub_list.append(subtitle) sub_name = sub.a.text if s_e in sub_name.upper(): - subtitle_list.append(self.extract_sub_info(sub, 2)) + subtitle_list.append(subtitle) # 如果匹配到了季,那就得返回了,没有就是没有 - return self.double_filter(subtitle_list, items) + # 如果没有匹配到,可能整季度的字幕被打包到一个文件中了,那就把所有的结果都返回让用户自己选择 + if len(subtitle_list) > 0: + return self.double_filter(subtitle_list, items) + else: + return unfiltered_sub_list # 精确查找没找到,那就返回所有 subtitle_list = [] @@ -493,7 +497,7 @@ def download(self, url): 下载并返回字幕文件列表 Params: - url 字幕详情页面,如 http://zimuku.org/detail/155262.html + url 字幕详情页面,如 https://srtku.com/detail/155262.html Return: [], [], [] 返回 3 个列表 @@ -507,7 +511,7 @@ def download(self, url): ".gz", ".xz", ".iso", ".tgz", ".tbz2", ".cbr") try: # 处理验证码逻辑 - self.verify(url, '?') + # self.verify(url, '?') # Subtitle detail page. headers, data = self.get_page(url) @@ -518,7 +522,7 @@ def download(self, url): url = urllib.parse.urljoin(self.ZIMUKU_BASE, url) # 处理验证码逻辑 - self.verify(url, '?') + # self.verify(url, '?') # Subtitle download-list page. headers, data = self.get_page(url) @@ -637,7 +641,7 @@ def download_links(self, links, referer): "DOWNLOAD SUBTITLE: %s" % (url)) # 处理验证码逻辑 - self.verify(url, '?') + # self.verify(url, '?') # Download subtitle one by one until success. headers, data = self.get_page(url, Referer=referer) diff --git a/script.subtitles.zimukux/resources/settings.xml b/script.subtitles.zimukux/resources/settings.xml index 58a5a57339..bc64d3ad85 100644 --- a/script.subtitles.zimukux/resources/settings.xml +++ b/script.subtitles.zimukux/resources/settings.xml @@ -9,7 +9,7 @@ 30101 - https://so.zimuku.org + https://srtku.com false @@ -19,7 +19,7 @@ 301011 - https://ddddocr.lm317379829.repl.co/ + false From 9a58fcb93474f6ed310593c0368eb12821c9f5fe Mon Sep 17 00:00:00 2001 From: pizzamx Date: Mon, 11 Dec 2023 21:14:12 +0800 Subject: [PATCH 4/4] [script.subtitles.zimukux] 0.2.1 --- script.subtitles.zimukux/resources/lib/zimuku_agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script.subtitles.zimukux/resources/lib/zimuku_agent.py b/script.subtitles.zimukux/resources/lib/zimuku_agent.py index 025edbea92..9e37e37dab 100644 --- a/script.subtitles.zimukux/resources/lib/zimuku_agent.py +++ b/script.subtitles.zimukux/resources/lib/zimuku_agent.py @@ -29,7 +29,7 @@ class Zimuku_Agent: - def __init__(self, base_url, dl_location, logger, unpacker, settings, ocrUrl='https://ddddocr.lm317379829.repl.co/'): + def __init__(self, base_url, dl_location, logger, unpacker, settings, ocrUrl=''): self.ua = 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)' self.ZIMUKU_BASE = base_url # self.ZIMUKU_API = '%s/search?q=%%s&vertoken=%%s' % base_url