Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[script.subtitles.zimukux] 0.2.1 #2560

Merged
merged 4 commits into from
Jan 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 9 additions & 14 deletions script.subtitles.zimukux/addon.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<addon id="script.subtitles.zimukux" name="ZiMuKuX" version="0.1.9" provider-name="pizzamx">
<addon id="script.subtitles.zimukux" name="ZiMuKuX" version="0.2.1" provider-name="pizzamx">
<requires>
<import addon="xbmc.python" version="3.0.0"/>
<import addon="script.module.beautifulsoup4" version="4.6.2"/>
Expand All @@ -20,6 +20,13 @@
<fanart>resources/fanart.png</fanart>
</assets>
<news>
v0.2.1(2023/12/11)
- Merged https://github.com/pizzamx/zimuku_for_kodi/pull/23(thanks @jiangpengcheng for solving the cookie issue)

v0.2.0(2023/5/30)
- Merged https://github.com/pizzamx/zimuku_for_kodi/pull/18 (big thanks to lm317379829 for solving the captcha issue)
- Let's bump version a bit

v0.1.9(2022/12/23)
- Merged https://github.com/pizzamx/zimuku_for_kodi/pull/14
- New setting allow showing only language suffix of sub file name to prevent the UI from scrolling (slowly!)
Expand All @@ -41,19 +48,7 @@ v0.1.5(2022/4/22)
- Misc. bug fix and code tweaks
- Is COVID-19 gonna last forever?

v0.1.1(2021/6/21)
- Rename addon to script.subtitles.zimukux

V0.1.0 (2021/6/14)
- Episode match logic gets more flexibility
- You can now specify your preferred sub type (srt, ass) and language (be cautious setting this parameter)
- A bit of refactoring to separate crawl logic from Kodi GUI and local file r/w interaction
- Some unit tests

V0.0.3 (2021/5/18)
- Fork from service.subtitles.zimuku V2.0.3
- Episode match for drama to locate the subtitle you need faster
- add support to change site URL on the fly
CHECK https://github.com/pizzamx/zimuku_for_kodi/releases FOR EARLIER RELEASE NOTES...
</news>
</extension>
</addon>
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ msgctxt "#30101"
msgid "Site URL"
msgstr ""

msgctxt "#301011"
msgid "OCR URL"
msgstr "OCR API"

msgctxt "#30200"
msgid "Sub preference"
msgstr ""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ msgctxt "#30101"
msgid "Site URL"
msgstr "网址"

msgctxt "#301011"
msgid "OCR URL"
msgstr "OCR API 的地址(请勿随意修改)"

msgctxt "#30200"
msgid "Sub preference"
msgstr "字幕下载偏好"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,11 +213,13 @@ def handle_params(params):
def run():
global agent, logger

# 获取参数
params = get_params()

logger = Logger()
logger.log(sys._getframe().f_code.co_name, "HANDLE PARAMS:%s" % params)

# 获取url
zimuku_base_url = __addon__.getSetting("ZiMuKuUrl")
tpe = __addon__.getSetting("subtype")
lang = __addon__.getSetting("sublang")
Expand All @@ -227,8 +229,11 @@ def run():
else __addon__.getSetting("proxy_server"))
os.environ["HTTP_PROXY"] = os.environ["HTTPS_PROXY"] = proxy

ocrUrl= __addon__.getSetting("ocr_url")

# 查询
agent = zmkagnt.Zimuku_Agent(zimuku_base_url, __temp__, logger, Unpacker(),
{'subtype': tpe, 'sublang': lang})
{'subtype': tpe, 'sublang': lang}, ocrUrl)

handle_params(params)
xbmcplugin.endOfDirectory(int(sys.argv[1]))
123 changes: 95 additions & 28 deletions script.subtitles.zimukux/resources/lib/zimuku_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,24 @@
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""

from ast import expr_context

import os
import sys
import time
import json
import base64
import urllib

import requests
from bs4 import BeautifulSoup


class Zimuku_Agent:
def __init__(self, base_url, dl_location, logger, unpacker, settings):
def __init__(self, base_url, dl_location, logger, unpacker, settings, ocrUrl=''):
self.ua = 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)'
self.ZIMUKU_BASE = base_url
self.INIT_PAGE = base_url + '/?security_verify_data=313932302c31303830'
# self.ZIMUKU_API = '%s/search?q=%%s&vertoken=%%s' % base_url
self.TOKEN_PARAM = 'security_verify_data=313932302c31303830'
self.ZIMUKU_API = '%s/search?q=%%s' % base_url
self.INIT_PAGE = base_url + '/?security_verify_data=313932302c31303830'
self.DOWNLOAD_LOCATION = dl_location
self.FILE_MIN_SIZE = 1024

Expand All @@ -43,22 +43,18 @@ def __init__(self, base_url, dl_location, logger, unpacker, settings):
self.plugin_settings = settings
self.session = requests.Session()
self.vertoken = ''
self.ocrUrl = ocrUrl

# 一次性调用,获取那个vertoken。目测这东西会过期,不过不管那么多了,感觉过两天验证机制又要变
# self.init_site()
# 一次性调用,获取必需的cookies,验证机制可能之后会变
self.init_site()

def set_setting(self, settings):
# for unittestting purpose
self.plugin_settings = settings

def init_site(self):
self.session.cookies.set(
'srcurl', '68747470733a2f2f7a696d756b752e6f72672f')
self.get_page(self.ZIMUKU_BASE)

self.get_page(self.INIT_PAGE)
_, resp = self.get_page(self.ZIMUKU_BASE)
self.get_vertoken(resp)
self.get_page(self.INIT_PAGE)

def get_page(self, url, **kwargs):
"""
Expand Down Expand Up @@ -105,6 +101,58 @@ def get_page(self, url, **kwargs):

return headers, http_body

def verify(self, url, append):
headers = None
http_body = None
s = self.session
try:
request_headers = {'User-Agent': self.ua}

a = requests.adapters.HTTPAdapter(max_retries=3)
s.mount('https://', a)

self.logger.log(sys._getframe().f_code.co_name,
'[CHALLENGE VERI-CODE] requests GET [%s]' % (url), level=3)

http_response = s.get(url, headers=request_headers)

if http_response.status_code != 200:
soup = BeautifulSoup(http_response.content, 'html.parser')
content = soup.find_all(attrs={'class': 'verifyimg'})[
0].get('src')
if content is not None:
# 处理编码
ocrurl = self.ocrUrl
payload = {'imgdata': content}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36'
}
response = requests.request(
"POST", ocrurl, headers=headers, json=payload)
result_json = json.loads(response.text)
text = ''
if result_json['code'] == 1:
text = result_json['result']
str1 = ''
i = 0
for ch in text:
if str1 == '':
str1 = hex(ord(text[i]))
else:
str1 += hex(ord(text[i]))
i = i + 1

# 使用带验证码的访问
get_cookie_url = '%s%s&%s' % (
url, append, 'security_verify_img=' + str1.replace('0x', ''))
http_response = s.get(
get_cookie_url, headers=request_headers)
a = 1

except Exception as e:
self.logger.log(sys._getframe().f_code.co_name,
"ERROR CHALLENGING VERI-CODE(target URL: %s): %s" % (url, e), level=3)

def extract_sub_info(self, sub, lang_info_mode):
"""
从 html 块中解析出字幕信息
Expand Down Expand Up @@ -169,7 +217,7 @@ def extract_sub_info(self, sub, lang_info_mode):
if rating not in ["0", "1", "2", "3", "4", "5"]:
self.logger.log(
sys._getframe().f_code.co_name, "NO RATING AVAILABLE IN (%s), URL: %s" %
(rating_div_str, link),
(rating_div_str, link),
2)
rating = "0"
except:
Expand Down Expand Up @@ -200,7 +248,7 @@ def get_vertoken(self, resp):
self.logger.log(sys._getframe().f_code.co_name,
"Fetching new vertoken form home page")
try:
headers, data = self.get_page(self.ZIMUKU_BASE+'/')
headers, data = self.get_page(self.ZIMUKU_BASE + '/')
hsoup = BeautifulSoup(resp, 'html.parser')
vertoken = hsoup.find(
'input', attrs={'name': 'vertoken'}).attrs.get('value', '')
Expand Down Expand Up @@ -233,17 +281,20 @@ def search(self, title, items):

# vertoken = self.get_vertoken()

get_cookie_url = '%s&%s' % (self.ZIMUKU_API %
(urllib.parse.quote(title)), self.TOKEN_PARAM)
url = self.ZIMUKU_API % urllib.parse.quote(title)
try:
# 10/10/22: 变成搜索要先拿 cookie
self.get_page(url)
self.get_page(get_cookie_url)
# self.get_page(url)
# self.get_page(get_cookie_url)

# 处理验证码逻辑
# self.verify(url, '&chost=zimuku.org')

# 真正的搜索
self.logger.log(sys._getframe().f_code.co_name,
"Search API url: %s" % (url))

url += '&chost=zimuku.org'
_, data = self.get_page(url)
soup = BeautifulSoup(data, 'html.parser')
except Exception as e:
Expand All @@ -268,8 +319,9 @@ def search(self, title, items):
return self.double_filter(subtitle_list, items)

# 2. 直接找不到,看是否存在同一季的链接,进去找
season_name_chn = ('一', '二', '三', '四', '五', '六', '七', '八', '九', '十', '十一', '十二', '十三', '十四', '十五')[
int(items['season']) - 1] if s_e != 'N/A' else 'N/A'
season_name_chn = \
('一', '二', '三', '四', '五', '六', '七', '八', '九', '十', '十一', '十二', '十三', '十四', '十五')[
int(items['season']) - 1] if s_e != 'N/A' else 'N/A'
season_list = soup.find_all("div", class_="item prel clearfix")

page_list = soup.find('div', class_='pagination')
Expand Down Expand Up @@ -305,12 +357,19 @@ def search(self, title, items):
'Error getting sub page', level=3)
return []
subs = soup.tbody.find_all("tr")
unfiltered_sub_list = []
for sub in reversed(subs):
subtitle = self.extract_sub_info(sub, 2)
unfiltered_sub_list.append(subtitle)
sub_name = sub.a.text
if s_e in sub_name.upper():
subtitle_list.append(self.extract_sub_info(sub, 2))
subtitle_list.append(subtitle)
# 如果匹配到了季,那就得返回了,没有就是没有
return self.double_filter(subtitle_list, items)
# 如果没有匹配到,可能整季度的字幕被打包到一个文件中了,那就把所有的结果都返回让用户自己选择
if len(subtitle_list) > 0:
return self.double_filter(subtitle_list, items)
else:
return unfiltered_sub_list

# 精确查找没找到,那就返回所有
subtitle_list = []
Expand Down Expand Up @@ -438,7 +497,7 @@ def download(self, url):
下载并返回字幕文件列表

Params:
url 字幕详情页面,如 http://zimuku.org/detail/155262.html
url 字幕详情页面,如 https://srtku.com/detail/155262.html

Return:
[], [], [] 返回 3 个列表
Expand All @@ -451,15 +510,19 @@ def download(self, url):
supported_archive_exts = (".zip", ".7z", ".tar", ".bz2", ".rar",
".gz", ".xz", ".iso", ".tgz", ".tbz2", ".cbr")
try:
# 处理验证码逻辑
# self.verify(url, '?')

# Subtitle detail page.
headers, data = self.get_page(url)
soup = BeautifulSoup(data, 'html.parser')
url = soup.find("li", class_="dlsub").a.get('href')

if not (url.startswith(('http://', 'https://'))):
url = urllib.parse.urljoin(self.ZIMUKU_BASE, url)
self.logger.log(sys._getframe().f_code.co_name,
"GET SUB DETAIL PAGE: %s" % (url))

# 处理验证码逻辑
# self.verify(url, '?')

# Subtitle download-list page.
headers, data = self.get_page(url)
Expand Down Expand Up @@ -576,6 +639,10 @@ def download_links(self, links, referer):
try:
self.logger.log(sys._getframe().f_code.co_name,
"DOWNLOAD SUBTITLE: %s" % (url))

# 处理验证码逻辑
# self.verify(url, '?')

# Download subtitle one by one until success.
headers, data = self.get_page(url, Referer=referer)

Expand Down Expand Up @@ -606,13 +673,13 @@ def download_links(self, links, referer):
else:
self.logger.log(
sys._getframe().f_code.co_name, 'File received but too small: %s %d bytes' %
(filename, len(data)),
(filename, len(data)),
level=2)
return '', ''
else:
self.logger.log(
sys._getframe().f_code.co_name, 'Failed to download subtitle from all links: %s' %
(referer),
(referer),
level=2)
return '', ''

Expand Down
12 changes: 11 additions & 1 deletion script.subtitles.zimukux/resources/settings.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,17 @@
<control type="edit" format="string">
<heading>30101</heading>
</control>
<default>http://zimuku.org</default>
<default>https://srtku.com</default>
<constraints>
<allowempty>false</allowempty>
</constraints>
</setting>
<setting id="ocr_url" type="string" label="301011" help="">
<level>0</level>
<control type="edit" format="string">
<heading>301011</heading>
</control>
<default></default>
<constraints>
<allowempty>false</allowempty>
</constraints>
Expand Down
Loading