Skip to content

Commit 6576834

Browse files
committed
feat:demo url whitelist(Guovin#470)
1 parent 0746ea1 commit 6576834

File tree

4 files changed

+91
-38
lines changed

4 files changed

+91
-38
lines changed

main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ def pbar_update(self, name=""):
130130
def get_urls_len(self, filter=False):
131131
data = copy.deepcopy(self.channel_data)
132132
if filter:
133-
process_nested_dict(data, seen=set(), flag=r"cache:(.*)")
133+
process_nested_dict(data, seen=set(), flag=r"cache:(.*)", force_str="!")
134134
processed_urls = set(
135135
url_info[0]
136136
for channel_obj in data.values()

utils/channel.py

Lines changed: 66 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,13 @@ def get_channel_data_from_file(channels, file, use_old):
9292
if name not in category_dict:
9393
category_dict[name] = []
9494
if use_old and url:
95-
info = (url, None, None, None)
96-
if info[0] and info not in category_dict[name]:
97-
category_dict[name].append(info)
95+
info = url.partition("$")[2]
96+
origin = None
97+
if info and info.startswith("!"):
98+
origin = "important"
99+
data = (url, None, None, origin)
100+
if data not in category_dict[name]:
101+
category_dict[name].append(data)
98102
return channels
99103

100104

@@ -119,10 +123,17 @@ def get_channel_items():
119123
for cate, data in channels.items():
120124
if cate in old_result:
121125
for name, info_list in data.items():
126+
urls = [
127+
item[0].partition("$")[0]
128+
for item in info_list
129+
if item[0]
130+
]
122131
if name in old_result[cate]:
123132
for info in old_result[cate][name]:
124-
if info not in info_list:
125-
channels[cate][name].append(info)
133+
if info:
134+
pure_url = info[0].partition("$")[0]
135+
if pure_url not in urls:
136+
channels[cate][name].append(info)
126137
return channels
127138

128139

@@ -473,7 +484,9 @@ def init_info_data(data, cate, name):
473484
data[cate][name] = []
474485

475486

476-
def append_data_to_info_data(info_data, cate, name, data, origin=None, check=True):
487+
def append_data_to_info_data(
488+
info_data, cate, name, data, origin=None, check=True, insert=False
489+
):
477490
"""
478491
Append channel data to total info data
479492
"""
@@ -482,13 +495,25 @@ def append_data_to_info_data(info_data, cate, name, data, origin=None, check=Tru
482495
for item in data:
483496
try:
484497
url, date, resolution, *rest = item
485-
origin = origin or (rest[0] if rest else None)
498+
url_origin = origin or (rest[0] if rest else None)
486499
if url:
487500
pure_url = url.partition("$")[0]
488-
if pure_url not in urls and (
489-
not check or (check and check_url_by_patterns(pure_url))
501+
if pure_url in urls:
502+
continue
503+
if (
504+
url_origin == "important"
505+
or (not check)
506+
or (check and check_url_by_patterns(pure_url))
490507
):
491-
info_data[cate][name].append((url, date, resolution, origin))
508+
if insert:
509+
info_data[cate][name].insert(
510+
0, (url, date, resolution, url_origin)
511+
)
512+
else:
513+
info_data[cate][name].append(
514+
(url, date, resolution, url_origin)
515+
)
516+
urls.append(pure_url)
492517
except:
493518
continue
494519

@@ -504,19 +529,15 @@ def get_origin_method_name(method):
504529

505530
def append_old_data_to_info_data(info_data, cate, name, data):
506531
"""
507-
Append old channel data to total info data
532+
Append history channel data to total info data
508533
"""
509534
append_data_to_info_data(
510535
info_data,
511536
cate,
512537
name,
513538
data,
514539
)
515-
print(name, "old:", len(data), end=", ")
516-
print(
517-
"total:",
518-
len(info_data.get(cate, {}).get(name, [])),
519-
)
540+
print("History:", len(data), end=", ")
520541

521542

522543
def append_total_data(
@@ -542,6 +563,8 @@ def append_total_data(
542563
for cate, channel_obj in items:
543564
for name, old_info_list in channel_obj.items():
544565
print(f"{name}:", end=" ")
566+
if constants.open_use_old_result and old_info_list:
567+
append_old_data_to_info_data(data, cate, name, old_info_list)
545568
for method, result in total_result:
546569
if constants.open_method[method]:
547570
origin_method = get_origin_method_name(method)
@@ -552,8 +575,10 @@ def append_total_data(
552575
data, cate, name, name_results, origin=origin_method
553576
)
554577
print(f"{method.capitalize()}:", len(name_results), end=", ")
555-
if constants.open_use_old_result:
556-
append_old_data_to_info_data(data, cate, name, old_info_list)
578+
print(
579+
"total:",
580+
len(data.get(cate, {}).get(name, [])),
581+
)
557582
if constants.open_keep_all:
558583
extra_cate = "📥其它频道"
559584
for method, result in total_result:
@@ -565,15 +590,20 @@ def append_total_data(
565590
if name in names:
566591
continue
567592
print(f"{name}:", end=" ")
593+
if constants.open_use_old_result:
594+
old_info_list = channel_obj.get(name, [])
595+
if old_info_list:
596+
append_old_data_to_info_data(
597+
data, extra_cate, name, old_info_list
598+
)
568599
append_data_to_info_data(
569600
data, extra_cate, name, urls, origin=origin_method
570601
)
571602
print(name, f"{method.capitalize()}:", len(urls), end=", ")
572-
if constants.open_use_old_result:
573-
old_info_list = channel_obj.get(name, [])
574-
append_old_data_to_info_data(
575-
data, extra_cate, name, old_info_list
576-
)
603+
print(
604+
"total:",
605+
len(data.get(cate, {}).get(name, [])),
606+
)
577607

578608

579609
async def sort_channel_list(
@@ -629,7 +659,7 @@ async def process_sort_channel_list(data, ipv6=False, callback=None):
629659
is_ffmpeg = constants.open_ffmpeg and ffmpeg_installed
630660
semaphore = asyncio.Semaphore(5)
631661
need_sort_data = copy.deepcopy(data)
632-
process_nested_dict(need_sort_data, seen=set(), flag=r"cache:(.*)")
662+
process_nested_dict(need_sort_data, seen=set(), flag=r"cache:(.*)", force_str="!")
633663
tasks = [
634664
asyncio.create_task(
635665
sort_channel_list(
@@ -663,7 +693,18 @@ async def process_sort_channel_list(data, ipv6=False, callback=None):
663693
}
664694
for url, date, resolution, origin in info_list:
665695
if "$" in url:
666-
matcher = re.search(r"cache:(.*)", url)
696+
info = url.partition("$")[2]
697+
if info and info.startswith("!"):
698+
append_data_to_info_data(
699+
sort_data,
700+
cate,
701+
name,
702+
[(url, date, resolution, origin)],
703+
check=False,
704+
insert=True,
705+
)
706+
continue
707+
matcher = re.search(r"cache:(.*)", info)
667708
if matcher:
668709
cache_key = matcher.group(1)
669710
if not cache_key:

utils/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ def get_resolution_value(resolution_str):
192192

193193
log_path = os.path.join(log_dir, log_file)
194194

195-
url_pattern = r"\b((https?):\/\/)?(\[[0-9a-fA-F:]+\]|([\w-]+\.)+[\w-]+)(:[0-9]{1,5})?(\/[^\s]*)?\b"
195+
url_pattern = r"((https?):\/\/)?(\[[0-9a-fA-F:]+\]|([\w-]+\.)+[\w-]+)(:[0-9]{1,5})?(\/[^\s]*)?(\$[^\s]+)?"
196196

197197
rtp_pattern = r"^([^,,]+)(?:[,,])?(rtp://.*)$"
198198

utils/tools.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -122,13 +122,20 @@ def get_total_urls_from_info_list(infoList, ipv6=False):
122122
origin: {"ipv4": [], "ipv6": []} for origin in origin_type_prefer
123123
}
124124

125+
total_urls = []
125126
for url, _, resolution, origin in infoList:
127+
if origin == "important":
128+
pure_url, _, info = url.partition("$")
129+
new_info = info.partition("!")[2]
130+
total_urls.append(f"{pure_url}${new_info}" if new_info else pure_url)
131+
continue
132+
126133
if constants.open_filter_resolution and resolution:
127134
resolution_value = get_resolution_value(resolution)
128135
if resolution_value < constants.min_resolution_value:
129136
continue
130137

131-
if not origin or (origin.lower() not in origin_type_prefer):
138+
if not origin or (origin not in origin_type_prefer):
132139
continue
133140

134141
if origin == "subscribe" and "/rtp/" in url:
@@ -142,7 +149,7 @@ def get_total_urls_from_info_list(infoList, ipv6=False):
142149
categorized_urls[origin]["ipv6"].append(url)
143150
else:
144151
categorized_urls[origin]["ipv4"].append(url)
145-
total_urls = []
152+
146153
ipv_num = {
147154
"ipv4": 0,
148155
"ipv6": 0,
@@ -380,32 +387,37 @@ def get_result_file_content(show_result=False):
380387
)
381388

382389

383-
def remove_duplicates_from_tuple_list(tuple_list, seen, flag=None):
390+
def remove_duplicates_from_tuple_list(tuple_list, seen, flag=None, force_str=None):
384391
"""
385392
Remove duplicates from tuple list
386393
"""
387394
unique_list = []
388395
for item in tuple_list:
389-
if flag:
390-
matcher = re.search(flag, item[0])
391-
part = matcher.group(1) if matcher else item[0]
392-
else:
393-
part = item[0]
396+
item_first = item[0]
397+
part = item_first
398+
if force_str:
399+
info = item_first.partition("$")[2]
400+
if info and info.startswith(force_str):
401+
continue
402+
elif flag:
403+
matcher = re.search(flag, item_first)
404+
if matcher:
405+
part = matcher.group(1)
394406
if part not in seen:
395407
seen.add(part)
396408
unique_list.append(item)
397409
return unique_list
398410

399411

400-
def process_nested_dict(data, seen, flag=None):
412+
def process_nested_dict(data, seen, flag=None, force_str=None):
401413
"""
402414
Process nested dict
403415
"""
404416
for key, value in data.items():
405417
if isinstance(value, dict):
406-
process_nested_dict(value, seen, flag)
418+
process_nested_dict(value, seen, flag, force_str)
407419
elif isinstance(value, list):
408-
data[key] = remove_duplicates_from_tuple_list(value, seen, flag)
420+
data[key] = remove_duplicates_from_tuple_list(value, seen, flag, force_str)
409421

410422

411423
url_domain_pattern = re.compile(

0 commit comments

Comments
 (0)