Skip to content

Commit 32e1e68

Browse files
committed
紧急修复解析详情页的正则表达式
1 parent e96aa74 commit 32e1e68

File tree

2 files changed

+707
-1924
lines changed

2 files changed

+707
-1924
lines changed

app/src/main/java/com/hippo/ehviewer/client/parser/GalleryDetailParser.java

+68-62
Original file line numberDiff line numberDiff line change
@@ -78,14 +78,18 @@ public class GalleryDetailParser {
7878
private static final Pattern PATTERN_PAGES = Pattern.compile("<tr><td[^<>]*>Length:</td><td[^<>]*>([\\d,]+) pages</td></tr>");
7979
private static final Pattern PATTERN_PREVIEW_PAGES = Pattern.compile("<td[^>]+><a[^>]+>([\\d,]+)</a></td><td[^>]+>(?:<a[^>]+>)?&gt;(?:</a>)?</td>");
8080
private static final Pattern PATTERN_NORMAL_PREVIEW = Pattern.compile("<div class=\"gdtm\"[^<>]*><div[^<>]*width:(\\d+)[^<>]*height:(\\d+)[^<>]*\\((.+?)\\)[^<>]*-(\\d+)px[^<>]*><a[^<>]*href=\"(.+?)\"[^<>]*><img alt=\"([\\d,]+)\"");
81+
private static final Pattern PATTERN_NORMAL_PREVIEW_NEW = Pattern.compile("<a href=\"(.+?)\">[^<>]*<[^<>]*title=\"Page (\\d+):[^<>]*width:(\\d+)[^<>]*height:(\\d+)[^<>]*\\((.+?)\\)[^<>]*\"></div>[^<>]*</a>");
8182
private static final Pattern PATTERN_LARGE_PREVIEW = Pattern.compile("<div class=\"gdtl\".+?<a href=\"(.+?)\"><img alt=\"([\\d,]+)\".+?src=\"(.+?)\"");
83+
private static final Pattern PATTERN_LARGE_PREVIEW_NEW = Pattern.compile("<a href=\"(.+?)\">[^<>]*<div title=\"Page (\\d+):[^<>]*\\((.+?)\\)[^<>]*0 0[^<>]*>");
8284
private static final Pattern PATTERN_ARCHIVE_DOWNLOAD = Pattern.compile("onclick=\"return popUp('(.*)',480,320)\">Archive Download</a>");
8385

8486
private static final GalleryTagGroup[] EMPTY_GALLERY_TAG_GROUP_ARRAY = new GalleryTagGroup[0];
8587
private static final GalleryCommentList EMPTY_GALLERY_COMMENT_ARRAY = new GalleryCommentList(new GalleryComment[0], false);
8688

8789
private static final DateFormat WEB_COMMENT_DATE_FORMAT = new SimpleDateFormat("dd MMMMM yyyy, HH:mm", Locale.US);
8890

91+
private static Integer EhSite;
92+
8993
static {
9094
WEB_COMMENT_DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
9195
}
@@ -516,17 +520,17 @@ public static GalleryComment parseComment(Element element) {
516520
// time
517521
Element c3 = JsoupUtils.getElementByClass(element, "c3");
518522
String temp = c3.ownText();
519-
if (temp.contains(" by:")){
523+
if (temp.contains(" by:")) {
520524
temp = temp.substring("Posted on ".length(), temp.length() - " by:".length());
521-
}else {
525+
} else {
522526
temp = temp.substring("Posted on ".length());
523527
}
524528

525529
comment.time = WEB_COMMENT_DATE_FORMAT.parse(temp).getTime();
526530
// user
527-
if (c3.children().isEmpty()){
531+
if (c3.children().isEmpty()) {
528532
comment.user = c4.text();
529-
}else {
533+
} else {
530534
comment.user = c3.child(0).text();
531535
}
532536

@@ -664,14 +668,29 @@ public static int parsePages(String body) throws ParseException {
664668
}
665669

666670
public static PreviewSet parsePreviewSet(Document d, String body) throws ParseException {
671+
if (null == EhSite) {
672+
EhSite = Settings.getGallerySite();
673+
}
674+
String previewClass;
675+
switch (EhSite) {
676+
case 0:
677+
previewClass = body;
678+
break;
679+
case 1:
680+
previewClass = d.getElementsByClass("gt200").html();
681+
break;
682+
default:
683+
previewClass = "";
684+
break;
685+
}
667686
PreviewSet previewSet;
668687
try {
669-
previewSet = parseLargePreviewSet(d, body);
670-
if (previewSet == null) {
671-
previewSet = parseNormalPreviewSet(body);
688+
previewSet = parseNormalPreviewSet(previewClass);
689+
if (previewSet.size() == 0) {
690+
previewSet = parseLargePreviewSet(previewClass.isEmpty() ? body : previewClass);
672691
}
673-
if (previewSet == null) {
674-
throw new ParseException("加载预览图失败", body);
692+
if (previewSet.size() == 0) {
693+
throw new ParseException("加载预览图失败", previewClass);
675694
}
676695
return previewSet;
677696
// return parseLargePreviewSet(d, body);
@@ -683,52 +702,26 @@ public static PreviewSet parsePreviewSet(Document d, String body) throws ParseEx
683702
}
684703

685704
public static PreviewSet parsePreviewSet(String body) throws ParseException {
686-
try {
687-
return parseLargePreviewSet(body);
688-
} catch (ParseException e) {
689-
return parseNormalPreviewSet(body);
690-
}
705+
return parsePreviewSet(Jsoup.parse(body), body);
691706
}
692707

693708
/**
694709
* Parse large previews with regular expressions
695710
*/
696-
private static LargePreviewSet parseLargePreviewSet(Document d, String body) throws ParseException {
697-
try {
698-
LargePreviewSet largePreviewSet = new LargePreviewSet();
699-
Element gdt = d.getElementById("gdt");
700-
Elements gdtls = gdt.getElementsByClass("gdtl");
701-
int n = gdtls.size();
702-
if (n <= 0) {
703-
return null;
704-
// throw new ParseException("Can't parse large preview", body);
705-
}
706-
for (int i = 0; i < n; i++) {
707-
Element element = gdtls.get(i).child(0);
708-
String pageUrl = element.attr("href");
709-
element = element.child(0);
710-
String imageUrl = element.attr("src");
711-
if (Settings.getFixThumbUrl()) {
712-
imageUrl = EhUrl.getFixedPreviewThumbUrl(imageUrl);
713-
}
714-
int index = Integer.parseInt(element.attr("alt")) - 1;
715-
largePreviewSet.addItem(index, imageUrl, pageUrl);
716-
}
717-
return largePreviewSet;
718-
} catch (Throwable e) {
719-
ExceptionUtils.throwIfFatal(e);
720-
e.printStackTrace();
721-
throw new ParseException("Can't parse large preview", body);
711+
private static LargePreviewSet parseLargePreviewSet(String body) {
712+
Matcher m = PATTERN_LARGE_PREVIEW_NEW.matcher(body);
713+
LargePreviewSet largePreviewSet = new LargePreviewSet();
714+
715+
find(m, largePreviewSet);
716+
717+
if (largePreviewSet.size() == 0) {
718+
m = PATTERN_LARGE_PREVIEW.matcher(body);
719+
find(m, largePreviewSet);
722720
}
721+
return largePreviewSet;
723722
}
724723

725-
/**
726-
* Parse large previews with regular expressions
727-
*/
728-
private static LargePreviewSet parseLargePreviewSet(String body) throws ParseException {
729-
Matcher m = PATTERN_LARGE_PREVIEW.matcher(body);
730-
LargePreviewSet largePreviewSet = new LargePreviewSet();
731-
724+
private static void find(Matcher m, LargePreviewSet largePreviewSet) {
732725
while (m.find()) {
733726
int index = ParserUtils.parseInt(m.group(2), 0) - 1;
734727
if (index < 0) {
@@ -741,42 +734,55 @@ private static LargePreviewSet parseLargePreviewSet(String body) throws ParseExc
741734
}
742735
largePreviewSet.addItem(index, imageUrl, pageUrl);
743736
}
744-
745-
if (largePreviewSet.size() == 0) {
746-
throw new ParseException("Can't parse large preview", body);
747-
}
748-
749-
return largePreviewSet;
750737
}
751738

752739
/**
753740
* Parse normal previews with regular expressions
754741
*/
755742
private static NormalPreviewSet parseNormalPreviewSet(String body) throws ParseException {
756-
Matcher m = PATTERN_NORMAL_PREVIEW.matcher(body);
743+
744+
Matcher m = PATTERN_NORMAL_PREVIEW_NEW.matcher(body);
757745
NormalPreviewSet normalPreviewSet = new NormalPreviewSet();
758746
while (m.find()) {
759-
int position = ParserUtils.parseInt(m.group(6), 0) - 1;
747+
int position = ParserUtils.parseInt(m.group(2), 0) - 1;
760748
if (position < 0) {
761749
continue;
762750
}
763-
String imageUrl = ParserUtils.trim(m.group(3));
764-
int xOffset = ParserUtils.parseInt(m.group(4), 0);
751+
String imageUrl = ParserUtils.trim(m.group(5));
752+
int xOffset = 0;
765753
int yOffset = 0;
766-
int width = ParserUtils.parseInt(m.group(1), 0);
754+
int width = ParserUtils.parseInt(m.group(3), 0);
767755
if (width <= 0) {
768756
continue;
769757
}
770-
int height = ParserUtils.parseInt(m.group(2), 0);
758+
int height = ParserUtils.parseInt(m.group(4), 0);
771759
if (height <= 0) {
772760
continue;
773761
}
774-
String pageUrl = ParserUtils.trim(m.group(5));
762+
String pageUrl = ParserUtils.trim(m.group(1));
775763
normalPreviewSet.addItem(position, imageUrl, xOffset, yOffset, width, height, pageUrl);
776764
}
777-
778765
if (normalPreviewSet.size() == 0) {
779-
throw new ParseException("Can't parse normal preview", body);
766+
m = PATTERN_NORMAL_PREVIEW.matcher(body);
767+
while (m.find()) {
768+
int position = ParserUtils.parseInt(m.group(6), 0) - 1;
769+
if (position < 0) {
770+
continue;
771+
}
772+
String imageUrl = ParserUtils.trim(m.group(3));
773+
int xOffset = ParserUtils.parseInt(m.group(4), 0);
774+
int yOffset = 0;
775+
int width = ParserUtils.parseInt(m.group(1), 0);
776+
if (width <= 0) {
777+
continue;
778+
}
779+
int height = ParserUtils.parseInt(m.group(2), 0);
780+
if (height <= 0) {
781+
continue;
782+
}
783+
String pageUrl = ParserUtils.trim(m.group(5));
784+
normalPreviewSet.addItem(position, imageUrl, xOffset, yOffset, width, height, pageUrl);
785+
}
780786
}
781787

782788
return normalPreviewSet;

0 commit comments

Comments
 (0)