Skip to content

Commit

Permalink
this never ends
Browse files Browse the repository at this point in the history
  • Loading branch information
deedeeh committed Sep 9, 2024
1 parent 66cdef1 commit 0af29a0
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 38 deletions.
2 changes: 1 addition & 1 deletion fixtures/downloaded/downloaded
Original file line number Diff line number Diff line change
@@ -1 +1 @@
Thu, 15 Aug 2024 16:56:55 GMT
Mon, 09 Sep 2024 15:39:05 GMT
11 changes: 10 additions & 1 deletion fixtures/downloaded/matomo-org.json
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,9 @@
"Quora Link Preview/1.0 (http://www.quora.com)",
"Mozilla/5.0 (compatible; Qwantify/2.2w; +https://www.qwant.com/)",
"Mozilla/5.0 (compatible; Qwantify-prod34997/1.0; +https://help.qwant.com/bot/)",
"Mozilla/5.0 (compatible; Qwantbot-prod12345/1.0; [email protected])",
"Mozilla/5.0 (compatible; Qwantbot-news/2.0; +https://www.qwant.com/)",
"Mozilla/5.0 (compatible; Qwantbot-dev12345/1.0; [email protected])",
"ROI Hunter; https://api-dev.roihunter.com",
"RSSRadio (Push Notification Scanner;[email protected])",
"Rainmeter WebParser plugin",
Expand Down Expand Up @@ -699,6 +702,7 @@
"Mozilla/5.0 (Morningscore Bot/1.0)",
"Uptime-Kuma/1.21.2",
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; ChatGPT-User/1.0; +https://openai.com/bot",
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; OAI-SearchBot/1.0; +https://openai.com/searchbot",
"BrightEdge Crawler/1.0 ([email protected])",
"sfFeedReader/0.9",
"HatenaBookmark/4.0 (Hatena::Bookmark; Analyzer)",
Expand Down Expand Up @@ -1008,6 +1012,11 @@
"Mozilla/5.0 (compatible; um-FC/1.0; mailto: [email protected])",
"Mozilla/5.0 (compatible; um-CC/1.0; mailto: [email protected])",
"Mozilla/5.0 (compatible; CyberFind Crawler; +https://cyberfind.net/bot.html)/Nutch-1.20",
"Mozilla/5.0 (compatible; CyberFindCrawler; +https://cyberfind.net/bot.html)/1.0 (https://cyberfind.net/bot.html)",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/124.0.6367.207 Safari/537.36 WordPress.com mShots",
"wp.com feedbot/1.0 (+https://wp.com)"
"wp.com feedbot/1.0 (+https://wp.com)",
"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) https://deepcrawl.com/bot",
"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) https://deepcrawl.com/testing",
"Mozilla/7.0 (compatible; Golfe/1.1; +http://www.goo-olfe.ae/bot.html)",
"Mozilla/5.0 (compatible; SpiderLing; +https://nlp.fi.muni.cz/projects/biwec/)"
]
6 changes: 4 additions & 2 deletions fixtures/downloaded/monperrus.json
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@
"Mozilla/5.0 (compatible; sukibot_heritrix/3.1.1 +http://suki.ling.helsinki.fi/eng/webmasters.html)",
"Mozilla/5.0 (compatible; MSIE 7.0 +http://www.europarchive.org)",
"Mozilla/5.0 (compatible; NerdByNature.Bot; http://www.nerdbynature.net/bot)",
"Mozilla/5.0 (compatible; SISTRIX Crawler; http://crawler.sistrix.net/)",
"Mozilla/5.0 (compatible; AhrefsBot/6.1; +http://ahrefs.com/robot/)",
"Mozilla/5.0 (compatible; AhrefsSiteAudit/6.1; +http://ahrefs.com/robot/)",
"Mozilla/5.0 (compatible; AhrefsBot/5.2; News; +http://ahrefs.com/robot/)",
Expand Down Expand Up @@ -877,7 +878,6 @@
"WordupInfoSearch/1.0",
"Mozilla/5.0 (compatible; WebDataStats/1.0 ; +https://webdatastats.com/policy.html)",
"Jersey/2.25.1 (HttpUrlConnection 1.8.0_141)",
"Mozilla/5.0 (compatible; Seekport Crawler; http://seekport.com/)",
"ZoomBot (Linkbot 1.0 http://suite.seozoom.it/bot.html)",
"VelenPublicWebCrawler (velen.io)",
"MoodleBot/1.0",
Expand Down Expand Up @@ -1063,6 +1063,7 @@
"IonCrawl (https://www.ionos.de/terms-gtc/faq-crawler-en/)",
"Uptime-Kuma/1.18.0",
"Mozilla/5.0 (compatible; SeekportBot; +https://bot.seekport.com)",
"Mozilla/5.0 (compatible; Seekport Crawler; http://seekport.com/)",
"FreshpingBot/1.0 (+https://freshping.io/)",
"Feedbin feed-id:2005098 - 2 subscribers",
"CriteoBot/0.1 (+https://www.criteo.com/criteo-crawler/)",
Expand Down Expand Up @@ -1148,5 +1149,6 @@
"claudebot",
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; ClaudeBot/1.0; [email protected])",
"Mozilla/5.0 (compatible; Monsidobot/2.2; +http://monsido.com/bot.html; [email protected])",
"GroupMeBot/1.0"
"GroupMeBot/1.0",
"Vercelbot (+https://vercel.com)"
]
Loading

0 comments on commit 0af29a0

Please sign in to comment.