Skip to content

Commit

Permalink
feat: Improve Amazon product sanitizer (#180)
Browse files Browse the repository at this point in the history
  • Loading branch information
svenjacobs authored May 22, 2023
1 parent 701d285 commit f9fe760
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,33 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.amazon

import android.content.Context
import com.svenjacobs.app.leon.core.common.domain.matchesDomain
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.RegexSanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.Sanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.SanitizerId

class AmazonProductSanitizer : RegexSanitizer(
regex = Regex("ref=[^?&]+|[?&][^=]+=.[^&]*"),
) {
class AmazonProductSanitizer : Sanitizer {

override val id = SanitizerId("amazon")

override fun getMetadata(context: Context) = Sanitizer.Metadata(
name = context.getString(R.string.sanitizer_amazon_product_name),
)

override fun matchesDomain(input: String) =
input.matchesDomain("amazon\\..+/dp/[0-9A-Z]+", isRegex = true)
override fun matchesDomain(input: String) = REGEX.containsMatchIn(input)

override fun invoke(input: String): String {
val result = REGEX.find(input)
// First group contains everything between top level domain and /dp/ argument
val group = result?.groups?.get(1) ?: return input

return RegexFactory.AllParameters.replace(
input = input.removeRange(group.range),
replacement = "",
)
}

private companion object {
private val REGEX = Regex("amazon\\..+?(/.*)?/dp/[^/]+")
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Léon - The URL Cleaner
* Copyright (C) 2022 Sven Jacobs
* Copyright (C) 2023 Sven Jacobs
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand All @@ -23,21 +23,50 @@ import io.kotest.matchers.shouldBe

class AmazonProductSanitizerTest : WordSpec(
{
val sanitizer = AmazonProductSanitizer()

"invoke" should {

"remove various Amazon parameters" {
val sanitizer = AmazonProductSanitizer()
val result = sanitizer(
"remove everything except /dp/* path argument" {
var result = sanitizer(
"https://www.amazon.de/Xiaomi-Aktivit%C3%A4tstracker-Trainings-Puls%C3%" +
"BCberwachung-Akkulaufzeit/dp/B091G3FLL7/?_encoding=UTF8&pd_rd_w=xDcJP&pf" +
"_rd_p=bf172aca-3277-41f6-babb-6ce7fc34cf7f&pf_rd_r=ZC6FZ5G6W9K8DEZTPBYW&" +
"pd_rd_r=11b3ec4e-047c-4f37-8302-62dedb8f502b&pd_rd_wg=Ozi90&ref_=pd_gw_c" +
"i_mcx_mr_hp_atf_m",
)

result shouldBe "https://www.amazon.de/Xiaomi-Aktivit%C3%A4tstracker-Trainings-Pu" +
"ls%C3%BCberwachung-Akkulaufzeit/dp/B091G3FLL7/"
result shouldBe "https://www.amazon.de/dp/B091G3FLL7/"

result = sanitizer(
"https://www.amazon.co.uk/Xiaomi-Aktivit%C3%A4tstracker-Trainings-Puls%C3%" +
"BCberwachung-Akkulaufzeit/dp/B091G3FLL7/?_encoding=UTF8&pd_rd_w=xDcJP&pf" +
"_rd_p=bf172aca-3277-41f6-babb-6ce7fc34cf7f&pf_rd_r=ZC6FZ5G6W9K8DEZTPBYW&" +
"pd_rd_r=11b3ec4e-047c-4f37-8302-62dedb8f502b&pd_rd_wg=Ozi90&ref_=pd_gw_c" +
"i_mcx_mr_hp_atf_m",
)

result shouldBe "https://www.amazon.co.uk/dp/B091G3FLL7/"
}

"keep already cleaned Amazon URL" {
sanitizer("https://www.amazon.com/dp/B091G3FLL7/") shouldBe
"https://www.amazon.com/dp/B091G3FLL7/"
}
}

"matchesDomain" should {

"match for Amazon product domain" {
sanitizer.matchesDomain(
"https://www.amazon.de/Xiaomi-Aktivit%C3%A4tstracke" +
"r-Trainings-Puls%C3%BCberwachung-Akkulaufzeit/dp/B091G3FLL7/",
) shouldBe true

sanitizer.matchesDomain(
"https://www.amazon.co.uk/Xiaomi-Aktivit%C3%A4tstracke" +
"r-Trainings-Puls%C3%BCberwachung-Akkulaufzeit/dp/B091G3FLL7/",
) shouldBe true
}
}
},
Expand Down

0 comments on commit f9fe760

Please sign in to comment.