Skip to content

Commit

Permalink
feat: set referer header when making a request
Browse files Browse the repository at this point in the history
  • Loading branch information
michalczmiel committed Jan 22, 2024
1 parent 8eba8eb commit 1eec08a
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 3 deletions.
11 changes: 8 additions & 3 deletions internal/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ func getRandomUserAgent() string {
return userAgents[i]
}

func request(url, userAgent string) (*http.Response, error) {
const DefaultReferer = "https://www.google.com"

func request(url, userAgent, referer string) (*http.Response, error) {
client := &http.Client{
Timeout: 15 * time.Second,
}
Expand All @@ -44,6 +46,7 @@ func request(url, userAgent string) (*http.Response, error) {
}

request.Header.Set("User-Agent", userAgent)
request.Header.Set("Referer", referer)

response, err := client.Do(request)
if err != nil {
Expand All @@ -58,7 +61,9 @@ func request(url, userAgent string) (*http.Response, error) {
}

func DownloadImageFromUrl(url, filePath string, parameters Parameters) error {
response, err := request(url, parameters.UserAgent)
rootUrl := getRootUrl(url)

response, err := request(url, parameters.UserAgent, rootUrl)
if err != nil {
return err
}
Expand Down Expand Up @@ -87,7 +92,7 @@ func DownloadImageFromUrl(url, filePath string, parameters Parameters) error {
}

func GetHtmlDocFromUrl(url string, userAgent string) (*html.Node, error) {
response, err := request(url, userAgent)
response, err := request(url, userAgent, DefaultReferer)
if err != nil {
return nil, err
}
Expand Down
6 changes: 6 additions & 0 deletions internal/url.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ func RemoveDuplicates(original []string) []string {
return output
}

func getRootUrl(u string) string {
parsedUrl, _ := url.Parse(u)

return parsedUrl.Scheme + "://" + parsedUrl.Host
}

func ProcessLinks(url string, rawLinks []string) []string {
var processedLinks []string

Expand Down
21 changes: 21 additions & 0 deletions internal/url_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,24 @@ func TestRemoveDuplicates(t *testing.T) {
t.Errorf("want %v got %v", expected, actual)
}
}

func TestGetRootUrl(t *testing.T) {
testdata := []struct {
url string
expected string
}{
{"https://example.com", "https://example.com"},
{"http://example.com", "http://example.com"},
{"https://example.com/images/image.jpg", "https://example.com"},
{"https://example.com/images/image2.jpg?w=1919&h=1280", "https://example.com"},
{"https://example.com?example=query", "https://example.com"},
}

for _, tt := range testdata {
actual := getRootUrl(tt.url)

if actual != tt.expected {
t.Errorf("want %s got %s", tt.expected, actual)
}
}
}

0 comments on commit 1eec08a

Please sign in to comment.