This repository has been archived by the owner on Jun 7, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
fetcher.go
84 lines (77 loc) · 2.17 KB
/
fetcher.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
package main
import (
"context"
"encoding/json"
"fmt"
"net/http"
"github.com/mmcdole/gofeed"
"github.com/peterbourgon/diskv"
log "github.com/sirupsen/logrus"
"github.com/gosimple/slug"
)
type Fetcher struct {
client *http.Client
sema chan struct{}
itemCache *diskv.Diskv
}
var encodeFunc func(string) string = slug.Make
func (f *Fetcher) Fetch(ctx context.Context, url string) (LinkedFeed, error) {
// HEAD, see if from cache
req, err := http.NewRequestWithContext(ctx, "HEAD", url, nil)
if err != nil {
return LinkedFeed{}, fmt.Errorf("Error creating request for %s : %w", url, err)
}
f.sema <- struct{}{}
resp, err := f.client.Do(req)
if err != nil {
<-f.sema
return LinkedFeed{}, fmt.Errorf("Error fetching %s : %w", url, err)
}
<-f.sema
if len(resp.Header["X-From-Cache"]) == 1 && resp.Header["X-From-Cache"][0] == "1" {
// lookup in itemCache
keyName := encodeFunc(url)
if f.itemCache.Has(keyName) {
b, err := f.itemCache.Read(keyName)
if err != nil {
return LinkedFeed{}, err
}
var buf *gofeed.Feed
err = json.Unmarshal(b, &buf)
if err != nil {
return LinkedFeed{}, err
}
return LinkFeed(buf), err
}
}
log.Infof("get on %s", url)
req, err = http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return LinkedFeed{}, fmt.Errorf("Error creating request for %s : %w", url, err)
}
f.sema <- struct{}{}
resp, err = f.client.Do(req)
if err != nil {
return LinkedFeed{}, fmt.Errorf("Error fetching %s : %w", url, err)
}
parser := gofeed.NewParser() // lol race
feed, err := parser.Parse(resp.Body)
resp.Body.Close()
<-f.sema
if err != nil {
return LinkedFeed{}, fmt.Errorf("Error parsing %s : %w", url, err)
}
jsonBuf, err := json.Marshal(feed)
if err != nil {
return LinkedFeed{}, fmt.Errorf("Error marshalling %s : %w", url, err)
}
err = f.itemCache.Write(encodeFunc(url), jsonBuf)
if err != nil {
return LinkedFeed{}, fmt.Errorf("Error writing to item cache %s : %w", url, err)
}
linkedFeed := LinkFeed(feed)
if linkedFeed.Feed.FeedLink != url {
log.Debugf("feed request url and self-reference url mismatch: requested %s, got %s", url, linkedFeed.Feed.FeedLink)
}
return linkedFeed, nil
}