Skip to content

Commit

Permalink
Merge pull request #88 from cmars/feat/vu-service-scraper
Browse files Browse the repository at this point in the history
feat: implement VU service scraper
  • Loading branch information
cmars authored Dec 6, 2021
2 parents c43ee26 + 221374e commit 406e244
Show file tree
Hide file tree
Showing 15 changed files with 641 additions and 22 deletions.
14 changes: 14 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ jobs:
- run:
name: Run tests
command: go test ./... -count=1
- run:
command: make -C vervet-underground test

lint:
docker:
Expand All @@ -43,6 +45,16 @@ jobs:
- run:
command: golangci-lint run -v ./...

lint-vu:
docker:
- image: golangci/golangci-lint:v1.42.1
steps:
- checkout
- attach_workspace:
at: ~/vervet/vervet-underground
- run:
command: golangci-lint run -v ./...

workflows:
version: 2
test:
Expand All @@ -53,3 +65,5 @@ workflows:
jobs:
- lint:
name: Lint
- lint-vu:
name: Lint VU
2 changes: 1 addition & 1 deletion vervet-underground/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ lint-docker:
tidy:
$(GOMOD) tidy -v
test:
go test ./... -count=1 -ginkgo.failFast
go test ./... -count=1 -race

build:
go build server.go
Expand Down
17 changes: 12 additions & 5 deletions vervet-underground/config/config.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
// Package config supports configuring the Vervet Underground service.
package config

import (
"encoding/json"
"os"

"vervet-underground/lib"
)

func Load(configPath string) (*lib.ServerConfig, error) {
// ServerConfig defines the configuration options for the Vervet Underground service.
type ServerConfig struct {
Host string `json:"host"`
Services []string `json:"services"`
}

// Load returns a ServerConfig instance loaded from the given path to a JSON
// config file.
func Load(configPath string) (*ServerConfig, error) {
file, err := os.Open(configPath)
var config lib.ServerConfig
var config ServerConfig
if err != nil {
return nil, err
}
Expand All @@ -19,4 +26,4 @@ func Load(configPath string) (*lib.ServerConfig, error) {
return nil, err
}
return &config, nil
}
}
2 changes: 2 additions & 0 deletions vervet-underground/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@ require (
github.com/aws/aws-sdk-go-v2/service/sso v1.6.1 // indirect
github.com/aws/aws-sdk-go-v2/service/sts v1.11.0 // indirect
github.com/aws/smithy-go v1.9.0 // indirect
github.com/frankban/quicktest v1.14.0 // indirect
github.com/fsnotify/fsnotify v1.4.9 // indirect
github.com/nxadm/tail v1.4.8 // indirect
go.uber.org/multierr v1.7.0 // indirect
golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d // indirect
golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e // indirect
golang.org/x/text v0.3.6 // indirect
Expand Down
22 changes: 22 additions & 0 deletions vervet-underground/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,11 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.11.0/go.mod h1:+BmlPeQ1Y+PuIho93MMKD
github.com/aws/smithy-go v1.9.0 h1:c7FUdEqrQA1/UVKKCNDFQPNKGp4FQg3YW4Ck5SLTG58=
github.com/aws/smithy-go v1.9.0/go.mod h1:SObp3lf9smib00L/v3U2eAKG8FyQ7iLrJnQiAmR5n+E=
github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/frankban/quicktest v1.14.0 h1:+cqqvzZV87b4adx/5ayVOaYZ2CrvM4ejQvUdBzPPUss=
github.com/frankban/quicktest v1.14.0/go.mod h1:NeW+ay9A/U67EYXNFA1nPE8e/tnQv/09mUdL/ijj8og=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
Expand Down Expand Up @@ -58,6 +61,13 @@ github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB7
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE=
github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU=
Expand All @@ -73,13 +83,21 @@ github.com/onsi/gomega v1.17.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAl
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.6.1 h1:/FiVV8dS/e+YqF2JvO3yXRFbBLTIuSDkuC7aBOAvL+k=
github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
github.com/rs/xid v1.3.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
github.com/rs/zerolog v1.26.0 h1:ORM4ibhEZeTeQlCojCK2kPz1ogAY4bGs4tD+SaAdGaE=
github.com/rs/zerolog v1.26.0/go.mod h1:yBiM87lvSqX8h0Ww4sdzNSkVYZ8dL2xjZJG1lAuGZEo=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.4.0/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw=
go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
go.uber.org/multierr v1.7.0 h1:zaiO/rmgFjbmCXdSYJWQcdvOCsthmdaHfr3Gm2Kx4Ec=
go.uber.org/multierr v1.7.0/go.mod h1:7EAYxJLBy9rStEaz58O2t4Uvip6FSURkq8/ppBp95ak=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
Expand Down Expand Up @@ -135,6 +153,8 @@ google.golang.org/protobuf v1.26.0 h1:bxAC2xTBsZGibn2RTntX0oH50xLsqy1OxA9tTL3p/l
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
Expand All @@ -144,3 +164,5 @@ gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
245 changes: 245 additions & 0 deletions vervet-underground/internal/scraper/scraper.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
// Package scraper provides support for scraping OpenAPI versions from
// services.
package scraper

import (
"context"
"encoding/json"
"io/ioutil"
"net/http"
"net/url"
"sync"
"time"

"github.com/pkg/errors"
"go.uber.org/multierr"

"vervet-underground/config"
"vervet-underground/internal/storage"
)

// Storage defines the storage functionality needed in order to store service
// API version spec snapshots.
type Storage interface {
// NotifyVersions tells the storage which versions are currently available.
// This is the primary mechanism by which the storage layer discovers and
// processes versions which are removed post-sunset.
NotifyVersions(ctx context.Context, name string, versions []string, scrapeTime time.Time) error

// HasVersion returns whether the storage has already stored the service
// API spec version at the given content digest.
HasVersion(ctx context.Context, name string, version string, digest string) (bool, error)

// NotifyVersion tells the storage to store the given version contents at
// the scrapeTime. The storage implementation must detect and ignore
// duplicate version contents, as some services may not provide content
// digest headers in their responses.
NotifyVersion(ctx context.Context, name string, version string, contents []byte, scrapeTime time.Time) error
}

// Scraper gets OpenAPI specs from a collection of services and updates storage
// accordingly.
type Scraper struct {
storage Storage
services []service
http *http.Client
timeNow func() time.Time
}

type service struct {
base string
url *url.URL
}

// Option defines an option that may be specified when creating a new Scraper.
type Option func(*Scraper) error

// New returns a new Scraper instance.
func New(cfg *config.ServerConfig, storage Storage, options ...Option) (*Scraper, error) {
s := &Scraper{
storage: storage,
http: &http.Client{},
timeNow: time.Now,
}
s.services = make([]service, len(cfg.Services))
for i := range cfg.Services {
u, err := url.Parse(cfg.Services[i] + "/openapi")
if err != nil {
return nil, errors.Wrapf(err, "invalid service %q", cfg.Services[i])
}
s.services[i] = service{base: cfg.Services[i], url: u}
}
for i := range options {
err := options[i](s)
if err != nil {
return nil, err
}
}
return s, nil
}

// HTTPClient is a Scraper constructor Option that allows providing an
// *http.Client instance. This may be used to configure the transport and
// timeouts on the HTTP client.
func HTTPClient(cl *http.Client) Option {
return func(s *Scraper) error {
s.http = cl
return nil
}
}

// Clock is a Scraper constructor Option that allows providing an alternative
// clock used to determine the scrape timestamps used to record changes in
// service spec versions.
func Clock(c func() time.Time) Option {
return func(s *Scraper) error {
s.timeNow = c
return nil
}
}

// Run executes the OpenAPI version scraping on all configured services.
func (s *Scraper) Run(ctx context.Context) error {
scrapeTime := s.timeNow().UTC()
var wg sync.WaitGroup
errCh := make(chan error)
for i := range s.services {
svc := s.services[i]
wg.Add(1)
go func() {
defer wg.Done()
errCh <- s.scrape(ctx, scrapeTime, svc)
}()
}
go func() {
wg.Wait()
close(errCh)
}()
var errs error
for err := range errCh {
errs = multierr.Append(errs, err)
}
return errs
}

func (s *Scraper) scrape(ctx context.Context, scrapeTime time.Time, svc service) error {
versions, err := s.getVersions(ctx, svc)
if err != nil {
return errors.WithStack(err)
}
err = s.storage.NotifyVersions(ctx, svc.base, versions, scrapeTime)
if err != nil {
return errors.WithStack(err)
}
for i := range versions {
// TODO: we might run this concurrently per live service pod if/when
// we're more k8s aware, but we won't do that yet.
contents, isNew, err := s.getNewVersion(ctx, svc, versions[i])
if err != nil {
return errors.WithStack(err)
}
if !isNew {
continue
}
err = s.storage.NotifyVersion(ctx, svc.base, versions[i], contents, scrapeTime)
if err != nil {
return errors.WithStack(err)
}
}
return nil
}

func (s *Scraper) getVersions(ctx context.Context, svc service) ([]string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", svc.url.String(), nil)
if err != nil {
return nil, errors.Wrap(err, "failed to create request")
}
resp, err := s.http.Do(req)
if err != nil {
return nil, errors.Wrap(err, "request failed")
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, httpError(resp)
}
var versions []string
err = json.NewDecoder(resp.Body).Decode(&versions)
if err != nil {
return nil, errors.WithStack(err)
}
return versions, nil
}

func httpError(r *http.Response) error {
if contents, err := ioutil.ReadAll(r.Body); err == nil {
return errors.Errorf("request failed: HTTP %d: %s", r.StatusCode, string(contents))
}
return errors.Errorf("request failed: HTTP %d", r.StatusCode)
}

func (s *Scraper) getNewVersion(ctx context.Context, svc service, version string) ([]byte, bool, error) {
isNew, err := s.hasNewVersion(ctx, svc, version)
if err != nil {
return nil, false, errors.WithStack(err)
}
if !isNew {
return nil, false, nil
}

req, err := http.NewRequestWithContext(ctx, "GET", svc.url.String()+"/"+version, nil)
if err != nil {
return nil, false, errors.Wrap(err, "failed to create request")
}
resp, err := s.http.Do(req)
if err != nil {
return nil, false, errors.Wrap(err, "request failed")
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, false, httpError(resp)
}
if ct := resp.Header.Get("Content-Type"); ct != "application/json" {
return nil, false, errors.Errorf("unexpected content type: %s", ct)
}
respContents, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, false, errors.WithStack(err)
}
// For now, let's just see if the response can be unmarshaled
// TODO: Load w/kin-openapi and validate it?
var doc map[string]interface{}
err = json.Unmarshal(respContents, &doc)
if err != nil {
return nil, false, errors.WithStack(err)
}
return respContents, true, nil
}

func (s *Scraper) hasNewVersion(ctx context.Context, svc service, version string) (bool, error) {
// Check Digest to see if there's a new version
req, err := http.NewRequestWithContext(ctx, "HEAD", svc.url.String()+"/"+version, nil)
if err != nil {
return false, errors.Wrap(err, "failed to create request")
}
resp, err := s.http.Do(req)
if err != nil {
return false, errors.Wrap(err, "request failed")
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusMethodNotAllowed {
// Not supporting HEAD is fine, we'll just come back with a GET
return true, nil
}
if resp.StatusCode != http.StatusOK {
return false, httpError(resp)
}
if ct := resp.Header.Get("Content-Type"); ct != "application/json" {
return false, errors.Errorf("unexpected content type: %s", ct)
}
digest := storage.DigestHeader(resp.Header.Get("Digest"))
if digest == "" {
// Not providing a digest is fine, we'll just come back with a GET
return true, nil
}
return s.storage.HasVersion(ctx, svc.base, version, digest)
}
Loading

0 comments on commit 406e244

Please sign in to comment.