From 221374eae43775c49bea0b4b604df6882f53211f Mon Sep 17 00:00:00 2001 From: Casey Marshall Date: Fri, 3 Dec 2021 17:11:31 -0600 Subject: [PATCH] feat: implement VU service scraper Add a scraper which fetches all the OpenAPI versions from each service and updates them in VU storage. Add an in-memory storage implementation to serve as a functional reference. Drive-by: consolidated config package, added godocs, enable CircleCI --- .circleci/config.yml | 14 + vervet-underground/Makefile | 2 +- vervet-underground/config/config.go | 17 +- vervet-underground/go.mod | 2 + vervet-underground/go.sum | 22 ++ .../internal/scraper/scraper.go | 245 ++++++++++++++++++ .../internal/scraper/scraper_test.go | 153 +++++++++++ vervet-underground/internal/storage/digest.go | 40 +++ .../internal/storage/mem/mem.go | 86 ++++++ .../internal/storage/mem/mem_test.go | 56 ++++ .../storage/s3}/aws_s3_client.go | 4 +- .../storage/s3}/aws_s3_client_suite_test.go | 4 +- .../storage/s3}/aws_s3_client_test.go | 4 +- vervet-underground/lib/types.go | 6 - vervet-underground/server.go | 8 +- 15 files changed, 641 insertions(+), 22 deletions(-) create mode 100644 vervet-underground/internal/scraper/scraper.go create mode 100644 vervet-underground/internal/scraper/scraper_test.go create mode 100644 vervet-underground/internal/storage/digest.go create mode 100644 vervet-underground/internal/storage/mem/mem.go create mode 100644 vervet-underground/internal/storage/mem/mem_test.go rename vervet-underground/{storage => internal/storage/s3}/aws_s3_client.go (90%) rename vervet-underground/{storage => internal/storage/s3}/aws_s3_client_suite_test.go (88%) rename vervet-underground/{storage => internal/storage/s3}/aws_s3_client_test.go (95%) delete mode 100644 vervet-underground/lib/types.go diff --git a/.circleci/config.yml b/.circleci/config.yml index 8b6b2a97..f51b3bf5 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -32,6 +32,8 @@ jobs: - run: name: Run tests command: go test ./... -count=1 + - run: + command: make -C vervet-underground test lint: docker: @@ -43,6 +45,16 @@ jobs: - run: command: golangci-lint run -v ./... + lint-vu: + docker: + - image: golangci/golangci-lint:v1.42.1 + steps: + - checkout + - attach_workspace: + at: ~/vervet/vervet-underground + - run: + command: golangci-lint run -v ./... + workflows: version: 2 test: @@ -53,3 +65,5 @@ workflows: jobs: - lint: name: Lint + - lint-vu: + name: Lint VU diff --git a/vervet-underground/Makefile b/vervet-underground/Makefile index 5e081463..40108fd8 100644 --- a/vervet-underground/Makefile +++ b/vervet-underground/Makefile @@ -18,7 +18,7 @@ lint-docker: tidy: $(GOMOD) tidy -v test: - go test ./... -count=1 -ginkgo.failFast + go test ./... -count=1 -race build: go build server.go diff --git a/vervet-underground/config/config.go b/vervet-underground/config/config.go index 7aeb634d..c4a0ce08 100644 --- a/vervet-underground/config/config.go +++ b/vervet-underground/config/config.go @@ -1,15 +1,22 @@ +// Package config supports configuring the Vervet Underground service. package config import ( "encoding/json" "os" - - "vervet-underground/lib" ) -func Load(configPath string) (*lib.ServerConfig, error) { +// ServerConfig defines the configuration options for the Vervet Underground service. +type ServerConfig struct { + Host string `json:"host"` + Services []string `json:"services"` +} + +// Load returns a ServerConfig instance loaded from the given path to a JSON +// config file. +func Load(configPath string) (*ServerConfig, error) { file, err := os.Open(configPath) - var config lib.ServerConfig + var config ServerConfig if err != nil { return nil, err } @@ -19,4 +26,4 @@ func Load(configPath string) (*lib.ServerConfig, error) { return nil, err } return &config, nil -} \ No newline at end of file +} diff --git a/vervet-underground/go.mod b/vervet-underground/go.mod index aa5e17f5..1390769d 100644 --- a/vervet-underground/go.mod +++ b/vervet-underground/go.mod @@ -26,8 +26,10 @@ require ( github.com/aws/aws-sdk-go-v2/service/sso v1.6.1 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.11.0 // indirect github.com/aws/smithy-go v1.9.0 // indirect + github.com/frankban/quicktest v1.14.0 // indirect github.com/fsnotify/fsnotify v1.4.9 // indirect github.com/nxadm/tail v1.4.8 // indirect + go.uber.org/multierr v1.7.0 // indirect golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d // indirect golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e // indirect golang.org/x/text v0.3.6 // indirect diff --git a/vervet-underground/go.sum b/vervet-underground/go.sum index 59f20483..548a7564 100644 --- a/vervet-underground/go.sum +++ b/vervet-underground/go.sum @@ -29,8 +29,11 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.11.0/go.mod h1:+BmlPeQ1Y+PuIho93MMKD github.com/aws/smithy-go v1.9.0 h1:c7FUdEqrQA1/UVKKCNDFQPNKGp4FQg3YW4Ck5SLTG58= github.com/aws/smithy-go v1.9.0/go.mod h1:SObp3lf9smib00L/v3U2eAKG8FyQ7iLrJnQiAmR5n+E= github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/frankban/quicktest v1.14.0 h1:+cqqvzZV87b4adx/5ayVOaYZ2CrvM4ejQvUdBzPPUss= +github.com/frankban/quicktest v1.14.0/go.mod h1:NeW+ay9A/U67EYXNFA1nPE8e/tnQv/09mUdL/ijj8og= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= @@ -58,6 +61,13 @@ github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB7 github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= +github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= @@ -73,13 +83,21 @@ github.com/onsi/gomega v1.17.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAl github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.6.1 h1:/FiVV8dS/e+YqF2JvO3yXRFbBLTIuSDkuC7aBOAvL+k= +github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= github.com/rs/xid v1.3.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/rs/zerolog v1.26.0 h1:ORM4ibhEZeTeQlCojCK2kPz1ogAY4bGs4tD+SaAdGaE= github.com/rs/zerolog v1.26.0/go.mod h1:yBiM87lvSqX8h0Ww4sdzNSkVYZ8dL2xjZJG1lAuGZEo= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.4.0/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw= +go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= +go.uber.org/multierr v1.7.0 h1:zaiO/rmgFjbmCXdSYJWQcdvOCsthmdaHfr3Gm2Kx4Ec= +go.uber.org/multierr v1.7.0/go.mod h1:7EAYxJLBy9rStEaz58O2t4Uvip6FSURkq8/ppBp95ak= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= @@ -135,6 +153,8 @@ google.golang.org/protobuf v1.26.0 h1:bxAC2xTBsZGibn2RTntX0oH50xLsqy1OxA9tTL3p/l google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= @@ -144,3 +164,5 @@ gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/vervet-underground/internal/scraper/scraper.go b/vervet-underground/internal/scraper/scraper.go new file mode 100644 index 00000000..3aca089b --- /dev/null +++ b/vervet-underground/internal/scraper/scraper.go @@ -0,0 +1,245 @@ +// Package scraper provides support for scraping OpenAPI versions from +// services. +package scraper + +import ( + "context" + "encoding/json" + "io/ioutil" + "net/http" + "net/url" + "sync" + "time" + + "github.com/pkg/errors" + "go.uber.org/multierr" + + "vervet-underground/config" + "vervet-underground/internal/storage" +) + +// Storage defines the storage functionality needed in order to store service +// API version spec snapshots. +type Storage interface { + // NotifyVersions tells the storage which versions are currently available. + // This is the primary mechanism by which the storage layer discovers and + // processes versions which are removed post-sunset. + NotifyVersions(ctx context.Context, name string, versions []string, scrapeTime time.Time) error + + // HasVersion returns whether the storage has already stored the service + // API spec version at the given content digest. + HasVersion(ctx context.Context, name string, version string, digest string) (bool, error) + + // NotifyVersion tells the storage to store the given version contents at + // the scrapeTime. The storage implementation must detect and ignore + // duplicate version contents, as some services may not provide content + // digest headers in their responses. + NotifyVersion(ctx context.Context, name string, version string, contents []byte, scrapeTime time.Time) error +} + +// Scraper gets OpenAPI specs from a collection of services and updates storage +// accordingly. +type Scraper struct { + storage Storage + services []service + http *http.Client + timeNow func() time.Time +} + +type service struct { + base string + url *url.URL +} + +// Option defines an option that may be specified when creating a new Scraper. +type Option func(*Scraper) error + +// New returns a new Scraper instance. +func New(cfg *config.ServerConfig, storage Storage, options ...Option) (*Scraper, error) { + s := &Scraper{ + storage: storage, + http: &http.Client{}, + timeNow: time.Now, + } + s.services = make([]service, len(cfg.Services)) + for i := range cfg.Services { + u, err := url.Parse(cfg.Services[i] + "/openapi") + if err != nil { + return nil, errors.Wrapf(err, "invalid service %q", cfg.Services[i]) + } + s.services[i] = service{base: cfg.Services[i], url: u} + } + for i := range options { + err := options[i](s) + if err != nil { + return nil, err + } + } + return s, nil +} + +// HTTPClient is a Scraper constructor Option that allows providing an +// *http.Client instance. This may be used to configure the transport and +// timeouts on the HTTP client. +func HTTPClient(cl *http.Client) Option { + return func(s *Scraper) error { + s.http = cl + return nil + } +} + +// Clock is a Scraper constructor Option that allows providing an alternative +// clock used to determine the scrape timestamps used to record changes in +// service spec versions. +func Clock(c func() time.Time) Option { + return func(s *Scraper) error { + s.timeNow = c + return nil + } +} + +// Run executes the OpenAPI version scraping on all configured services. +func (s *Scraper) Run(ctx context.Context) error { + scrapeTime := s.timeNow().UTC() + var wg sync.WaitGroup + errCh := make(chan error) + for i := range s.services { + svc := s.services[i] + wg.Add(1) + go func() { + defer wg.Done() + errCh <- s.scrape(ctx, scrapeTime, svc) + }() + } + go func() { + wg.Wait() + close(errCh) + }() + var errs error + for err := range errCh { + errs = multierr.Append(errs, err) + } + return errs +} + +func (s *Scraper) scrape(ctx context.Context, scrapeTime time.Time, svc service) error { + versions, err := s.getVersions(ctx, svc) + if err != nil { + return errors.WithStack(err) + } + err = s.storage.NotifyVersions(ctx, svc.base, versions, scrapeTime) + if err != nil { + return errors.WithStack(err) + } + for i := range versions { + // TODO: we might run this concurrently per live service pod if/when + // we're more k8s aware, but we won't do that yet. + contents, isNew, err := s.getNewVersion(ctx, svc, versions[i]) + if err != nil { + return errors.WithStack(err) + } + if !isNew { + continue + } + err = s.storage.NotifyVersion(ctx, svc.base, versions[i], contents, scrapeTime) + if err != nil { + return errors.WithStack(err) + } + } + return nil +} + +func (s *Scraper) getVersions(ctx context.Context, svc service) ([]string, error) { + req, err := http.NewRequestWithContext(ctx, "GET", svc.url.String(), nil) + if err != nil { + return nil, errors.Wrap(err, "failed to create request") + } + resp, err := s.http.Do(req) + if err != nil { + return nil, errors.Wrap(err, "request failed") + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, httpError(resp) + } + var versions []string + err = json.NewDecoder(resp.Body).Decode(&versions) + if err != nil { + return nil, errors.WithStack(err) + } + return versions, nil +} + +func httpError(r *http.Response) error { + if contents, err := ioutil.ReadAll(r.Body); err == nil { + return errors.Errorf("request failed: HTTP %d: %s", r.StatusCode, string(contents)) + } + return errors.Errorf("request failed: HTTP %d", r.StatusCode) +} + +func (s *Scraper) getNewVersion(ctx context.Context, svc service, version string) ([]byte, bool, error) { + isNew, err := s.hasNewVersion(ctx, svc, version) + if err != nil { + return nil, false, errors.WithStack(err) + } + if !isNew { + return nil, false, nil + } + + req, err := http.NewRequestWithContext(ctx, "GET", svc.url.String()+"/"+version, nil) + if err != nil { + return nil, false, errors.Wrap(err, "failed to create request") + } + resp, err := s.http.Do(req) + if err != nil { + return nil, false, errors.Wrap(err, "request failed") + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, false, httpError(resp) + } + if ct := resp.Header.Get("Content-Type"); ct != "application/json" { + return nil, false, errors.Errorf("unexpected content type: %s", ct) + } + respContents, err := ioutil.ReadAll(resp.Body) + if err != nil { + return nil, false, errors.WithStack(err) + } + // For now, let's just see if the response can be unmarshaled + // TODO: Load w/kin-openapi and validate it? + var doc map[string]interface{} + err = json.Unmarshal(respContents, &doc) + if err != nil { + return nil, false, errors.WithStack(err) + } + return respContents, true, nil +} + +func (s *Scraper) hasNewVersion(ctx context.Context, svc service, version string) (bool, error) { + // Check Digest to see if there's a new version + req, err := http.NewRequestWithContext(ctx, "HEAD", svc.url.String()+"/"+version, nil) + if err != nil { + return false, errors.Wrap(err, "failed to create request") + } + resp, err := s.http.Do(req) + if err != nil { + return false, errors.Wrap(err, "request failed") + } + defer resp.Body.Close() + if resp.StatusCode == http.StatusMethodNotAllowed { + // Not supporting HEAD is fine, we'll just come back with a GET + return true, nil + } + if resp.StatusCode != http.StatusOK { + return false, httpError(resp) + } + if ct := resp.Header.Get("Content-Type"); ct != "application/json" { + return false, errors.Errorf("unexpected content type: %s", ct) + } + digest := storage.DigestHeader(resp.Header.Get("Digest")) + if digest == "" { + // Not providing a digest is fine, we'll just come back with a GET + return true, nil + } + return s.storage.HasVersion(ctx, svc.base, version, digest) +} diff --git a/vervet-underground/internal/scraper/scraper_test.go b/vervet-underground/internal/scraper/scraper_test.go new file mode 100644 index 00000000..a1b0b2d8 --- /dev/null +++ b/vervet-underground/internal/scraper/scraper_test.go @@ -0,0 +1,153 @@ +package scraper_test + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "testing" + "time" + + qt "github.com/frankban/quicktest" + "github.com/gorilla/mux" + + "vervet-underground/config" + "vervet-underground/internal/scraper" + "vervet-underground/internal/storage/mem" +) + +var t0 = time.Date(2021, time.December, 3, 20, 49, 51, 0, time.UTC) + +type testService struct { + versions []string + contents map[string]string +} + +func (t *testService) Handler() http.Handler { + r := mux.NewRouter() + r.HandleFunc("/openapi", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + err := json.NewEncoder(w).Encode(&t.versions) + if err != nil { + panic(err) + } + }) + r.HandleFunc("/openapi/{version}", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, err := w.Write([]byte(t.contents[mux.Vars(r)["version"]])) + if err != nil { + panic(err) + } + }) + return r +} + +func TestScraper(t *testing.T) { + c := qt.New(t) + petfood := &testService{ + versions: []string{"2021-09-01", "2021-09-16"}, + contents: map[string]string{ + "2021-09-01": `{"paths":{"/crickets": {}}}`, + "2021-09-16": `{"paths":{"/crickets": {}, "/kibble": {}}}`, + }, + } + petfoodService := httptest.NewServer(petfood.Handler()) + c.Cleanup(petfoodService.Close) + + animals := &testService{ + versions: []string{"2021-10-01", "2021-10-16"}, + contents: map[string]string{ + "2021-10-01": `{"paths":{"/geckos": {}}}`, + "2021-10-16": `{"paths":{"/geckos": {}, "/puppies": {}}}`, + }, + } + animalsService := httptest.NewServer(animals.Handler()) + c.Cleanup(animalsService.Close) + + tests := []struct { + service, version, digest string + }{ + {petfoodService.URL, "2021-09-01", "sha256:I20cAQ3VEjDrY7O0B678yq+0pYN2h3sxQy7vmdlo4+w="}, + {animalsService.URL, "2021-10-16", "sha256:P1FEFvnhtxJSqXr/p6fMNKE+HYwN6iwKccBGHIVZbyg="}, + } + + cfg := &config.ServerConfig{ + Services: []string{ + petfoodService.URL, + animalsService.URL, + }, + } + st := mem.New() + sc, err := scraper.New(cfg, st, scraper.Clock(func() time.Time { return t0 })) + c.Assert(err, qt.IsNil) + + // Cancel the scrape context after a timeout so we don't hang the test + ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) + c.Cleanup(cancel) + + // No version digests should be known + for _, test := range tests { + ok, err := st.HasVersion(ctx, test.service, test.version, test.digest) + c.Assert(err, qt.IsNil) + c.Assert(ok, qt.IsFalse) + } + + // Run the scrape + err = sc.Run(ctx) + c.Assert(err, qt.IsNil) + + // Version digests now known to storage + for _, test := range tests { + ok, err := st.HasVersion(ctx, test.service, test.version, test.digest) + c.Assert(err, qt.IsNil) + c.Assert(ok, qt.IsTrue) + } +} + +func TestEmptyScrape(t *testing.T) { + c := qt.New(t) + cfg := &config.ServerConfig{ + Services: nil, + } + st := mem.New() + sc, err := scraper.New(cfg, st, scraper.Clock(func() time.Time { return t0 })) + c.Assert(err, qt.IsNil) + + // Cancel after a short timeout so we don't hang the test + ctx, cancel := context.WithTimeout(context.Background(), time.Second*1) + c.Cleanup(cancel) + + // Run the scrape + err = sc.Run(ctx) + c.Assert(err, qt.IsNil) +} + +func TestScrapeClientError(t *testing.T) { + c := qt.New(t) + cfg := &config.ServerConfig{ + Services: []string{"http://example.com/nope"}, + } + st := mem.New() + sc, err := scraper.New(cfg, st, + scraper.Clock(func() time.Time { return t0 }), + scraper.HTTPClient(&http.Client{ + Transport: &errorTransport{}, + }), + ) + c.Assert(err, qt.IsNil) + + // Cancel after a short timeout so we don't hang the test + ctx, cancel := context.WithTimeout(context.Background(), time.Second*1) + c.Cleanup(cancel) + + // Run the scrape + err = sc.Run(ctx) + c.Assert(err, qt.ErrorMatches, `.*: bad wolf`) +} + +type errorTransport struct{} + +func (*errorTransport) RoundTrip(*http.Request) (*http.Response, error) { + return nil, fmt.Errorf("bad wolf") +} diff --git a/vervet-underground/internal/storage/digest.go b/vervet-underground/internal/storage/digest.go new file mode 100644 index 00000000..5cef3163 --- /dev/null +++ b/vervet-underground/internal/storage/digest.go @@ -0,0 +1,40 @@ +// Package storage provides common functionality supporting Vervet Underground +// storage. +package storage + +import ( + "crypto/sha256" + "encoding/base64" + "strings" +) + +// DigestHeader returns a content digest parsed from a Digest HTTP response +// header as defined in +// https://datatracker.ietf.org/doc/html/draft-ietf-httpbis-digest-headers-05#section-3. +// The returned digest is algorithm-prefixed so that other digest schemes may +// be supported later if needed. +// +// Returns "" if no digest is available. +func DigestHeader(value string) string { + digests := strings.Split(value, ",") + for i := range digests { + digests[i] = strings.TrimSpace(digests[i]) + kv := strings.SplitN(digests[i], "=", 2) + if len(kv) < 2 { + continue + } + if kv[0] == "id-sha-256" || kv[0] == "sha-256" { + // Use the no-encoding digest if specified, otherwise assume no + // encoding as a fallback. HTTP compression is likely to be handled + // transparently. + return "sha256:" + kv[1] + } + } + return "" +} + +// Digest returns the digest of the given contents. +func Digest(contents []byte) string { + buf := sha256.Sum256(contents) + return "sha256:" + base64.StdEncoding.EncodeToString(buf[:]) +} diff --git a/vervet-underground/internal/storage/mem/mem.go b/vervet-underground/internal/storage/mem/mem.go new file mode 100644 index 00000000..c4e3fb80 --- /dev/null +++ b/vervet-underground/internal/storage/mem/mem.go @@ -0,0 +1,86 @@ +// Package mem provides an in-memory implementation of the storage used in +// Vervet Underground. It's not intended for production use, but as a +// functionally complete reference implementation that can be used to validate +// the other parts of the VU system. +package mem + +import ( + "context" + "sync" + "time" + + "vervet-underground/internal/storage" +) + +type serviceVersion struct { + service string + version string +} + +type contentRevision struct { + timestamp time.Time + digest string + + // TODO: store the sunset time when a version is removed + //sunset *time.Time +} + +type serviceVersions map[serviceVersion][]contentRevision + +type contents map[serviceVersion]map[string][]byte + +// Storage provides an in-memory implementation of Vervet Underground storage. +type Storage struct { + mu sync.RWMutex + serviceVersions serviceVersions + contents contents +} + +// New returns a new Storage instance. +func New() *Storage { + return &Storage{ + serviceVersions: serviceVersions{}, + contents: contents{}, + } +} + +// NotifyVersions implements scraper.Storage. +func (s *Storage) NotifyVersions(ctx context.Context, name string, versions []string, scrapeTime time.Time) error { + s.mu.Lock() + defer s.mu.Unlock() + // TODO: implement notify versions; update sunset when versions are removed + return nil +} + +// HasVersion implements scraper.Storage. +func (s *Storage) HasVersion(ctx context.Context, name string, version string, digest string) (bool, error) { + s.mu.RLock() + defer s.mu.RUnlock() + digests, ok := s.contents[serviceVersion{service: name, version: version}] + if !ok { + return false, nil + } + _, ok = digests[digest] + return ok, nil +} + +// NotifyVersion implements scraper.Storage. +func (s *Storage) NotifyVersion(ctx context.Context, name string, version string, contents []byte, scrapeTime time.Time) error { + s.mu.Lock() + defer s.mu.Unlock() + k := serviceVersion{service: name, version: version} + digest := storage.Digest(contents) + if digests, ok := s.contents[k]; ok { + if _, ok := digests[digest]; ok { + return nil + } + } else { + s.contents[k] = map[string][]byte{} + } + s.contents[k][digest] = contents + s.serviceVersions[k] = append(s.serviceVersions[k], contentRevision{ + timestamp: scrapeTime, + digest: digest, + }) + return nil +} diff --git a/vervet-underground/internal/storage/mem/mem_test.go b/vervet-underground/internal/storage/mem/mem_test.go new file mode 100644 index 00000000..1b0ae373 --- /dev/null +++ b/vervet-underground/internal/storage/mem/mem_test.go @@ -0,0 +1,56 @@ +package mem + +import ( + "context" + "testing" + "time" + + qt "github.com/frankban/quicktest" +) + +var t0 = time.Date(2021, time.December, 3, 20, 49, 51, 0, time.UTC) + +func TestNotifyVersions(t *testing.T) { + c := qt.New(t) + ctx := context.Background() + s := New() + err := s.NotifyVersions(ctx, "petfood", []string{"2021-09-01", "2021-09-16"}, t0) + c.Assert(err, qt.IsNil) + // TODO: verify side-effects when there are some... +} + +func TestHasVersion(t *testing.T) { + c := qt.New(t) + ctx := context.Background() + s := New() + + const cricketsDigest = "sha256:mWpHX0/hIZS9mVd8eobfHWm6OkUsKZLiqd6ShRnNzA4=" + const geckosDigest = "sha256:c5JD7m0g4DVhoaX4z8HFcTP8S/yUOEsjgP8ECkuEHqM=" + for _, digest := range []string{cricketsDigest, geckosDigest} { + ok, err := s.HasVersion(ctx, "petfood", "2021-09-16", digest) + c.Assert(err, qt.IsNil) + c.Assert(ok, qt.IsFalse) + } + err := s.NotifyVersion(ctx, "petfood", "2021-09-16", []byte("crickets"), t0) + c.Assert(err, qt.IsNil) + err = s.NotifyVersion(ctx, "animals", "2021-09-16", []byte("geckos"), t0) + c.Assert(err, qt.IsNil) + + tests := []struct { + service, version, digest string + shouldHave bool + }{ + {"petfood", "2021-09-16", cricketsDigest, true}, + {"animals", "2021-09-16", geckosDigest, true}, + {"petfood", "2021-09-16", geckosDigest, false}, + {"animals", "2021-09-16", cricketsDigest, false}, + {"petfood", "2021-10-16", cricketsDigest, false}, + {"animals", "2021-09-17", geckosDigest, false}, + } + for i, t := range tests { + c.Logf("test#%d: %v", i, t) + ok, err := s.HasVersion(ctx, t.service, t.version, t.digest) + c.Assert(err, qt.IsNil) + c.Assert(ok, qt.Equals, t.shouldHave) + } +} diff --git a/vervet-underground/storage/aws_s3_client.go b/vervet-underground/internal/storage/s3/aws_s3_client.go similarity index 90% rename from vervet-underground/storage/aws_s3_client.go rename to vervet-underground/internal/storage/s3/aws_s3_client.go index ae1bf11a..bfa84938 100644 --- a/vervet-underground/storage/aws_s3_client.go +++ b/vervet-underground/internal/storage/s3/aws_s3_client.go @@ -1,4 +1,6 @@ -package storage +// Package s3 provides an implementation of Vervet Underground storage backed +// by Amazon S3. +package s3 import ( "context" diff --git a/vervet-underground/storage/aws_s3_client_suite_test.go b/vervet-underground/internal/storage/s3/aws_s3_client_suite_test.go similarity index 88% rename from vervet-underground/storage/aws_s3_client_suite_test.go rename to vervet-underground/internal/storage/s3/aws_s3_client_suite_test.go index e99a2266..04d98d73 100644 --- a/vervet-underground/storage/aws_s3_client_suite_test.go +++ b/vervet-underground/internal/storage/s3/aws_s3_client_suite_test.go @@ -1,4 +1,4 @@ -package storage_test +package s3_test import ( "testing" @@ -10,4 +10,4 @@ import ( func TestOasRouter(t *testing.T) { gomega.RegisterFailHandler(ginkgo.Fail) ginkgo.RunSpecs(t, "") -} \ No newline at end of file +} diff --git a/vervet-underground/storage/aws_s3_client_test.go b/vervet-underground/internal/storage/s3/aws_s3_client_test.go similarity index 95% rename from vervet-underground/storage/aws_s3_client_test.go rename to vervet-underground/internal/storage/s3/aws_s3_client_test.go index ad68dcc5..147a1969 100644 --- a/vervet-underground/storage/aws_s3_client_test.go +++ b/vervet-underground/internal/storage/s3/aws_s3_client_test.go @@ -1,4 +1,4 @@ -package storage +package s3 import ( "github.com/onsi/ginkgo" @@ -14,4 +14,4 @@ var _ = ginkgo.Describe("Aws S3 Client Initialization", func() { }) }) }) -}) \ No newline at end of file +}) diff --git a/vervet-underground/lib/types.go b/vervet-underground/lib/types.go deleted file mode 100644 index f3dcc085..00000000 --- a/vervet-underground/lib/types.go +++ /dev/null @@ -1,6 +0,0 @@ -package lib - -type ServerConfig struct { - Host string `json:"host"` - Services []string `json:"services"` -} diff --git a/vervet-underground/server.go b/vervet-underground/server.go index 5347f03a..ec9c3063 100644 --- a/vervet-underground/server.go +++ b/vervet-underground/server.go @@ -9,25 +9,23 @@ import ( "os/signal" "time" - gorillaMux "github.com/gorilla/mux" + "github.com/gorilla/mux" "github.com/pkg/errors" "github.com/rs/zerolog" "github.com/rs/zerolog/log" "vervet-underground/config" - "vervet-underground/lib" ) func main() { - var wait time.Duration flag.DurationVar(&wait, "graceful-timeout", time.Second*15, "the duration for which the server gracefully wait for existing connections to finish - e.g. 15s or 1m") flag.Parse() zerolog.TimeFieldFormat = zerolog.TimeFormatUnix zerolog.SetGlobalLevel(zerolog.DebugLevel) - router := gorillaMux.NewRouter() - var cfg *lib.ServerConfig + router := mux.NewRouter() + var cfg *config.ServerConfig var err error if cfg, err = config.Load("config.default.json"); err != nil { logError(err)