From 78998c13d46bc420cafadfa31496382f899be68e Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Wed, 26 Jul 2023 00:00:39 +0900 Subject: [PATCH 1/2] pkg/cache: record basenames Fix issue 29 Signed-off-by: Akihiro Suda --- pkg/cache/cache.go | 116 +++++++++++++++++++++++------------ pkg/cache/cache_test.go | 12 +++- pkg/distro/alpine/alpine.go | 5 +- pkg/distro/arch/arch.go | 5 +- pkg/distro/fedora/fedora.go | 5 +- pkg/downloader/downloader.go | 9 ++- 6 files changed, 103 insertions(+), 49 deletions(-) diff --git a/pkg/cache/cache.go b/pkg/cache/cache.go index 36cbef6..be384df 100644 --- a/pkg/cache/cache.go +++ b/pkg/cache/cache.go @@ -6,19 +6,19 @@ // // - blobs/sha256/: verified blobs // -// - urls/sha256/ : URL of the blob (optional) +// - metadata/sha256/ : metadata of the blob (optional) // -// - digests/by-url-sha256/ : digest of the blob (optional) +// - digests/by-url-sha256/ : digest of the blob (optional, note that URL is not always unique) package cache import ( "context" + "encoding/json" "errors" "fmt" "io" "net/url" "os" - "path" "path/filepath" "strings" @@ -30,10 +30,24 @@ import ( "github.com/sirupsen/logrus" ) +type Metadata struct { + Basename string +} + +func ValidateMetadata(m *Metadata) error { + if m == nil { + return nil + } + if filepath.Base(m.Basename) != m.Basename { + return fmt.Errorf("invalid basename: %q", m.Basename) + } + return nil +} + const ( - BlobsSHA256RelPath = "blobs/sha256" - URLsSHA256RelPath = "urls/sha256" - ReverseURLRelPath = "digests/by-url-sha256" + BlobsSHA256RelPath = "blobs/sha256" + MetadataSHA256RelPath = "metadata/sha256" + ReverseURLRelPath = "digests/by-url-sha256" ) func New(dir string) (*Cache, error) { @@ -43,7 +57,7 @@ func New(dir string) (*Cache, error) { if err := os.MkdirAll(dir, 0755); err != nil { return nil, err } - for _, f := range []string{BlobsSHA256RelPath, URLsSHA256RelPath, ReverseURLRelPath} { + for _, f := range []string{BlobsSHA256RelPath, MetadataSHA256RelPath, ReverseURLRelPath} { subDir := filepath.Join(dir, f) // no need to use securejoin (const) if err := os.MkdirAll(subDir, 0755); err != nil { return nil, err @@ -84,15 +98,15 @@ func (c *Cache) BlobAbsPath(sha256sum string) (string, error) { return filepath.Join(c.dir, rel), nil // no need to use securejoin (rel is verified) } -func (c *Cache) URLFileRelPath(sha256sum string) (string, error) { +func (c *Cache) MetadataFileRelPath(sha256sum string) (string, error) { if err := digest.SHA256.Validate(sha256sum); err != nil { return "", err } - return securejoin.SecureJoin(URLsSHA256RelPath, sha256sum) + return securejoin.SecureJoin(MetadataSHA256RelPath, sha256sum) } -func (c *Cache) URLFileAbsPath(sha256sum string) (string, error) { - rel, err := c.URLFileRelPath(sha256sum) +func (c *Cache) MetadataFileAbsPath(sha256sum string) (string, error) { + rel, err := c.MetadataFileRelPath(sha256sum) if err != nil { return "", err } @@ -127,7 +141,10 @@ func (c *Cache) Cached(sha256sum string) (bool, error) { return true, nil } -func (c *Cache) Ensure(ctx context.Context, u *url.URL, sha256sum string) error { +func (c *Cache) Ensure(ctx context.Context, u *url.URL, sha256sum string, m *Metadata) error { + if err := ValidateMetadata(m); err != nil { + return err + } blob, err := c.BlobAbsPath(sha256sum) // also verifies sha256sum string representation if err != nil { return err @@ -183,7 +200,7 @@ func (c *Cache) Ensure(ctx context.Context, u *url.URL, sha256sum string) error if err = os.Rename(tmpW.Name(), blob); err != nil { return err } - if err := c.writeURLFiles(sha256sum, u); err != nil { + if err := c.writeMetadataFiles(sha256sum, u, m); err != nil { return err } return nil @@ -212,10 +229,10 @@ func (c *Cache) Export(dir string) (map[string]string, error) { } cpSrc := filepath.Join(c.dir, BlobsSHA256RelPath, sha256sum) // no need to use securejoin (sha256sum is verified) basename := "UNKNOWN-" + sha256sum - if u, err := c.OriginURLBySHA256(sha256sum); err == nil { - basename = path.Base(u.Path) + if m, err := c.MetadataBySHA256(sha256sum); err == nil && m.Basename != "" { + basename = filepath.Base(m.Basename) } else { - logrus.WithError(err).Warnf("Failed to get the origin URL of %s", sha256sum) + logrus.WithError(err).Warnf("Failed to get the original basename of %s", sha256sum) } cpDst, err := securejoin.SecureJoin(dir, basename) if err != nil { @@ -306,11 +323,14 @@ func (c *Cache) importFile(nameFull string) (sha256sum string, err error) { if err != nil { return "", err } - return c.ImportWithURL(u) + m := &Metadata{ + Basename: filepath.Base(nameFull), + } + return c.ImportWithURL(u, m) } // ImportWithReader imports from the reader. -// Does not create the URL file. +// Does not create the metadata files. func (c *Cache) ImportWithReader(r io.Reader) (sha256sum string, err error) { blobsSHA256Dir := filepath.Join(c.dir, BlobsSHA256RelPath) // no need to use securejoin (const) tmpW, err := os.CreateTemp(blobsSHA256Dir, ".import-*.tmp") @@ -344,7 +364,10 @@ func (c *Cache) ImportWithReader(r io.Reader) (sha256sum string, err error) { return sha256sum, nil } -func (c *Cache) ImportWithURL(u *url.URL) (sha256sum string, err error) { +func (c *Cache) ImportWithURL(u *url.URL, m *Metadata) (sha256sum string, err error) { + if err := ValidateMetadata(m); err != nil { + return "", err + } r, _, err := c.urlOpener.Open(context.TODO(), u, "") if err != nil { return "", err @@ -354,47 +377,60 @@ func (c *Cache) ImportWithURL(u *url.URL) (sha256sum string, err error) { if err != nil { return "", err } - err = c.writeURLFiles(sha256sum, u) + err = c.writeMetadataFiles(sha256sum, u, m) return sha256sum, err } -// writeURLFiles writes URL files. +// writeMetadataFiles writes metadata files and reverse URL files. +// Note that a URL is not unique. // Existing files are overwritten. -func (c *Cache) writeURLFiles(sha256sum string, u *url.URL) error { - urlFileAbs, err := c.URLFileAbsPath(sha256sum) - if err != nil { - return err - } - if err = os.WriteFile(urlFileAbs, []byte(u.Redacted()), 0644); err != nil { - return fmt.Errorf("failed to create %q: %w", urlFileAbs, err) - } - revURLFileAbs, err := c.ReverseURLFileAbsPath(u) - if err != nil { - return err +func (c *Cache) writeMetadataFiles(sha256sum string, u *url.URL, m *Metadata) error { + if m != nil { + j, err := json.Marshal(m) + if err != nil { + return err + } + metadataFileAbs, err := c.MetadataFileAbsPath(sha256sum) + if err != nil { + return err + } + if err = os.WriteFile(metadataFileAbs, j, 0644); err != nil { + return fmt.Errorf("failed to create %q: %w", metadataFileAbs, err) + } } - if err = os.WriteFile(revURLFileAbs, []byte("sha256:"+sha256sum), 0644); err != nil { - return fmt.Errorf("failed to create %q: %w", revURLFileAbs, err) + if u != nil { + revURLFileAbs, err := c.ReverseURLFileAbsPath(u) + if err != nil { + return err + } + if err = os.WriteFile(revURLFileAbs, []byte("sha256:"+sha256sum), 0644); err != nil { + return fmt.Errorf("failed to create %q: %w", revURLFileAbs, err) + } } return nil } -// OriginURLBySHA256 returns the origin of the blob. +// MetadataBySHA256 returns the metadata for the blob. // Not always available. -func (c *Cache) OriginURLBySHA256(sha256sum string) (*url.URL, error) { - urlFileAbs, err := c.URLFileAbsPath(sha256sum) +func (c *Cache) MetadataBySHA256(sha256sum string) (*Metadata, error) { + metadataFileAbs, err := c.MetadataFileAbsPath(sha256sum) if err != nil { return nil, err } - b, err := os.ReadFile(urlFileAbs) + b, err := os.ReadFile(metadataFileAbs) if err != nil { return nil, err } - s := strings.TrimSpace(string(b)) - return url.Parse(s) + var m Metadata + if err = json.Unmarshal(b, &m); err != nil { + return nil, err + } + return &m, nil } // SHA256ByOriginURL returns the sha256sum by the origin URL. // Not always available. +// Do not use this unless you are sure that the URL is unique. func (c *Cache) SHA256ByOriginURL(u *url.URL) (string, error) { revUrlFileAbs, err := c.ReverseURLFileAbsPath(u) if err != nil { diff --git a/pkg/cache/cache_test.go b/pkg/cache/cache_test.go index b63a530..dd2bf47 100644 --- a/pkg/cache/cache_test.go +++ b/pkg/cache/cache_test.go @@ -142,13 +142,16 @@ func testCacheEnsure(t testing.TB, blobsBySHA256 map[string]*testBlob, newBlobUR ctx := context.TODO() for _, blob := range blobsBySHA256 { u := newBlobURL(blob) + m := &Metadata{ + Basename: filepath.Base(u.Path), + } for i := 0; i < 2; i++ { // run twice to test idempotency - assert.NilError(t, cache.Ensure(ctx, u, blob.sha256)) + assert.NilError(t, cache.Ensure(ctx, u, blob.sha256, m)) ok, err := cache.Cached(blob.sha256) assert.NilError(t, err) assert.Equal(t, true, ok) } - assert.Check(t, cache.Ensure(ctx, u, digest.SHA256.FromString("wrong").Encoded()) != nil) + assert.Check(t, cache.Ensure(ctx, u, digest.SHA256.FromString("wrong").Encoded(), m) != nil) } testCacheDir(t, cache, blobsBySHA256) @@ -211,7 +214,10 @@ func TestCacheExportImport(t *testing.T) { defer testServer.Close() for _, blob := range blobsBySHA256 { u := testServer.basenameURL(blob) - assert.NilError(t, cache.Ensure(ctx, u, blob.sha256)) + m := &Metadata{ + Basename: filepath.Base(u.Path), + } + assert.NilError(t, cache.Ensure(ctx, u, blob.sha256, m)) } mapByBasename := make(map[string]string) diff --git a/pkg/distro/alpine/alpine.go b/pkg/distro/alpine/alpine.go index fdb1e3d..3ad9f04 100644 --- a/pkg/distro/alpine/alpine.go +++ b/pkg/distro/alpine/alpine.go @@ -117,7 +117,10 @@ func (d *alpine) generateHashWithURL(ctx context.Context, hw distro.HashWriter, return fmt.Errorf("failed to check the cached sha256 by URL %q: %w", u.Redacted(), err) } logrus.Debugf("%q: downloading from %q", basename, u.Redacted()) - sha256sum, err := c.ImportWithURL(u) + m := &cache.Metadata{ + Basename: basename, + } + sha256sum, err := c.ImportWithURL(u, m) if err != nil { return err } diff --git a/pkg/distro/arch/arch.go b/pkg/distro/arch/arch.go index 1835315..e5a28f4 100644 --- a/pkg/distro/arch/arch.go +++ b/pkg/distro/arch/arch.go @@ -120,7 +120,10 @@ func (d *arch) generateHash1(ctx context.Context, hw distro.HashWriter, c *cache return fmt.Errorf("failed to check the cached sha256 by URL %q: %w", u.Redacted(), err) } logrus.Debugf("%q: downloading from %q", basename, u.Redacted()) - sha256sum, err := c.ImportWithURL(u) + m := &cache.Metadata{ + Basename: basename, + } + sha256sum, err := c.ImportWithURL(u, m) if err != nil { return err } diff --git a/pkg/distro/fedora/fedora.go b/pkg/distro/fedora/fedora.go index aed28a3..1ea1816 100644 --- a/pkg/distro/fedora/fedora.go +++ b/pkg/distro/fedora/fedora.go @@ -135,7 +135,10 @@ func (d *fedora) generateHash1(ctx context.Context, hw distro.HashWriter, c *cac return fmt.Errorf("failed to check the cached sha256 by URL %q: %w", u.Redacted(), err) } logrus.Debugf("%q: downloading from %q", basename, u.Redacted()) - sha256sum, err := c.ImportWithURL(u) + m := &cache.Metadata{ + Basename: basename, + } + sha256sum, err := c.ImportWithURL(u, m) if err != nil { return err } diff --git a/pkg/downloader/downloader.go b/pkg/downloader/downloader.go index fd091d6..e93f8a4 100644 --- a/pkg/downloader/downloader.go +++ b/pkg/downloader/downloader.go @@ -7,7 +7,7 @@ import ( "sort" "github.com/fatih/color" - "github.com/reproducible-containers/repro-get/pkg/cache" + pkgcache "github.com/reproducible-containers/repro-get/pkg/cache" "github.com/reproducible-containers/repro-get/pkg/distro" "github.com/reproducible-containers/repro-get/pkg/filespec" "github.com/sirupsen/logrus" @@ -32,7 +32,7 @@ type Opts struct { SkipInstalled bool } -func Download(ctx context.Context, d distro.Distro, cache *cache.Cache, fileSpecs map[string]*filespec.FileSpec, opts Opts) (*Result, error) { +func Download(ctx context.Context, d distro.Distro, cache *pkgcache.Cache, fileSpecs map[string]*filespec.FileSpec, opts Opts) (*Result, error) { if d == nil { return nil, errors.New("distro driver needs to be specified") } @@ -106,7 +106,10 @@ func Download(ctx context.Context, d distro.Distro, cache *cache.Cache, fileSpec return nil, fmt.Errorf("failed to determine the URL of %v with the provider %q: %w", sp, provider, err) } printPackageStatus("Downloading from %s", u.Redacted()) - if err = cache.Ensure(ctx, u, sp.SHA256); err != nil { + m := &pkgcache.Metadata{ + Basename: sp.Basename, + } + if err = cache.Ensure(ctx, u, sp.SHA256, m); err != nil { if j != len(providers)-1 { logrus.WithError(err).Warnf("Failed to download %s (%s), trying the next provider", sp.Basename, u.Redacted()) } else { From eb41d6b36f742f1ce87bf655ff36f1fe59de6427 Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Wed, 26 Jul 2023 00:25:44 +0900 Subject: [PATCH 2/2] pkg/cache: skip creating reverse URL files for oci Signed-off-by: Akihiro Suda --- pkg/cache/cache.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pkg/cache/cache.go b/pkg/cache/cache.go index be384df..58e271d 100644 --- a/pkg/cache/cache.go +++ b/pkg/cache/cache.go @@ -398,7 +398,7 @@ func (c *Cache) writeMetadataFiles(sha256sum string, u *url.URL, m *Metadata) er return fmt.Errorf("failed to create %q: %w", metadataFileAbs, err) } } - if u != nil { + if u != nil && u.Scheme != "oci" && !strings.HasPrefix(u.Scheme, "oci+") { revURLFileAbs, err := c.ReverseURLFileAbsPath(u) if err != nil { return err @@ -432,6 +432,9 @@ func (c *Cache) MetadataBySHA256(sha256sum string) (*Metadata, error) { // Not always available. // Do not use this unless you are sure that the URL is unique. func (c *Cache) SHA256ByOriginURL(u *url.URL) (string, error) { + if u.Scheme == "oci" || strings.HasPrefix(u.Scheme, "oci+") { + return "", fmt.Errorf("oci URL scheme is not supported") + } revUrlFileAbs, err := c.ReverseURLFileAbsPath(u) if err != nil { return "", err