Skip to content

Commit df6be88

Browse files
Recover corrupted cache (#1381)
* add failing test to restorer Signed-off-by: Joey Brown <[email protected]> * restorer and exporter working as expected Signed-off-by: Joey Brown <[email protected]> * lint Signed-off-by: Joey Brown <[email protected]> * Update phase/restorer.go Co-authored-by: Natalie Arellano <[email protected]> Signed-off-by: Joey Brown <[email protected]> * Update phase/cache.go Co-authored-by: Natalie Arellano <[email protected]> Signed-off-by: Joey Brown <[email protected]> * Update cache/image_cache.go Co-authored-by: Natalie Arellano <[email protected]> Signed-off-by: Joey Brown <[email protected]> * Update cache/volume_cache.go Co-authored-by: Natalie Arellano <[email protected]> Signed-off-by: Joey Brown <[email protected]> * Update cache/volume_cache.go Co-authored-by: Natalie Arellano <[email protected]> Signed-off-by: Joey Brown <[email protected]> * Update cache/volume_cache.go Co-authored-by: Natalie Arellano <[email protected]> Signed-off-by: Joey Brown <[email protected]> * Update cache/volume_cache.go Co-authored-by: Natalie Arellano <[email protected]> Signed-off-by: Joey Brown <[email protected]> * Update cache/volume_cache.go Co-authored-by: Natalie Arellano <[email protected]> Signed-off-by: Joey Brown <[email protected]> * update based on feedback Signed-off-by: Joey Brown <[email protected]> * fix log * temp fix * this does not work as is. I think we need to modify img utils. Image utils should fail with a Layer Not found in both ReuseLayer & GetLayer. For GetLayer, when there is a missing blob, it's return an unexpected EOF error. For ReuseLayer, when there is a missing blob, it's not returning an error but it should. * add eof check * add not exist check * reuse layer test * fix test regression --------- Signed-off-by: Joey Brown <[email protected]> Co-authored-by: Joey Brown <[email protected]>
1 parent a87e12e commit df6be88

18 files changed

+269
-97
lines changed

cache/caching_image_test.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ import (
1313
"github.com/sclevine/spec"
1414
"github.com/sclevine/spec/report"
1515

16+
"github.com/buildpacks/lifecycle/cmd"
17+
1618
"github.com/buildpacks/lifecycle/cache"
1719
h "github.com/buildpacks/lifecycle/testhelpers"
1820
)
@@ -37,7 +39,7 @@ func testCachingImage(t *testing.T, when spec.G, it spec.S) {
3739
fakeImage = fakes.NewImage("some-image", "", nil)
3840
tmpDir, err = os.MkdirTemp("", "")
3941
h.AssertNil(t, err)
40-
volumeCache, err = cache.NewVolumeCache(tmpDir)
42+
volumeCache, err = cache.NewVolumeCache(tmpDir, cmd.DefaultLogger)
4143
h.AssertNil(t, err)
4244
subject = cache.NewCachingImage(fakeImage, volumeCache)
4345
layerPath, layerSHA, layerData = h.RandomLayer(t, tmpDir)

cache/common.go

+22
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,25 @@ import (
55
)
66

77
var errCacheCommitted = errors.New("cache cannot be modified after commit")
8+
9+
// ReadErr is an error type for filesystem read errors.
10+
type ReadErr struct {
11+
msg string
12+
}
13+
14+
// NewReadErr creates a new ReadErr.
15+
func NewReadErr(msg string) ReadErr {
16+
return ReadErr{msg: msg}
17+
}
18+
19+
// Error returns the error message.
20+
func (e ReadErr) Error() string {
21+
return e.msg
22+
}
23+
24+
// IsReadErr checks if an error is a ReadErr.
25+
func IsReadErr(err error) (bool, *ReadErr) {
26+
var e ReadErr
27+
isReadErr := errors.As(err, &e)
28+
return isReadErr, &e
29+
}

cache/image_cache.go

+31-2
Original file line numberDiff line numberDiff line change
@@ -99,15 +99,44 @@ func (c *ImageCache) AddLayerFile(tarPath string, diffID string) error {
9999
return c.newImage.AddLayerWithDiffID(tarPath, diffID)
100100
}
101101

102+
// isLayerNotFound checks if the error is a layer not found error
103+
//
104+
// FIXME: we should not have to rely on trapping ErrUnexpectedEOF.
105+
// If a blob is not present in the registry, we should get imgutil.ErrLayerNotFound,
106+
// but we do not and instead get io.ErrUnexpectedEOF
107+
func isLayerNotFound(err error) bool {
108+
var e imgutil.ErrLayerNotFound
109+
return errors.As(err, &e) || errors.Is(err, io.ErrUnexpectedEOF)
110+
}
111+
102112
func (c *ImageCache) ReuseLayer(diffID string) error {
103113
if c.committed {
104114
return errCacheCommitted
105115
}
106-
return c.newImage.ReuseLayer(diffID)
116+
err := c.newImage.ReuseLayer(diffID)
117+
if err != nil {
118+
// FIXME: this path is not currently executed.
119+
// If a blob is not present in the registry, we should get imgutil.ErrLayerNotFound.
120+
// We should then skip attempting to reuse the layer.
121+
// However, we do not get imgutil.ErrLayerNotFound when the blob is not present.
122+
if isLayerNotFound(err) {
123+
return NewReadErr(fmt.Sprintf("failed to find cache layer with SHA '%s'", diffID))
124+
}
125+
return fmt.Errorf("failed to reuse cache layer with SHA '%s'", diffID)
126+
}
127+
return nil
107128
}
108129

130+
// RetrieveLayer retrieves a layer from the cache
109131
func (c *ImageCache) RetrieveLayer(diffID string) (io.ReadCloser, error) {
110-
return c.origImage.GetLayer(diffID)
132+
closer, err := c.origImage.GetLayer(diffID)
133+
if err != nil {
134+
if isLayerNotFound(err) {
135+
return nil, NewReadErr(fmt.Sprintf("failed to find cache layer with SHA '%s'", diffID))
136+
}
137+
return nil, fmt.Errorf("failed to get cache layer with SHA '%s'", diffID)
138+
}
139+
return closer, nil
111140
}
112141

113142
func (c *ImageCache) Commit() error {

cache/image_cache_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ func testImageCache(t *testing.T, when spec.G, it spec.S) {
146146
when("layer does not exist", func() {
147147
it("returns an error", func() {
148148
_, err := subject.RetrieveLayer("some_nonexistent_sha")
149-
h.AssertError(t, err, "failed to get layer with sha 'some_nonexistent_sha'")
149+
h.AssertError(t, err, "failed to get cache layer with SHA 'some_nonexistent_sha'")
150150
})
151151
})
152152
})
@@ -236,7 +236,7 @@ func testImageCache(t *testing.T, when spec.G, it spec.S) {
236236
h.AssertNil(t, subject.AddLayerFile(testLayerTarPath, testLayerSHA))
237237

238238
_, err := subject.RetrieveLayer(testLayerSHA)
239-
h.AssertError(t, err, fmt.Sprintf("failed to get layer with sha '%s'", testLayerSHA))
239+
h.AssertError(t, err, fmt.Sprintf("failed to get cache layer with SHA '%s'", testLayerSHA))
240240
})
241241
})
242242
})

cache/image_deleter.go

+4-3
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
// ImageDeleter defines the methods available to delete and compare cached images
1313
type ImageDeleter interface {
1414
DeleteOrigImageIfDifferentFromNewImage(origImage, newImage imgutil.Image)
15+
DeleteImage(image imgutil.Image)
1516
}
1617

1718
// ImageDeleterImpl is a component to manage cache image deletion
@@ -35,13 +36,13 @@ func (c *ImageDeleterImpl) DeleteOrigImageIfDifferentFromNewImage(origImage, new
3536
}
3637

3738
if !same {
38-
c.deleteImage(origImage)
39+
c.DeleteImage(origImage)
3940
}
4041
}
4142
}
4243

43-
// deleteImage deletes an image
44-
func (c *ImageDeleterImpl) deleteImage(image imgutil.Image) {
44+
// DeleteImage deletes an image
45+
func (c *ImageDeleterImpl) DeleteImage(image imgutil.Image) {
4546
if c.deletionEnabled {
4647
if err := image.Delete(); err != nil {
4748
c.logger.Warnf("Unable to delete cache image: %v", err.Error())

cache/volume_cache.go

+29-4
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package cache
22

33
import (
44
"encoding/json"
5+
"fmt"
56
"io"
67
"os"
78
"path/filepath"
@@ -10,6 +11,8 @@ import (
1011

1112
"github.com/pkg/errors"
1213

14+
"github.com/buildpacks/lifecycle/log"
15+
1316
"github.com/buildpacks/lifecycle/internal/fsutil"
1417
"github.com/buildpacks/lifecycle/platform"
1518
)
@@ -20,9 +23,11 @@ type VolumeCache struct {
2023
backupDir string
2124
stagingDir string
2225
committedDir string
26+
logger log.Logger
2327
}
2428

25-
func NewVolumeCache(dir string) (*VolumeCache, error) {
29+
// NewVolumeCache creates a new VolumeCache
30+
func NewVolumeCache(dir string, logger log.Logger) (*VolumeCache, error) {
2631
if _, err := os.Stat(dir); err != nil {
2732
return nil, err
2833
}
@@ -32,6 +37,7 @@ func NewVolumeCache(dir string) (*VolumeCache, error) {
3237
backupDir: filepath.Join(dir, "committed-backup"),
3338
stagingDir: filepath.Join(dir, "staging"),
3439
committedDir: filepath.Join(dir, "committed"),
40+
logger: logger,
3541
}
3642

3743
if err := c.setupStagingDir(); err != nil {
@@ -133,7 +139,20 @@ func (c *VolumeCache) ReuseLayer(diffID string) error {
133139
if c.committed {
134140
return errCacheCommitted
135141
}
136-
if err := os.Link(diffIDPath(c.committedDir, diffID), diffIDPath(c.stagingDir, diffID)); err != nil && !os.IsExist(err) {
142+
committedPath := diffIDPath(c.committedDir, diffID)
143+
stagingPath := diffIDPath(c.stagingDir, diffID)
144+
145+
if _, err := os.Stat(committedPath); err != nil {
146+
if os.IsNotExist(err) {
147+
return NewReadErr(fmt.Sprintf("failed to find cache layer with SHA '%s'", diffID))
148+
}
149+
if os.IsPermission(err) {
150+
return NewReadErr(fmt.Sprintf("failed to read cache layer with SHA '%s' due to insufficient permissions", diffID))
151+
}
152+
return fmt.Errorf("failed to re-use cache layer with SHA '%s': %w", diffID, err)
153+
}
154+
155+
if err := os.Link(committedPath, stagingPath); err != nil && !os.IsExist(err) {
137156
return errors.Wrapf(err, "reusing layer (%s)", diffID)
138157
}
139158
return nil
@@ -146,7 +165,13 @@ func (c *VolumeCache) RetrieveLayer(diffID string) (io.ReadCloser, error) {
146165
}
147166
file, err := os.Open(path)
148167
if err != nil {
149-
return nil, errors.Wrapf(err, "opening layer with SHA '%s'", diffID)
168+
if os.IsPermission(err) {
169+
return nil, NewReadErr(fmt.Sprintf("failed to read cache layer with SHA '%s' due to insufficient permissions", diffID))
170+
}
171+
if os.IsNotExist(err) {
172+
return nil, NewReadErr(fmt.Sprintf("failed to find cache layer with SHA '%s'", diffID))
173+
}
174+
return nil, fmt.Errorf("failed to get cache layer with SHA '%s'", diffID)
150175
}
151176
return file, nil
152177
}
@@ -165,7 +190,7 @@ func (c *VolumeCache) RetrieveLayerFile(diffID string) (string, error) {
165190
path := diffIDPath(c.committedDir, diffID)
166191
if _, err := os.Stat(path); err != nil {
167192
if os.IsNotExist(err) {
168-
return "", errors.Wrapf(err, "layer with SHA '%s' not found", diffID)
193+
return "", NewReadErr(fmt.Sprintf("failed to find cache layer with SHA '%s'", diffID))
169194
}
170195
return "", errors.Wrapf(err, "retrieving layer with SHA '%s'", diffID)
171196
}

cache/volume_cache_test.go

+30-10
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ import (
1010
"github.com/sclevine/spec"
1111
"github.com/sclevine/spec/report"
1212

13+
"github.com/buildpacks/lifecycle/cmd"
14+
"github.com/buildpacks/lifecycle/log"
15+
1316
"github.com/buildpacks/lifecycle/buildpack"
1417
"github.com/buildpacks/lifecycle/cache"
1518
"github.com/buildpacks/lifecycle/platform"
@@ -28,6 +31,7 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {
2831
backupDir string
2932
stagingDir string
3033
committedDir string
34+
testLogger log.Logger
3135
)
3236

3337
it.Before(func() {
@@ -42,6 +46,7 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {
4246
backupDir = filepath.Join(volumeDir, "committed-backup")
4347
stagingDir = filepath.Join(volumeDir, "staging")
4448
committedDir = filepath.Join(volumeDir, "committed")
49+
testLogger = cmd.DefaultLogger
4550
})
4651

4752
it.After(func() {
@@ -50,7 +55,7 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {
5055

5156
when("#NewVolumeCache", func() {
5257
it("returns an error when the volume path does not exist", func() {
53-
_, err := cache.NewVolumeCache(filepath.Join(tmpDir, "does_not_exist"))
58+
_, err := cache.NewVolumeCache(filepath.Join(tmpDir, "does_not_exist"), testLogger)
5459
if err == nil {
5560
t.Fatal("expected NewVolumeCache to fail because volume path does not exist")
5661
}
@@ -66,7 +71,7 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {
6671
it("clears staging", func() {
6772
var err error
6873

69-
subject, err = cache.NewVolumeCache(volumeDir)
74+
subject, err = cache.NewVolumeCache(volumeDir, testLogger)
7075
h.AssertNil(t, err)
7176

7277
_, err = os.Stat(filepath.Join(stagingDir, "some-layer.tar"))
@@ -80,7 +85,7 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {
8085
it("creates staging dir", func() {
8186
var err error
8287

83-
subject, err = cache.NewVolumeCache(volumeDir)
88+
subject, err = cache.NewVolumeCache(volumeDir, testLogger)
8489
h.AssertNil(t, err)
8590

8691
_, err = os.Stat(stagingDir)
@@ -92,7 +97,7 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {
9297
it("creates committed dir", func() {
9398
var err error
9499

95-
subject, err = cache.NewVolumeCache(volumeDir)
100+
subject, err = cache.NewVolumeCache(volumeDir, testLogger)
96101
h.AssertNil(t, err)
97102

98103
_, err = os.Stat(committedDir)
@@ -109,7 +114,7 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {
109114
it("clears the backup dir", func() {
110115
var err error
111116

112-
subject, err = cache.NewVolumeCache(volumeDir)
117+
subject, err = cache.NewVolumeCache(volumeDir, testLogger)
113118
h.AssertNil(t, err)
114119

115120
_, err = os.Stat(filepath.Join(backupDir, "some-layer.tar"))
@@ -124,7 +129,7 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {
124129
it.Before(func() {
125130
var err error
126131

127-
subject, err = cache.NewVolumeCache(volumeDir)
132+
subject, err = cache.NewVolumeCache(volumeDir, testLogger)
128133
h.AssertNil(t, err)
129134
})
130135

@@ -206,7 +211,7 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {
206211
when("layer does not exist", func() {
207212
it("returns an error", func() {
208213
_, err := subject.RetrieveLayer("some_nonexistent_sha")
209-
h.AssertError(t, err, "layer with SHA 'some_nonexistent_sha' not found")
214+
h.AssertError(t, err, "failed to find cache layer with SHA 'some_nonexistent_sha'")
210215
})
211216
})
212217
})
@@ -230,7 +235,7 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {
230235
when("layer does not exist", func() {
231236
it("returns an error", func() {
232237
_, err := subject.RetrieveLayerFile("some_nonexistent_sha")
233-
h.AssertError(t, err, "layer with SHA 'some_nonexistent_sha' not found")
238+
h.AssertError(t, err, "failed to find cache layer with SHA 'some_nonexistent_sha'")
234239
})
235240
})
236241
})
@@ -340,7 +345,7 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {
340345
h.AssertNil(t, subject.AddLayerFile(tarPath, "some_sha"))
341346

342347
_, err := subject.RetrieveLayer("some_sha")
343-
h.AssertError(t, err, "layer with SHA 'some_sha' not found")
348+
h.AssertError(t, err, "failed to find cache layer with SHA 'some_sha'")
344349
})
345350
})
346351

@@ -415,7 +420,7 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {
415420
h.AssertNil(t, subject.AddLayer(layerReader, layerSha))
416421

417422
_, err := subject.RetrieveLayer(layerSha)
418-
h.AssertError(t, err, fmt.Sprintf("layer with SHA '%s' not found", layerSha))
423+
h.AssertError(t, err, fmt.Sprintf("failed to find cache layer with SHA '%s'", layerSha))
419424
})
420425
})
421426

@@ -507,6 +512,21 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {
507512
h.AssertEq(t, string(bytes), "existing data")
508513
})
509514
})
515+
516+
when("the layer does not exist", func() {
517+
it("fails with a read error", func() {
518+
err := subject.ReuseLayer("some_nonexistent_sha")
519+
isReadErr, _ := cache.IsReadErr(err)
520+
h.AssertEq(t, isReadErr, true)
521+
522+
err = subject.Commit()
523+
h.AssertNil(t, err)
524+
525+
_, err = subject.RetrieveLayer("some_sha")
526+
isReadErr, _ = cache.IsReadErr(err)
527+
h.AssertEq(t, isReadErr, true)
528+
})
529+
})
510530
})
511531

512532
when("attempting to commit more than once", func() {

cmd/lifecycle/exporter.go

+5-3
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ import (
1919
"github.com/pkg/errors"
2020
"golang.org/x/sync/errgroup"
2121

22+
"github.com/buildpacks/lifecycle/log"
23+
2224
"github.com/buildpacks/lifecycle/auth"
2325
"github.com/buildpacks/lifecycle/buildpack"
2426
"github.com/buildpacks/lifecycle/cache"
@@ -200,7 +202,7 @@ func (e *exportCmd) export(group buildpack.Group, cacheStore phase.Cache, analyz
200202
case e.UseLayout:
201203
appImage, runImageID, err = e.initLayoutAppImage(analyzedMD)
202204
case e.UseDaemon:
203-
appImage, runImageID, err = e.initDaemonAppImage(analyzedMD)
205+
appImage, runImageID, err = e.initDaemonAppImage(analyzedMD, cmd.DefaultLogger)
204206
default:
205207
appImage, runImageID, err = e.initRemoteAppImage(analyzedMD)
206208
}
@@ -258,7 +260,7 @@ func (e *exportCmd) export(group buildpack.Group, cacheStore phase.Cache, analyz
258260
return nil
259261
}
260262

261-
func (e *exportCmd) initDaemonAppImage(analyzedMD files.Analyzed) (imgutil.Image, string, error) {
263+
func (e *exportCmd) initDaemonAppImage(analyzedMD files.Analyzed, logger log.Logger) (imgutil.Image, string, error) {
262264
var opts = []imgutil.ImageOption{
263265
local.FromBaseImage(e.RunImageRef),
264266
}
@@ -301,7 +303,7 @@ func (e *exportCmd) initDaemonAppImage(analyzedMD files.Analyzed) (imgutil.Image
301303
}
302304

303305
if e.LaunchCacheDir != "" {
304-
volumeCache, err := cache.NewVolumeCache(e.LaunchCacheDir)
306+
volumeCache, err := cache.NewVolumeCache(e.LaunchCacheDir, logger)
305307
if err != nil {
306308
return nil, "", cmd.FailErr(err, "create launch cache")
307309
}

0 commit comments

Comments
 (0)