From 5494513f2eb1e5c8d209c8a325fccb2fe3846f42 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Tue, 25 May 2021 14:42:26 +0200 Subject: [PATCH] chunked: fix build on other platforms fix package build on !linux platforms Signed-off-by: Giuseppe Scrivano --- pkg/chunked/storage.go | 870 +--------------------------- pkg/chunked/storage_linux.go | 874 +++++++++++++++++++++++++++++ pkg/chunked/storage_unsupported.go | 16 + pkg/chunked/zstdchunked_test.go | 2 + 4 files changed, 893 insertions(+), 869 deletions(-) create mode 100644 pkg/chunked/storage_linux.go create mode 100644 pkg/chunked/storage_unsupported.go diff --git a/pkg/chunked/storage.go b/pkg/chunked/storage.go index 67ed50b78e..9212cbbcff 100644 --- a/pkg/chunked/storage.go +++ b/pkg/chunked/storage.go @@ -1,39 +1,8 @@ package chunked import ( - archivetar "archive/tar" - "context" - "encoding/base64" - "encoding/json" "fmt" "io" - "io/ioutil" - "os" - "path/filepath" - "sort" - "strings" - "syscall" - "time" - - storage "github.com/containers/storage" - graphdriver "github.com/containers/storage/drivers" - driversCopy "github.com/containers/storage/drivers/copy" - "github.com/containers/storage/pkg/archive" - "github.com/containers/storage/pkg/idtools" - "github.com/containers/storage/types" - "github.com/klauspost/compress/zstd" - digest "github.com/opencontainers/go-digest" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "github.com/vbatts/tar-split/archive/tar" - "golang.org/x/sys/unix" -) - -const ( - maxNumberMissingChunks = 1024 - newFileFlags = (unix.O_CREAT | unix.O_TRUNC | unix.O_WRONLY | unix.O_EXCL) - containersOverrideXattr = "user.containers.override_stat" - bigDataKey = "zstd-chunked-manifest" ) // ImageSourceChunk is a portion of a blob. @@ -48,847 +17,10 @@ type ImageSourceSeekable interface { GetBlobAt([]ImageSourceChunk) (chan io.ReadCloser, chan error, error) } -type chunkedZstdDiffer struct { - stream ImageSourceSeekable - manifest []byte - layersMetadata map[string][]zstdFileMetadata - layersTarget map[string]string -} - // ErrBadRequest is returned when the request is not valid type ErrBadRequest struct { } func (e ErrBadRequest) Error() string { - return fmt.Sprintf("http bad request") -} - -func timeToTimespec(time time.Time) (ts unix.Timespec) { - if time.IsZero() { - // Return UTIME_OMIT special value - ts.Sec = 0 - ts.Nsec = ((1 << 30) - 2) - return - } - return unix.NsecToTimespec(time.UnixNano()) -} - -func copyFileContent(src, destFile, root string, dirfd int, missingDirsMode, mode os.FileMode) (*os.File, int64, error) { - st, err := os.Stat(src) - if err != nil { - return nil, -1, err - } - - copyWithFileRange, copyWithFileClone := true, true - - // If the destination file already exists, we shouldn't blow it away - dstFile, err := openFileUnderRoot(destFile, root, dirfd, newFileFlags, mode) - if err != nil { - return nil, -1, err - } - - err = driversCopy.CopyRegularToFile(src, dstFile, st, ©WithFileRange, ©WithFileClone) - if err != nil { - dstFile.Close() - return nil, -1, err - } - return dstFile, st.Size(), err -} - -func prepareOtherLayersCache(layersMetadata map[string][]zstdFileMetadata) map[string]map[string]*zstdFileMetadata { - maps := make(map[string]map[string]*zstdFileMetadata) - - for layerID, v := range layersMetadata { - r := make(map[string]*zstdFileMetadata) - for i := range v { - r[v[i].Digest] = &v[i] - } - maps[layerID] = r - } - return maps -} - -func getLayersCache(store storage.Store) (map[string][]zstdFileMetadata, map[string]string, error) { - allLayers, err := store.Layers() - if err != nil { - return nil, nil, err - } - - layersMetadata := make(map[string][]zstdFileMetadata) - layersTarget := make(map[string]string) - for _, r := range allLayers { - manifestReader, err := store.LayerBigData(r.ID, bigDataKey) - if err != nil { - continue - } - defer manifestReader.Close() - manifest, err := ioutil.ReadAll(manifestReader) - if err != nil { - return nil, nil, err - } - var toc zstdTOC - if err := json.Unmarshal(manifest, &toc); err != nil { - continue - } - layersMetadata[r.ID] = toc.Entries - target, err := store.DifferTarget(r.ID) - if err != nil { - return nil, nil, err - } - layersTarget[r.ID] = target - } - - return layersMetadata, layersTarget, nil -} - -// GetDiffer returns a differ than can be used with ApplyDiffWithDiffer. -func GetDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable) (graphdriver.Differ, error) { - if _, ok := annotations[manifestChecksumKey]; ok { - return makeZstdChunkedDiffer(ctx, store, blobSize, annotations, iss) - } - return nil, errors.New("blob type not supported for partial retrieval") -} - -func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable) (*chunkedZstdDiffer, error) { - manifest, err := readZstdChunkedManifest(iss, blobSize, annotations) - if err != nil { - return nil, err - } - layersMetadata, layersTarget, err := getLayersCache(store) - if err != nil { - return nil, err - } - - return &chunkedZstdDiffer{ - stream: iss, - manifest: manifest, - layersMetadata: layersMetadata, - layersTarget: layersTarget, - }, nil -} - -func findFileInOtherLayers(file zstdFileMetadata, root string, dirfd int, layersMetadata map[string]map[string]*zstdFileMetadata, layersTarget map[string]string, missingDirsMode os.FileMode) (*os.File, int64, error) { - // this is ugly, needs to be indexed - for layerID, checksums := range layersMetadata { - m, found := checksums[file.Digest] - if !found { - continue - } - - source, ok := layersTarget[layerID] - if !ok { - continue - } - - srcDirfd, err := unix.Open(root, unix.O_RDONLY, 0) - if err != nil { - continue - } - defer unix.Close(srcDirfd) - - srcFile, err := openFileUnderRoot(m.Name, source, srcDirfd, unix.O_RDONLY, 0) - if err != nil { - continue - } - defer srcFile.Close() - - srcPath := fmt.Sprintf("/proc/self/fd/%d", srcFile.Fd()) - - dstFile, written, err := copyFileContent(srcPath, file.Name, root, dirfd, missingDirsMode, 0) - if err != nil { - continue - } - return dstFile, written, nil - } - return nil, 0, nil -} - -func getFileDigest(f *os.File) (digest.Digest, error) { - digester := digest.Canonical.Digester() - if _, err := io.Copy(digester.Hash(), f); err != nil { - return "", err - } - return digester.Digest(), nil -} - -// findFileOnTheHost checks whether the requested file already exist on the host and copies the file content from there if possible. -// It is currently implemented to look only at the file with the same path. Ideally it can detect the same content also at different -// paths. -func findFileOnTheHost(file zstdFileMetadata, root string, dirfd int, missingDirsMode os.FileMode) (*os.File, int64, error) { - sourceFile := filepath.Clean(filepath.Join("/", file.Name)) - if !strings.HasPrefix(sourceFile, "/usr/") { - // limit host deduplication to files under /usr. - return nil, 0, nil - } - - st, err := os.Stat(sourceFile) - if err != nil || !st.Mode().IsRegular() { - return nil, 0, nil - } - - if st.Size() != file.Size { - return nil, 0, nil - } - - fd, err := unix.Open(sourceFile, unix.O_RDONLY|unix.O_NONBLOCK, 0) - if err != nil { - return nil, 0, nil - } - - f := os.NewFile(uintptr(fd), "fd") - defer f.Close() - - manifestChecksum, err := digest.Parse(file.Digest) - if err != nil { - return nil, 0, err - } - - checksum, err := getFileDigest(f) - if err != nil { - return nil, 0, err - } - - if checksum != manifestChecksum { - return nil, 0, nil - } - - dstFile, written, err := copyFileContent(fmt.Sprintf("/proc/self/fd/%d", fd), file.Name, root, dirfd, missingDirsMode, 0) - if err != nil { - return nil, 0, nil - } - - // calculate the checksum again to make sure the file wasn't modified while it was copied - if _, err := f.Seek(0, 0); err != nil { - return nil, 0, err - } - checksum, err = getFileDigest(f) - if err != nil { - return nil, 0, err - } - if checksum != manifestChecksum { - return nil, 0, nil - } - return dstFile, written, nil -} - -func maybeDoIDRemap(manifest []zstdFileMetadata, options *archive.TarOptions) error { - if options.ChownOpts == nil && len(options.UIDMaps) == 0 || len(options.GIDMaps) == 0 { - return nil - } - - idMappings := idtools.NewIDMappingsFromMaps(options.UIDMaps, options.GIDMaps) - - for i := range manifest { - if options.ChownOpts != nil { - manifest[i].UID = options.ChownOpts.UID - manifest[i].GID = options.ChownOpts.GID - } else { - pair := idtools.IDPair{ - UID: manifest[i].UID, - GID: manifest[i].GID, - } - var err error - manifest[i].UID, manifest[i].GID, err = idMappings.ToContainer(pair) - if err != nil { - return err - } - } - } - return nil -} - -type missingFile struct { - File *zstdFileMetadata - Gap int64 -} - -func (m missingFile) Length() int64 { - return m.File.EndOffset - m.File.Offset -} - -type missingChunk struct { - RawChunk ImageSourceChunk - Files []missingFile -} - -func setFileAttrs(file *os.File, mode os.FileMode, metadata *zstdFileMetadata, options *archive.TarOptions) error { - if file == nil || file.Fd() < 0 { - return errors.Errorf("invalid file") - } - fd := int(file.Fd()) - - t, err := typeToTarType(metadata.Type) - if err != nil { - return err - } - if t == tar.TypeSymlink { - return nil - } - - if err := unix.Fchown(fd, metadata.UID, metadata.GID); err != nil { - if !options.IgnoreChownErrors { - return err - } - } - - for k, v := range metadata.Xattrs { - data, err := base64.StdEncoding.DecodeString(v) - if err != nil { - return err - } - if err := unix.Fsetxattr(fd, k, data, 0); err != nil { - return err - } - } - - ts := []unix.Timespec{timeToTimespec(metadata.AccessTime), timeToTimespec(metadata.ModTime)} - if err := unix.UtimesNanoAt(fd, "", ts, 0); err != nil && errors.Is(err, unix.ENOSYS) { - return err - } - - if err := unix.Fchmod(fd, uint32(mode)); err != nil { - return err - } - return nil -} - -func openFileUnderRoot(name, root string, dirfd int, flags uint64, mode os.FileMode) (*os.File, error) { - how := unix.OpenHow{ - Flags: flags, - Mode: uint64(mode & 07777), - Resolve: unix.RESOLVE_IN_ROOT, - } - - fd, err := unix.Openat2(dirfd, name, &how) - if err != nil { - return nil, err - } - return os.NewFile(uintptr(fd), name), nil -} - -func createFileFromZstdStream(dest string, dirfd int, reader io.Reader, missingDirsMode, mode os.FileMode, metadata *zstdFileMetadata, options *archive.TarOptions) (err error) { - file, err := openFileUnderRoot(metadata.Name, dest, dirfd, newFileFlags, 0) - if err != nil { - return err - } - defer func() { - err2 := file.Close() - if err == nil { - err = err2 - } - }() - - z, err := zstd.NewReader(reader) - if err != nil { - return err - } - defer z.Close() - - digester := digest.Canonical.Digester() - checksum := digester.Hash() - _, err = z.WriteTo(io.MultiWriter(file, checksum)) - if err != nil { - return err - } - manifestChecksum, err := digest.Parse(metadata.Digest) - if err != nil { - return err - } - if digester.Digest() != manifestChecksum { - return fmt.Errorf("checksum mismatch for %q", dest) - } - return setFileAttrs(file, mode, metadata, options) -} - -func storeMissingFiles(streams chan io.ReadCloser, errs chan error, dest string, dirfd int, missingChunks []missingChunk, missingDirsMode os.FileMode, options *archive.TarOptions) error { - for mc := 0; ; mc++ { - var part io.ReadCloser - select { - case p := <-streams: - part = p - case err := <-errs: - return err - } - if part == nil { - if mc == len(missingChunks) { - break - } - return errors.Errorf("invalid stream returned %d %d", mc, len(missingChunks)) - } - if mc == len(missingChunks) { - return errors.Errorf("too many chunks returned") - } - - for _, mf := range missingChunks[mc].Files { - if mf.Gap > 0 { - limitReader := io.LimitReader(part, mf.Gap) - _, err := io.Copy(ioutil.Discard, limitReader) - if err != nil { - return err - } - continue - } - - limitReader := io.LimitReader(part, mf.Length()) - - if err := createFileFromZstdStream(dest, dirfd, limitReader, missingDirsMode, os.FileMode(mf.File.Mode), mf.File, options); err != nil { - part.Close() - return err - } - } - part.Close() - } - return nil -} - -func mergeMissingChunks(missingChunks []missingChunk, target int) []missingChunk { - if len(missingChunks) <= target { - return missingChunks - } - - getGap := func(missingChunks []missingChunk, i int) int { - prev := missingChunks[i-1].RawChunk.Offset + missingChunks[i-1].RawChunk.Length - return int(missingChunks[i].RawChunk.Offset - prev) - } - - // this implementation doesn't account for duplicates, so it could merge - // more than necessary to reach the specified target. Since target itself - // is a heuristic value, it doesn't matter. - var gaps []int - for i := 1; i < len(missingChunks); i++ { - gaps = append(gaps, getGap(missingChunks, i)) - } - sort.Ints(gaps) - - toShrink := len(missingChunks) - target - targetValue := gaps[toShrink-1] - - newMissingChunks := missingChunks[0:1] - for i := 1; i < len(missingChunks); i++ { - gap := getGap(missingChunks, i) - if gap > targetValue { - newMissingChunks = append(newMissingChunks, missingChunks[i]) - } else { - prev := &newMissingChunks[len(newMissingChunks)-1] - gapFile := missingFile{ - Gap: int64(gap), - } - prev.RawChunk.Length += uint64(gap) + missingChunks[i].RawChunk.Length - prev.Files = append(append(prev.Files, gapFile), missingChunks[i].Files...) - } - } - - return newMissingChunks -} - -func retrieveMissingFiles(input *chunkedZstdDiffer, dest string, dirfd int, missingChunks []missingChunk, missingDirsMode os.FileMode, options *archive.TarOptions) error { - var chunksToRequest []ImageSourceChunk - for _, c := range missingChunks { - chunksToRequest = append(chunksToRequest, c.RawChunk) - } - - // There are some missing files. Prepare a multirange request for the missing chunks. - var streams chan io.ReadCloser - var err error - var errs chan error - for { - streams, errs, err = input.stream.GetBlobAt(chunksToRequest) - if err == nil { - break - } - - if _, ok := err.(ErrBadRequest); ok { - requested := len(missingChunks) - // If the server cannot handle at least 64 chunks in a single request, just give up. - if requested < 64 { - return err - } - - // Merge more chunks to request - missingChunks = mergeMissingChunks(missingChunks, requested/2) - continue - } - return err - } - - if err := storeMissingFiles(streams, errs, dest, dirfd, missingChunks, missingDirsMode, options); err != nil { - return err - } - return nil -} - -func safeMkdir(target string, dirfd int, mode os.FileMode, metadata *zstdFileMetadata, options *archive.TarOptions) error { - parent := filepath.Dir(metadata.Name) - base := filepath.Base(metadata.Name) - - parentFd := dirfd - if parent != "." { - parentFile, err := openFileUnderRoot(parent, target, dirfd, unix.O_DIRECTORY|unix.O_PATH|unix.O_RDONLY, 0) - if err != nil { - return err - } - defer parentFile.Close() - parentFd = int(parentFile.Fd()) - } - - if err := unix.Mkdirat(parentFd, base, uint32(mode)); err != nil { - if !os.IsExist(err) { - return err - } - } - - file, err := openFileUnderRoot(metadata.Name, target, dirfd, unix.O_RDONLY, 0) - if err != nil { - return err - } - defer file.Close() - - return setFileAttrs(file, mode, metadata, options) -} - -func safeLink(target string, dirfd int, mode os.FileMode, metadata *zstdFileMetadata, options *archive.TarOptions) error { - sourceFile, err := openFileUnderRoot(metadata.Linkname, target, dirfd, unix.O_RDONLY, 0) - if err != nil { - return err - } - defer sourceFile.Close() - - destDir, destBase := filepath.Dir(metadata.Name), filepath.Base(metadata.Name) - destDirFd := dirfd - if destDir != "." { - f, err := openFileUnderRoot(destDir, target, dirfd, unix.O_RDONLY, 0) - if err != nil { - return err - } - defer f.Close() - destDirFd = int(f.Fd()) - } - - err = unix.Linkat(int(sourceFile.Fd()), "", destDirFd, destBase, unix.AT_EMPTY_PATH) - if err != nil { - return err - } - - newFile, err := openFileUnderRoot(metadata.Name, target, dirfd, unix.O_WRONLY, 0) - if err != nil { - return err - } - defer newFile.Close() - - return setFileAttrs(newFile, mode, metadata, options) -} - -func safeSymlink(target string, dirfd int, mode os.FileMode, metadata *zstdFileMetadata, options *archive.TarOptions) error { - destDir, destBase := filepath.Dir(metadata.Name), filepath.Base(metadata.Name) - destDirFd := dirfd - if destDir != "." { - f, err := openFileUnderRoot(destDir, target, dirfd, unix.O_RDONLY, 0) - if err != nil { - return err - } - defer f.Close() - destDirFd = int(f.Fd()) - } - - return unix.Symlinkat(metadata.Linkname, destDirFd, destBase) -} - -type whiteoutHandler struct { - Dirfd int - Root string -} - -func (d whiteoutHandler) Setxattr(path, name string, value []byte) error { - file, err := openFileUnderRoot(path, d.Root, d.Dirfd, unix.O_RDONLY, 0) - if err != nil { - return err - } - defer file.Close() - - return unix.Fsetxattr(int(file.Fd()), name, value, 0) -} - -func (d whiteoutHandler) Mknod(path string, mode uint32, dev int) error { - dir := filepath.Dir(path) - base := filepath.Base(path) - - dirfd := d.Dirfd - if dir != "" { - dir, err := openFileUnderRoot(dir, d.Root, d.Dirfd, unix.O_RDONLY, 0) - if err != nil { - return err - } - defer dir.Close() - - dirfd = int(dir.Fd()) - } - - return unix.Mknodat(dirfd, base, mode, dev) -} - -func checkChownErr(err error, name string, uid, gid int) error { - if errors.Is(err, syscall.EINVAL) { - return errors.Wrapf(err, "potentially insufficient UIDs or GIDs available in user namespace (requested %d:%d for %s): Check /etc/subuid and /etc/subgid", uid, gid, name) - } - return err -} - -func (d whiteoutHandler) Chown(path string, uid, gid int) error { - file, err := openFileUnderRoot(path, d.Root, d.Dirfd, unix.O_PATH, 0) - if err != nil { - return err - } - defer file.Close() - - if err := unix.Fchownat(int(file.Fd()), "", uid, gid, unix.AT_EMPTY_PATH); err != nil { - var stat unix.Stat_t - if unix.Fstat(int(file.Fd()), &stat) == nil { - if stat.Uid == uint32(uid) && stat.Gid == uint32(gid) { - return nil - } - } - return checkChownErr(err, path, uid, gid) - } - return nil -} - -type hardLinkToCreate struct { - dest string - dirfd int - mode os.FileMode - metadata *zstdFileMetadata -} - -func (d *chunkedZstdDiffer) ApplyDiff(dest string, options *archive.TarOptions) (graphdriver.DriverWithDifferOutput, error) { - bigData := map[string][]byte{ - bigDataKey: d.manifest, - } - output := graphdriver.DriverWithDifferOutput{ - Differ: d, - BigData: bigData, - } - - storeOpts, err := types.DefaultStoreOptionsAutoDetectUID() - if err != nil { - return output, err - } - - enableHostDedup := false - if value := storeOpts.PullOptions["enable_host_deduplication"]; strings.ToLower(value) == "true" { - enableHostDedup = true - } - - // Generate the manifest - var toc zstdTOC - if err := json.Unmarshal(d.manifest, &toc); err != nil { - return output, err - } - - whiteoutConverter := archive.GetWhiteoutConverter(options.WhiteoutFormat, options.WhiteoutData) - - var missingChunks []missingChunk - var mergedEntries []zstdFileMetadata - - if err := maybeDoIDRemap(toc.Entries, options); err != nil { - return output, err - } - - for _, e := range toc.Entries { - if e.Type == TypeChunk { - l := len(mergedEntries) - if l == 0 || mergedEntries[l-1].Type != TypeReg { - return output, errors.New("chunk type without a regular file") - } - mergedEntries[l-1].EndOffset = e.EndOffset - continue - } - mergedEntries = append(mergedEntries, e) - } - - if options.ForceMask != nil { - uid, gid, mode, err := archive.GetFileOwner(dest) - if err == nil { - value := fmt.Sprintf("%d:%d:0%o", uid, gid, mode) - if err := unix.Setxattr(dest, containersOverrideXattr, []byte(value), 0); err != nil { - return output, err - } - } - } - - dirfd, err := unix.Open(dest, unix.O_RDONLY|unix.O_PATH, 0) - if err != nil { - return output, err - } - defer unix.Close(dirfd) - - otherLayersCache := prepareOtherLayersCache(d.layersMetadata) - - missingDirsMode := os.FileMode(0700) - if options.ForceMask != nil { - missingDirsMode = *options.ForceMask - } - - // hardlinks can point to missing files. So create them after all files - // are retrieved - var hardLinks []hardLinkToCreate - - missingChunksSize, totalChunksSize := int64(0), int64(0) - for i, r := range mergedEntries { - if options.ForceMask != nil { - value := fmt.Sprintf("%d:%d:0%o", r.UID, r.GID, r.Mode&07777) - r.Xattrs[containersOverrideXattr] = base64.StdEncoding.EncodeToString([]byte(value)) - r.Mode = int64(*options.ForceMask) - } - - mode := os.FileMode(r.Mode) - - r.Name = filepath.Clean(r.Name) - r.Linkname = filepath.Clean(r.Linkname) - - t, err := typeToTarType(r.Type) - if err != nil { - return output, err - } - if whiteoutConverter != nil { - hdr := archivetar.Header{ - Typeflag: t, - Name: r.Name, - Linkname: r.Linkname, - Size: r.Size, - Mode: r.Mode, - Uid: r.UID, - Gid: r.GID, - } - handler := whiteoutHandler{ - Dirfd: dirfd, - Root: dest, - } - writeFile, err := whiteoutConverter.ConvertReadWithHandler(&hdr, r.Name, &handler) - if err != nil { - return output, err - } - if !writeFile { - continue - } - } - switch t { - case tar.TypeReg: - // Create directly empty files. - if r.Size == 0 { - // Used to have a scope for cleanup. - createEmptyFile := func() error { - file, err := openFileUnderRoot(r.Name, dest, dirfd, newFileFlags, 0) - if err != nil { - return err - } - defer file.Close() - if err := setFileAttrs(file, mode, &r, options); err != nil { - return err - } - return nil - } - if err := createEmptyFile(); err != nil { - return output, err - } - continue - } - - case tar.TypeDir: - if err := safeMkdir(dest, dirfd, mode, &r, options); err != nil { - return output, err - } - continue - - case tar.TypeLink: - dest := dest - dirfd := dirfd - mode := mode - r := r - hardLinks = append(hardLinks, hardLinkToCreate{ - dest: dest, - dirfd: dirfd, - mode: mode, - metadata: &r, - }) - continue - - case tar.TypeSymlink: - if err := safeSymlink(dest, dirfd, mode, &r, options); err != nil { - return output, err - } - continue - - case tar.TypeChar: - case tar.TypeBlock: - case tar.TypeFifo: - /* Ignore. */ - default: - return output, fmt.Errorf("invalid type %q", t) - } - - totalChunksSize += r.Size - - dstFile, _, err := findFileInOtherLayers(r, dest, dirfd, otherLayersCache, d.layersTarget, missingDirsMode) - if err != nil { - return output, err - } - if dstFile != nil { - if err := setFileAttrs(dstFile, mode, &r, options); err != nil { - dstFile.Close() - return output, err - } - dstFile.Close() - continue - } - - if enableHostDedup { - dstFile, _, err = findFileOnTheHost(r, dest, dirfd, missingDirsMode) - if err != nil { - return output, err - } - if dstFile != nil { - if err := setFileAttrs(dstFile, mode, &r, options); err != nil { - dstFile.Close() - return output, err - } - dstFile.Close() - continue - } - } - - missingChunksSize += r.Size - if t == tar.TypeReg { - rawChunk := ImageSourceChunk{ - Offset: uint64(r.Offset), - Length: uint64(r.EndOffset - r.Offset), - } - file := missingFile{ - File: &toc.Entries[i], - } - missingChunks = append(missingChunks, missingChunk{ - RawChunk: rawChunk, - Files: []missingFile{ - file, - }, - }) - } - } - // There are some missing files. Prepare a multirange request for the missing chunks. - if len(missingChunks) > 0 { - missingChunks = mergeMissingChunks(missingChunks, maxNumberMissingChunks) - if err := retrieveMissingFiles(d, dest, dirfd, missingChunks, missingDirsMode, options); err != nil { - return output, err - } - } - - for _, m := range hardLinks { - if err := safeLink(m.dest, m.dirfd, m.mode, m.metadata, options); err != nil { - return output, err - } - } - - if totalChunksSize > 0 { - logrus.Debugf("Missing %d bytes out of %d (%.2f %%)", missingChunksSize, totalChunksSize, float32(missingChunksSize*100.0)/float32(totalChunksSize)) - } - return output, nil + return fmt.Sprintf("bad request") } diff --git a/pkg/chunked/storage_linux.go b/pkg/chunked/storage_linux.go new file mode 100644 index 0000000000..7002d6e9ed --- /dev/null +++ b/pkg/chunked/storage_linux.go @@ -0,0 +1,874 @@ +package chunked + +import ( + archivetar "archive/tar" + "context" + "encoding/base64" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "sort" + "strings" + "syscall" + "time" + + storage "github.com/containers/storage" + graphdriver "github.com/containers/storage/drivers" + driversCopy "github.com/containers/storage/drivers/copy" + "github.com/containers/storage/pkg/archive" + "github.com/containers/storage/pkg/idtools" + "github.com/containers/storage/types" + "github.com/klauspost/compress/zstd" + digest "github.com/opencontainers/go-digest" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "github.com/vbatts/tar-split/archive/tar" + "golang.org/x/sys/unix" +) + +const ( + maxNumberMissingChunks = 1024 + newFileFlags = (unix.O_CREAT | unix.O_TRUNC | unix.O_WRONLY | unix.O_EXCL) + containersOverrideXattr = "user.containers.override_stat" + bigDataKey = "zstd-chunked-manifest" +) + +type chunkedZstdDiffer struct { + stream ImageSourceSeekable + manifest []byte + layersMetadata map[string][]zstdFileMetadata + layersTarget map[string]string +} + +func timeToTimespec(time time.Time) (ts unix.Timespec) { + if time.IsZero() { + // Return UTIME_OMIT special value + ts.Sec = 0 + ts.Nsec = ((1 << 30) - 2) + return + } + return unix.NsecToTimespec(time.UnixNano()) +} + +func copyFileContent(src, destFile, root string, dirfd int, missingDirsMode, mode os.FileMode) (*os.File, int64, error) { + st, err := os.Stat(src) + if err != nil { + return nil, -1, err + } + + copyWithFileRange, copyWithFileClone := true, true + + // If the destination file already exists, we shouldn't blow it away + dstFile, err := openFileUnderRoot(destFile, root, dirfd, newFileFlags, mode) + if err != nil { + return nil, -1, err + } + + err = driversCopy.CopyRegularToFile(src, dstFile, st, ©WithFileRange, ©WithFileClone) + if err != nil { + dstFile.Close() + return nil, -1, err + } + return dstFile, st.Size(), err +} + +func prepareOtherLayersCache(layersMetadata map[string][]zstdFileMetadata) map[string]map[string]*zstdFileMetadata { + maps := make(map[string]map[string]*zstdFileMetadata) + + for layerID, v := range layersMetadata { + r := make(map[string]*zstdFileMetadata) + for i := range v { + r[v[i].Digest] = &v[i] + } + maps[layerID] = r + } + return maps +} + +func getLayersCache(store storage.Store) (map[string][]zstdFileMetadata, map[string]string, error) { + allLayers, err := store.Layers() + if err != nil { + return nil, nil, err + } + + layersMetadata := make(map[string][]zstdFileMetadata) + layersTarget := make(map[string]string) + for _, r := range allLayers { + manifestReader, err := store.LayerBigData(r.ID, bigDataKey) + if err != nil { + continue + } + defer manifestReader.Close() + manifest, err := ioutil.ReadAll(manifestReader) + if err != nil { + return nil, nil, err + } + var toc zstdTOC + if err := json.Unmarshal(manifest, &toc); err != nil { + continue + } + layersMetadata[r.ID] = toc.Entries + target, err := store.DifferTarget(r.ID) + if err != nil { + return nil, nil, err + } + layersTarget[r.ID] = target + } + + return layersMetadata, layersTarget, nil +} + +// GetDiffer returns a differ than can be used with ApplyDiffWithDiffer. +func GetDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable) (graphdriver.Differ, error) { + if _, ok := annotations[manifestChecksumKey]; ok { + return makeZstdChunkedDiffer(ctx, store, blobSize, annotations, iss) + } + return nil, errors.New("blob type not supported for partial retrieval") +} + +func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable) (*chunkedZstdDiffer, error) { + manifest, err := readZstdChunkedManifest(iss, blobSize, annotations) + if err != nil { + return nil, err + } + layersMetadata, layersTarget, err := getLayersCache(store) + if err != nil { + return nil, err + } + + return &chunkedZstdDiffer{ + stream: iss, + manifest: manifest, + layersMetadata: layersMetadata, + layersTarget: layersTarget, + }, nil +} + +func findFileInOtherLayers(file zstdFileMetadata, root string, dirfd int, layersMetadata map[string]map[string]*zstdFileMetadata, layersTarget map[string]string, missingDirsMode os.FileMode) (*os.File, int64, error) { + // this is ugly, needs to be indexed + for layerID, checksums := range layersMetadata { + m, found := checksums[file.Digest] + if !found { + continue + } + + source, ok := layersTarget[layerID] + if !ok { + continue + } + + srcDirfd, err := unix.Open(root, unix.O_RDONLY, 0) + if err != nil { + continue + } + defer unix.Close(srcDirfd) + + srcFile, err := openFileUnderRoot(m.Name, source, srcDirfd, unix.O_RDONLY, 0) + if err != nil { + continue + } + defer srcFile.Close() + + srcPath := fmt.Sprintf("/proc/self/fd/%d", srcFile.Fd()) + + dstFile, written, err := copyFileContent(srcPath, file.Name, root, dirfd, missingDirsMode, 0) + if err != nil { + continue + } + return dstFile, written, nil + } + return nil, 0, nil +} + +func getFileDigest(f *os.File) (digest.Digest, error) { + digester := digest.Canonical.Digester() + if _, err := io.Copy(digester.Hash(), f); err != nil { + return "", err + } + return digester.Digest(), nil +} + +// findFileOnTheHost checks whether the requested file already exist on the host and copies the file content from there if possible. +// It is currently implemented to look only at the file with the same path. Ideally it can detect the same content also at different +// paths. +func findFileOnTheHost(file zstdFileMetadata, root string, dirfd int, missingDirsMode os.FileMode) (*os.File, int64, error) { + sourceFile := filepath.Clean(filepath.Join("/", file.Name)) + if !strings.HasPrefix(sourceFile, "/usr/") { + // limit host deduplication to files under /usr. + return nil, 0, nil + } + + st, err := os.Stat(sourceFile) + if err != nil || !st.Mode().IsRegular() { + return nil, 0, nil + } + + if st.Size() != file.Size { + return nil, 0, nil + } + + fd, err := unix.Open(sourceFile, unix.O_RDONLY|unix.O_NONBLOCK, 0) + if err != nil { + return nil, 0, nil + } + + f := os.NewFile(uintptr(fd), "fd") + defer f.Close() + + manifestChecksum, err := digest.Parse(file.Digest) + if err != nil { + return nil, 0, err + } + + checksum, err := getFileDigest(f) + if err != nil { + return nil, 0, err + } + + if checksum != manifestChecksum { + return nil, 0, nil + } + + dstFile, written, err := copyFileContent(fmt.Sprintf("/proc/self/fd/%d", fd), file.Name, root, dirfd, missingDirsMode, 0) + if err != nil { + return nil, 0, nil + } + + // calculate the checksum again to make sure the file wasn't modified while it was copied + if _, err := f.Seek(0, 0); err != nil { + return nil, 0, err + } + checksum, err = getFileDigest(f) + if err != nil { + return nil, 0, err + } + if checksum != manifestChecksum { + return nil, 0, nil + } + return dstFile, written, nil +} + +func maybeDoIDRemap(manifest []zstdFileMetadata, options *archive.TarOptions) error { + if options.ChownOpts == nil && len(options.UIDMaps) == 0 || len(options.GIDMaps) == 0 { + return nil + } + + idMappings := idtools.NewIDMappingsFromMaps(options.UIDMaps, options.GIDMaps) + + for i := range manifest { + if options.ChownOpts != nil { + manifest[i].UID = options.ChownOpts.UID + manifest[i].GID = options.ChownOpts.GID + } else { + pair := idtools.IDPair{ + UID: manifest[i].UID, + GID: manifest[i].GID, + } + var err error + manifest[i].UID, manifest[i].GID, err = idMappings.ToContainer(pair) + if err != nil { + return err + } + } + } + return nil +} + +type missingFile struct { + File *zstdFileMetadata + Gap int64 +} + +func (m missingFile) Length() int64 { + return m.File.EndOffset - m.File.Offset +} + +type missingChunk struct { + RawChunk ImageSourceChunk + Files []missingFile +} + +func setFileAttrs(file *os.File, mode os.FileMode, metadata *zstdFileMetadata, options *archive.TarOptions) error { + if file == nil || file.Fd() < 0 { + return errors.Errorf("invalid file") + } + fd := int(file.Fd()) + + t, err := typeToTarType(metadata.Type) + if err != nil { + return err + } + if t == tar.TypeSymlink { + return nil + } + + if err := unix.Fchown(fd, metadata.UID, metadata.GID); err != nil { + if !options.IgnoreChownErrors { + return err + } + } + + for k, v := range metadata.Xattrs { + data, err := base64.StdEncoding.DecodeString(v) + if err != nil { + return err + } + if err := unix.Fsetxattr(fd, k, data, 0); err != nil { + return err + } + } + + ts := []unix.Timespec{timeToTimespec(metadata.AccessTime), timeToTimespec(metadata.ModTime)} + if err := unix.UtimesNanoAt(fd, "", ts, 0); err != nil && errors.Is(err, unix.ENOSYS) { + return err + } + + if err := unix.Fchmod(fd, uint32(mode)); err != nil { + return err + } + return nil +} + +func openFileUnderRoot(name, root string, dirfd int, flags uint64, mode os.FileMode) (*os.File, error) { + how := unix.OpenHow{ + Flags: flags, + Mode: uint64(mode & 07777), + Resolve: unix.RESOLVE_IN_ROOT, + } + + fd, err := unix.Openat2(dirfd, name, &how) + if err != nil { + return nil, err + } + return os.NewFile(uintptr(fd), name), nil +} + +func createFileFromZstdStream(dest string, dirfd int, reader io.Reader, missingDirsMode, mode os.FileMode, metadata *zstdFileMetadata, options *archive.TarOptions) (err error) { + file, err := openFileUnderRoot(metadata.Name, dest, dirfd, newFileFlags, 0) + if err != nil { + return err + } + defer func() { + err2 := file.Close() + if err == nil { + err = err2 + } + }() + + z, err := zstd.NewReader(reader) + if err != nil { + return err + } + defer z.Close() + + digester := digest.Canonical.Digester() + checksum := digester.Hash() + _, err = z.WriteTo(io.MultiWriter(file, checksum)) + if err != nil { + return err + } + manifestChecksum, err := digest.Parse(metadata.Digest) + if err != nil { + return err + } + if digester.Digest() != manifestChecksum { + return fmt.Errorf("checksum mismatch for %q", dest) + } + return setFileAttrs(file, mode, metadata, options) +} + +func storeMissingFiles(streams chan io.ReadCloser, errs chan error, dest string, dirfd int, missingChunks []missingChunk, missingDirsMode os.FileMode, options *archive.TarOptions) error { + for mc := 0; ; mc++ { + var part io.ReadCloser + select { + case p := <-streams: + part = p + case err := <-errs: + return err + } + if part == nil { + if mc == len(missingChunks) { + break + } + return errors.Errorf("invalid stream returned %d %d", mc, len(missingChunks)) + } + if mc == len(missingChunks) { + return errors.Errorf("too many chunks returned") + } + + for _, mf := range missingChunks[mc].Files { + if mf.Gap > 0 { + limitReader := io.LimitReader(part, mf.Gap) + _, err := io.Copy(ioutil.Discard, limitReader) + if err != nil { + return err + } + continue + } + + limitReader := io.LimitReader(part, mf.Length()) + + if err := createFileFromZstdStream(dest, dirfd, limitReader, missingDirsMode, os.FileMode(mf.File.Mode), mf.File, options); err != nil { + part.Close() + return err + } + } + part.Close() + } + return nil +} + +func mergeMissingChunks(missingChunks []missingChunk, target int) []missingChunk { + if len(missingChunks) <= target { + return missingChunks + } + + getGap := func(missingChunks []missingChunk, i int) int { + prev := missingChunks[i-1].RawChunk.Offset + missingChunks[i-1].RawChunk.Length + return int(missingChunks[i].RawChunk.Offset - prev) + } + + // this implementation doesn't account for duplicates, so it could merge + // more than necessary to reach the specified target. Since target itself + // is a heuristic value, it doesn't matter. + var gaps []int + for i := 1; i < len(missingChunks); i++ { + gaps = append(gaps, getGap(missingChunks, i)) + } + sort.Ints(gaps) + + toShrink := len(missingChunks) - target + targetValue := gaps[toShrink-1] + + newMissingChunks := missingChunks[0:1] + for i := 1; i < len(missingChunks); i++ { + gap := getGap(missingChunks, i) + if gap > targetValue { + newMissingChunks = append(newMissingChunks, missingChunks[i]) + } else { + prev := &newMissingChunks[len(newMissingChunks)-1] + gapFile := missingFile{ + Gap: int64(gap), + } + prev.RawChunk.Length += uint64(gap) + missingChunks[i].RawChunk.Length + prev.Files = append(append(prev.Files, gapFile), missingChunks[i].Files...) + } + } + + return newMissingChunks +} + +func retrieveMissingFiles(input *chunkedZstdDiffer, dest string, dirfd int, missingChunks []missingChunk, missingDirsMode os.FileMode, options *archive.TarOptions) error { + var chunksToRequest []ImageSourceChunk + for _, c := range missingChunks { + chunksToRequest = append(chunksToRequest, c.RawChunk) + } + + // There are some missing files. Prepare a multirange request for the missing chunks. + var streams chan io.ReadCloser + var err error + var errs chan error + for { + streams, errs, err = input.stream.GetBlobAt(chunksToRequest) + if err == nil { + break + } + + if _, ok := err.(ErrBadRequest); ok { + requested := len(missingChunks) + // If the server cannot handle at least 64 chunks in a single request, just give up. + if requested < 64 { + return err + } + + // Merge more chunks to request + missingChunks = mergeMissingChunks(missingChunks, requested/2) + continue + } + return err + } + + if err := storeMissingFiles(streams, errs, dest, dirfd, missingChunks, missingDirsMode, options); err != nil { + return err + } + return nil +} + +func safeMkdir(target string, dirfd int, mode os.FileMode, metadata *zstdFileMetadata, options *archive.TarOptions) error { + parent := filepath.Dir(metadata.Name) + base := filepath.Base(metadata.Name) + + parentFd := dirfd + if parent != "." { + parentFile, err := openFileUnderRoot(parent, target, dirfd, unix.O_DIRECTORY|unix.O_PATH|unix.O_RDONLY, 0) + if err != nil { + return err + } + defer parentFile.Close() + parentFd = int(parentFile.Fd()) + } + + if err := unix.Mkdirat(parentFd, base, uint32(mode)); err != nil { + if !os.IsExist(err) { + return err + } + } + + file, err := openFileUnderRoot(metadata.Name, target, dirfd, unix.O_RDONLY, 0) + if err != nil { + return err + } + defer file.Close() + + return setFileAttrs(file, mode, metadata, options) +} + +func safeLink(target string, dirfd int, mode os.FileMode, metadata *zstdFileMetadata, options *archive.TarOptions) error { + sourceFile, err := openFileUnderRoot(metadata.Linkname, target, dirfd, unix.O_RDONLY, 0) + if err != nil { + return err + } + defer sourceFile.Close() + + destDir, destBase := filepath.Dir(metadata.Name), filepath.Base(metadata.Name) + destDirFd := dirfd + if destDir != "." { + f, err := openFileUnderRoot(destDir, target, dirfd, unix.O_RDONLY, 0) + if err != nil { + return err + } + defer f.Close() + destDirFd = int(f.Fd()) + } + + err = unix.Linkat(int(sourceFile.Fd()), "", destDirFd, destBase, unix.AT_EMPTY_PATH) + if err != nil { + return err + } + + newFile, err := openFileUnderRoot(metadata.Name, target, dirfd, unix.O_WRONLY, 0) + if err != nil { + return err + } + defer newFile.Close() + + return setFileAttrs(newFile, mode, metadata, options) +} + +func safeSymlink(target string, dirfd int, mode os.FileMode, metadata *zstdFileMetadata, options *archive.TarOptions) error { + destDir, destBase := filepath.Dir(metadata.Name), filepath.Base(metadata.Name) + destDirFd := dirfd + if destDir != "." { + f, err := openFileUnderRoot(destDir, target, dirfd, unix.O_RDONLY, 0) + if err != nil { + return err + } + defer f.Close() + destDirFd = int(f.Fd()) + } + + return unix.Symlinkat(metadata.Linkname, destDirFd, destBase) +} + +type whiteoutHandler struct { + Dirfd int + Root string +} + +func (d whiteoutHandler) Setxattr(path, name string, value []byte) error { + file, err := openFileUnderRoot(path, d.Root, d.Dirfd, unix.O_RDONLY, 0) + if err != nil { + return err + } + defer file.Close() + + return unix.Fsetxattr(int(file.Fd()), name, value, 0) +} + +func (d whiteoutHandler) Mknod(path string, mode uint32, dev int) error { + dir := filepath.Dir(path) + base := filepath.Base(path) + + dirfd := d.Dirfd + if dir != "" { + dir, err := openFileUnderRoot(dir, d.Root, d.Dirfd, unix.O_RDONLY, 0) + if err != nil { + return err + } + defer dir.Close() + + dirfd = int(dir.Fd()) + } + + return unix.Mknodat(dirfd, base, mode, dev) +} + +func checkChownErr(err error, name string, uid, gid int) error { + if errors.Is(err, syscall.EINVAL) { + return errors.Wrapf(err, "potentially insufficient UIDs or GIDs available in user namespace (requested %d:%d for %s): Check /etc/subuid and /etc/subgid", uid, gid, name) + } + return err +} + +func (d whiteoutHandler) Chown(path string, uid, gid int) error { + file, err := openFileUnderRoot(path, d.Root, d.Dirfd, unix.O_PATH, 0) + if err != nil { + return err + } + defer file.Close() + + if err := unix.Fchownat(int(file.Fd()), "", uid, gid, unix.AT_EMPTY_PATH); err != nil { + var stat unix.Stat_t + if unix.Fstat(int(file.Fd()), &stat) == nil { + if stat.Uid == uint32(uid) && stat.Gid == uint32(gid) { + return nil + } + } + return checkChownErr(err, path, uid, gid) + } + return nil +} + +type hardLinkToCreate struct { + dest string + dirfd int + mode os.FileMode + metadata *zstdFileMetadata +} + +func (d *chunkedZstdDiffer) ApplyDiff(dest string, options *archive.TarOptions) (graphdriver.DriverWithDifferOutput, error) { + bigData := map[string][]byte{ + bigDataKey: d.manifest, + } + output := graphdriver.DriverWithDifferOutput{ + Differ: d, + BigData: bigData, + } + + storeOpts, err := types.DefaultStoreOptionsAutoDetectUID() + if err != nil { + return output, err + } + + enableHostDedup := false + if value := storeOpts.PullOptions["enable_host_deduplication"]; strings.ToLower(value) == "true" { + enableHostDedup = true + } + + // Generate the manifest + var toc zstdTOC + if err := json.Unmarshal(d.manifest, &toc); err != nil { + return output, err + } + + whiteoutConverter := archive.GetWhiteoutConverter(options.WhiteoutFormat, options.WhiteoutData) + + var missingChunks []missingChunk + var mergedEntries []zstdFileMetadata + + if err := maybeDoIDRemap(toc.Entries, options); err != nil { + return output, err + } + + for _, e := range toc.Entries { + if e.Type == TypeChunk { + l := len(mergedEntries) + if l == 0 || mergedEntries[l-1].Type != TypeReg { + return output, errors.New("chunk type without a regular file") + } + mergedEntries[l-1].EndOffset = e.EndOffset + continue + } + mergedEntries = append(mergedEntries, e) + } + + if options.ForceMask != nil { + uid, gid, mode, err := archive.GetFileOwner(dest) + if err == nil { + value := fmt.Sprintf("%d:%d:0%o", uid, gid, mode) + if err := unix.Setxattr(dest, containersOverrideXattr, []byte(value), 0); err != nil { + return output, err + } + } + } + + dirfd, err := unix.Open(dest, unix.O_RDONLY|unix.O_PATH, 0) + if err != nil { + return output, err + } + defer unix.Close(dirfd) + + otherLayersCache := prepareOtherLayersCache(d.layersMetadata) + + missingDirsMode := os.FileMode(0700) + if options.ForceMask != nil { + missingDirsMode = *options.ForceMask + } + + // hardlinks can point to missing files. So create them after all files + // are retrieved + var hardLinks []hardLinkToCreate + + missingChunksSize, totalChunksSize := int64(0), int64(0) + for i, r := range mergedEntries { + if options.ForceMask != nil { + value := fmt.Sprintf("%d:%d:0%o", r.UID, r.GID, r.Mode&07777) + r.Xattrs[containersOverrideXattr] = base64.StdEncoding.EncodeToString([]byte(value)) + r.Mode = int64(*options.ForceMask) + } + + mode := os.FileMode(r.Mode) + + r.Name = filepath.Clean(r.Name) + r.Linkname = filepath.Clean(r.Linkname) + + t, err := typeToTarType(r.Type) + if err != nil { + return output, err + } + if whiteoutConverter != nil { + hdr := archivetar.Header{ + Typeflag: t, + Name: r.Name, + Linkname: r.Linkname, + Size: r.Size, + Mode: r.Mode, + Uid: r.UID, + Gid: r.GID, + } + handler := whiteoutHandler{ + Dirfd: dirfd, + Root: dest, + } + writeFile, err := whiteoutConverter.ConvertReadWithHandler(&hdr, r.Name, &handler) + if err != nil { + return output, err + } + if !writeFile { + continue + } + } + switch t { + case tar.TypeReg: + // Create directly empty files. + if r.Size == 0 { + // Used to have a scope for cleanup. + createEmptyFile := func() error { + file, err := openFileUnderRoot(r.Name, dest, dirfd, newFileFlags, 0) + if err != nil { + return err + } + defer file.Close() + if err := setFileAttrs(file, mode, &r, options); err != nil { + return err + } + return nil + } + if err := createEmptyFile(); err != nil { + return output, err + } + continue + } + + case tar.TypeDir: + if err := safeMkdir(dest, dirfd, mode, &r, options); err != nil { + return output, err + } + continue + + case tar.TypeLink: + dest := dest + dirfd := dirfd + mode := mode + r := r + hardLinks = append(hardLinks, hardLinkToCreate{ + dest: dest, + dirfd: dirfd, + mode: mode, + metadata: &r, + }) + continue + + case tar.TypeSymlink: + if err := safeSymlink(dest, dirfd, mode, &r, options); err != nil { + return output, err + } + continue + + case tar.TypeChar: + case tar.TypeBlock: + case tar.TypeFifo: + /* Ignore. */ + default: + return output, fmt.Errorf("invalid type %q", t) + } + + totalChunksSize += r.Size + + dstFile, _, err := findFileInOtherLayers(r, dest, dirfd, otherLayersCache, d.layersTarget, missingDirsMode) + if err != nil { + return output, err + } + if dstFile != nil { + if err := setFileAttrs(dstFile, mode, &r, options); err != nil { + dstFile.Close() + return output, err + } + dstFile.Close() + continue + } + + if enableHostDedup { + dstFile, _, err = findFileOnTheHost(r, dest, dirfd, missingDirsMode) + if err != nil { + return output, err + } + if dstFile != nil { + if err := setFileAttrs(dstFile, mode, &r, options); err != nil { + dstFile.Close() + return output, err + } + dstFile.Close() + continue + } + } + + missingChunksSize += r.Size + if t == tar.TypeReg { + rawChunk := ImageSourceChunk{ + Offset: uint64(r.Offset), + Length: uint64(r.EndOffset - r.Offset), + } + file := missingFile{ + File: &toc.Entries[i], + } + missingChunks = append(missingChunks, missingChunk{ + RawChunk: rawChunk, + Files: []missingFile{ + file, + }, + }) + } + } + // There are some missing files. Prepare a multirange request for the missing chunks. + if len(missingChunks) > 0 { + missingChunks = mergeMissingChunks(missingChunks, maxNumberMissingChunks) + if err := retrieveMissingFiles(d, dest, dirfd, missingChunks, missingDirsMode, options); err != nil { + return output, err + } + } + + for _, m := range hardLinks { + if err := safeLink(m.dest, m.dirfd, m.mode, m.metadata, options); err != nil { + return output, err + } + } + + if totalChunksSize > 0 { + logrus.Debugf("Missing %d bytes out of %d (%.2f %%)", missingChunksSize, totalChunksSize, float32(missingChunksSize*100.0)/float32(totalChunksSize)) + } + return output, nil +} diff --git a/pkg/chunked/storage_unsupported.go b/pkg/chunked/storage_unsupported.go new file mode 100644 index 0000000000..3a406ba786 --- /dev/null +++ b/pkg/chunked/storage_unsupported.go @@ -0,0 +1,16 @@ +// +build !linux + +package chunked + +import ( + "context" + + storage "github.com/containers/storage" + graphdriver "github.com/containers/storage/drivers" + "github.com/pkg/errors" +) + +// GetDiffer returns a differ than can be used with ApplyDiffWithDiffer. +func GetDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable) (graphdriver.Differ, error) { + return nil, errors.New("format not supported on this architecture") +} diff --git a/pkg/chunked/zstdchunked_test.go b/pkg/chunked/zstdchunked_test.go index 59892a3f4f..a26eb19f06 100644 --- a/pkg/chunked/zstdchunked_test.go +++ b/pkg/chunked/zstdchunked_test.go @@ -1,3 +1,5 @@ +// +build linux + package chunked import (