Skip to content

Commit 8fc8f0b

Browse files
committed
introduce a blob manager which handle chunk downloads
Signed-off-by: Soule BA <bah.soule@gmail.com>
1 parent f6d0201 commit 8fc8f0b

File tree

3 files changed

+392
-183
lines changed

3 files changed

+392
-183
lines changed

oci/client/client.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,13 @@ import (
2121

2222
"github.com/google/go-containerregistry/pkg/crane"
2323
"github.com/google/go-containerregistry/pkg/v1/remote"
24-
"github.com/hashicorp/go-retryablehttp"
2524

2625
"github.com/fluxcd/pkg/oci"
2726
)
2827

2928
// Client holds the options for accessing remote OCI registries.
3029
type Client struct {
31-
options []crane.Option
32-
httpClient *retryablehttp.Client
30+
options []crane.Option
3331
}
3432

3533
// NewClient returns an OCI client configured with the given crane options.

oci/client/download.go

Lines changed: 359 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,359 @@
1+
/*
2+
Copyright 2024 The Flux authors
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package client
18+
19+
import (
20+
"context"
21+
"crypto/sha256"
22+
"errors"
23+
"fmt"
24+
"io"
25+
"net/http"
26+
"net/url"
27+
"os"
28+
"syscall"
29+
"time"
30+
31+
"github.com/google/go-containerregistry/pkg/authn"
32+
"github.com/google/go-containerregistry/pkg/name"
33+
v1 "github.com/google/go-containerregistry/pkg/v1"
34+
"github.com/google/go-containerregistry/pkg/v1/remote"
35+
"github.com/google/go-containerregistry/pkg/v1/remote/transport"
36+
"github.com/hashicorp/go-retryablehttp"
37+
"golang.org/x/sync/errgroup"
38+
)
39+
40+
const (
41+
minChunkSize = 100 * 1024 * 1024 // 100MB
42+
maxChunkSize = 1 << 30 // 1GB
43+
defaultNumberOfChunks = 50
44+
)
45+
46+
var (
47+
// errRangeRequestNotSupported is returned when the registry does not support range requests.
48+
errRangeRequestNotSupported = fmt.Errorf("range requests are not supported by the registry")
49+
errCopyFailed = errors.New("copy failed")
50+
)
51+
52+
var (
53+
retries = 3
54+
defaultRetryBackoff = remote.Backoff{
55+
Duration: 1.0 * time.Second,
56+
Factor: 3.0,
57+
Jitter: 0.1,
58+
Steps: retries,
59+
}
60+
)
61+
62+
type downloadOption func(*downloadOptions)
63+
64+
type downloadOptions struct {
65+
transport http.RoundTripper
66+
auth authn.Authenticator
67+
keychain authn.Keychain
68+
numberOfChunks int
69+
}
70+
71+
type blobManager struct {
72+
name name.Reference
73+
c *retryablehttp.Client
74+
layer v1.Layer
75+
path string
76+
digest v1.Hash
77+
size int64
78+
downloadOptions
79+
}
80+
81+
func withTransport(t http.RoundTripper) downloadOption {
82+
return func(o *downloadOptions) {
83+
o.transport = t
84+
}
85+
}
86+
87+
func withAuth(auth authn.Authenticator) downloadOption {
88+
return func(o *downloadOptions) {
89+
o.auth = auth
90+
}
91+
}
92+
93+
func withKeychain(k authn.Keychain) downloadOption {
94+
return func(o *downloadOptions) {
95+
o.keychain = k
96+
}
97+
}
98+
99+
func withNumberOfChunks(n int) downloadOption {
100+
return func(o *downloadOptions) {
101+
o.numberOfChunks = n
102+
}
103+
}
104+
105+
type chunk struct {
106+
n int
107+
offset int64
108+
size int64
109+
writeCounter
110+
}
111+
112+
func makeChunk(n int, offset, size int64) *chunk {
113+
return &chunk{
114+
n: n,
115+
offset: offset,
116+
size: size,
117+
writeCounter: writeCounter{},
118+
}
119+
}
120+
121+
// newDownloader returns a new blobManager with the given options.
122+
func newDownloader(name name.Reference, path string, layer v1.Layer, opts ...downloadOption) *blobManager {
123+
o := &downloadOptions{
124+
numberOfChunks: defaultNumberOfChunks,
125+
keychain: authn.DefaultKeychain,
126+
transport: remote.DefaultTransport.(*http.Transport).Clone(),
127+
}
128+
d := &blobManager{
129+
layer: layer,
130+
name: name,
131+
path: path,
132+
downloadOptions: *o,
133+
}
134+
for _, opt := range opts {
135+
opt(&d.downloadOptions)
136+
}
137+
138+
return d
139+
}
140+
141+
func (d *blobManager) download(ctx context.Context) error {
142+
digest, err := d.layer.Digest()
143+
if err != nil {
144+
return fmt.Errorf("failed to get layer digest: %w", err)
145+
}
146+
d.digest = digest
147+
148+
size, err := d.layer.Size()
149+
if err != nil {
150+
return fmt.Errorf("failed to get layer size: %w", err)
151+
}
152+
d.size = size
153+
154+
if d.c == nil {
155+
h, err := makeHttpClient(ctx, d.name.Context(), &d.downloadOptions)
156+
if err != nil {
157+
return fmt.Errorf("failed to create HTTP client: %w", err)
158+
}
159+
d.c = h
160+
}
161+
162+
ok, err := d.isRangeRequestEnabled(ctx)
163+
if err != nil {
164+
return fmt.Errorf("failed to check range request support: %w", err)
165+
}
166+
167+
if !ok {
168+
return errRangeRequestNotSupported
169+
}
170+
171+
if err := d.downloadChunks(ctx); err != nil {
172+
return fmt.Errorf("failed to download layer in chunks: %w", err)
173+
}
174+
175+
if err := d.verifyDigest(); err != nil {
176+
return fmt.Errorf("failed to verify layer digest: %w", err)
177+
}
178+
179+
return nil
180+
}
181+
182+
func (d *blobManager) downloadChunks(ctx context.Context) error {
183+
u := makeUrl(d.name, d.digest)
184+
185+
file, err := os.OpenFile(d.path+".tmp", os.O_CREATE|os.O_WRONLY, 0644)
186+
if err != nil {
187+
return fmt.Errorf("failed to create layer file: %w", err)
188+
}
189+
defer file.Close()
190+
191+
chunkSize := d.size / int64(d.numberOfChunks)
192+
if chunkSize < minChunkSize {
193+
chunkSize = minChunkSize
194+
} else if chunkSize > maxChunkSize {
195+
chunkSize = maxChunkSize
196+
}
197+
198+
var (
199+
chunks []*chunk
200+
n int
201+
)
202+
203+
for offset := int64(0); offset < d.size; offset += chunkSize {
204+
if offset+chunkSize > d.size {
205+
chunkSize = d.size - offset
206+
}
207+
chunk := makeChunk(n, offset, chunkSize)
208+
chunks = append(chunks, chunk)
209+
n++
210+
}
211+
212+
g, ctx := errgroup.WithContext(ctx)
213+
g.SetLimit(d.numberOfChunks)
214+
for _, chunk := range chunks {
215+
chunk := chunk
216+
g.Go(func() error {
217+
b := defaultRetryBackoff
218+
for i := 0; i < retries; i++ {
219+
w := io.NewOffsetWriter(file, chunk.offset)
220+
err := chunk.download(ctx, d.c, w, u)
221+
switch {
222+
case errors.Is(err, context.Canceled), errors.Is(err, syscall.ENOSPC):
223+
return err
224+
case errors.Is(err, errCopyFailed):
225+
time.Sleep(b.Step())
226+
continue
227+
default:
228+
return nil
229+
}
230+
}
231+
return fmt.Errorf("failed to download chunk %d: %w", n, err)
232+
})
233+
}
234+
235+
err = g.Wait()
236+
if err != nil {
237+
return fmt.Errorf("failed to download layer in chunks: %w", err)
238+
}
239+
240+
if err := os.Rename(file.Name(), d.path); err != nil {
241+
return err
242+
}
243+
244+
return nil
245+
246+
}
247+
248+
func (c *chunk) download(ctx context.Context, client *retryablehttp.Client, w io.Writer, u url.URL) error {
249+
req, err := retryablehttp.NewRequest(http.MethodGet, u.String(), nil)
250+
if err != nil {
251+
return err
252+
}
253+
254+
req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", c.offset, c.offset+c.size-1))
255+
resp, err := client.Do(req.WithContext(ctx))
256+
if err != nil {
257+
return err
258+
}
259+
260+
if err := transport.CheckError(resp, http.StatusPartialContent); err != nil {
261+
return err
262+
}
263+
264+
_, err = io.Copy(w, io.TeeReader(resp.Body, &c.writeCounter))
265+
if err != nil && !errors.Is(err, context.Canceled) && !errors.Is(err, io.ErrUnexpectedEOF) {
266+
// TODO: if the download was interrupted, we can resume it
267+
return fmt.Errorf("failed to download chunk %d: %w", c.n, err)
268+
}
269+
270+
return err
271+
}
272+
273+
func (d *blobManager) isRangeRequestEnabled(ctx context.Context) (bool, error) {
274+
u := makeUrl(d.name, d.digest)
275+
req, err := retryablehttp.NewRequest(http.MethodHead, u.String(), nil)
276+
if err != nil {
277+
return false, err
278+
}
279+
280+
resp, err := d.c.Do(req.WithContext(ctx))
281+
if err != nil {
282+
return false, err
283+
}
284+
285+
if err := transport.CheckError(resp, http.StatusOK); err != nil {
286+
return false, err
287+
}
288+
289+
if rangeUnit := resp.Header.Get("Accept-Ranges"); rangeUnit == "bytes" {
290+
return true, nil
291+
}
292+
293+
return false, nil
294+
}
295+
296+
func (d *blobManager) verifyDigest() error {
297+
f, err := os.Open(d.path)
298+
if err != nil {
299+
return fmt.Errorf("failed to open layer file: %w", err)
300+
}
301+
defer f.Close()
302+
303+
h := sha256.New()
304+
_, err = io.Copy(h, f)
305+
if err != nil {
306+
return fmt.Errorf("failed to hash layer: %w", err)
307+
}
308+
309+
newDigest := h.Sum(nil)
310+
if d.digest.String() != fmt.Sprintf("sha256:%x", newDigest) {
311+
return fmt.Errorf("layer digest does not match: %s != sha256:%x", d.digest.String(), newDigest)
312+
}
313+
return nil
314+
}
315+
316+
func makeUrl(name name.Reference, digest v1.Hash) url.URL {
317+
return url.URL{
318+
Scheme: name.Context().Scheme(),
319+
Host: name.Context().RegistryStr(),
320+
Path: fmt.Sprintf("/v2/%s/blobs/%s", name.Context().RepositoryStr(), digest.String()),
321+
}
322+
}
323+
324+
type resource interface {
325+
Scheme() string
326+
RegistryStr() string
327+
Scope(string) string
328+
329+
authn.Resource
330+
}
331+
332+
func makeHttpClient(ctx context.Context, target resource, o *downloadOptions) (*retryablehttp.Client, error) {
333+
auth := o.auth
334+
if o.keychain != nil {
335+
kauth, err := o.keychain.Resolve(target)
336+
if err != nil {
337+
return nil, err
338+
}
339+
auth = kauth
340+
}
341+
342+
reg, ok := target.(name.Registry)
343+
if !ok {
344+
repo, ok := target.(name.Repository)
345+
if !ok {
346+
return nil, fmt.Errorf("unexpected resource: %T", target)
347+
}
348+
reg = repo.Registry
349+
}
350+
351+
tr, err := transport.NewWithContext(ctx, reg, auth, o.transport, []string{target.Scope(transport.PullScope)})
352+
if err != nil {
353+
return nil, err
354+
}
355+
356+
h := retryablehttp.NewClient()
357+
h.HTTPClient = &http.Client{Transport: tr}
358+
return h, nil
359+
}

0 commit comments

Comments
 (0)