From 616c407f9248ca915ad586d7a8c293cf3ab79785 Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Wed, 27 Aug 2025 07:55:03 +0300 Subject: [PATCH 1/3] test: block put/get with escaped characters --- pkg/block/blocktest/basic_suite.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/block/blocktest/basic_suite.go b/pkg/block/blocktest/basic_suite.go index 9e8e85c9dbc..62096499644 100644 --- a/pkg/block/blocktest/basic_suite.go +++ b/pkg/block/blocktest/basic_suite.go @@ -32,6 +32,8 @@ func testAdapterPutGet(t *testing.T, adapter block.Adapter, storageNamespace, ex }{ {"identifier_relative", block.IdentifierTypeRelative, "test_file"}, {"identifier_full", block.IdentifierTypeFull, externalPath + "/" + "test_file"}, + {"identifier_relative_escaped", block.IdentifierTypeRelative, "special%3Atest_file"}, + {"identifier_full_escaped", block.IdentifierTypeFull, externalPath + "/" + "special%3Atest_file"}, {"identifier_unknown_relative", block.IdentifierTypeUnknownDeprecated, "test_file"}, //nolint:staticcheck {"identifier_unknown_full", block.IdentifierTypeUnknownDeprecated, externalPath + "/" + "test_file"}, //nolint:staticcheck } From 6d827e3b60e55cfb9d55283c0f49969145b712ea Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Wed, 27 Aug 2025 14:28:25 +0300 Subject: [PATCH 2/3] Update storage URI handling to use RawPathFromURI for consistency. Use parsed URI's RawPath with fallback to Path. --- pkg/block/azure/walker.go | 3 ++- pkg/block/gs/walker.go | 3 ++- pkg/block/mem/walker.go | 3 ++- pkg/block/namespace.go | 15 ++++++++++++--- pkg/block/namespace_test.go | 22 ++++++++++++++++++---- pkg/block/s3/walker.go | 3 ++- 6 files changed, 38 insertions(+), 11 deletions(-) diff --git a/pkg/block/azure/walker.go b/pkg/block/azure/walker.go index f24d2376a0e..8886b466a9e 100644 --- a/pkg/block/azure/walker.go +++ b/pkg/block/azure/walker.go @@ -21,7 +21,8 @@ var ErrAzureInvalidURL = errors.New("invalid Azure storage URL") // extractAzurePrefix takes a URL that looks like this: https://storageaccount.blob.core.windows.net/container/prefix // and return the URL for the container and a prefix, if one exists func extractAzurePrefix(storageURI *url.URL) (*url.URL, string, error) { - path := strings.TrimLeft(storageURI.Path, "/") + rawPath := block.RawPathFromURI(storageURI) + path := strings.TrimLeft(rawPath, "/") if len(path) == 0 { return nil, "", fmt.Errorf("%w: could not parse container URL: %s", ErrAzureInvalidURL, storageURI) } diff --git a/pkg/block/gs/walker.go b/pkg/block/gs/walker.go index 5a50741e7b1..67d24b279f0 100644 --- a/pkg/block/gs/walker.go +++ b/pkg/block/gs/walker.go @@ -23,7 +23,8 @@ func NewGCSWalker(client *storage.Client) *GCSWalker { } func (w *GCSWalker) Walk(ctx context.Context, storageURI *url.URL, op block.WalkOptions, walkFn func(e block.ObjectStoreEntry) error) error { - prefix := strings.TrimLeft(storageURI.Path, "/") + rawPath := block.RawPathFromURI(storageURI) + prefix := strings.TrimLeft(rawPath, "/") var basePath string if idx := strings.LastIndex(prefix, "/"); idx != -1 { basePath = prefix[:idx+1] diff --git a/pkg/block/mem/walker.go b/pkg/block/mem/walker.go index 750946de7f6..3e07acd58b5 100644 --- a/pkg/block/mem/walker.go +++ b/pkg/block/mem/walker.go @@ -32,8 +32,9 @@ func (w *Walker) Walk(_ context.Context, storageURI *url.URL, op block.WalkOptio defer w.adapter.mutex.RUnlock() // Extract the prefix from the storageURI + rawPath := block.RawPathFromURI(storageURI) const schemePrefix = block.BlockstoreTypeMem + "://" - prefix := schemePrefix + storageURI.Host + "/" + strings.TrimLeft(storageURI.Path, "/") + prefix := schemePrefix + storageURI.Host + "/" + strings.TrimLeft(rawPath, "/") // basePath is the path relative to which the walk is done var basePath string diff --git a/pkg/block/namespace.go b/pkg/block/namespace.go index 24e921103a8..d9099132321 100644 --- a/pkg/block/namespace.go +++ b/pkg/block/namespace.go @@ -145,13 +145,22 @@ func resolveFull(key string) (CommonQualifiedKey, error) { if err != nil { return CommonQualifiedKey{}, err } + + rawPath := RawPathFromURI(parsedKey) return CommonQualifiedKey{ StorageType: storageType, StorageNamespace: parsedKey.Host, - Key: formatPathWithNamespace("", parsedKey.Path), + Key: formatPathWithNamespace("", rawPath), }, nil } +func RawPathFromURI(u *url.URL) string { + if u.RawPath != "" { + return u.RawPath + } + return u.Path +} + func resolveRelative(defaultNamespace, key string) (CommonQualifiedKey, error) { // is not fully qualified, treat as key only // if we don't have a trailing slash for the namespace, add it. @@ -164,9 +173,10 @@ func resolveRelative(defaultNamespace, key string) (CommonQualifiedKey, error) { return CommonQualifiedKey{}, fmt.Errorf("no storage type for %s: %w", parsedNS, err) } + rawPath := RawPathFromURI(parsedNS) return CommonQualifiedKey{ StorageType: storageType, - StorageNamespace: strings.TrimSuffix(parsedNS.Host+parsedNS.Path, "/"), + StorageNamespace: strings.TrimSuffix(parsedNS.Host+rawPath, "/"), Key: key, }, nil } @@ -176,7 +186,6 @@ func resolveNamespaceUnknown(defaultNamespace, key string) (CommonQualifiedKey, if qk, err := resolveFull(key); err == nil { return qk, nil } - // else, treat it as a relative path return resolveRelative(defaultNamespace, key) } diff --git a/pkg/block/namespace_test.go b/pkg/block/namespace_test.go index 941c3fe2874..d9b69aa0bf8 100644 --- a/pkg/block/namespace_test.go +++ b/pkg/block/namespace_test.go @@ -3,9 +3,9 @@ package block_test import ( "errors" "fmt" - "reflect" "testing" + "github.com/go-test/deep" "github.com/treeverse/lakefs/pkg/block" ) @@ -102,6 +102,18 @@ func TestResolveNamespace(t *testing.T) { Key: "bar/baz", }, }, + { + Name: "valid_fq_key_encoded", + DefaultNamespace: "mem://foo/", + Key: "s3://example/bar/foo%3Abaz", + Type: block.IdentifierTypeFull, + ExpectedErr: nil, + Expected: block.CommonQualifiedKey{ + StorageType: block.StorageTypeS3, + StorageNamespace: "example", + Key: "bar/foo%3Abaz", + }, + }, { Name: "invalid_namespace_wrong_scheme", DefaultNamespace: "memzzzz://foo/", @@ -153,10 +165,12 @@ func TestResolveNamespace(t *testing.T) { t.Run(fmt.Sprintf("%s/%s", cas.Name, relativeName), func(t *testing.T) { resolved, err := block.DefaultResolveNamespace(cas.DefaultNamespace, cas.Key, r) if err != nil && !errors.Is(err, cas.ExpectedErr) { - t.Fatalf("got unexpected error :%v - expected %v", err, cas.ExpectedErr) + t.Fatalf("got unexpected error: %v - expected %v", err, cas.ExpectedErr) } - if cas.ExpectedErr == nil && !reflect.DeepEqual(resolved, cas.Expected) { - t.Fatalf("expected %v got %v", cas.Expected, resolved) + if cas.ExpectedErr == nil { + if diff := deep.Equal(resolved, cas.Expected); diff != nil { + t.Fatalf("mismatch in resolved namespace: %s", diff) + } } }) } diff --git a/pkg/block/s3/walker.go b/pkg/block/s3/walker.go index 8de012c8cc2..7d9e1cb84c6 100644 --- a/pkg/block/s3/walker.go +++ b/pkg/block/s3/walker.go @@ -26,7 +26,8 @@ func NewS3Walker(client *s3.Client) *Walker { func (s *Walker) Walk(ctx context.Context, storageURI *url.URL, op block.WalkOptions, walkFn func(e block.ObjectStoreEntry) error) error { var continuation *string const maxKeys = 1000 - prefix := strings.TrimLeft(storageURI.Path, "/") + rawPath := block.RawPathFromURI(storageURI) + prefix := strings.TrimLeft(rawPath, "/") // basePath is the path relative to which the walk is done. The key of the resulting entries will be relative to this path. // As the original prefix might not end with a separator, it cannot be used for the From 5aacfc70183342d9fecd7a8cdb62dd5c36fa3c7f Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Thu, 28 Aug 2025 10:33:57 +0300 Subject: [PATCH 3/3] test: check also resolve namespace with relative path encoded --- pkg/block/namespace_test.go | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pkg/block/namespace_test.go b/pkg/block/namespace_test.go index d9b69aa0bf8..54af9967e99 100644 --- a/pkg/block/namespace_test.go +++ b/pkg/block/namespace_test.go @@ -104,7 +104,7 @@ func TestResolveNamespace(t *testing.T) { }, { Name: "valid_fq_key_encoded", - DefaultNamespace: "mem://foo/", + DefaultNamespace: "s3://foo/", Key: "s3://example/bar/foo%3Abaz", Type: block.IdentifierTypeFull, ExpectedErr: nil, @@ -114,6 +114,18 @@ func TestResolveNamespace(t *testing.T) { Key: "bar/foo%3Abaz", }, }, + { + Name: "valid_relative_encoded", + DefaultNamespace: "s3://foo/", + Key: "bar/foo%3Abaz", + Type: block.IdentifierTypeRelative, + ExpectedErr: nil, + Expected: block.CommonQualifiedKey{ + StorageType: block.StorageTypeS3, + StorageNamespace: "foo", + Key: "bar/foo%3Abaz", + }, + }, { Name: "invalid_namespace_wrong_scheme", DefaultNamespace: "memzzzz://foo/",