Skip to content

Commit

Permalink
feat(gw): support UnixFS 1.5 on deserialized responses as Last-Modifi…
Browse files Browse the repository at this point in the history
…ed (#659)

bare minimum support for Last-Modified and If-Modified-Since
if mtime present in dag-pb
  • Loading branch information
lidel committed Aug 28, 2024
1 parent 3cd3857 commit 08f200a
Show file tree
Hide file tree
Showing 7 changed files with 258 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ The following emojis are used to highlight certain changes:
### Changed

- `chunker` refactored to reduce overall memory use by reducing heap fragmentation [#649](https://github.com/ipfs/boxo/pull/649)
- `gateway` deserialized responses will have `Last-Modified` set to value from optional UnixFS 1.5 modification time field (if present in DAG) and a matching `If-Modified-Since` will return `304 Not Modified` (UnixFS 1.5 files only) [#659](https://github.com/ipfs/boxo/pull/659)

### Removed

Expand Down
12 changes: 12 additions & 0 deletions gateway/backend_blocks.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,12 @@ func (bb *BlocksBackend) Get(ctx context.Context, path path.ImmutablePath, range
return md, nil, err
}

// Set modification time in ContentPathMetadata if found in dag-pb's optional mtime field (UnixFS 1.5)
mtime := f.ModTime()
if !mtime.IsZero() {
md.ModTime = mtime
}

if d, ok := f.(files.Directory); ok {
dir, err := uio.NewDirectoryFromNode(bb.dagService, nd)
if err != nil {
Expand Down Expand Up @@ -231,6 +237,12 @@ func (bb *BlocksBackend) Head(ctx context.Context, path path.ImmutablePath) (Con
return ContentPathMetadata{}, nil, err
}

// Set modification time in ContentPathMetadata if found in dag-pb's optional mtime field (UnixFS 1.5)
mtime := fileNode.ModTime()
if !mtime.IsZero() {
md.ModTime = mtime
}

sz, err := fileNode.Size()
if err != nil {
return ContentPathMetadata{}, nil, err
Expand Down
3 changes: 2 additions & 1 deletion gateway/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,8 @@ type ContentPathMetadata struct {
PathSegmentRoots []cid.Cid
LastSegment path.ImmutablePath
LastSegmentRemainder []string
ContentType string // Only used for UnixFS requests
ContentType string // Only used for UnixFS requests
ModTime time.Time // Optional, non-zero values may be present in UnixFS 1.5 DAGs
}

// ByteRange describes a range request within a UnixFS file. "From" and "To" mostly
Expand Down
160 changes: 160 additions & 0 deletions gateway/gateway_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,60 @@ func TestHeaders(t *testing.T) {
test(dagCborResponseFormat, dagCborPath)
})

// We have UnixFS1.5 tests in TestHeadersUnixFSModeModTime, here we test default behavior (DAG without modtime)
t.Run("If-Modified-Since is noop against DAG without optional UnixFS 1.5 mtime", func(t *testing.T) {
test := func(responseFormat string, path string) {
t.Run(responseFormat, func(t *testing.T) {
// Make regular request and read Last-Modified
url := ts.URL + path
req := mustNewRequest(t, http.MethodGet, url, nil)
req.Header.Add("Accept", responseFormat)
res := mustDoWithoutRedirect(t, req)
_, err := io.Copy(io.Discard, res.Body)
require.NoError(t, err)
defer res.Body.Close()
require.Equal(t, http.StatusOK, res.StatusCode)
lastModified := res.Header.Get("Last-Modified")
require.Empty(t, lastModified)

// Make second request with If-Modified-Since far in past and expect normal response
req = mustNewRequest(t, http.MethodGet, url, nil)
req.Header.Add("Accept", responseFormat)
req.Header.Add("If-Modified-Since", "Mon, 13 Jun 2000 22:18:32 GMT")
res = mustDoWithoutRedirect(t, req)
_, err = io.Copy(io.Discard, res.Body)
require.NoError(t, err)
defer res.Body.Close()
require.Equal(t, http.StatusOK, res.StatusCode)
})
}

test("", dirPath)
test("text/html", dirPath)
test(carResponseFormat, dirPath)
test(rawResponseFormat, dirPath)
test(tarResponseFormat, dirPath)

test("", hamtFilePath)
test("text/html", hamtFilePath)
test(carResponseFormat, hamtFilePath)
test(rawResponseFormat, hamtFilePath)
test(tarResponseFormat, hamtFilePath)

test("", filePath)
test("text/html", filePath)
test(carResponseFormat, filePath)
test(rawResponseFormat, filePath)
test(tarResponseFormat, filePath)

test("", dagCborPath)
test("text/html", dagCborPath+"/")
test(carResponseFormat, dagCborPath)
test(rawResponseFormat, dagCborPath)
test(dagJsonResponseFormat, dagCborPath)
test(dagCborResponseFormat, dagCborPath)
})

t.Run("X-Ipfs-Roots contains expected values", func(t *testing.T) {
test := func(responseFormat string, path string, roots string) {
t.Run(responseFormat, func(t *testing.T) {
Expand Down Expand Up @@ -495,6 +549,112 @@ func TestHeaders(t *testing.T) {
})
}

// Testing a DAG with (optional) UnixFS1.5 modification time
func TestHeadersUnixFSModeModTime(t *testing.T) {
t.Parallel()

ts, _, root := newTestServerAndNode(t, "unixfs-dir-with-mode-mtime.car")
var (
rootCID = root.String() // "bafybeidbcy4u6y55gsemlubd64zk53xoxs73ifd6rieejxcr7xy46mjvky"
filePath = "/ipfs/" + rootCID + "/file1"
dirPath = "/ipfs/" + rootCID + "/dir1/"
)

t.Run("If-Modified-Since matching UnixFS 1.5 modtime returns Not Modified", func(t *testing.T) {
test := func(responseFormat string, path string, entityType string, supported bool) {
t.Run(fmt.Sprintf("%s/%s support=%t", responseFormat, entityType, supported), func(t *testing.T) {
// Make regular request and read Last-Modified
url := ts.URL + path
req := mustNewRequest(t, http.MethodGet, url, nil)
req.Header.Add("Accept", responseFormat)
res := mustDoWithoutRedirect(t, req)
_, err := io.Copy(io.Discard, res.Body)
require.NoError(t, err)
defer res.Body.Close()
require.Equal(t, http.StatusOK, res.StatusCode)
lastModified := res.Header.Get("Last-Modified")
if supported {
assert.NotEmpty(t, lastModified)
} else {
assert.Empty(t, lastModified)
lastModified = "Mon, 13 Jun 2022 22:18:32 GMT" // manually set value for use in next steps
}

ifModifiedSinceTime, err := time.Parse(time.RFC1123, lastModified)
require.NoError(t, err)
oneHourBefore := ifModifiedSinceTime.Add(-1 * time.Hour).Truncate(time.Second)
oneHourAfter := ifModifiedSinceTime.Add(1 * time.Hour).Truncate(time.Second)
oneHourBeforeStr := oneHourBefore.Format(time.RFC1123)
oneHourAfterStr := oneHourAfter.Format(time.RFC1123)
lastModifiedStr := ifModifiedSinceTime.Format(time.RFC1123)

// Make second request with If-Modified-Since and value read from response to first request
req = mustNewRequest(t, http.MethodGet, url, nil)
req.Header.Add("Accept", responseFormat)
req.Header.Add("If-Modified-Since", lastModifiedStr)
res = mustDoWithoutRedirect(t, req)
_, err = io.Copy(io.Discard, res.Body)
require.NoError(t, err)
defer res.Body.Close()
if supported {
// 304 on exact match, can skip body
assert.Equal(t, http.StatusNotModified, res.StatusCode)
} else {
assert.Equal(t, http.StatusOK, res.StatusCode)
}

// Make third request with If-Modified-Since 1h before value read from response to first request
// and expect HTTP 200
req = mustNewRequest(t, http.MethodGet, url, nil)
req.Header.Add("Accept", responseFormat)
req.Header.Add("If-Modified-Since", oneHourBeforeStr)
res = mustDoWithoutRedirect(t, req)
_, err = io.Copy(io.Discard, res.Body)
require.NoError(t, err)
defer res.Body.Close()
// always return 200 with body because mtime from unixfs is after value from If-Modified-Since
// so it counts as an update
assert.Equal(t, http.StatusOK, res.StatusCode)

// Make third request with If-Modified-Since 1h after value read from response to first request
// and expect HTTP 200
req = mustNewRequest(t, http.MethodGet, url, nil)
req.Header.Add("Accept", responseFormat)
req.Header.Add("If-Modified-Since", oneHourAfterStr)
res = mustDoWithoutRedirect(t, req)
_, err = io.Copy(io.Discard, res.Body)
require.NoError(t, err)
defer res.Body.Close()
if supported {
// 304 because mtime from unixfs is before value from If-Modified-Since
// so no update, can skip body
assert.Equal(t, http.StatusNotModified, res.StatusCode)
} else {
assert.Equal(t, http.StatusOK, res.StatusCode)
}
})
}

file, dir := "file", "directory"
// supported on file-based web responses
test("", filePath, file, true)
test("text/html", filePath, file, true)

// not supported on other formats
// we may implement support for If-Modified-Since for below request types
// if users raise the need, but If-None-Match is way better
test(carResponseFormat, filePath, file, false)
test(rawResponseFormat, filePath, file, false)
test(tarResponseFormat, filePath, file, false)

test("", dirPath, dir, false)
test("text/html", dirPath, dir, false)
test(carResponseFormat, dirPath, dir, false)
test(rawResponseFormat, dirPath, dir, false)
test(tarResponseFormat, dirPath, dir, false)
})
}

func TestGoGetSupport(t *testing.T) {
ts, _, root := newTestServerAndNode(t, "fixtures.car")

Expand Down
79 changes: 75 additions & 4 deletions gateway/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,11 @@ func (i *handler) getOrHeadHandler(w http.ResponseWriter, r *http.Request) {
return
}

// Detect when If-Modified-Since HTTP header + UnixFS 1.5 allow returning HTTP 304 Not Modified.
if i.handleIfModifiedSince(w, r, rq) {
return
}

// Support custom response formats passed via ?format or Accept HTTP header
switch responseFormat {
case "", jsonResponseFormat, cborResponseFormat:
Expand Down Expand Up @@ -410,18 +415,25 @@ func addCacheControlHeaders(w http.ResponseWriter, r *http.Request, contentPath
}

if lastMod.IsZero() {
// Otherwise, we set Last-Modified to the current time to leverage caching heuristics
// If no lastMod, set Last-Modified to the current time to leverage caching heuristics
// built into modern browsers: https://github.com/ipfs/kubo/pull/8074#pullrequestreview-645196768
modtime = time.Now()
} else {
// set Last-Modified to a meaningful value e.g. one read from dag-pb (UnixFS 1.5, mtime field)
// or the last time DNSLink / IPNS Record was modified / resoved or cache
modtime = lastMod
}

} else {
w.Header().Set("Cache-Control", immutableCacheControl)
modtime = noModtime // disable Last-Modified

// TODO: consider setting Last-Modified if UnixFS V1.5 ever gets released
// with metadata: https://github.com/ipfs/kubo/issues/6920
if lastMod.IsZero() {
// (noop) skip Last-Modified on immutable response
modtime = noModtime
} else {
// set Last-Modified to value read from dag-pb (UnixFS 1.5, mtime field)
modtime = lastMod
}
}

return modtime
Expand Down Expand Up @@ -507,6 +519,21 @@ func setIpfsRootsHeader(w http.ResponseWriter, rq *requestData, md *ContentPathM
w.Header().Set("X-Ipfs-Roots", rootCidList)
}

// lastModifiedMatch returns true if we can respond with HTTP 304 Not Modified
// It compares If-Modified-Since with logical modification time read from DAG
// (e.g. UnixFS 1.5 modtime, if present)
func lastModifiedMatch(ifModifiedSinceHeader string, lastModified time.Time) bool {
if ifModifiedSinceHeader == "" || lastModified.IsZero() {
return false
}
ifModifiedSinceTime, err := time.Parse(time.RFC1123, ifModifiedSinceHeader)
if err != nil {
return false
}
// ignoring fractional seconds (as HTTP dates don't include fractional seconds)
return !lastModified.Truncate(time.Second).After(ifModifiedSinceTime)
}

// etagMatch evaluates if we can respond with HTTP 304 Not Modified
// It supports multiple weak and strong etags passed in If-None-Match string
// including the wildcard one.
Expand Down Expand Up @@ -745,6 +772,50 @@ func (i *handler) handleIfNoneMatch(w http.ResponseWriter, r *http.Request, rq *
return false
}

func (i *handler) handleIfModifiedSince(w http.ResponseWriter, r *http.Request, rq *requestData) bool {
// Detect when If-Modified-Since HTTP header allows returning HTTP 304 Not Modified
ifModifiedSince := r.Header.Get("If-Modified-Since")
if ifModifiedSince == "" {
return false
}

// Resolve path to be able to read pathMetadata.ModTime
pathMetadata, err := i.backend.ResolvePath(r.Context(), rq.immutablePath)
if err != nil {
var forwardedPath path.ImmutablePath
var continueProcessing bool
if isWebRequest(rq.responseFormat) {
forwardedPath, continueProcessing = i.handleWebRequestErrors(w, r, rq.mostlyResolvedPath(), rq.immutablePath, rq.contentPath, err, rq.logger)
if continueProcessing {
pathMetadata, err = i.backend.ResolvePath(r.Context(), forwardedPath)
}
}
if !continueProcessing || err != nil {
err = fmt.Errorf("failed to resolve %s: %w", debugStr(rq.contentPath.String()), err)
i.webError(w, r, err, http.StatusInternalServerError)
return true
}
}

// Currently we only care about optional mtime from UnixFS 1.5 (dag-pb)
// but other sources of this metadata could be added in the future
lastModified := pathMetadata.ModTime
if lastModifiedMatch(ifModifiedSince, lastModified) {
w.WriteHeader(http.StatusNotModified)
return true
}

// Check if the resolvedPath is an immutable path.
_, err = path.NewImmutablePath(pathMetadata.LastSegment)
if err != nil {
i.webError(w, r, err, http.StatusInternalServerError)
return true
}

rq.pathMetadata = &pathMetadata
return false
}

// check if request was for one of known explicit formats,
// or should use the default, implicit Web+UnixFS behaviors.
func isWebRequest(responseFormat string) bool {
Expand Down
8 changes: 8 additions & 0 deletions gateway/handler_defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,14 @@ func (i *handler) serveDefaults(ctx context.Context, w http.ResponseWriter, r *h

setIpfsRootsHeader(w, rq, &pathMetadata)

// On deserialized responses, we prefer Last-Modified from pathMetadata
// (mtime in UnixFS 1.5 DAG). This also applies to /ipns/, because value
// from dag-pb, if present, is more meaningful than lastMod inferred from
// namesys.
if !pathMetadata.ModTime.IsZero() {
rq.lastMod = pathMetadata.ModTime
}

resolvedPath := pathMetadata.LastSegment
switch mc.Code(resolvedPath.RootCid().Prefix().Codec) {
case mc.Json, mc.DagJson, mc.Cbor, mc.DagCbor:
Expand Down
Binary file added gateway/testdata/unixfs-dir-with-mode-mtime.car
Binary file not shown.

0 comments on commit 08f200a

Please sign in to comment.