Skip to content

Commit

Permalink
fix: url decoding in ic-certified-assets (#3767)
Browse files Browse the repository at this point in the history
Co-authored-by: Severin Siffert <[email protected]>
  • Loading branch information
raymondk and sesi200 authored Jun 3, 2024
1 parent 455de36 commit e28a878
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 66 deletions.
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,21 @@ different replica version or different replica options.

It doesn't apply to `--pocketic` because PocketIC does not yet persist any data.

## Dependencies

### Frontend canister

**fix!: URL decoding follows the whatwg standard**

Previously, the frontend canister used custom logic to decode URLs.
The logic was replaced with a dependency that follows https://url.spec.whatwg.org/#percent-decode, which is what JavaScript's `new Request("https://example.com/% $").url` also uses.
This also drops support for decoding `%%` to `%`. `%` does no longer need to be encoded.

URLs that contain invalid encodings now return `400 Bad Request` instead of `500 Internal Server Error`

- Module hash: 2cc4ec4381dee231379270a08403c984986c9fc0c2eaadb64488b704a3104cc0
- https://github.com/dfinity/sdk/pull/3767

# 0.20.2

### fix: `dfx canister delete` fails
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 6 additions & 14 deletions e2e/tests-dfx/assetscanister.bash
Original file line number Diff line number Diff line change
Expand Up @@ -710,20 +710,15 @@ check_permission_failure() {
dfx canister call --query e2e_project_frontend list '(record {})'

# decode as expected
assert_command dfx canister call --query e2e_project_frontend http_request '(record{url="/%e6";headers=vec{};method="GET";body=vec{}})'
assert_command dfx canister call --query e2e_project_frontend http_request '(record{url="/%c3%a6";headers=vec{};method="GET";body=vec{}})'
assert_match "filename is an ae symbol" # candid looks like blob "filename is \c3\a6\0a"

ID=$(dfx canister id e2e_project_frontend)
PORT=$(get_webserver_port)

# fails with Err(InvalidExpressionPath)
assert_command_fail curl --fail -vv http://localhost:"$PORT"/%c3%a6?canisterId="$ID"

# fails with Err(Utf8ConversionError(FromUtf8Error { bytes: [47, 230], error: Utf8Error { valid_up_to: 1, error_len: None } }))
# fails with because %e6 is not valid utf-8 percent encoding
assert_command_fail curl --fail -vv http://localhost:"$PORT"/%e6?canisterId="$ID"
# assert_match "200 OK" "$stderr"
# assert_match "filename is an ae symbol"
assert_contains "500 Internal Server Error"
assert_contains "400 Bad Request"
}

@test "http_request percent-decodes urls" {
Expand Down Expand Up @@ -751,7 +746,7 @@ check_permission_failure() {
assert_match "contents of file with plus in filename"
assert_command dfx canister call --query e2e_project_frontend http_request '(record{url="/has%25percent.txt";headers=vec{};method="GET";body=vec{}})'
assert_match "contents of file with percent in filename"
assert_command dfx canister call --query e2e_project_frontend http_request '(record{url="/%e6";headers=vec{};method="GET";body=vec{}})'
assert_command dfx canister call --query e2e_project_frontend http_request '(record{url="/%c3%a6";headers=vec{};method="GET";body=vec{}})'
assert_match "filename is an ae symbol" # candid looks like blob "filename is \c3\a6\0a"
assert_command dfx canister call --query e2e_project_frontend http_request '(record{url="/%25";headers=vec{};method="GET";body=vec{}})'
assert_match "filename is percent"
Expand Down Expand Up @@ -792,11 +787,8 @@ check_permission_failure() {
assert_match "contents of file with percent in filename"

assert_command_fail curl --fail -vv http://localhost:"$PORT"/%e6?canisterId="$ID"
# see https://dfinity.atlassian.net/browse/SDK-1247
# fails with Err(Utf8ConversionError(FromUtf8Error { bytes: [47, 230], error: Utf8Error { valid_up_to: 1, error_len: None } }))
assert_contains "500 Internal Server Error"
# assert_match "200 OK" "$stderr"
# assert_match "filename is an ae symbol"
# fails because %e6 is not valid utf-8 percent encoding
assert_contains "400 Bad Request"

assert_command curl --fail -vv http://localhost:"$PORT"/%25?canisterId="$ID"
assert_match "200 OK" "$stderr"
Expand Down
1 change: 1 addition & 0 deletions src/canisters/frontend/ic-certified-assets/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ serde.workspace = true
serde_bytes.workspace = true
serde_cbor.workspace = true
sha2.workspace = true
percent-encoding = "2.3.1"

[dev-dependencies]
ic-http-certification = "2.3.0"
Expand Down
27 changes: 19 additions & 8 deletions src/canisters/frontend/ic-certified-assets/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1077,21 +1077,32 @@ fn supports_max_age_headers() {

#[test]
fn check_url_decode() {
assert_eq!(url_decode("/%"), Ok("/%".to_string()));
assert_eq!(url_decode("/%%"), Ok("/%%".to_string()));
assert_eq!(url_decode("/%e%"), Ok("/%e%".to_string()));

assert_eq!(url_decode("/%20%a"), Ok("/ %a".to_string()));
assert_eq!(url_decode("/%%+a%20+%@"), Ok("/%%+a +%@".to_string()));
assert_eq!(
url_decode("/%"),
Err(UrlDecodeError::InvalidPercentEncoding)
url_decode("/has%percent.txt"),
Ok("/has%percent.txt".to_string())
);
assert_eq!(url_decode("/%%"), Ok("/%".to_string()));
assert_eq!(url_decode("/%20a"), Ok("/ a".to_string()));

assert_eq!(url_decode("/%%2"), Ok("/%%2".to_string()));
assert_eq!(url_decode("/%C3%A6"), Ok("/æ".to_string()));
assert_eq!(url_decode("/%c3%a6"), Ok("/æ".to_string()));

assert_eq!(url_decode("/a+b+c%20d"), Ok("/a+b+c d".to_string()));

assert_eq!(
url_decode("/%%+a%20+%@"),
Err(UrlDecodeError::InvalidPercentEncoding)
url_decode("/capture-d%E2%80%99e%CC%81cran-2023-10-26-a%CC%80.txt"),
Ok("/capture-d’écran-2023-10-26-à.txt".to_string())
);

assert_eq!(
url_decode("/has%percent.txt"),
url_decode("/%FF%FF"),
Err(UrlDecodeError::InvalidPercentEncoding)
);
assert_eq!(url_decode("/%e6"), Ok("/æ".to_string()));
}

#[test]
Expand Down
59 changes: 15 additions & 44 deletions src/canisters/frontend/ic-certified-assets/src/url_decode.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,6 @@
use std::fmt;

/// An iterator-like structure that decode a URL.
struct UrlDecode<'a> {
bytes: std::slice::Iter<'a, u8>,
}

fn convert_percent(iter: &mut std::slice::Iter<u8>) -> Option<u8> {
let mut cloned_iter = iter.clone();
let result = match cloned_iter.next()? {
b'%' => b'%',
h => {
let h = char::from(*h).to_digit(16)?;
let l = char::from(*cloned_iter.next()?).to_digit(16)?;
h as u8 * 0x10 + l as u8
}
};
// Update this if we make it this far, otherwise "reset" the iterator.
*iter = cloned_iter;
Some(result)
}
use percent_encoding::percent_decode_str;

#[derive(Debug, PartialEq, Eq)]
pub enum UrlDecodeError {
Expand All @@ -33,31 +15,20 @@ impl fmt::Display for UrlDecodeError {
}
}

impl<'a> Iterator for UrlDecode<'a> {
type Item = Result<char, UrlDecodeError>;

fn next(&mut self) -> Option<Self::Item> {
let b = self.bytes.next()?;
match b {
b'%' => Some(
convert_percent(&mut self.bytes)
.map(char::from)
.ok_or(UrlDecodeError::InvalidPercentEncoding),
),
b'+' => Some(Ok(' ')),
x => Some(Ok(char::from(*x))),
}
}

fn size_hint(&self) -> (usize, Option<usize>) {
let bytes = self.bytes.len();
(bytes / 3, Some(bytes))
}
}

/// Decodes a percent encoded string according to https://url.spec.whatwg.org/#percent-decode
///
/// This is a wrapper around the percent-encoding crate.
///
/// The rules that it follow by are:
/// - Start with an empty sequence of bytes of the output
/// - Convert the input to a sequence of bytes
/// - if the byte is `%` and the next two bytes are hex, convet the hex value to a byte
/// and add it to the output, otherwise add the byte to the output
/// - convert the output byte sequence to a UTF-8 string and return it. If the conversion
/// fails return an error.
pub fn url_decode(url: &str) -> Result<String, UrlDecodeError> {
UrlDecode {
bytes: url.as_bytes().iter(),
match percent_decode_str(url).decode_utf8() {
Ok(result) => Ok(result.to_string()),
Err(_) => Err(UrlDecodeError::InvalidPercentEncoding),
}
.collect()
}
Binary file modified src/distributed/assetstorage.wasm.gz
Binary file not shown.

0 comments on commit e28a878

Please sign in to comment.