From 16e4ee40e0056438eff9ad3e694f05012a7fc8ef Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Thu, 19 Oct 2023 10:28:09 +1100 Subject: [PATCH] Enforce maximum string length BIP-173 states that a bech32 string must not exceed 90 characters. However BOLT-11 states that the string limit may be exceeded. This puts us, architecturally speaking in a conundrum - we want to support lightning but this crate pretty heavily documents itself as an implementation of BIP-173 and BIP-350. The solution we choose is to enforce the string limit in the segwit modules and not in the functions in `lib.rs`. We document this decision in the crate level docs as well as on the individual functions. FTR in `bech32 v0.9.0` the lengths were not enforced either. --- src/lib.rs | 109 ++++++++++++++++++++++++++++++++++++++- src/primitives/decode.rs | 15 +++++- src/primitives/mod.rs | 5 ++ src/segwit.rs | 85 ++++++++++++++++++++++-------- 4 files changed, 190 insertions(+), 24 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 9b8fcff3d..488772be2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,6 +9,9 @@ //! a data part. A checksum at the end of the string provides error detection to prevent mistakes //! when the string is written off or read out loud. //! +//! Please note, so as to support lighting ([BOLT-11]) we explicitly do not do string length checks +//! in the top level API. We do however enforce the 90 character limit within the `segwit` modules. +//! //! # Usage //! //! - If you are doing segwit stuff you likely want to use the [`segwit`] API. @@ -113,6 +116,8 @@ //! //! # } //! ``` +//! +//! [BOLT-11]: #![cfg_attr(all(not(feature = "std"), not(test)), no_std)] // Experimental features we need. @@ -166,7 +171,10 @@ pub use { /// If this function succeeds the input string was found to be well formed (hrp, separator, bech32 /// characters), and to have either a valid bech32m checksum or a valid bech32 checksum. /// -/// If your input string has no checksum use the [`CheckedHrpstring`] constructor, which allows selecting the checksum algorithm explicitly. +/// If your input string has no checksum use the [`CheckedHrpstring`] constructor, which allows +/// selecting the checksum algorithm explicitly. +/// +/// Note: this function does not enforce any restrictions on the total length of the input string. /// /// # Returns /// @@ -213,6 +221,15 @@ pub fn decode(s: &str) -> Result<(Hrp, Vec), DecodeError> { /// /// Encoded string will be prefixed with the `hrp` and have a checksum appended as specified by the /// `Ck` algorithm (`NoChecksum` to exclude checksum all together). +/// +/// ## Deviation from spec (BIP-173) +/// +/// In order to support [BOLT-11] this function does not restrict the total length of the returned +/// string. To encode [BIP-173] / [BIP-350] compliant segwit addresses use [`segwit::encode`]. +/// +/// [BIP-173]: +/// [BIP-350]: +/// [BOLT-11]: #[cfg(feature = "alloc")] #[inline] pub fn encode(hrp: &Hrp, data: &[u8]) -> Result { @@ -223,6 +240,15 @@ pub fn encode(hrp: &Hrp, data: &[u8]) -> Result +/// [BIP-350]: +/// [BOLT-11]: #[cfg(feature = "alloc")] #[inline] pub fn encode_lower(hrp: &Hrp, data: &[u8]) -> Result { @@ -235,6 +261,15 @@ pub fn encode_lower(hrp: &Hrp, data: &[u8]) -> Result +/// [BIP-350]: +/// [BOLT-11]: #[cfg(feature = "alloc")] #[inline] pub fn encode_upper(hrp: &Hrp, data: &[u8]) -> Result { @@ -247,6 +282,15 @@ pub fn encode_upper(hrp: &Hrp, data: &[u8]) -> Result +/// [BIP-350]: +/// [BOLT-11]: #[inline] pub fn encode_to_fmt( fmt: &mut W, @@ -260,6 +304,15 @@ pub fn encode_to_fmt( /// /// Encoded string will be prefixed with the `hrp` and have a checksum appended as specified by the /// `Ck` algorithm (`NoChecksum` to exclude checksum all together). +/// +/// ## Deviation from spec (BIP-173) +/// +/// In order to support [BOLT-11] this function does not restrict the total length of the returned +/// string. To encode [BIP-173] / [BIP-350] compliant segwit addresses use [`segwit::encode`]. +/// +/// [BIP-173]: +/// [BIP-350]: +/// [BOLT-11]: #[inline] pub fn encode_lower_to_fmt( fmt: &mut W, @@ -278,6 +331,15 @@ pub fn encode_lower_to_fmt( /// /// Encoded string will be prefixed with the `hrp` and have a checksum appended as specified by the /// `Ck` algorithm (`NoChecksum` to exclude checksum all together). +/// +/// ## Deviation from spec (BIP-173) +/// +/// In order to support [BOLT-11] this function does not restrict the total length of the returned +/// string. To encode [BIP-173] / [BIP-350] compliant segwit addresses use [`segwit::encode`]. +/// +/// [BIP-173]: +/// [BIP-350]: +/// [BOLT-11]: #[inline] pub fn encode_upper_to_fmt( fmt: &mut W, @@ -296,6 +358,15 @@ pub fn encode_upper_to_fmt( /// /// Encoded string will be prefixed with the `hrp` and have a checksum appended as specified by the /// `Ck` algorithm (`NoChecksum` to exclude checksum all together). +/// +/// ## Deviation from spec (BIP-173) +/// +/// In order to support [BOLT-11] this function does not restrict the total length of the returned +/// string. To encode [BIP-173] / [BIP-350] compliant segwit addresses use [`segwit::encode`]. +/// +/// [BIP-173]: +/// [BIP-350]: +/// [BOLT-11]: #[cfg(feature = "std")] #[inline] pub fn encode_to_writer( @@ -310,6 +381,15 @@ pub fn encode_to_writer( /// /// Encoded string will be prefixed with the `hrp` and have a checksum appended as specified by the /// `Ck` algorithm (`NoChecksum` to exclude checksum all together). +/// +/// ## Deviation from spec (BIP-173) +/// +/// In order to support [BOLT-11] this function does not restrict the total length of the returned +/// string. To encode [BIP-173] / [BIP-350] compliant segwit addresses use [`segwit::encode`]. +/// +/// [BIP-173]: +/// [BIP-350]: +/// [BOLT-11]: #[cfg(feature = "std")] #[inline] pub fn encode_lower_to_writer( @@ -329,6 +409,15 @@ pub fn encode_lower_to_writer( /// /// Encoded string will be prefixed with the `hrp` and have a checksum appended as specified by the /// `Ck` algorithm (`NoChecksum` to exclude checksum all together). +/// +/// ## Deviation from spec (BIP-173) +/// +/// In order to support [BOLT-11] this function does not restrict the total length of the returned +/// string. To encode [BIP-173] / [BIP-350] compliant segwit addresses use [`segwit::encode`]. +/// +/// [BIP-173]: +/// [BIP-350]: +/// [BOLT-11]: #[cfg(feature = "std")] #[inline] pub fn encode_upper_to_writer( @@ -492,4 +581,22 @@ mod tests { assert_eq!(got, want); } + + #[test] + fn can_encode_really_long_string() { + // Encode around the bech32 limit, mainly here as documentation. + let tcs = vec![ + // Also shows there are no limit checks on the data slice (since segwit limits this to 40 bytes). + ([0_u8; 50], Hrp::parse_unchecked("abc")), // Encodes to 90 characters. + ([0_u8; 50], Hrp::parse_unchecked("abcd")), // Encodes to 91 characters. + ]; + for (data, hrp) in tcs { + assert!(encode::(&hrp, &data).is_ok()); + } + + // Encode something arbitrarily long. + let data = [0_u8; 1024]; + let hrp = Hrp::parse_unchecked("abc"); + assert!(encode::(&hrp, &data).is_ok()); + } } diff --git a/src/primitives/decode.rs b/src/primitives/decode.rs index 3210d8eb6..13d316dc8 100644 --- a/src/primitives/decode.rs +++ b/src/primitives/decode.rs @@ -81,6 +81,7 @@ use crate::primitives::gf32::Fe32; use crate::primitives::hrp::{self, Hrp}; use crate::primitives::iter::{Fe32IterExt, FesToBytes}; use crate::primitives::segwit::{self, WitnessLengthError, VERSION_0}; +use crate::primitives::MAX_STRING_LENGTH; use crate::{Bech32, Bech32m}; /// Separator between the hrp and payload (as defined by BIP-173). @@ -127,6 +128,11 @@ impl<'s> UncheckedHrpstring<'s> { /// Checks for valid ASCII values, does not validate the checksum. #[inline] pub fn new(s: &'s str) -> Result { + let len = s.len(); + if len > MAX_STRING_LENGTH { + return Err(UncheckedHrpstringError::TooLong(len)); + } + let sep_pos = check_characters(s)?; let (hrp, data) = s.split_at(sep_pos); @@ -658,6 +664,8 @@ impl From for CheckedHrpstringError { #[derive(Debug, Clone, PartialEq, Eq)] #[non_exhaustive] pub enum UncheckedHrpstringError { + /// String exceeds maximum allowed length. + TooLong(usize), /// An error with the characters of the input string. Char(CharError), /// The human-readable part is invalid. @@ -669,6 +677,8 @@ impl fmt::Display for UncheckedHrpstringError { use UncheckedHrpstringError::*; match *self { + TooLong(len) => + write!(f, "string exceeds spec limit of {} chars: {}", MAX_STRING_LENGTH, len), Char(ref e) => write_err!(f, "character error"; e), Hrp(ref e) => write_err!(f, "invalid human-readable part"; e), } @@ -681,6 +691,7 @@ impl std::error::Error for UncheckedHrpstringError { use UncheckedHrpstringError::*; match *self { + TooLong(_) => None, Char(ref e) => Some(e), Hrp(ref e) => Some(e), } @@ -827,7 +838,7 @@ mod tests { ("\u{80}1eym55h", Hrp(hrp::Error::NonAsciiChar('\u{80}'))), ("an84characterslonghumanreadablepartthatcontainsthetheexcludedcharactersbioandnumber11d6pts4", - Hrp(hrp::Error::TooLong(84))), + TooLong(91)), ("pzry9x0s0muk", Char(CharError::MissingSeparator)), ("1pzry9x0s0muk", @@ -880,7 +891,7 @@ mod tests { ("\u{80}1g6xzxy", Hrp(hrp::Error::NonAsciiChar('\u{80}'))), ("an84characterslonghumanreadablepartthatcontainsthenumber1andtheexcludedcharactersbio1569pvx", - Hrp(hrp::Error::TooLong(84))), + TooLong(91)), ("qyrz8wqd2c9m", Char(CharError::MissingSeparator)), ("1qyrz8wqd2c9m", diff --git a/src/primitives/mod.rs b/src/primitives/mod.rs index 1d496ba2b..297292991 100644 --- a/src/primitives/mod.rs +++ b/src/primitives/mod.rs @@ -12,6 +12,11 @@ pub mod segwit; use checksum::{Checksum, PackedNull}; +/// The maximum allowed length of a bech32 string (see [`BIP-173`]). +/// +/// [`BIP-173`]: +pub const MAX_STRING_LENGTH: usize = 90; + /// The "null checksum" used on bech32 strings for which we want to do no checksum checking. #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum NoChecksum {} diff --git a/src/segwit.rs b/src/segwit.rs index 17a2598bf..101ec0662 100644 --- a/src/segwit.rs +++ b/src/segwit.rs @@ -53,7 +53,7 @@ use crate::primitives::iter::{ByteIterExt, Fe32IterExt}; #[cfg(feature = "alloc")] use crate::primitives::segwit; use crate::primitives::segwit::{InvalidWitnessVersionError, WitnessLengthError}; -use crate::primitives::{Bech32, Bech32m}; +use crate::primitives::{Bech32, Bech32m, MAX_STRING_LENGTH}; #[rustfmt::skip] // Keep public re-exports separate. #[doc(inline)] @@ -79,7 +79,8 @@ pub fn decode(s: &str) -> Result<(Hrp, Fe32, Vec), DecodeError> { /// Encodes a segwit address. /// -/// Does validity checks on the `witness_version` and length checks on the `witness_program`. +/// Does validity checks on the `witness_version`, length checks on the `witness_program`, and +/// checks the total encoded string length. /// /// As specified by [`BIP-350`] we use the [`Bech32m`] checksum algorithm for witness versions 1 and /// above, and for witness version 0 we use the original ([`BIP-173`]) [`Bech32`] checksum @@ -101,12 +102,20 @@ pub fn encode( segwit::validate_witness_version(witness_version)?; segwit::validate_witness_program_length(witness_program.len(), witness_version)?; + let encoded_length = encoded_length(hrp, witness_version, witness_program); + if encoded_length > MAX_STRING_LENGTH { + return Err(EncodeError::TooLong(encoded_length)); + } + let mut buf = String::new(); encode_to_fmt_unchecked(&mut buf, hrp, witness_version, witness_program)?; Ok(buf) } /// Encodes a segwit version 0 address. +/// +/// Does validity checks on the `witness_version`, length checks on the `witness_program`, and +/// checks the total encoded string length. #[cfg(feature = "alloc")] #[inline] pub fn encode_v0(hrp: &Hrp, witness_program: &[u8]) -> Result { @@ -114,6 +123,9 @@ pub fn encode_v0(hrp: &Hrp, witness_program: &[u8]) -> Result Result { @@ -122,8 +134,8 @@ pub fn encode_v1(hrp: &Hrp, witness_program: &[u8]) -> Result( fmt: &mut W, @@ -136,8 +148,8 @@ pub fn encode_to_fmt_unchecked( /// Encodes a segwit address to a writer ([`fmt::Write`]) using lowercase characters. /// -/// Does not check the validity of the witness version and witness program lengths (see -/// the [`crate::primitives::segwit`] module for validation functions). +/// There are no guarantees that the written string is a valid segwit address unless all the +/// parameters are valid, see the body of `encode()` to see the validity checks are required. pub fn encode_lower_to_fmt_unchecked( fmt: &mut W, hrp: &Hrp, @@ -164,8 +176,8 @@ pub fn encode_lower_to_fmt_unchecked( /// /// This is provided for use when creating QR codes. /// -/// Does not check the validity of the witness version and witness program lengths (see -/// the [`crate::primitives::segwit`] module for validation functions). +/// There are no guarantees that the written string is a valid segwit address unless all the +/// parameters are valid, see the body of `encode()` to see the validity checks are required. #[inline] pub fn encode_upper_to_fmt_unchecked( fmt: &mut W, @@ -192,8 +204,8 @@ pub fn encode_upper_to_fmt_unchecked( /// Encodes a segwit address to a writer ([`std::io::Write`]) using lowercase characters. /// -/// Does not check the validity of the witness version and witness program lengths (see -/// the [`crate::primitives::segwit`] module for validation functions). +/// There are no guarantees that the written string is a valid segwit address unless all the +/// parameters are valid, see the body of `encode()` to see the validity checks are required. #[cfg(feature = "std")] #[inline] pub fn encode_to_writer_unchecked( @@ -207,8 +219,8 @@ pub fn encode_to_writer_unchecked( /// Encodes a segwit address to a writer ([`std::io::Write`]) using lowercase characters. /// -/// Does not check the validity of the witness version and witness program lengths (see -/// the [`crate::primitives::segwit`] module for validation functions). +/// There are no guarantees that the written string is a valid segwit address unless all the +/// parameters are valid, see the body of `encode()` to see the validity checks are required. #[cfg(feature = "std")] #[inline] pub fn encode_lower_to_writer_unchecked( @@ -237,8 +249,8 @@ pub fn encode_lower_to_writer_unchecked( /// /// This is provided for use when creating QR codes. /// -/// Does not check the validity of the witness version and witness program lengths (see -/// the [`crate::primitives::segwit`] module for validation functions). +/// There are no guarantees that the written string is a valid segwit address unless all the +/// parameters are valid, see the body of `encode()` to see the validity checks are required. #[cfg(feature = "std")] #[inline] pub fn encode_upper_to_writer_unchecked( @@ -265,13 +277,6 @@ pub fn encode_upper_to_writer_unchecked( } /// Returns the length of the bech32 string after encoding HRP, witness version and program. -/// -/// # Returns -/// -/// Returns the encoded length, ether as `Ok(len)` if valid or as `Err(EncodedLengthError(len))` if -/// invalid (exceeds the maximum of 90 characters as defined in [BIP-173]). -/// -/// [`BIP-173`]: pub fn encoded_length( hrp: &Hrp, _witness_version: Fe32, // Emphasize that this is only for segwit. @@ -313,6 +318,8 @@ pub enum EncodeError { WitnessVersion(InvalidWitnessVersionError), /// Invalid witness length. WitnessLength(WitnessLengthError), + /// Encoding HRP, witver, and program into a bech32 string exceeds maximum allowed. + TooLong(usize), /// Writing to formatter failed. Fmt(fmt::Error), } @@ -324,6 +331,8 @@ impl fmt::Display for EncodeError { match *self { WitnessVersion(ref e) => write_err!(f, "witness version"; e), WitnessLength(ref e) => write_err!(f, "witness length"; e), + TooLong(len) => + write!(f, "encoded length {} exceeds spec limit {} chars", len, MAX_STRING_LENGTH), Fmt(ref e) => write_err!(f, "writing to formatter failed"; e), } } @@ -337,6 +346,7 @@ impl std::error::Error for EncodeError { match *self { WitnessVersion(ref e) => Some(e), WitnessLength(ref e) => Some(e), + TooLong(_) => None, Fmt(ref e) => Some(e), } } @@ -451,4 +461,37 @@ mod tests { assert_eq!(got, want); } } + + #[test] + fn cannot_encode_address_too_long() { + let tcs = vec![ + ("anhrpthatis19charsx", 91), + ("anhrpthatisthemaximumallowedlengthofeightythreebytesxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", 155) + ]; + + for (hrp, len) in tcs { + let program = [0_u8; 40]; // Maximum witness program length. + let hrp = Hrp::parse_unchecked(hrp); + match encode(&hrp, VERSION_1, &program) { + Err(e) => assert_eq!(e, EncodeError::TooLong(len)), + Ok(address) => println!("len: {}", address.len()), + } + } + } + + #[test] + fn decode_long_address() { + use crate::primitives::decode::{SegwitHrpstringError, UncheckedHrpstringError}; + + // 90 characters long, maximum allowed. + let address = "anhrpthatisnineteen1pqyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqghfyyfz"; + assert!(decode(address).is_ok()); + + // 91 characters long, one over xmaximum allowed. + let address = "anhrpthatistwentychs1pqyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqgqfrwjz"; + assert_eq!( + decode(address).unwrap_err(), + DecodeError(SegwitHrpstringError::Unchecked(UncheckedHrpstringError::TooLong(91))) + ); + } }