Skip to content

Commit

Permalink
Canonical doc and test improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
swooster committed Oct 24, 2023
1 parent c6818fb commit deb913b
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 1 deletion.
6 changes: 5 additions & 1 deletion src/canonical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ impl<I> ExactSizeIterator for Canonical<I> where
///
/// This returns a sequence of [`Nucleotide`]s that is:
/// * Isomorphic to the original sequence; that is, the bases can be remapped to convert between
/// the original and forward-canonnical sequences. So `ATA` and `GAG` have the same
/// the original and forward-canonical sequences. So `ATA` and `GAG` have the same
/// forward-canonical sequence but `TAA` and `GAG` do not.
/// * Lexically "before" all other [`Nucleotide`] sequences that are isomorphic to it. Note that
/// (at the time of this writing) the order of [`Nucleotide`]s is
Expand Down Expand Up @@ -425,6 +425,10 @@ mod test {
canonical2 == canonical
}

fn canonicalization_is_unaffected_by_reverse_complement(dna: DnaSequenceStrict) -> bool {
dna.canonical() == dna.reverse_complement().canonical()
}

fn lexical_min_is_equivalent_to_vec_min(vec1: Vec<Nucleotide>, vec2: Vec<Nucleotide>) -> bool {
let lmin = LexicalMin::new(vec1.iter(), vec2.iter());
let vmin = vec1.clone().min(vec2.clone());
Expand Down
24 changes: 24 additions & 0 deletions src/rust_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,31 @@ impl<T: NucleotideLike> FromStr for DnaSequence<T> {
impl DnaSequence<Nucleotide> {
/// Return canonical isomorphic DNA sequence.
///
/// This returns the lexical minimum of all sequences isomorphic to the original or its
/// reverse. In other words, two sequences have the same canonical sequence if and only if
/// they are isomorphic (or one is isomorphic to the reverse of the other).
/// See [`Canonical`] for details.
///
/// # Caveat
///
/// We define "lexical minimum" in terms of the ordering of [`Nucleotide`] which is currently
/// [`A`](Nucleotide::A) [`T`](Nucleotide::T) [`C`](Nucleotide::C) [`G`](Nucleotide::G),
/// _not_ alphabetical.
///
/// # Examples
///
/// ```
/// use quickdna::DnaSequenceStrict;
///
/// let dna: DnaSequenceStrict = "CATTAG".parse().unwrap();
/// let expected: DnaSequenceStrict = "ATCCTG".parse().unwrap();
/// assert_eq!(dna.canonical(), expected);
///
/// let dna: DnaSequenceStrict = "TAGACGTACGTAGTACGTTAGCTGAGCTGAGTACG".parse().unwrap();
/// // Reverse complement does not change the canonical sequence,
/// // because by definition its reverse is isomorphic to the original.
/// assert_eq!(dna.canonical(), dna.reverse_complement().canonical());
/// ```
pub fn canonical(&self) -> Self {
let canonical = Canonical::new(self.as_slice().iter().copied()).collect();
Self::new(canonical)
Expand Down

0 comments on commit deb913b

Please sign in to comment.